Skip to content

Commit

Permalink
GDALChecksumImage(): read by multiple of blocks for floating-point ba…
Browse files Browse the repository at this point in the history
…nds to improve performance

This is similar to optimization done in
c854d6c for the integer case.
  • Loading branch information
rouault committed Apr 24, 2024
1 parent f909e03 commit a17e970
Show file tree
Hide file tree
Showing 2 changed files with 193 additions and 28 deletions.
157 changes: 129 additions & 28 deletions alg/gdalchecksum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@

#include <cmath>
#include <cstddef>
#include <algorithm>

#include "cpl_conv.h"
#include "cpl_error.h"
#include "cpl_vsi.h"
#include "gdal.h"

CPL_CVSID("$Id$")
#include "gdal_priv.h"

/************************************************************************/
/* GDALChecksumImage() */
Expand Down Expand Up @@ -73,9 +73,132 @@ int CPL_STDCALL GDALChecksumImage(GDALRasterBandH hBand, int nXOff, int nYOff,
int iPrime = 0;
const GDALDataType eDataType = GDALGetRasterDataType(hBand);
const bool bComplex = CPL_TO_BOOL(GDALDataTypeIsComplex(eDataType));
const bool bIsFloatingPoint =
(eDataType == GDT_Float32 || eDataType == GDT_Float64 ||
eDataType == GDT_CFloat32 || eDataType == GDT_CFloat64);

const auto IntFromDouble = [](double dfVal)
{
int nVal;
if (CPLIsNan(dfVal) || CPLIsInf(dfVal))
{
// Most compilers seem to cast NaN or Inf to 0x80000000.
// but VC7 is an exception. So we force the result
// of such a cast.
nVal = 0x80000000;
}
else
{
// Standard behavior of GDALCopyWords when converting
// from floating point to Int32.
dfVal += 0.5;

if (dfVal < -2147483647.0)
nVal = -2147483647;
else if (dfVal > 2147483647)
nVal = 2147483647;
else
nVal = static_cast<GInt32>(floor(dfVal));
}
return nVal;
};

if (bIsFloatingPoint && nXOff == 0 && nYOff == 0)
{
const GDALDataType eDstDataType = bComplex ? GDT_CFloat64 : GDT_Float64;
int nBlockXSize = 0;
int nBlockYSize = 0;
GDALGetBlockSize(hBand, &nBlockXSize, &nBlockYSize);
const int nDstDataTypeSize = GDALGetDataTypeSizeBytes(eDstDataType);
int nChunkXSize = nBlockXSize;
const int nChunkYSize = nBlockYSize;
if (nBlockXSize < nXSize)
{
const GIntBig nMaxChunkSize =
std::max(static_cast<GIntBig>(10 * 1000 * 1000),
GDALGetCacheMax64() / 10);
if (nDstDataTypeSize > 0 &&
static_cast<GIntBig>(nXSize) * nChunkYSize <
nMaxChunkSize / nDstDataTypeSize)
{
// A full line of height nChunkYSize can fit in the maximum
// allowed memory
nChunkXSize = nXSize;
}
else
{
// Otherwise compute a size that is a multiple of nBlockXSize
nChunkXSize = static_cast<int>(std::min(
static_cast<GIntBig>(nXSize),
nBlockXSize *
std::max(static_cast<GIntBig>(1),
nMaxChunkSize /
(static_cast<GIntBig>(nBlockXSize) *
nChunkYSize * nDstDataTypeSize))));
}
}

double *padfLineData = static_cast<double *>(
VSI_MALLOC3_VERBOSE(nChunkXSize, nChunkYSize, nDstDataTypeSize));
if (padfLineData == nullptr)
{
return 0;
}
const int nValsPerIter = bComplex ? 2 : 1;

if (eDataType == GDT_Float32 || eDataType == GDT_Float64 ||
eDataType == GDT_CFloat32 || eDataType == GDT_CFloat64)
const int nYBlocks = DIV_ROUND_UP(nYSize, nChunkYSize);
const int nXBlocks = DIV_ROUND_UP(nXSize, nChunkXSize);
for (int iYBlock = 0; iYBlock < nYBlocks; ++iYBlock)
{
const int iYStart = iYBlock * nChunkYSize;
const int iYEnd =
iYBlock == nYBlocks - 1 ? nYSize : iYStart + nChunkYSize;
const int nChunkActualHeight = iYEnd - iYStart;
for (int iXBlock = 0; iXBlock < nXBlocks; ++iXBlock)
{
const int iXStart = iXBlock * nChunkXSize;
const int iXEnd =
iXBlock == nXBlocks - 1 ? nXSize : iXStart + nChunkXSize;
const int nChunkActualXSize = iXEnd - iXStart;
if (GDALRasterIO(
hBand, GF_Read, iXStart, iYStart, nChunkActualXSize,
nChunkActualHeight, padfLineData, nChunkActualXSize,
nChunkActualHeight, eDstDataType, 0, 0) != CE_None)
{
CPLError(CE_Failure, CPLE_FileIO,
"Checksum value could not be computed due to I/O "
"read error.");
nChecksum = 0;
iYBlock = nYBlocks;
break;
}
const size_t xIters =
static_cast<size_t>(nValsPerIter) * nChunkActualXSize;
for (int iY = iYStart; iY < iYEnd; ++iY)
{
// Initialize iPrime so that it is consistent with a
// per full line iteration strategy
iPrime = (nValsPerIter *
(static_cast<int64_t>(iY) * nXSize + iXStart)) %
11;
const size_t nOffset = nValsPerIter *
static_cast<size_t>(iY - iYStart) *
nChunkActualXSize;
for (size_t i = 0; i < xIters; ++i)
{
const double dfVal = padfLineData[nOffset + i];
nChecksum += IntFromDouble(dfVal) % anPrimes[iPrime++];
if (iPrime > 10)
iPrime = 0;
}
nChecksum &= 0xffff;
}
}
}

CPLFree(padfLineData);
}
else if (bIsFloatingPoint)
{
const GDALDataType eDstDataType = bComplex ? GDT_CFloat64 : GDT_Float64;

Expand All @@ -101,30 +224,8 @@ int CPL_STDCALL GDALChecksumImage(GDALRasterBandH hBand, int nXOff, int nYOff,

for (int i = 0; i < nCount; i++)
{
double dfVal = padfLineData[i];
int nVal;
if (CPLIsNan(dfVal) || CPLIsInf(dfVal))
{
// Most compilers seem to cast NaN or Inf to 0x80000000.
// but VC7 is an exception. So we force the result
// of such a cast.
nVal = 0x80000000;
}
else
{
// Standard behavior of GDALCopyWords when converting
// from floating point to Int32.
dfVal += 0.5;

if (dfVal < -2147483647.0)
nVal = -2147483647;
else if (dfVal > 2147483647)
nVal = 2147483647;
else
nVal = static_cast<GInt32>(floor(dfVal));
}

nChecksum += nVal % anPrimes[iPrime++];
const double dfVal = padfLineData[i];
nChecksum += IntFromDouble(dfVal) % anPrimes[iPrime++];
if (iPrime > 10)
iPrime = 0;

Expand Down
64 changes: 64 additions & 0 deletions autotest/alg/checksum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env pytest
###############################################################################
# Project: GDAL/OGR Test Suite
# Purpose: GDALChecksumImage() testing
# Author: Even Rouault <even.rouault @ spatialys.com>
#
###############################################################################
# Copyright (c) 2024, Even Rouault <even.rouault @ spatialys.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
###############################################################################

import pytest
from osgeo import gdal


@pytest.mark.parametrize("source", ["byte", "float32"])
def test_checksum(source):

tmpfilename = "/vsimem/tmp.tif"

src_ds = gdal.Open(f"../gcore/data/{source}.tif")
ds = gdal.GetDriverByName("GTiff").CreateCopy(tmpfilename, src_ds)
assert ds.GetRasterBand(1).Checksum() == 4672

ds = gdal.GetDriverByName("GTiff").CreateCopy(
tmpfilename, src_ds, options=["TILED=YES", "BLOCKXSIZE=16", "BLOCKYSIZE=16"]
)
assert ds.GetRasterBand(1).Checksum() == 4672

ds = gdal.GetDriverByName("GTiff").CreateCopy(
tmpfilename, src_ds, options=["TILED=YES", "BLOCKXSIZE=32", "BLOCKYSIZE=16"]
)
assert ds.GetRasterBand(1).Checksum() == 4672

ds = gdal.GetDriverByName("GTiff").CreateCopy(
tmpfilename, src_ds, options=["TILED=YES", "BLOCKXSIZE=16", "BLOCKYSIZE=32"]
)
assert ds.GetRasterBand(1).Checksum() == 4672

mem_ds = gdal.GetDriverByName("MEM").Create(
"", 21, 21, 1, src_ds.GetRasterBand(1).DataType
)
mem_ds.WriteRaster(1, 1, 20, 20, src_ds.ReadRaster())
assert mem_ds.GetRasterBand(1).Checksum(1, 1, 20, 20) == 4672
assert mem_ds.GetRasterBand(1).Checksum() == 4568

gdal.Unlink(tmpfilename)

0 comments on commit a17e970

Please sign in to comment.