From a17e9705efa4cae053efcd5ceea26cd2e69d2565 Mon Sep 17 00:00:00 2001
From: Even Rouault <even.rouault@spatialys.com>
Date: Wed, 24 Apr 2024 23:44:16 +0200
Subject: [PATCH] GDALChecksumImage(): read by multiple of blocks for
 floating-point bands to improve performance

This is similar to optimization done in
c854d6c97588bffa144e3daaec99ac6dd87e26d7 for the integer case.
---
 alg/gdalchecksum.cpp     | 157 ++++++++++++++++++++++++++++++++-------
 autotest/alg/checksum.py |  64 ++++++++++++++++
 2 files changed, 193 insertions(+), 28 deletions(-)
 create mode 100644 autotest/alg/checksum.py
diff --git a/alg/gdalchecksum.cpp b/alg/gdalchecksum.cpp
index fa2a534a950d..8b3bb80cda68 100644
--- a/alg/gdalchecksum.cpp
+++ b/alg/gdalchecksum.cpp
@@ -32,13 +32,13 @@
 
 #include <cmath>
 #include <cstddef>
+#include <algorithm>
 
 #include "cpl_conv.h"
 #include "cpl_error.h"
 #include "cpl_vsi.h"
 #include "gdal.h"
-
-CPL_CVSID("$Id$")
+#include "gdal_priv.h"
 
 /************************************************************************/
 /*                         GDALChecksumImage()                          */
@@ -73,9 +73,132 @@ int CPL_STDCALL GDALChecksumImage(GDALRasterBandH hBand, int nXOff, int nYOff,
     int iPrime = 0;
     const GDALDataType eDataType = GDALGetRasterDataType(hBand);
     const bool bComplex = CPL_TO_BOOL(GDALDataTypeIsComplex(eDataType));
+    const bool bIsFloatingPoint =
+        (eDataType == GDT_Float32 || eDataType == GDT_Float64 ||
+         eDataType == GDT_CFloat32 || eDataType == GDT_CFloat64);
+
+    const auto IntFromDouble = [](double dfVal)
+    {
+        int nVal;
+        if (CPLIsNan(dfVal) || CPLIsInf(dfVal))
+        {
+            // Most compilers seem to cast NaN or Inf to 0x80000000.
+            // but VC7 is an exception. So we force the result
+            // of such a cast.
+            nVal = 0x80000000;
+        }
+        else
+        {
+            // Standard behavior of GDALCopyWords when converting
+            // from floating point to Int32.
+            dfVal += 0.5;
+
+            if (dfVal < -2147483647.0)
+                nVal = -2147483647;
+            else if (dfVal > 2147483647)
+                nVal = 2147483647;
+            else
+                nVal = static_cast<GInt32>(floor(dfVal));
+        }
+        return nVal;
+    };
+
+    if (bIsFloatingPoint && nXOff == 0 && nYOff == 0)
+    {
+        const GDALDataType eDstDataType = bComplex ? GDT_CFloat64 : GDT_Float64;
+        int nBlockXSize = 0;
+        int nBlockYSize = 0;
+        GDALGetBlockSize(hBand, &nBlockXSize, &nBlockYSize);
+        const int nDstDataTypeSize = GDALGetDataTypeSizeBytes(eDstDataType);
+        int nChunkXSize = nBlockXSize;
+        const int nChunkYSize = nBlockYSize;
+        if (nBlockXSize < nXSize)
+        {
+            const GIntBig nMaxChunkSize =
+                std::max(static_cast<GIntBig>(10 * 1000 * 1000),
+                         GDALGetCacheMax64() / 10);
+            if (nDstDataTypeSize > 0 &&
+                static_cast<GIntBig>(nXSize) * nChunkYSize <
+                    nMaxChunkSize / nDstDataTypeSize)
+            {
+                // A full line of height nChunkYSize can fit in the maximum
+                // allowed memory
+                nChunkXSize = nXSize;
+            }
+            else
+            {
+                // Otherwise compute a size that is a multiple of nBlockXSize
+                nChunkXSize = static_cast<int>(std::min(
+                    static_cast<GIntBig>(nXSize),
+                    nBlockXSize *
+                        std::max(static_cast<GIntBig>(1),
+                                 nMaxChunkSize /
+                                     (static_cast<GIntBig>(nBlockXSize) *
+                                      nChunkYSize * nDstDataTypeSize))));
+            }
+        }
+
+        double *padfLineData = static_cast<double *>(
+            VSI_MALLOC3_VERBOSE(nChunkXSize, nChunkYSize, nDstDataTypeSize));
+        if (padfLineData == nullptr)
+        {
+            return 0;
+        }
+        const int nValsPerIter = bComplex ? 2 : 1;
 
-    if (eDataType == GDT_Float32 || eDataType == GDT_Float64 ||
-        eDataType == GDT_CFloat32 || eDataType == GDT_CFloat64)
+        const int nYBlocks = DIV_ROUND_UP(nYSize, nChunkYSize);
+        const int nXBlocks = DIV_ROUND_UP(nXSize, nChunkXSize);
+        for (int iYBlock = 0; iYBlock < nYBlocks; ++iYBlock)
+        {
+            const int iYStart = iYBlock * nChunkYSize;
+            const int iYEnd =
+                iYBlock == nYBlocks - 1 ? nYSize : iYStart + nChunkYSize;
+            const int nChunkActualHeight = iYEnd - iYStart;
+            for (int iXBlock = 0; iXBlock < nXBlocks; ++iXBlock)
+            {
+                const int iXStart = iXBlock * nChunkXSize;
+                const int iXEnd =
+                    iXBlock == nXBlocks - 1 ? nXSize : iXStart + nChunkXSize;
+                const int nChunkActualXSize = iXEnd - iXStart;
+                if (GDALRasterIO(
+                        hBand, GF_Read, iXStart, iYStart, nChunkActualXSize,
+                        nChunkActualHeight, padfLineData, nChunkActualXSize,
+                        nChunkActualHeight, eDstDataType, 0, 0) != CE_None)
+                {
+                    CPLError(CE_Failure, CPLE_FileIO,
+                             "Checksum value could not be computed due to I/O "
+                             "read error.");
+                    nChecksum = 0;
+                    iYBlock = nYBlocks;
+                    break;
+                }
+                const size_t xIters =
+                    static_cast<size_t>(nValsPerIter) * nChunkActualXSize;
+                for (int iY = iYStart; iY < iYEnd; ++iY)
+                {
+                    // Initialize iPrime so that it is consistent with a
+                    // per full line iteration strategy
+                    iPrime = (nValsPerIter *
+                              (static_cast<int64_t>(iY) * nXSize + iXStart)) %
+                             11;
+                    const size_t nOffset = nValsPerIter *
+                                           static_cast<size_t>(iY - iYStart) *
+                                           nChunkActualXSize;
+                    for (size_t i = 0; i < xIters; ++i)
+                    {
+                        const double dfVal = padfLineData[nOffset + i];
+                        nChecksum += IntFromDouble(dfVal) % anPrimes[iPrime++];
+                        if (iPrime > 10)
+                            iPrime = 0;
+                    }
+                    nChecksum &= 0xffff;
+                }
+            }
+        }
+
+        CPLFree(padfLineData);
+    }
+    else if (bIsFloatingPoint)
     {
         const GDALDataType eDstDataType = bComplex ? GDT_CFloat64 : GDT_Float64;
 
@@ -101,30 +224,8 @@ int CPL_STDCALL GDALChecksumImage(GDALRasterBandH hBand, int nXOff, int nYOff,
 
             for (int i = 0; i < nCount; i++)
             {
-                double dfVal = padfLineData[i];
-                int nVal;
-                if (CPLIsNan(dfVal) || CPLIsInf(dfVal))
-                {
-                    // Most compilers seem to cast NaN or Inf to 0x80000000.
-                    // but VC7 is an exception. So we force the result
-                    // of such a cast.
-                    nVal = 0x80000000;
-                }
-                else
-                {
-                    // Standard behavior of GDALCopyWords when converting
-                    // from floating point to Int32.
-                    dfVal += 0.5;
-
-                    if (dfVal < -2147483647.0)
-                        nVal = -2147483647;
-                    else if (dfVal > 2147483647)
-                        nVal = 2147483647;
-                    else
-                        nVal = static_cast<GInt32>(floor(dfVal));
-                }
-
-                nChecksum += nVal % anPrimes[iPrime++];
+                const double dfVal = padfLineData[i];
+                nChecksum += IntFromDouble(dfVal) % anPrimes[iPrime++];
                 if (iPrime > 10)
                     iPrime = 0;
 
diff --git a/autotest/alg/checksum.py b/autotest/alg/checksum.py
new file mode 100644
index 000000000000..46de2bc3f9b0
--- /dev/null
+++ b/autotest/alg/checksum.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env pytest
+###############################################################################
+# Project:  GDAL/OGR Test Suite
+# Purpose:  GDALChecksumImage() testing
+# Author:   Even Rouault <even.rouault @ spatialys.com>
+#
+###############################################################################
+# Copyright (c) 2024, Even Rouault <even.rouault @ spatialys.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+###############################################################################
+
+import pytest
+from osgeo import gdal
+
+
+@pytest.mark.parametrize("source", ["byte", "float32"])
+def test_checksum(source):
+
+    tmpfilename = "/vsimem/tmp.tif"
+
+    src_ds = gdal.Open(f"../gcore/data/{source}.tif")
+    ds = gdal.GetDriverByName("GTiff").CreateCopy(tmpfilename, src_ds)
+    assert ds.GetRasterBand(1).Checksum() == 4672
+
+    ds = gdal.GetDriverByName("GTiff").CreateCopy(
+        tmpfilename, src_ds, options=["TILED=YES", "BLOCKXSIZE=16", "BLOCKYSIZE=16"]
+    )
+    assert ds.GetRasterBand(1).Checksum() == 4672
+
+    ds = gdal.GetDriverByName("GTiff").CreateCopy(
+        tmpfilename, src_ds, options=["TILED=YES", "BLOCKXSIZE=32", "BLOCKYSIZE=16"]
+    )
+    assert ds.GetRasterBand(1).Checksum() == 4672
+
+    ds = gdal.GetDriverByName("GTiff").CreateCopy(
+        tmpfilename, src_ds, options=["TILED=YES", "BLOCKXSIZE=16", "BLOCKYSIZE=32"]
+    )
+    assert ds.GetRasterBand(1).Checksum() == 4672
+
+    mem_ds = gdal.GetDriverByName("MEM").Create(
+        "", 21, 21, 1, src_ds.GetRasterBand(1).DataType
+    )
+    mem_ds.WriteRaster(1, 1, 20, 20, src_ds.ReadRaster())
+    assert mem_ds.GetRasterBand(1).Checksum(1, 1, 20, 20) == 4672
+    assert mem_ds.GetRasterBand(1).Checksum() == 4568
+
+    gdal.Unlink(tmpfilename)