From 21f7a80adc5ddc0c7e27e08be3823ff6c9c720de Mon Sep 17 00:00:00 2001
From: Talley Lambert <talley.lambert@gmail.com>
Date: Mon, 15 Jul 2024 15:49:14 -0400
Subject: [PATCH] support cuda 12, rerender build (#29)

* change build config

* MNT: Re-rendered with conda-build 3.27.0, conda-smithy 3.29.0, and conda-forge-pinning 2023.11.18.11.24.55

* commit config

* updates for cuda12

* back to tex3D

* bump version

* rerender

* undo

* comment out camcor stuff

* export path

* fix path

* try remove find_package cuda

* undo cmake changes

* pin cmake

* rerender

* update cmake

* revert

* update cmake with find cudatoolkit

* change lib strategy

* MNT: Re-rendered with conda-build 24.5.1, conda-smithy 3.36.2, and conda-forge-pinning 2024.07.15.13.31.46

* add include

* drop cuda 11.2 on windows
---
 .azure-pipelines/azure-pipelines-linux.yml    |  23 ++--
 .azure-pipelines/azure-pipelines-win.yml      |  64 +++------
 .../linux_64_cuda_compiler_version9.2.yaml    |  37 ------
 ...ercuda-nvcccuda_compiler_version12.0.yaml} |  24 ++--
 ...ompilernvcccuda_compiler_version11.2.yaml} |  18 ++-
 ...compiler_version11.0.yaml => win_64_.yaml} |  18 +--
 .../win_64_cuda_compiler_version10.2.yaml     |  23 ----
 .../win_64_cuda_compiler_version11.1.yaml     |  23 ----
 .../win_64_cuda_compiler_version11.2.yaml     |  23 ----
 .github/workflows/automerge.yml               |   2 -
 .gitignore                                    |   1 +
 .scripts/build_steps.sh                       |  21 ++-
 .scripts/logging_utils.sh                     |   4 +-
 .scripts/run_docker_build.sh                  |   9 ++
 .scripts/run_win_build.bat                    | 121 ++++++++++++++++++
 README.md                                     |  11 +-
 recipe/build.sh                               |   2 +
 recipe/conda_build_config.yaml                |  49 ++++---
 recipe/meta.yaml                              |   6 +-
 src/Buffers/CMakeLists.txt                    |   3 +-
 src/CMakeLists.txt                            | 114 +++--------------
 src/RL-Biggs-Andrews.cpp                      |  46 +++----
 src/geometryTransform.cu                      |  71 +++++-----
 src/linearDecon.cpp                           |   2 +-
 src/linearDecon.h                             |  18 +--
 src/llspy.cpp                                 |  56 ++++----
 26 files changed, 359 insertions(+), 430 deletions(-)
 delete mode 100644 .ci_support/linux_64_cuda_compiler_version9.2.yaml
 rename .ci_support/{linux_64_cuda_compiler_version10.2.yaml => linux_64_cuda_compilercuda-nvcccuda_compiler_version12.0.yaml} (68%)
 rename .ci_support/{linux_64_cuda_compiler_version11.2.yaml => linux_64_cuda_compilernvcccuda_compiler_version11.2.yaml} (79%)
 rename .ci_support/{win_64_cuda_compiler_version11.0.yaml => win_64_.yaml} (59%)
 delete mode 100644 .ci_support/win_64_cuda_compiler_version10.2.yaml
 delete mode 100644 .ci_support/win_64_cuda_compiler_version11.1.yaml
 delete mode 100644 .ci_support/win_64_cuda_compiler_version11.2.yaml
 create mode 100755 .scripts/run_win_build.bat

diff --git a/.azure-pipelines/azure-pipelines-linux.yml b/.azure-pipelines/azure-pipelines-linux.yml
index ccbfe07..817ba90 100755
--- a/.azure-pipelines/azure-pipelines-linux.yml
+++ b/.azure-pipelines/azure-pipelines-linux.yml
@@ -8,28 +8,20 @@ jobs:
     vmImage: ubuntu-latest
   strategy:
     matrix:
-      linux_64_cuda_compiler_version10.2:
-        CONFIG: linux_64_cuda_compiler_version10.2
+      linux_64_cuda_compilercuda-nvcccuda_compiler_version12.0:
+        CONFIG: linux_64_cuda_compilercuda-nvcccuda_compiler_version12.0
         UPLOAD_PACKAGES: 'False'
-        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:10.2
-      linux_64_cuda_compiler_version11.2:
-        CONFIG: linux_64_cuda_compiler_version11.2
+        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cos7-x86_64
+      linux_64_cuda_compilernvcccuda_compiler_version11.2:
+        CONFIG: linux_64_cuda_compilernvcccuda_compiler_version11.2
         UPLOAD_PACKAGES: 'False'
         DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:11.2
-      linux_64_cuda_compiler_version9.2:
-        CONFIG: linux_64_cuda_compiler_version9.2
-        UPLOAD_PACKAGES: 'False'
-        DOCKER_IMAGE: quay.io/condaforge/linux-anvil-cuda:9.2
   timeoutInMinutes: 360
+  variables: {}
 
   steps:
   - checkout: self
     fetchDepth: 0
-  - script: |
-         rm -rf /opt/ghc
-         df -h
-    displayName: Manage disk space
-
   # configure qemu binfmt-misc running.  This allows us to run docker containers
   # embedded qemu-static
   - script: |
@@ -40,6 +32,9 @@ jobs:
 
   - script: |
         export CI=azure
+        export flow_run_id=azure_$(Build.BuildNumber).$(System.JobAttempt)
+        export remote_url=$(Build.Repository.Uri)
+        export sha=$(Build.SourceVersion)
         export GIT_BRANCH=$BUILD_SOURCEBRANCHNAME
         export FEEDSTOCK_NAME=$(basename ${BUILD_REPOSITORY_NAME})
         if [[ "${BUILD_REASON:-}" == "PullRequest" ]]; then
diff --git a/.azure-pipelines/azure-pipelines-win.yml b/.azure-pipelines/azure-pipelines-win.yml
index 8e62172..3e6193a 100755
--- a/.azure-pipelines/azure-pipelines-win.yml
+++ b/.azure-pipelines/azure-pipelines-win.yml
@@ -5,28 +5,21 @@
 jobs:
 - job: win
   pool:
-    vmImage: windows-2019
+    vmImage: windows-2022
   strategy:
     matrix:
-      win_64_cuda_compiler_version10.2:
-        CONFIG: win_64_cuda_compiler_version10.2
-        UPLOAD_PACKAGES: 'False'
-      win_64_cuda_compiler_version11.0:
-        CONFIG: win_64_cuda_compiler_version11.0
-        UPLOAD_PACKAGES: 'False'
-      win_64_cuda_compiler_version11.1:
-        CONFIG: win_64_cuda_compiler_version11.1
-        UPLOAD_PACKAGES: 'False'
-      win_64_cuda_compiler_version11.2:
-        CONFIG: win_64_cuda_compiler_version11.2
+      win_64_:
+        CONFIG: win_64_
         UPLOAD_PACKAGES: 'False'
   timeoutInMinutes: 360
   variables:
     CONDA_BLD_PATH: D:\\bld\\
+    UPLOAD_TEMP: D:\\tmp
 
   steps:
     - checkout: self
       fetchDepth: 0
+
     - task: PythonScript@0
       displayName: 'Download Miniforge'
       inputs:
@@ -45,42 +38,15 @@ jobs:
       displayName: Add conda to PATH
 
     - script: |
-        call activate base
-        mamba.exe install "python=3.9" conda-build conda pip conda-forge-ci-setup=3 "py-lief<0.12" -c conda-forge --strict-channel-priority --yes
-      displayName: Install conda-build
-
-    - script: set PYTHONUNBUFFERED=1
-      displayName: Set PYTHONUNBUFFERED
-
-    # Configure the VM
-    - script: |
-        call activate base
-        setup_conda_rc .\ ".\recipe" .\.ci_support\%CONFIG%.yaml
-      displayName: conda-forge CI setup
-
-    # Configure the VM.
-    - script: |
-        set "CI=azure"
-        call activate base
-        run_conda_forge_build_setup
-      displayName: conda-forge build setup
-
-    - script: |
-        call activate base
-        if EXIST LICENSE.txt (
-          copy LICENSE.txt "recipe\\recipe-scripts-license.txt"
-        )
-        conda.exe build "recipe" -m .ci_support\%CONFIG%.yaml --suppress-variables
-      displayName: Build recipe
+        call ".scripts\run_win_build.bat"
+      displayName: Run Windows build
       env:
         PYTHONUNBUFFERED: 1
-
-    - script: |
-        set "GIT_BRANCH=%BUILD_SOURCEBRANCHNAME%"
-        set "FEEDSTOCK_NAME=%BUILD_REPOSITORY_NAME:*/=%"
-        call activate base
-        upload_package  .\ ".\recipe" .ci_support\%CONFIG%.yaml
-      displayName: Upload package
-      env:
-        BINSTAR_TOKEN: $(BINSTAR_TOKEN)
-      condition: and(succeeded(), not(eq(variables['UPLOAD_PACKAGES'], 'False')), not(eq(variables['Build.Reason'], 'PullRequest')))
\ No newline at end of file
+        CONFIG: $(CONFIG)
+        CI: azure
+        flow_run_id: azure_$(Build.BuildNumber).$(System.JobAttempt)
+        remote_url: $(Build.Repository.Uri)
+        sha: $(Build.SourceVersion)
+        UPLOAD_PACKAGES: $(UPLOAD_PACKAGES)
+        UPLOAD_TEMP: $(UPLOAD_TEMP)
+        BINSTAR_TOKEN: $(BINSTAR_TOKEN)
\ No newline at end of file
diff --git a/.ci_support/linux_64_cuda_compiler_version9.2.yaml b/.ci_support/linux_64_cuda_compiler_version9.2.yaml
deleted file mode 100644
index d66ee00..0000000
--- a/.ci_support/linux_64_cuda_compiler_version9.2.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-boost_cpp:
-- 1.78.0
-c_compiler:
-- gcc
-c_compiler_version:
-- '7'
-cdt_name:
-- cos6
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler:
-- nvcc
-cuda_compiler_version:
-- '9.2'
-cxx_compiler:
-- gxx
-cxx_compiler_version:
-- '7'
-docker_image:
-- quay.io/condaforge/linux-anvil-cuda:9.2
-fftw:
-- '3'
-libtiff:
-- '4'
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-target_platform:
-- linux-64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
-  - cuda_compiler_version
-  - cdt_name
-  - docker_image
diff --git a/.ci_support/linux_64_cuda_compiler_version10.2.yaml b/.ci_support/linux_64_cuda_compilercuda-nvcccuda_compiler_version12.0.yaml
similarity index 68%
rename from .ci_support/linux_64_cuda_compiler_version10.2.yaml
rename to .ci_support/linux_64_cuda_compilercuda-nvcccuda_compiler_version12.0.yaml
index 179619d..a37bd8b 100644
--- a/.ci_support/linux_64_cuda_compiler_version10.2.yaml
+++ b/.ci_support/linux_64_cuda_compilercuda-nvcccuda_compiler_version12.0.yaml
@@ -1,37 +1,35 @@
-boost_cpp:
-- 1.78.0
 c_compiler:
 - gcc
 c_compiler_version:
-- '7'
+- '10'
 cdt_name:
 - cos7
 channel_sources:
-- conda-forge,defaults
+- conda-forge
 channel_targets:
 - conda-forge main
 cuda_compiler:
-- nvcc
+- cuda-nvcc
 cuda_compiler_version:
-- '10.2'
+- '12.0'
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '7'
+- '10'
 docker_image:
-- quay.io/condaforge/linux-anvil-cuda:10.2
+- quay.io/condaforge/linux-anvil-cos7-x86_64
 fftw:
 - '3'
+libboost_devel:
+- '1.82'
 libtiff:
-- '4'
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
+- '4.6'
 target_platform:
 - linux-64
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
-  - cuda_compiler_version
   - cdt_name
+  - cuda_compiler
+  - cuda_compiler_version
   - docker_image
diff --git a/.ci_support/linux_64_cuda_compiler_version11.2.yaml b/.ci_support/linux_64_cuda_compilernvcccuda_compiler_version11.2.yaml
similarity index 79%
rename from .ci_support/linux_64_cuda_compiler_version11.2.yaml
rename to .ci_support/linux_64_cuda_compilernvcccuda_compiler_version11.2.yaml
index 319239e..b533918 100644
--- a/.ci_support/linux_64_cuda_compiler_version11.2.yaml
+++ b/.ci_support/linux_64_cuda_compilernvcccuda_compiler_version11.2.yaml
@@ -1,13 +1,11 @@
-boost_cpp:
-- 1.78.0
 c_compiler:
 - gcc
 c_compiler_version:
-- '7'
+- '10'
 cdt_name:
 - cos7
 channel_sources:
-- conda-forge,defaults
+- conda-forge
 channel_targets:
 - conda-forge main
 cuda_compiler:
@@ -17,21 +15,21 @@ cuda_compiler_version:
 cxx_compiler:
 - gxx
 cxx_compiler_version:
-- '7'
+- '10'
 docker_image:
 - quay.io/condaforge/linux-anvil-cuda:11.2
 fftw:
 - '3'
+libboost_devel:
+- '1.82'
 libtiff:
-- '4'
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
+- '4.6'
 target_platform:
 - linux-64
 zip_keys:
 - - c_compiler_version
   - cxx_compiler_version
-  - cuda_compiler_version
   - cdt_name
+  - cuda_compiler
+  - cuda_compiler_version
   - docker_image
diff --git a/.ci_support/win_64_cuda_compiler_version11.0.yaml b/.ci_support/win_64_.yaml
similarity index 59%
rename from .ci_support/win_64_cuda_compiler_version11.0.yaml
rename to .ci_support/win_64_.yaml
index 167dba3..3b3b5c0 100644
--- a/.ci_support/win_64_cuda_compiler_version11.0.yaml
+++ b/.ci_support/win_64_.yaml
@@ -1,23 +1,23 @@
-boost_cpp:
-- 1.78.0
 c_compiler:
 - vs2019
 channel_sources:
-- conda-forge,defaults
+- conda-forge
 channel_targets:
 - conda-forge main
 cuda_compiler:
-- nvcc
+- cuda-nvcc
 cuda_compiler_version:
-- '11.0'
+- '12.0'
 cxx_compiler:
 - vs2019
 fftw:
 - '3'
+libboost_devel:
+- '1.82'
 libtiff:
-- '4'
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
+- '4.6'
 target_platform:
 - win-64
+zip_keys:
+- - cuda_compiler
+  - cuda_compiler_version
diff --git a/.ci_support/win_64_cuda_compiler_version10.2.yaml b/.ci_support/win_64_cuda_compiler_version10.2.yaml
deleted file mode 100644
index a070cfb..0000000
--- a/.ci_support/win_64_cuda_compiler_version10.2.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-boost_cpp:
-- 1.78.0
-c_compiler:
-- vs2019
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler:
-- nvcc
-cuda_compiler_version:
-- '10.2'
-cxx_compiler:
-- vs2019
-fftw:
-- '3'
-libtiff:
-- '4'
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-target_platform:
-- win-64
diff --git a/.ci_support/win_64_cuda_compiler_version11.1.yaml b/.ci_support/win_64_cuda_compiler_version11.1.yaml
deleted file mode 100644
index fa990f5..0000000
--- a/.ci_support/win_64_cuda_compiler_version11.1.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-boost_cpp:
-- 1.78.0
-c_compiler:
-- vs2019
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler:
-- nvcc
-cuda_compiler_version:
-- '11.1'
-cxx_compiler:
-- vs2019
-fftw:
-- '3'
-libtiff:
-- '4'
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-target_platform:
-- win-64
diff --git a/.ci_support/win_64_cuda_compiler_version11.2.yaml b/.ci_support/win_64_cuda_compiler_version11.2.yaml
deleted file mode 100644
index 2919beb..0000000
--- a/.ci_support/win_64_cuda_compiler_version11.2.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-boost_cpp:
-- 1.78.0
-c_compiler:
-- vs2019
-channel_sources:
-- conda-forge,defaults
-channel_targets:
-- conda-forge main
-cuda_compiler:
-- nvcc
-cuda_compiler_version:
-- '11.2'
-cxx_compiler:
-- vs2019
-fftw:
-- '3'
-libtiff:
-- '4'
-pin_run_as_build:
-  boost-cpp:
-    max_pin: x.x.x
-target_platform:
-- win-64
diff --git a/.github/workflows/automerge.yml b/.github/workflows/automerge.yml
index da1368a..0535f6a 100644
--- a/.github/workflows/automerge.yml
+++ b/.github/workflows/automerge.yml
@@ -9,8 +9,6 @@ jobs:
     runs-on: ubuntu-latest
     name: automerge
     steps:
-      - name: checkout
-        uses: actions/checkout@v3
       - name: automerge-action
         id: automerge-action
         uses: conda-forge/automerge-action@main
diff --git a/.gitignore b/.gitignore
index 500bffe..8cc9e7e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,4 @@ VS/cudaDeconv.vcxproj.filters
 
 build_artifacts
 # conda smithy ci-skeleton end
+cmake_build
diff --git a/.scripts/build_steps.sh b/.scripts/build_steps.sh
index 9e26040..8b6dc08 100755
--- a/.scripts/build_steps.sh
+++ b/.scripts/build_steps.sh
@@ -28,12 +28,15 @@ conda-build:
 pkgs_dirs:
   - ${FEEDSTOCK_ROOT}/build_artifacts/pkg_cache
   - /opt/conda/pkgs
+solver: libmamba
 
 CONDARC
+export CONDA_LIBMAMBA_SOLVER_NO_CHANNELS_FROM_INSTALLED=1
 
-
-mamba install --update-specs --yes --quiet --channel conda-forge \
-    conda-build pip conda-forge-ci-setup=3 "py-lief<0.12"
+mamba install --update-specs --yes --quiet --channel conda-forge --strict-channel-priority \
+    pip mamba conda-build conda-forge-ci-setup=4 "conda-build>=24.1"
+mamba update --update-specs --yes --quiet --channel conda-forge --strict-channel-priority \
+    pip mamba conda-build conda-forge-ci-setup=4 "conda-build>=24.1"
 
 # set up the condarc
 setup_conda_rc "${FEEDSTOCK_ROOT}" "${RECIPE_ROOT}" "${CONFIG_FILE}"
@@ -62,9 +65,17 @@ if [[ "${BUILD_WITH_CONDA_DEBUG:-0}" == 1 ]]; then
     # Drop into an interactive shell
     /bin/bash
 else
-    conda build "${RECIPE_ROOT}" -m "${CI_SUPPORT}/${CONFIG}.yaml" \
+    conda-build "${RECIPE_ROOT}" -m "${CI_SUPPORT}/${CONFIG}.yaml" \
         --suppress-variables ${EXTRA_CB_OPTIONS:-} \
-        --clobber-file "${CI_SUPPORT}/clobber_${CONFIG}.yaml"
+        --clobber-file "${CI_SUPPORT}/clobber_${CONFIG}.yaml" \
+        --extra-meta flow_run_id="${flow_run_id:-}" remote_url="${remote_url:-}" sha="${sha:-}"
+
+    ( startgroup "Inspecting artifacts" ) 2> /dev/null
+
+    # inspect_artifacts was only added in conda-forge-ci-setup 4.6.0
+    command -v inspect_artifacts >/dev/null 2>&1 && inspect_artifacts || echo "inspect_artifacts needs conda-forge-ci-setup >=4.6.0"
+
+    ( endgroup "Inspecting artifacts" ) 2> /dev/null
 
     ( startgroup "Uploading packages" ) 2> /dev/null
 
diff --git a/.scripts/logging_utils.sh b/.scripts/logging_utils.sh
index 57bc95c..aff009f 100644
--- a/.scripts/logging_utils.sh
+++ b/.scripts/logging_utils.sh
@@ -12,7 +12,7 @@ function startgroup {
             echo "##[group]$1";;
         travis )
             echo "$1"
-            echo -en 'travis_fold:start:'"${1// /}"'\\r';;
+            echo -en 'travis_fold:start:'"${1// /}"'\r';;
         github_actions )
             echo "::group::$1";;
         * )
@@ -28,7 +28,7 @@ function endgroup {
         azure )
             echo "##[endgroup]";;
         travis )
-            echo -en 'travis_fold:end:'"${1// /}"'\\r';;
+            echo -en 'travis_fold:end:'"${1// /}"'\r';;
         github_actions )
             echo "::endgroup::";;
     esac
diff --git a/.scripts/run_docker_build.sh b/.scripts/run_docker_build.sh
index c4079b7..a16e7e7 100755
--- a/.scripts/run_docker_build.sh
+++ b/.scripts/run_docker_build.sh
@@ -21,6 +21,12 @@ if [ -z ${FEEDSTOCK_NAME} ]; then
     export FEEDSTOCK_NAME=$(basename ${FEEDSTOCK_ROOT})
 fi
 
+if [[ "${sha:-}" == "" ]]; then
+  pushd "${FEEDSTOCK_ROOT}"
+  sha=$(git rev-parse HEAD)
+  popd
+fi
+
 docker info
 
 # In order for the conda-build process in the container to write to the mounted
@@ -91,6 +97,9 @@ docker run ${DOCKER_RUN_ARGS} \
            -e CPU_COUNT \
            -e BUILD_WITH_CONDA_DEBUG \
            -e BUILD_OUTPUT_ID \
+           -e flow_run_id \
+           -e remote_url \
+           -e sha \
            -e BINSTAR_TOKEN \
            "${DOCKER_IMAGE}" \
            bash \
diff --git a/.scripts/run_win_build.bat b/.scripts/run_win_build.bat
new file mode 100755
index 0000000..93b09d5
--- /dev/null
+++ b/.scripts/run_win_build.bat
@@ -0,0 +1,121 @@
+:: PLEASE NOTE: This script has been automatically generated by conda-smithy. Any changes here
+:: will be lost next time ``conda smithy rerender`` is run. If you would like to make permanent
+:: changes to this script, consider a proposal to conda-smithy so that other feedstocks can also
+:: benefit from the improvement.
+
+:: Note: we assume a Miniforge installation is available
+
+:: INPUTS (required environment variables)
+:: CONFIG: name of the .ci_support/*.yaml file for this job
+:: CI: azure, github_actions, or unset
+:: UPLOAD_PACKAGES: true or false
+:: UPLOAD_ON_BRANCH: true or false
+
+setlocal enableextensions enabledelayedexpansion
+
+call :start_group "Configuring conda"
+
+:: Activate the base conda environment
+call activate base
+:: Configure the solver
+set "CONDA_SOLVER=libmamba"
+if !errorlevel! neq 0 exit /b !errorlevel!
+set "CONDA_LIBMAMBA_SOLVER_NO_CHANNELS_FROM_INSTALLED=1"
+
+:: Provision the necessary dependencies to build the recipe later
+echo Installing dependencies
+mamba.exe install "python=3.10" pip mamba conda-build conda-forge-ci-setup=4 "conda-build>=24.1" -c conda-forge --strict-channel-priority --yes
+if !errorlevel! neq 0 exit /b !errorlevel!
+
+:: Set basic configuration
+echo Setting up configuration
+setup_conda_rc .\ ".\recipe" .\.ci_support\%CONFIG%.yaml
+if !errorlevel! neq 0 exit /b !errorlevel!
+echo Running build setup
+CALL run_conda_forge_build_setup
+
+
+if !errorlevel! neq 0 exit /b !errorlevel!
+
+if EXIST LICENSE.txt (
+    echo Copying feedstock license
+    copy LICENSE.txt "recipe\\recipe-scripts-license.txt"
+)
+
+if NOT [%flow_run_id%] == [] (
+    set "EXTRA_CB_OPTIONS=%EXTRA_CB_OPTIONS% --extra-meta flow_run_id=%flow_run_id% remote_url=%remote_url% sha=%sha%"
+)
+
+call :end_group
+
+:: Build the recipe
+echo Building recipe
+conda-build.exe "recipe" -m .ci_support\%CONFIG%.yaml --suppress-variables %EXTRA_CB_OPTIONS%
+if !errorlevel! neq 0 exit /b !errorlevel!
+
+call :start_group "Inspecting artifacts"
+:: inspect_artifacts was only added in conda-forge-ci-setup 4.6.0
+WHERE inspect_artifacts >nul 2>nul && inspect_artifacts || echo "inspect_artifacts needs conda-forge-ci-setup >=4.6.0"
+call :end_group
+
+:: Prepare some environment variables for the upload step
+if /i "%CI%" == "github_actions" (
+    set "FEEDSTOCK_NAME=%GITHUB_REPOSITORY:*/=%"
+    set "GIT_BRANCH=%GITHUB_REF:refs/heads/=%"
+    if /i "%GITHUB_EVENT_NAME%" == "pull_request" (
+        set "IS_PR_BUILD=True"
+    ) else (
+        set "IS_PR_BUILD=False"
+    )
+    set "TEMP=%RUNNER_TEMP%"
+)
+if /i "%CI%" == "azure" (
+    set "FEEDSTOCK_NAME=%BUILD_REPOSITORY_NAME:*/=%"
+    set "GIT_BRANCH=%BUILD_SOURCEBRANCHNAME%"
+    if /i "%BUILD_REASON%" == "PullRequest" (
+        set "IS_PR_BUILD=True"
+    ) else (
+        set "IS_PR_BUILD=False"
+    )
+    set "TEMP=%UPLOAD_TEMP%"
+)
+
+:: Validate
+
+if /i "%UPLOAD_PACKAGES%" == "true" (
+    if /i "%IS_PR_BUILD%" == "false" (
+        call :start_group "Uploading packages"
+        if not exist "%TEMP%\" md "%TEMP%"
+        set "TMP=%TEMP%"
+        upload_package  .\ ".\recipe" .ci_support\%CONFIG%.yaml
+        if !errorlevel! neq 0 exit /b !errorlevel!
+        call :end_group
+    )
+)
+
+exit
+
+:: Logging subroutines
+
+:start_group
+if /i "%CI%" == "github_actions" (
+    echo ::group::%~1
+    exit /b
+)
+if /i "%CI%" == "azure" (
+    echo ##[group]%~1
+    exit /b
+)
+echo %~1
+exit /b
+
+:end_group
+if /i "%CI%" == "github_actions" (
+    echo ::endgroup::
+    exit /b
+)
+if /i "%CI%" == "azure" (
+    echo ##[endgroup]
+    exit /b
+)
+exit /b
\ No newline at end of file
diff --git a/README.md b/README.md
index 3cd745d..7dd6540 100644
--- a/README.md
+++ b/README.md
@@ -54,17 +54,15 @@ conda install -c conda-forge cudadecon cudatoolkit=10.2
 | 11.1  | ≥ 455.23     | ≥ 456.38   |
 | 11.2  | ≥ 460.27.03  | ≥ 460.82   |
 
-
 If you run into trouble, feel free to [open an issue](https://github.com/scopetools/cudaDecon/issues) and describe your setup.
 
-
------ 
+-----------------------
 
 ## Notes
 
 * Compatible GPUs are specified in this "C:\cudaDecon\CMakeLists.txt".  This also sets up all of the linking to dependent libraries.  If you end up adding other code libraries, or changing versions, etc you will want to edit this file.  Specifically where you see the lines like : "-gencode=arch=compute_75,code=sm_75"
 
-* GPU based resources have a d_ prefix in their name such as : GPUBuffer & d_interpOTF
+* GPU based resources have a `d_` prefix in their name such as : GPUBuffer & d_interpOTF
 
 * transferConstants() is a function to send small data values from host to GPU device.
 
@@ -83,7 +81,7 @@ Running this command from an adminstrator command prompt should set the timeout
 
 * Better yet, use a second GPU.  The GPU you wish to use for computation only should use the TCC driver (must be a Titan or Tesla or other GPU that supports TCC).  This card should be initialized after the display GPU, so put the compute card in a slot that is > display card.  The TCC driver is selected with NVIDIAsmi.exe -L from an administrator cmd window to show the GPUs, then NVIDIAsmi.exe -dm 1 -i 0 to set TCC on GPU 0.  Then use `set CUDA_VISIBLE_DEVICES` to pick the GPU the deconv code should execute on.
 
----------------------
+-----------------------
 
 ## Local build instructions
 
@@ -96,7 +94,7 @@ To build the source locally, you have two options:
 With docker installed, use `.scripts/run_docker_build.sh` with one of the
 configs available in `.ci_support`, for instance:
 
-```
+```shell
 CONFIG=linux_64_cuda_compiler_version10.2 .scripts/run_docker_build.sh
 ```
 
@@ -106,7 +104,6 @@ Here we create a dedicated conda environment with all of the build dependencies
 installed, and then use cmake directly.  This method is faster and creates an
 immediately useable binary (i.e. it is better for iteration if you're changing
 the source code), but requires that you set up build dependencies correctly.
-   
 
 1. install [miniconda](https://docs.conda.io/en/latest/miniconda.html)
 2. install [cudatoolkit](https://developer.nvidia.com/cuda-10.1-download-archive-update2) (I haven't yet tried 10.2)
diff --git a/recipe/build.sh b/recipe/build.sh
index 9e020cc..504db00 100644
--- a/recipe/build.sh
+++ b/recipe/build.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+export PATH="$PATH:$BUILD_PREFIX/nvvm/bin/"
+
 mkdir cmake_build
 cd cmake_build
 cmake ${CMAKE_ARGS} -DCMAKE_BUILD_TYPE=Release ../src
diff --git a/recipe/conda_build_config.yaml b/recipe/conda_build_config.yaml
index 8dd2a62..94350b5 100644
--- a/recipe/conda_build_config.yaml
+++ b/recipe/conda_build_config.yaml
@@ -1,36 +1,33 @@
 docker_image:                                     # [os.environ.get("BUILD_PLATFORM", "").startswith("linux") or (os.environ.get("CONFIG_VERSION", "1") == "1" and linux)]
-   - quay.io/condaforge/linux-anvil-cuda:9.2      # [linux64 and (os.environ.get("BUILD_PLATFORM") == "linux-64" or os.environ.get("CONFIG_VERSION", "1") == "1")]
-   - quay.io/condaforge/linux-anvil-cuda:10.2     # [linux64 and (os.environ.get("BUILD_PLATFORM") == "linux-64" or os.environ.get("CONFIG_VERSION", "1") == "1")]
    - quay.io/condaforge/linux-anvil-cuda:11.2     # [linux64 and (os.environ.get("BUILD_PLATFORM") == "linux-64" or os.environ.get("CONFIG_VERSION", "1") == "1")]
+   - quay.io/condaforge/linux-anvil-cos7-x86_64   # [linux64 and (os.environ.get("BUILD_PLATFORM") == "linux-64" or os.environ.get("CONFIG_VERSION", "1") == "1")]
 
-c_compiler_version:     # [linux]
-  - 7                   # [linux64 or aarch64]
-  - 7                   # [linux64 or aarch64]
-  - 7                   # [linux64 or aarch64]
-cxx_compiler_version:   # [linux]
-  - 7                   # [linux64 or aarch64]
-  - 7                   # [linux64 or aarch64]
-  - 7                   # [linux64 or aarch64]
-fortran_compiler_version: # [linux]
-  - 7                     # [linux64 or aarch64]
-  - 7                     # [linux64 or aarch64]
-  - 7                     # [linux64 or aarch64]
+c_compiler_version:       # [linux]
+  - 10                    # [linux64 or aarch64]
+  - 10                    # [linux64 or aarch64]
+cxx_compiler_version:     # [linux]
+  - 10                    # [linux64 or aarch64]
+  - 10                    # [linux64 or aarch64]
 
+cdt_name:                 # [linux]
+  - cos7                  # [linux64]
+  - cos7                  # [linux64]
+
+cuda_compiler:
+  - nvcc                  # [not win]
+  - cuda-nvcc
 
 cuda_compiler_version:
-  - 9.2                        # [linux64]
-  - 10.2                       # [linux64]
-  - 11.2                       # [linux64]
+  - 11.2                  # [not win]
+  - 12.0
 
-cudnn:
-  - 7                          # [linux64]
-  - 8                          # [linux64]
-  - 8                          # [linux64]
+libboost_devel:           
+  - '1.82'                # [not win]             
+  - '1.82'                
 
-cdt_name:  # [linux]
-  - cos6   # [linux64]
-  - cos7   # [linux64]
-  - cos7   # [linux64]
+libtiff:
+  - 4.6                   # [not win]
+  - 4.6
 
 channel_sources:
-  - conda-forge,defaults
+  - conda-forge
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
index dd907c2..a226906 100644
--- a/recipe/meta.yaml
+++ b/recipe/meta.yaml
@@ -21,15 +21,15 @@ requirements:
     - {{ compiler('c') }}
     - {{ compiler('cxx') }}
     - {{ compiler('cuda') }}
-    - cmake
+    - cmake >=3.18
     - make  # [not win]
     - libgomp  # [linux]
   host:
-    - boost-cpp
+    - libboost-devel
     - libtiff
     - fftw
+    - libcufft-dev # [(cuda_compiler_version or "").startswith("12")]
   run:
-    - boost-cpp
 
 test:
   commands:
diff --git a/src/Buffers/CMakeLists.txt b/src/Buffers/CMakeLists.txt
index 0fd2b32..9a28084 100644
--- a/src/Buffers/CMakeLists.txt
+++ b/src/Buffers/CMakeLists.txt
@@ -2,9 +2,10 @@ include_directories(
   "${CMAKE_CURRENT_SOURCE_DIR}"
   ${CUDA_INCLUDE_DIRS}
   "${CMAKE_SOURCE_DIR}/gtest/include"
+  ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
   )
 
-CUDA_ADD_LIBRARY(
+add_library(
   Buffer
   Buffer.cpp
   bufferExample.cpp
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 7d53726..f5b73e2 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -10,19 +10,13 @@
 ######################################################################
 
 
-cmake_minimum_required (VERSION 3.12)
+cmake_minimum_required (VERSION 3.20)
 # cmake_policy(SET CMP0054 OLD) #Set IF statements to dereference variables like in CMAKE version < 3.1
 # cmake_policy(SET CMP0012 NEW) #Set IF statements to use values of numbers and booleans rather than pretend that they could be variables
 
-project (cudaDecon)
-
+project (cudaDecon VERSION 0.7.0 LANGUAGES CXX CUDA)
 set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
 
-set(VERSION_MAJOR "0")
-set(VERSION_MINOR "6")
-set(VERSION_PATCH "1")
-set(PROJECT_VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH})
-
 # set -fPIC for all targets (for shared libraries)
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 
@@ -103,7 +97,6 @@ endif()
 #
 ######################################################################
 
-find_package(CUDA REQUIRED)
 if (WIN32)
   set (FFTW3_INCLUDE_DIR $ENV{CONDA_PREFIX}/Library/lib)
   set (FFTW3F_LIBRARY fftw3f)
@@ -123,51 +116,9 @@ message(STATUS "FFTW3 Library: " ${FFTW3F_LIBRARY})
 
 find_package(Boost REQUIRED COMPONENTS program_options filesystem system)
 
-
-message(STATUS "CUDA version : ${CUDA_VERSION}") 
-if(NOT(CUDA_VERSION LESS 11.1))  # if CUDA version is > or = 11.1  
-  set(CUDA_NVCC_FLAGS "-O3;\
-  -lcudart;\
-  -gencode=arch=compute_61,code=sm_61;\
-  -gencode=arch=compute_62,code=sm_62;\
-  -gencode=arch=compute_70,code=sm_70;\
-  -gencode=arch=compute_72,code=sm_72;\
-  -gencode=arch=compute_75,code=sm_75,\
-  -gencode=arch=compute_75,code=compute_75,\
-  -gencode=arch=compute_80,code=compute_80,\
-  -gencode=arch=compute_86,code=compute_86")
-elseif(NOT(CUDA_VERSION LESS 10.0))  # if CUDA version is > or = 10.0
-  set(CUDA_NVCC_FLAGS "-O3;\
-  -lcudart;\
-  -gencode=arch=compute_52,code=sm_52;\ 
-  -gencode=arch=compute_61,code=sm_61;\
-  -gencode=arch=compute_62,code=sm_62;\
-  -gencode=arch=compute_70,code=sm_70;\
-  -gencode=arch=compute_72,code=sm_72;\
-  -gencode=arch=compute_75,code=sm_75,\
-  -gencode=arch=compute_75,code=compute_75")
-elseif(NOT(CUDA_VERSION LESS 8.0))  # if CUDA version is > or = 8.0
-  set(CUDA_NVCC_FLAGS "-O2;\
-  -gencode=arch=compute_30,code=sm_30;\
-  -gencode=arch=compute_35,code=sm_35;\
-  -gencode=arch=compute_37,code=sm_37;\
-  -gencode=arch=compute_50,code=sm_50;\
-  -gencode=arch=compute_52,code=sm_52;\
-  -gencode=arch=compute_61,code=sm_61") # compute capability 6.1 only avaiable in CUDA 8.0 or greater
-else()
-  set(CUDA_NVCC_FLAGS "-O2;\
-  -gencode=arch=compute_20,code=sm_20;\
-  -gencode=arch=compute_30,code=sm_30;\
-  -gencode=arch=compute_35,code=sm_35;\
-  -gencode=arch=compute_37,code=sm_37;\
-  -gencode=arch=compute_50,code=sm_50;\
-  -gencode=arch=compute_52,code=sm_52")
-                      # compute capability 6.1 only avaiable in CUDA 8.0 or greater
-endif()
-
-# Specifies which compute capabilities are supported by the built .exe http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#virtual-architecture-feature-list http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#gpu-feature-list 
-# Need to add to these if different GPUs are targeted.
-#--ptxas-options=-v;
+find_package(CUDAToolkit REQUIRED)
+message(STATUS "CUDA version : ${CMAKE_CUDA_COMPILER_VERSION}")
+set(CMAKE_CUDA_ARCHITECTURES all-major)
 
 ######################################################################
 #
@@ -182,13 +133,11 @@ include_directories(
   "${CMAKE_CURRENT_SOURCE_DIR}/Buffers"
   ${CUDA_INCLUDE_DIRS}
   ${FFTW3_INCLUDE_DIR}
-  # ${X11_INCLUDE_DIR}
+  ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
 )
 
 
 if(WIN32)
-  set(CUDA_NVCC_FLAGS
-    ${CUDA_NVCC_FLAGS};--use-local-env;)
   link_directories ( ${Boost_LIBRARY_DIRS} ${FFTW3_INCLUDE_DIR} ${TIFF_LIBRARY} )
   include_directories( ${Boost_INCLUDE_DIRS} ${FFTW3_INCLUDE_DIR}  ${TIFF_INCLUDE_DIR}
     "$ENV{NVCUDASAMPLES_ROOT}/common/inc" )
@@ -196,41 +145,24 @@ else()
   include_directories( ${CUDA_TOOLKIT_ROOT_DIR}/samples/common/inc )
 endif()
 
-
-
-
-##### Make this everything time so that linearDecon.cpp is remade each time.  This will ensure that the build date is correct #####
-#add_custom_target(invalidate_files ALL
-#                  COMMAND ${CMAKE_COMMAND} -E touch "${CMAKE_CURRENT_SOURCE_DIR}/linearDecon.cpp")
-
-
 ######################################################################
 #
 # Add build outputs and their dependencies
 #
 ######################################################################
 
-
-
 #####  libcudaDecon.dll ###########################################
 
- CUDA_ADD_LIBRARY(
-   libcudaDecon
-   SHARED
-   RL-Biggs-Andrews.cpp
-   RLgpuImpl.cu
-   geometryTransform.cu
-#   camcor.cpp
-   camcor.cu
-   )
-
+add_library(
+  libcudaDecon
+  SHARED
+  RL-Biggs-Andrews.cpp
+  RLgpuImpl.cu
+  geometryTransform.cu
+  #  camcor.cu
+  )
 set_target_properties(libcudaDecon PROPERTIES PREFIX "")
 
-add_dependencies(
-   libcudaDecon
-   Buffer
-   )
-
 
 #####  libradialft.dll ###########################################
 
@@ -240,10 +172,6 @@ add_dependencies(
    radialft_interface.cpp
    )
 
-add_dependencies(
-   libradialft
-   Buffer
-   )
 
 if (WIN32)
   set_property(TARGET libradialft PROPERTY POSITION_INDEPENDENT_CODE ON)
@@ -260,14 +188,14 @@ endif()
 
 #####  cudaDecon.exe ###########################################
 
-CUDA_ADD_EXECUTABLE(
+add_executable(
   cudaDecon
   linearDecon.cpp
   RL-Biggs-Andrews.cpp
   boostfs.cpp
   RLgpuImpl.cu
   geometryTransform.cu
-  camcor.cu
+  # camcor.cu
   )
 
 add_dependencies(
@@ -291,6 +219,7 @@ add_executable(radialft radialft-nonSIM.cpp)
 
 target_link_libraries(
   cudaDecon
+  PRIVATE
   Buffer
   ${FFTW3F_LIBRARY}
   ${FFTW3F_THREADS_LIBRARY}
@@ -300,10 +229,11 @@ target_link_libraries(
   ${Boost_PREDICATE_LIBRARY}
   ${Boost_SYSTEM_LIBRARY}
   ${TIFF_LIBRARIES}
+  CUDA::cufft
   )
 
 if (UNIX)
-  target_link_libraries(cudaDecon rt)
+  target_link_libraries(cudaDecon PRIVATE rt)
 endif ()
 
 target_link_libraries(
@@ -315,9 +245,9 @@ target_link_libraries(
   ${TIFF_LIBRARIES}
 )
 
-
 target_link_libraries(
   libcudaDecon
+  PRIVATE
   Buffer
   ${FFTW3F_LIBRARY}
   ${FFTW3F_THREADS_LIBRARY}
@@ -327,8 +257,8 @@ target_link_libraries(
   ${Boost_PREDICATE_LIBRARY}
   ${Boost_SYSTEM_LIBRARY}
   ${TIFF_LIBRARIES}
+  CUDA::cufft
 )
-CUDA_ADD_CUFFT_TO_TARGET(libcudaDecon)
 
 
 target_link_libraries(
@@ -337,8 +267,6 @@ target_link_libraries(
   ${TIFF_LIBRARIES}
 )
 
-CUDA_ADD_CUFFT_TO_TARGET(cudaDecon)
-
 # added for make install to work in conda
 set(HEADERS
   CImg.h
diff --git a/src/RL-Biggs-Andrews.cpp b/src/RL-Biggs-Andrews.cpp
index 6049bc5..b4d6ac4 100644
--- a/src/RL-Biggs-Andrews.cpp
+++ b/src/RL-Biggs-Andrews.cpp
@@ -853,29 +853,29 @@ int Affine_interface_RA(const float * const raw_data,
 
 
 
-int camcor_interface_init(int nx, int ny, int nz,
-                     const float * const camparam)
-{
-  CImg<> h_camparam(camparam, nx, ny, 3);
-  setupConst(nx, ny, nz);
-  setupCamCor(nx, ny, h_camparam.data());
-  return 1;
-}
-
-
-int camcor_interface(const unsigned short * const raw_data,
-                     int nx, int ny, int nz,
-                     unsigned short * const result)
-{
-  CImg<unsigned short> input(raw_data, nx, ny, nz);
-  CImg<unsigned> raw_image(input);
-  GPUBuffer d_correctedResult(nx * ny * nz * sizeof(unsigned short), 0, false);
-  setupData(nx, ny, nz, raw_image.data());
-  camcor_GPU(nx, ny, nz, d_correctedResult);
-  //transfer result back to host
-  cudaMemcpy(result, d_correctedResult.getPtr(), nx * ny * nz * sizeof(unsigned short), cudaMemcpyDeviceToHost);
-  return 1;
-}
+// int camcor_interface_init(int nx, int ny, int nz,
+//                      const float * const camparam)
+// {
+//   CImg<> h_camparam(camparam, nx, ny, 3);
+//   setupConst(nx, ny, nz);
+//   setupCamCor(nx, ny, h_camparam.data());
+//   return 1;
+// }
+
+
+// int camcor_interface(const unsigned short * const raw_data,
+//                      int nx, int ny, int nz,
+//                      unsigned short * const result)
+// {
+//   CImg<unsigned short> input(raw_data, nx, ny, nz);
+//   CImg<unsigned> raw_image(input);
+//   GPUBuffer d_correctedResult(nx * ny * nz * sizeof(unsigned short), 0, false);
+//   setupData(nx, ny, nz, raw_image.data());
+//   camcor_GPU(nx, ny, nz, d_correctedResult);
+//   //transfer result back to host
+//   cudaMemcpy(result, d_correctedResult.getPtr(), nx * ny * nz * sizeof(unsigned short), cudaMemcpyDeviceToHost);
+//   return 1;
+// }
 
 
 
diff --git a/src/geometryTransform.cu b/src/geometryTransform.cu
index ce9a3f3..aed3d05 100644
--- a/src/geometryTransform.cu
+++ b/src/geometryTransform.cu
@@ -177,13 +177,12 @@ __host__ void duplicateReversedStack_GPU(GPUBuffer &zExpanded, int nx, int ny, i
 }
 
 
-texture<float, cudaTextureType3D, cudaReadModeElementType> texRef;
 
 
 // Simple transformation kernel
 __global__ void transformKernel(float *output,
                                 int nx, int ny, int nz,
-                                float *mat)
+                                float *mat, cudaTextureObject_t texObj)
 {
 
     // Calculate texture coordinates
@@ -210,14 +209,14 @@ __global__ void transformKernel(float *output,
 
     // Read from texture and write to global memory
     int idx = z * (nx*ny) + y * nx + x;
-    output[idx] = tex3D(texRef, tu, tv, tw);
+    output[idx] = tex3D<float>(texObj, tu, tv, tw);
 }
 
 // Simple transformation kernel
 __global__ void transformKernelRA(float *output,
                                 int nx, int ny, int nz,
                                 float dx, float dy, float dz,
-                                float *mat)
+                                float *mat, cudaTextureObject_t texObj)
 {
 
     // Calculate texture coordinates
@@ -250,7 +249,7 @@ __global__ void transformKernelRA(float *output,
 
     // Read from texture and write to global memory
     int idx = z * (nx*ny) + y * nx + x;
-    output[idx] = tex3D(texRef, tu, tv, tw);
+    output[idx] = tex3D<float>(texObj, tu, tv, tw);
 }
 
 
@@ -259,19 +258,23 @@ __host__ void affine_GPU(cudaArray *cuArray, int nx, int ny, int nz,
                          float * result, GPUBuffer &affMat)
 {
 
-    // Allocate CUDA array in device memory
-    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(
-        32, 0, 0, 0, cudaChannelFormatKindFloat);
+    // Create texture object
+    cudaTextureObject_t texObj;
+    cudaResourceDesc resDesc;
+    memset(&resDesc, 0, sizeof(resDesc));
+    resDesc.resType = cudaResourceTypeArray;
+    resDesc.res.array.array = cuArray;
 
-    // Set texture reference parameters
-    texRef.addressMode[0] = cudaAddressModeBorder;
-    texRef.addressMode[1] = cudaAddressModeBorder;
-    texRef.addressMode[2] = cudaAddressModeBorder;
-    texRef.filterMode = cudaFilterModeLinear;
-    texRef.normalized = false;
+    cudaTextureDesc texDesc;
+    memset(&texDesc, 0, sizeof(texDesc));
+    texDesc.readMode = cudaReadModeElementType;
+    texDesc.addressMode[0] = cudaAddressModeBorder;
+    texDesc.addressMode[1] = cudaAddressModeBorder;
+    texDesc.addressMode[2] = cudaAddressModeBorder;
+    texDesc.filterMode = cudaFilterModeLinear;
+    texDesc.normalizedCoords = false;
 
-    // Bind the array to the texture reference
-    cudaBindTextureToArray(texRef, cuArray, channelDesc);
+    cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);
 
     // Allocate result of transformation in device memory
     float* output;
@@ -283,9 +286,12 @@ __host__ void affine_GPU(cudaArray *cuArray, int nx, int ny, int nz,
                  (ny + dimBlock.y - 1) / dimBlock.y,
                  (nz + dimBlock.z - 1) / dimBlock.z);
 
-    transformKernel<<<dimGrid, dimBlock>>>(output, nx, ny, nz, (float *) affMat.getPtr());
+    transformKernel<<<dimGrid, dimBlock>>>(output, nx, ny, nz, (float *) affMat.getPtr(), texObj);
     CudaCheckError();
 
+    // Destroy texture object
+    cudaDestroyTextureObject(texObj);
+
     //transfer result back to host
     cudaMemcpy(result, output, nz * nx * ny * sizeof(float), cudaMemcpyDeviceToHost);
 
@@ -300,19 +306,23 @@ __host__ void affine_GPU_RA(cudaArray *cuArray, int nx, int ny, int nz,
                          float * result, GPUBuffer &affMat)
 {
 
-    // Allocate CUDA array in device memory
-    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(
-        32, 0, 0, 0, cudaChannelFormatKindFloat);
+    // Create texture object
+    cudaTextureObject_t texObj;
+    cudaResourceDesc resDesc;
+    memset(&resDesc, 0, sizeof(resDesc));
+    resDesc.resType = cudaResourceTypeArray;
+    resDesc.res.array.array = cuArray;
 
-    // Set texture reference parameters
-    texRef.addressMode[0] = cudaAddressModeBorder;
-    texRef.addressMode[1] = cudaAddressModeBorder;
-    texRef.addressMode[2] = cudaAddressModeBorder;
-    texRef.filterMode = cudaFilterModeLinear;
-    texRef.normalized = false;
+    cudaTextureDesc texDesc;
+    memset(&texDesc, 0, sizeof(texDesc));
+    texDesc.readMode = cudaReadModeElementType;
+    texDesc.addressMode[0] = cudaAddressModeBorder;
+    texDesc.addressMode[1] = cudaAddressModeBorder;
+    texDesc.addressMode[2] = cudaAddressModeBorder;
+    texDesc.filterMode = cudaFilterModeLinear;
+    texDesc.normalizedCoords = false;
 
-    // Bind the array to the texture reference
-    cudaBindTextureToArray(texRef, cuArray, channelDesc);
+    cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);
 
     // Allocate result of transformation in device memory
     float* output;
@@ -324,9 +334,12 @@ __host__ void affine_GPU_RA(cudaArray *cuArray, int nx, int ny, int nz,
                  (ny + dimBlock.y - 1) / dimBlock.y,
                  (nz + dimBlock.z - 1) / dimBlock.z);
 
-    transformKernelRA<<<dimGrid, dimBlock>>>(output, nx, ny, nz, dx, dy, dz, (float *) affMat.getPtr());
+    transformKernelRA<<<dimGrid, dimBlock>>>(output, nx, ny, nz, dx, dy, dz, (float *) affMat.getPtr(), texObj);
     CudaCheckError();
 
+    // Destroy texture object
+    cudaDestroyTextureObject(texObj);
+
     //transfer result back to host
     cudaMemcpy(result, output, nz * nx * ny * sizeof(float), cudaMemcpyDeviceToHost);
 
diff --git a/src/linearDecon.cpp b/src/linearDecon.cpp
index c6a3df6..4573f37 100644
--- a/src/linearDecon.cpp
+++ b/src/linearDecon.cpp
@@ -10,7 +10,7 @@
 #pragma warning(disable : 4305) // Disregard loss of data from double to float.
 #endif
 
-std::string version_number = "0.6.2";
+std::string version_number = "0.7.0";
 CImg<> next_file_image;
 
 CImg<> ToSave;
diff --git a/src/linearDecon.h b/src/linearDecon.h
index 4f95f99..b7ec5fe 100644
--- a/src/linearDecon.h
+++ b/src/linearDecon.h
@@ -200,13 +200,13 @@ void affine_GPU(cudaArray *cuArray, int nx, int ny, int nz, float *result,
 void affine_GPU_RA(cudaArray *cuArray, int nx, int ny, int nz, float dx,
                    float dy, float dz, float *result, GPUBuffer &affMat);
 
-void camcor_GPU(int nx, int ny, int nz, GPUBuffer &outBuf);
+// void camcor_GPU(int nx, int ny, int nz, GPUBuffer &outBuf);
 
-void setupConst(int nx, int ny, int nz);
+// void setupConst(int nx, int ny, int nz);
 
-void setupCamCor(int nx, int ny, float *h_caparam);
+// void setupCamCor(int nx, int ny, float *h_caparam);
 
-void setupData(int nx, int ny, int nz, unsigned *h_data);
+// void setupData(int nx, int ny, int nz, unsigned *h_data);
 
 void cropGPU(GPUBuffer &inBuf, int nx, int ny, int nz, int new_nx, int new_ny,
              int new_nz, GPUBuffer &outBuf);
@@ -296,12 +296,12 @@ void makeNewDir(std::string subdirname);
       const float *const raw_data, int nx, int ny, int nz, float dx, float dy,
       float dz, float *const result, const float *affMat);
 
-  CUDADECON_API int camcor_interface_init(int nx, int ny, int nz,
-                                          const float *const camparam);
+  // CUDADECON_API int camcor_interface_init(int nx, int ny, int nz,
+  //                                         const float *const camparam);
 
-  CUDADECON_API int camcor_interface(const unsigned short *const raw_data,
-                                     int nx, int ny, int nz,
-                                     unsigned short *const result);
+  // CUDADECON_API int camcor_interface(const unsigned short *const raw_data,
+  //                                    int nx, int ny, int nz,
+  //                                    unsigned short *const result);
 
   //! Call this before program quits to release global GPUBuffer d_interpOTF
   CUDADECON_API void RL_cleanup();
diff --git a/src/llspy.cpp b/src/llspy.cpp
index 4000cc1..17d596e 100644
--- a/src/llspy.cpp
+++ b/src/llspy.cpp
@@ -42,12 +42,12 @@ extern "C" {
                        float * const result,
                        const float * affMat);
 
-  LLSPY_API int camcor_interface_init(int nx, int ny, int nz,
-                       const float * const camparam);
+  // LLSPY_API int camcor_interface_init(int nx, int ny, int nz,
+  //                      const float * const camparam);
 
-  LLSPY_API int camcor_interface(const unsigned short * const raw_data,
-                       int nx, int ny, int nz,
-                       unsigned short * const result);
+  // LLSPY_API int camcor_interface(const unsigned short * const raw_data,
+  //                      int nx, int ny, int nz,
+  //                      unsigned short * const result);
 
   LLSPY_API void cuda_reset();
 
@@ -66,29 +66,29 @@ void cuda_reset()
   cudaDeviceReset();
 }
 
-int camcor_interface_init(int nx, int ny, int nz,
-                     const float * const camparam)
-{
-  CImg<> h_camparam(camparam, nx, ny, 3);
-  setupConst(nx, ny, nz);
-  setupCamCor(nx, ny, h_camparam.data());
-  return 1;
-}
-
-
-int camcor_interface(const unsigned short * const raw_data,
-                     int nx, int ny, int nz,
-                     unsigned short * const result)
-{
-  CImg<unsigned short> input(raw_data, nx, ny, nz);
-  CImg<unsigned> raw_image(input);
-  GPUBuffer d_correctedResult(nx * ny * nz * sizeof(unsigned short), 0, false);
-  setupData(nx, ny, nz, raw_image.data());
-  camcor_GPU(nx, ny, nz, d_correctedResult);
-  //transfer result back to host
-  cudaMemcpy(result, d_correctedResult.getPtr(), nx * ny * nz * sizeof(unsigned short), cudaMemcpyDeviceToHost);
-  return 1;
-}
+// int camcor_interface_init(int nx, int ny, int nz,
+//                      const float * const camparam)
+// {
+//   CImg<> h_camparam(camparam, nx, ny, 3);
+//   setupConst(nx, ny, nz);
+//   setupCamCor(nx, ny, h_camparam.data());
+//   return 1;
+// }
+
+
+// int camcor_interface(const unsigned short * const raw_data,
+//                      int nx, int ny, int nz,
+//                      unsigned short * const result)
+// {
+//   CImg<unsigned short> input(raw_data, nx, ny, nz);
+//   CImg<unsigned> raw_image(input);
+//   GPUBuffer d_correctedResult(nx * ny * nz * sizeof(unsigned short), 0, false);
+//   setupData(nx, ny, nz, raw_image.data());
+//   camcor_GPU(nx, ny, nz, d_correctedResult);
+//   //transfer result back to host
+//   cudaMemcpy(result, d_correctedResult.getPtr(), nx * ny * nz * sizeof(unsigned short), cudaMemcpyDeviceToHost);
+//   return 1;
+// }