-
Notifications
You must be signed in to change notification settings - Fork 82
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Spreadinterponly (CPU and GPU) #602
base: master
Are you sure you want to change the base?
Changes from all commits
838245b
a033f3b
a67b717
d77cbb1
2094338
d0d60fe
305482b
09a9d0c
90a0675
85e1c4e
2e490a1
8170567
0d4c0e4
244791f
d8f42d6
29940c2
d13dade
2b8786f
587ddf2
bd96294
89deb03
175ef83
2acdb7a
b43e3a5
295cd10
12d1760
d5b2bad
c4a78dd
10342bb
ba1cc8a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
// this is all you must include for the finufft lib... | ||
#include <finufft.h> | ||
|
||
// also used in this example... | ||
#include <cassert> | ||
#include <chrono> | ||
#include <complex> | ||
#include <cstdio> | ||
#include <stdlib.h> | ||
#include <vector> | ||
using namespace std; | ||
using namespace std::chrono; | ||
|
||
int main(int argc, char *argv[]) | ||
/* Example of double-prec spread/interp only tasks, with basic math tests. | ||
Complex I/O arrays, but recall the kernel is real. Barnett 1/8/25. | ||
|
||
The math tests are: | ||
1) for spread, check sum of spread kernel masses is as expected from sum | ||
of strengths (ie testing the zero-frequency component in NUFFT). | ||
2) for interp, check each interp kernel mass is the same as from one. | ||
|
||
Without knowing the kernel, this is about all that can be done! | ||
(Better math tests would be, ironically, to wrap the spreader/interpolator | ||
into a NUFFT and test that :) But we already have that in FINUFFT.) | ||
|
||
Compile and run (static library case): | ||
|
||
g++ spreadinterponly1d.cpp -I../include ../lib-static/libfinufft.a -o | ||
spreadinterponly1d -lfftw3 -lfftw3_omp && ./spreadinterponly1d | ||
|
||
See: spreadtestnd for usage of internal (non FINUFFT-API) spread/interp. | ||
*/ | ||
{ | ||
int M = 1e7; // number of nonuniform points | ||
int N = 1e7; // size of regular grid | ||
finufft_opts opts; | ||
finufft_default_opts(&opts); | ||
opts.spreadinterponly = 1; // task: the following two control kernel used... | ||
double tol = 1e-9; // tolerance for (real) kernel shape design only | ||
opts.upsampfac = 2.0; // pretend upsampling factor (really no upsampling) | ||
// opts.spread_kerevalmeth = 0; // would be needed for any nonstd upsampfac | ||
|
||
complex<double> I = complex<double>(0.0, 1.0); // the imaginary unit | ||
vector<double> x(M); // input | ||
vector<complex<double>> c(M); // input | ||
vector<complex<double>> F(N); // output (spread to this array) | ||
|
||
// first spread M=1 single unit-strength at the origin, only to get its total mass... | ||
x[0] = 0.0; | ||
c[0] = 1.0; | ||
int unused = 1; | ||
int ier = finufft1d1(1, &x[0], &c[0], unused, tol, N, &F[0], &opts); // warm-up | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. x.data(), c.data(), F.data() &x[0] is |
||
if (ier > 1) return ier; | ||
complex<double> kersum = 0.0; | ||
for (auto Fk : F) kersum += Fk; // kernel mass | ||
|
||
// Now generate random nonuniform points (x) and complex strengths (c)... | ||
for (int j = 0; j < M; ++j) { | ||
x[j] = M_PI * (2 * ((double)rand() / RAND_MAX) - 1); // uniform random in [-pi,pi) | ||
c[j] = | ||
2 * ((double)rand() / RAND_MAX) - 1 + I * (2 * ((double)rand() / RAND_MAX) - 1); | ||
} | ||
|
||
opts.debug = 1; | ||
auto t0 = steady_clock::now(); // now spread with all M pts... (dir=1) | ||
ier = finufft1d1(M, &x[0], &c[0], unused, tol, N, &F[0], &opts); // do it | ||
double t = (steady_clock::now() - t0) / 1.0s; | ||
if (ier > 1) return ier; | ||
complex<double> csum = 0.0; // tot input strength | ||
for (auto cj : c) csum += cj; | ||
complex<double> mass = 0.0; // tot output mass | ||
for (auto Fk : F) mass += Fk; | ||
double relerr = abs(mass - kersum * csum) / abs(mass); | ||
printf("1D spread-only, double-prec, %.3g s (%.3g NU pt/sec), ier=%d, mass err %.3g\n", | ||
t, M / t, ier, relerr); | ||
|
||
for (auto &Fk : F) Fk = complex<double>{1.0, 0.0}; // unit grid input | ||
opts.debug = 0; | ||
t0 = steady_clock::now(); // now interp to all M pts... (dir=2) | ||
ier = finufft1d2(M, &x[0], &c[0], unused, tol, N, &F[0], &opts); // do it | ||
t = (steady_clock::now() - t0) / 1.0s; | ||
if (ier > 1) return ier; | ||
csum = 0.0; // tot output | ||
for (auto cj : c) csum += cj; | ||
double maxerr = 0.0; | ||
for (auto cj : c) maxerr = max(maxerr, abs(cj - kersum)); | ||
printf("1D interp-only, double-prec, %.3g s (%.3g NU pt/sec), ier=%d, max err %.3g\n", | ||
t, M / t, ier, maxerr / abs(kersum)); | ||
return 0; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -155,13 +155,6 @@ int cufinufft_makeplan_impl(int type, int dim, int *nmodes, int iflag, int ntran | |
printf("[cufinufft] upsampfac automatically set to %.3g\n", d_plan->opts.upsampfac); | ||
} | ||
} | ||
if (d_plan->opts.gpu_spreadinterponly) { | ||
// XNOR implementation below with boolean logic. | ||
if ((d_plan->opts.upsampfac != 1) == (type != 3)) { | ||
ier = FINUFFT_ERR_SPREADONLY_UPSAMP_INVALID; | ||
goto finalize; | ||
} | ||
} | ||
/* Setup Spreader */ | ||
if ((ier = setup_spreader_for_nufft(d_plan->spopts, tol, d_plan->opts)) > 1) { | ||
// can return FINUFFT_WARN_EPS_TOO_SMALL=1, which is OK | ||
|
@@ -197,7 +190,6 @@ int cufinufft_makeplan_impl(int type, int dim, int *nmodes, int iflag, int ntran | |
printf("[cufinufft] shared memory required for the spreader: %ld\n", mem_required); | ||
} | ||
|
||
|
||
// dynamically request the maximum amount of shared memory available | ||
// for the spreader | ||
|
||
|
@@ -235,23 +227,31 @@ int cufinufft_makeplan_impl(int type, int dim, int *nmodes, int iflag, int ntran | |
|
||
if (type == 1 || type == 2) { | ||
CUFINUFFT_BIGINT nf1 = 1, nf2 = 1, nf3 = 1; | ||
set_nf_type12(d_plan->ms, d_plan->opts, d_plan->spopts, &nf1, | ||
d_plan->opts.gpu_obinsizex); | ||
if (dim > 1) | ||
set_nf_type12(d_plan->mt, d_plan->opts, d_plan->spopts, &nf2, | ||
d_plan->opts.gpu_obinsizey); | ||
if (dim > 2) | ||
set_nf_type12(d_plan->mu, d_plan->opts, d_plan->spopts, &nf3, | ||
d_plan->opts.gpu_obinsizez); | ||
|
||
if (d_plan->opts.gpu_spreadinterponly) { | ||
// spread/interp grid is precisely the user "mode" sizes, no upsampling | ||
nf1 = d_plan->ms; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @chaithyagr ...and here is the corresponding GPU code. At the risk of repetition, since the user allocates an N1*N2 output array, spreading could not write to any other size without segfault. Agreed? |
||
if (dim > 1) nf2 = d_plan->mt; | ||
if (dim > 2) nf3 = d_plan->mu; | ||
if (d_plan->opts.debug) { | ||
printf("[cufinufft] spreadinterponly mode: (nf1,nf2,nf3) = (%d, %d, %d)\n", nf1, | ||
nf2, nf3); | ||
} | ||
} else { // usual NUFFT with fine grid using upsampling | ||
set_nf_type12(d_plan->ms, d_plan->opts, d_plan->spopts, &nf1, | ||
d_plan->opts.gpu_obinsizex); | ||
if (dim > 1) | ||
set_nf_type12(d_plan->mt, d_plan->opts, d_plan->spopts, &nf2, | ||
d_plan->opts.gpu_obinsizey); | ||
if (dim > 2) | ||
set_nf_type12(d_plan->mu, d_plan->opts, d_plan->spopts, &nf3, | ||
d_plan->opts.gpu_obinsizez); | ||
if (d_plan->opts.debug) | ||
printf("[cufinufft] (nf1,nf2,nf3) = (%d, %d, %d)\n", nf1, nf2, nf3); | ||
} | ||
d_plan->nf1 = nf1; | ||
d_plan->nf2 = nf2; | ||
d_plan->nf3 = nf3; | ||
d_plan->nf = nf1 * nf2 * nf3; | ||
if (d_plan->opts.debug) { | ||
printf("[cufinufft] (nf1,nf2,nf3) = (%d, %d, %d)\n", d_plan->nf1, d_plan->nf2, | ||
d_plan->nf3); | ||
} | ||
|
||
using namespace cufinufft::memtransfer; | ||
switch (d_plan->dim) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,8 +9,10 @@ typedef struct finufft_opts { // defaults see finufft_core.cpp:finufft_default_o | |
// sphinx tag (don't remove): @opts_start | ||
// FINUFFT options: | ||
// data handling opts... | ||
int modeord; // (type 1,2 only): 0 CMCL-style increasing mode order | ||
// 1 FFT-style mode order | ||
int modeord; // (type 1,2 only): 0 CMCL-style increasing mode order | ||
// 1 FFT-style mode order | ||
int spreadinterponly; // 0 do actual NUFFT | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as per comment for modeord (type1, 2 only) should be specified here too |
||
// 1 only spread (if type 1) or interpolate (type 2) | ||
|
||
// diagnostic opts... | ||
int debug; // 0 silent, 1 some timing/debug, or 2 more | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looking at the rendered docs here: https://github.com/flatironinstitute/finufft/blob/spreadinterponly/docs/opts.rst
modeord has a nice itemize. I think is worth doing the same.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe: if
1
: It does not perform a NUFFT!hijacking -> adapting
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if hacky -> unconventional | unorthodox