-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathMCGPU-PET.cu
3356 lines (2762 loc) · 180 KB
/
MCGPU-PET.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//!!MCGPU-PET!! Changes for PET marked with !!MCGPU-PET!! (ABS, 2016-07-02)
// *****************************************************************************
// *** MCGPU-PET_v0.1 (based on MC-GPU_v1.3) ***
// *** ***
// *** Distribution: https://github.com/DIDSR/MCGPU-PET ***
// *** ***
// *** Authors: ***
// *** ***
// *** MCGPU code foundation and PET source sampling implemented by: ***
// *** ***
// *** - Andreu Badal (Andreu.Badal-Soler[at]fda.hhs.gov) ***
// *** Division of Imaging and Applied Mathematics ***
// *** Office of Science and Engineering Laboratories ***
// *** Center for Devices and Radiological Health ***
// *** U.S. Food and Drug Administration ***
// *** ***
// *** ***
// *** PET detector model and sinogram reporting implemented by: ***
// *** ***
// *** - Joaquin L. Herraiz and Alejandro López-Montes ***
// *** Complutense University of Madrid, EMFTEL, Grupo Fisica Nuclear***
// *** and IPARCOS; Instituto de Investigacion Sanitaria Hospital ***
// *** Clinico San Carlos (IdiSSC), Madrid, Spain ***
// *** ***
// *** Code presented at the IEEE NSS MIC 2021 conference: ***
// *** ***
// *** M-07-01 – GPU-accelerated Monte Carlo-Based Scatter and ***
// *** Prompt-Gamma Corrections in PET, A. López-Montes, J. Cabello, ***
// *** M. Conti, A. Badal, J. L. Herraiz ***
// *** ***
// *** ***
// *** Last update: 2022/02/02 ***
// *****************************************************************************
// ** DISCLAIMER **
//
// This software and documentation (the "Software") were developed at the Food and
// Drug Administration (FDA) by employees of the Federal Government in the course
// of their official duties. Pursuant to Title 17, Section 105 of the United States
// Code, this work is not subject to copyright protection and is in the public
// domain. Permission is hereby granted, free of charge, to any person obtaining a
// copy of the Software, to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish, distribute,
// sublicense, or sell copies of the Software or derivatives, and to permit persons
// to whom the Software is furnished to do so. FDA assumes no responsibility
// whatsoever for use by other parties of the Software, its source code,
// documentation or compiled executables, and makes no guarantees, expressed or
// implied, about its quality, reliability, or any other characteristic. Further,
// use of this code in no way implies endorsement by the FDA or confers any
// advantage in regulatory decisions. Although this software can be redistributed
// and/or modified freely, we ask that any derivative works bear some notice that
// they are derived from it, and any modified versions bear some notice that they
// have been modified.
// *** Include header file with the structures and functions declarations
#include <MCGPU-PET.h>
// *** Include the computing kernel:
#include <MCGPU-PET_kernel.cu>
////////////////////////////////////////////////////////////////////////////////
//! Main program of MC-GPU: initialize the simulation enviroment, launch the GPU
//! kernels that perform the x ray transport and report the final results.
//! This function reads the description of the simulation from an external file
//! given in the command line. This input file defines the number of particles to
//! simulate, the characteristics of the x-ray source and the detector, the number
//! and spacing of the projections (if simulating a CT), the location of the
//! material files containing the interaction mean free paths, and the location
//! of the voxelized geometry file.
//!
//! @author Andreu Badal
//!
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv)
{
printf("narg= %i \n",argc);
unsigned int *PETA; // JLH (PETA=1 --> True&Scatter. PETA=0 --> Bg)
PETA = (unsigned int*)malloc(2*sizeof(unsigned int));
PETA[0]=1; // JLH (PETA=1 --> True&Scatter)
if (argc==3) PETA[0] = 0; // JLH (PETA=0 --> Bg)
// -- Start time counter:
time_t current_time = time(NULL); // Get current time (in seconds)
clock_t clock_start, clock_end, clock_start_beginning; // (requires standard header <time.h>)
clock_start = clock(); // Get current clock counter
clock_start_beginning = clock_start;
#ifdef USING_MPI
// -- Using MPI to access multiple GPUs to simulate the x-ray projection image:
int myID = -88, numprocs = -99, return_reduce = -1;
MPI_Init(&argc, &argv); // Init MPI and get the current thread ID
MPI_Comm_rank(MPI_COMM_WORLD, &myID);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
char MPI_processor_name[81];
int resultlen = -1;
MPI_Get_processor_name(MPI_processor_name, &resultlen);
char* char_time = ctime(¤t_time); char_time[19] = '\0'; // The time is located betwen the characters 11 and 19.
if (numprocs>1)
{
printf(" >> MPI run (myId=%d, numprocs=%d) on processor \"%s\" (time: %s) <<\n", myID, numprocs, MPI_processor_name, &char_time[11]);
fflush(stdout); // Clear the screen output buffer
MPI_Barrier(MPI_COMM_WORLD); // Synchronize MPI threads
MASTER_THREAD printf(" -- Time spent initializing the MPI world (MPI_Barrier): %.3f s\n", ((double)(clock()-clock_start))/CLOCKS_PER_SEC);
}
#else
int myID = 0, numprocs = 1; // Only one CPU thread used when MPI is not activated (multiple projections will be simulated sequentially).
#endif
MASTER_THREAD
{
printf("\n\n *****************************************************************************\n");
printf( " *** ***\n");
printf( " *** MCGPU-PET_v0.1 (based on MC-GPU_v1.3) ***\n");
printf( " *** ***\n");
printf( " *** -Distribution: ***\n");
printf( " *** https://github.com/DIDSR/MCGPU-PET ***\n");
printf( " *** ***\n");
printf( " *** -Authors: ***\n");
printf( " *** MCGPU code foundation and PET source sampling implemented by: ***\n");
printf( " *** - Andreu Badal (Andreu.Badal-Soler[at]fda.hhs.gov) ***\n");
printf( " *** Division of Imaging and Applied Mathematics ***\n");
printf( " *** Office of Science and Engineering Laboratories ***\n");
printf( " *** Center for Devices and Radiological Health ***\n");
printf( " *** U.S. Food and Drug Administration ***\n");
printf( " *** ***\n");
printf( " *** PET detector model and sinogram reporting implemented by: ***\n");
printf( " *** - Joaquin L. Herraiz and Alejandro López-Montes ***\n");
printf( " *** Complutense University of Madrid, EMFTEL, Grupo Fisica ***\n");
printf( " *** Nuclear and IPARCOS; Instituto de Investigacion ***\n");
printf( " *** Sanitaria Hospital Clinico San Carlos (IdiSSC), Spain ***\n");
printf( " *** ***\n");
printf( " *** -Code presented at the IEEE NSS MIC 2021 conference: ***\n");
printf( " *** M-07-01 – GPU-accelerated Monte Carlo-Based Scatter and ***\n");
printf( " *** Prompt-Gamma Corrections in PET, A. López-Montes, J. Cabello, ***\n");
printf( " *** M. Conti, A. Badal, J. L. Herraiz ***\n");
printf( " *** ***\n");
printf( " *** Last update: 2022/02/02 ***\n");
printf( " *****************************************************************************\n\n");
printf("****** Code execution started on: %s\n\n", ctime(¤t_time));
fflush(stdout);
}
#ifdef USING_CUDA
// The "MASTER_THREAD" macro prints the messages just once when using MPI threads (it has no effect if MPI is not used): MASTER_THREAD == "if(0==myID)"
MASTER_THREAD printf ("\n *** CUDA SIMULATION IN THE GPU ***\n");
#else
MASTER_THREAD printf ("\n *** SIMULATION IN THE CPU ***\n");
#endif
MASTER_THREAD printf("\n -- INITIALIZATION phase:\n");
MASTER_THREAD fflush(stdout); // Clear the screen output buffer for the master thread
///////////////////////////////////////////////////////////////////////////////////////////////////
// *** Declare the arrays and structures that will contain the simulation data:
struct voxel_struct voxel_data; // Define the geometric constants of the voxel file
struct detector_struct detector_data; // Define an x ray detector
struct source_struct source_data; // Define the particles source
struct source_energy_struct source_energy_data; // Define the source energy spectrum
struct linear_interp mfp_table_data; // Constant data for the linear interpolation
struct compton_struct compton_table; // Structure containing Compton sampling data (to be copied to CONSTANT memory)
struct rayleigh_struct rayleigh_table; // Structure containing Rayleigh sampling data (to be copied to CONSTANT memory)
float3 *voxel_mat_dens = NULL; // Poiter where voxels array will be allocated
unsigned int voxel_mat_dens_bytes = 0; // Size (in bytes) of the voxels array (using unsigned int to allocate up to 4.2GBytes)
float density_max[MAX_MATERIALS];
float density_nominal[MAX_MATERIALS];
PSF_element_struct *PSF = NULL; // Poiter where the PSF array will be allocated // !!MCGPU-PET!!
int index_PSF = 0; // Counter of how many PSF elements have been used
unsigned long long int PSF_bytes = 0; // Size of the PSF array in bytes
int mfp_table_bytes = -1, mfp_Woodcock_table_bytes = -1; // Size of the table arrays
float2 *mfp_Woodcock_table = NULL; // Linear interpolation data for the Woodcock mean free path [cm]
float3 *mfp_table_a = NULL, *mfp_table_b = NULL; // Linear interpolation data for 3 different interactions:
// (1) inverse total mean free path (divided by density, cm^2/g)
// (2) inverse Compton mean free path (divided by density, cm^2/g)
// (3) inverse Rayleigh mean free path (divided by density, cm^2/g)
short int dose_ROI_x_min, dose_ROI_x_max, dose_ROI_y_min, dose_ROI_y_max, dose_ROI_z_min, dose_ROI_z_max; // Coordinates of the dose region of interest (ROI)
ulonglong2 *voxels_Edep = NULL; // Poiter where the voxel energy deposition array will be allocated
int voxels_Edep_bytes = 0; // Size of the voxel Edep array
ulonglong2 materials_dose[MAX_MATERIALS]; // Array for tally_materials_dose. !!tally_materials_dose!!
int kk;
for(kk=0;kk<MAX_MATERIALS;kk++)
{
materials_dose[kk].x = 0; // Initializing data !!tally_materials_dose!!
materials_dose[kk].y = 0;
density_nominal[kk] =-1.0f;
}
clock_t clock_kernel; // Using only cpu timers after CUDA 5.0
double time_total_MC_init_report = 0.0, time_elapsed_MC_loop = 0.0;
unsigned long long int total_histories;
int histories_per_thread, seed_input, num_threads_per_block, gpu_id, num_projections=1;
float fact_object;
float E_resol;
float E_low;
float E_high;
float FOVZ;
int NROWS,NCRYSTALS,NANGLES,NRAD,NZS,NBINS,RES,NVOXS,NE,MRD,SPAN,NSEG,NSINOS;
int flag_material_dose = -2;
double D_angle=-1.0, angularROI_0=0.0, angularROI_1=360.0, initial_angle=0.0, SRotAxisD=-1.0, vertical_translation_per_projection=0.0;
char file_name_voxels[250], file_name_materials[MAX_MATERIALS][250], file_name_output[250], file_dose_output[250], file_name_espc[250];
// *** Read the input file given in the command line and return the significant data:
read_input(argc, argv, myID, &total_histories, &seed_input, &gpu_id, &num_threads_per_block, &histories_per_thread, &detector_data, &PSF, &PSF_bytes, &source_data, &source_energy_data, file_name_voxels, file_name_materials, file_name_output, file_name_espc, &num_projections, &D_angle, &angularROI_0, &angularROI_1, &initial_angle, &voxels_Edep, &voxels_Edep_bytes, file_dose_output, &dose_ROI_x_min, &dose_ROI_x_max, &dose_ROI_y_min, &dose_ROI_y_max, &dose_ROI_z_min, &dose_ROI_z_max, &SRotAxisD, &vertical_translation_per_projection, &flag_material_dose, &fact_object, &E_resol, &E_low, &E_high, &FOVZ, &NROWS, &NCRYSTALS, &NANGLES, &NRAD, &NZS, &NBINS, &RES, &NVOXS, &NE, &MRD, &SPAN, &NSEG, &NSINOS) ;
//!!MCGPU-PET!!
// // *** Read the energy spectrum and initialize its sampling with the Walker aliasing method:
// MASTER_THREAD printf(" -- Reading the energy spectrum and initializing the Walker aliasing sampling algorithm.\n");
// float mean_energy_spectrum = 0.0f;
// init_energy_spectrum(file_name_espc, &source_energy_data, &mean_energy_spectrum);
// *** Output some of the data read to make sure everything was correctly read:
MASTER_THREAD
{
printf(" PET acquistion time = %lf s\n", source_data.acquisition_time_ps*1.0e-12); // !!MCGPU-PET!!
printf(" mean life isotope = %f\n", source_data.mean_life);
for (kk=0; kk<MAX_MATERIALS; kk++)
if (source_data.activity[kk]>0.0f)
printf(" Activity material %d = %f Bq\n", kk+1, source_data.activity[kk]);
printf(" Input voxel file = %s\n", file_name_voxels);
printf(" initial random seed = %d\n", seed_input);
if (dose_ROI_x_max>-1)
{
printf(" Output dose file = %s\n", file_dose_output);
printf(" Input region of interest dose = X[%d,%d], Y[%d,%d], Z[%d,%d]\n", dose_ROI_x_min+1, dose_ROI_x_max+1, dose_ROI_y_min+1, dose_ROI_y_max+1, dose_ROI_z_min+1, dose_ROI_z_max+1); // Show ROI with index=1 for the first voxel instead of 0.
}
fflush(stdout);
}
// !!MCGPU-PET!! Not using the cone-beam CT trajectory option from MCGPU
// // *** Set the detectors and sources for the CT trajectory (if needed, ie, for more than one projection):
// if (num_projections != 1)
// {
// set_CT_trajectory(myID, num_projections, D_angle, angularROI_0, angularROI_1, SRotAxisD, source_data, detector_data, vertical_translation_per_projection);
// }
fflush(stdout);
printf("\n ----- Factor OBJECT = %f \n", fact_object);
// *** Read the voxel data and allocate the density map matrix. Return the maximum density:
load_voxels(myID, file_name_voxels, density_max, &fact_object, &voxel_data, &voxel_mat_dens, &voxel_mat_dens_bytes, &dose_ROI_x_max, &dose_ROI_y_max, &dose_ROI_z_max);
MASTER_THREAD printf(" Total CPU memory allocated for voxels vector and data structures = %lf Mbytes\n", (double)(voxel_mat_dens_bytes+PSF_bytes+sizeof(struct voxel_struct)+sizeof(struct source_struct)+sizeof(struct detector_struct)+sizeof(struct linear_interp)+2*mfp_table_bytes+sizeof(struct rayleigh_struct)+sizeof(struct compton_struct))/(1024.0*1024.0));
MASTER_THREAD fflush(stdout);
// *** Read the material mean free paths and set the interaction table in a "linear_interp" structure:
load_material(myID, file_name_materials, density_max, density_nominal, &mfp_table_data, &mfp_Woodcock_table, &mfp_Woodcock_table_bytes, &mfp_table_a, &mfp_table_b, &mfp_table_bytes, &rayleigh_table, &compton_table);
//--- SINOGRAM --
int NVOX_SIM = voxel_data.num_voxels.x*voxel_data.num_voxels.y*voxel_data.num_voxels.z;
//--- VARIABLES DEFINED IN .h FILE
// -- HOST ---
int *True = NULL;
True = (int*)malloc(NBINS*sizeof(int));
int *Scatter = NULL;
Scatter = (int*)malloc(NBINS*sizeof(int));
int *Imagen_T = NULL;
Imagen_T = (int*)malloc(NVOX_SIM*sizeof(int));
int *Imagen_SC = NULL;
Imagen_SC = (int*)malloc(NVOX_SIM*sizeof(int));
int *Energy_Spectrum = NULL;
Energy_Spectrum = (int*)malloc(NE*sizeof(int));
memset(True, 0, NBINS*4);
memset(Scatter, 0, NBINS*4);
memset(Imagen_T, 0, NVOX_SIM*4);
memset(Imagen_SC, 0, NVOX_SIM*4);
memset(Energy_Spectrum, 0, NE*4);
// -- DEVICE (=GPU)---
int *True_dev = NULL;
int *Scatter_dev = NULL;
int *Imagen_T_dev = NULL;
int *Imagen_SC_dev = NULL;
int *Energy_Spectrum_dev = NULL;
//int* PETA_dev = NULL; //JLH
//unsigned int h_ff[] = {90, 50, 100};
//---- END OF SINOGRAM DEFINITION ------------------------------ APRIL 18
if (detector_data.PSF_radius < 0.0f) // !!MCGPU-PET!!
{
detector_data.PSF_center.x = voxel_data.size_bbox.x*0.5f;
detector_data.PSF_center.y = voxel_data.size_bbox.y*0.5f;
detector_data.PSF_center.z = voxel_data.size_bbox.z*0.5f;
detector_data.PSF_radius = -detector_data.PSF_radius;
}
printf("\n\n ** INPUT PET DETECTOR PARAMETERS **\n");
printf( " Phase Space File (PSF) size = %d\n", detector_data.PSF_size);
printf( " PSF detector center = (%.5f,%.5f,%.5f) cm\n", detector_data.PSF_center.x, detector_data.PSF_center.y, detector_data.PSF_center.z);
printf( " PSF detector height and radius = %.5f, %.5f cm\n", detector_data.PSF_height, detector_data.PSF_radius);
printf( " Output PSF file = %s\n", file_name_output);
if (detector_data.tally_TYPE==1)
printf(" PSF will include only True coincidences (Scatter not reported).\n");
else if (detector_data.tally_TYPE==2)
printf(" PSF will include only Scatter coincidences (Trues not reported).\n");
else
printf(" PSF will include True and Scatter coincidences.\n");
printf("\n\n Detector energy resolution = %f\n", E_resol);
printf( " Low, high energy window threshold = %f , %f\n\n\n", E_low, E_high);
// -- Check that the input material tables and the x-ray source are consistent:
if ( (ANNIHILATION_PHOTON_ENERGY > (mfp_table_data.e0 + (mfp_table_data.num_values-1)/mfp_table_data.ide)) ) // !!MCGPU-PET!!
{
MASTER_THREAD printf("\n\n\n !!ERROR!! The input material files do not have data up to %f keV, please update material files.\n\n", ANNIHILATION_PHOTON_ENERGY*1e-3); // !!MCGPU-PET!!
#ifdef USING_MPI
MPI_Finalize();
#endif
exit(-1);
}
// int source_voxels0=0, source_voxels1=0;
// -- Pre-compute the total mass of each material present in the voxel phantom (to be used in "report_materials_dose"):
double voxel_volume = 1.0 / ( ((double)voxel_data.inv_voxel_size.x) * ((double)voxel_data.inv_voxel_size.y) * ((double)voxel_data.inv_voxel_size.z) );
double mass_materials[MAX_MATERIALS];
for(kk=0; kk<MAX_MATERIALS; kk++)
mass_materials[kk] = 0.0;
for(kk=0; kk<(voxel_data.num_voxels.x*voxel_data.num_voxels.y*voxel_data.num_voxels.z); kk++) // For each voxel in the geometry
{
mass_materials[((int)voxel_mat_dens[kk].x)-1] += ((double)voxel_mat_dens[kk].y)*voxel_volume; // Add material mass = density*volume
// if (((int)voxel_mat_dens[kk].x) == source_data.material[0]) // !!MCGPU-PET!!
// source_voxels0++;
// else if (((int)voxel_mat_dens[kk].x) == source_data.material[1]) // !!MCGPU-PET!!
// source_voxels1++;
}
// MASTER_THREAD printf(" Activity for each one of the %d source voxels with material %d = %f Bq\n", source_voxels0, source_data.material[0], source_data.activity[0]);
// MASTER_THREAD printf(" Activity for each one of the %d source voxels with material %d = %f Bq\n", source_voxels1, source_data.material[1], source_data.activity[1]);
// *** Initialize the GPU using the NVIDIA CUDA libraries, if USING_CUDA parameter defined at compile time:
#ifdef USING_CUDA
// -- Declare the pointers to the device global memory, when using the GPU:
float3 *voxel_mat_dens_device = NULL;
float2 *mfp_Woodcock_table_device = NULL;
float3 *mfp_table_a_device = NULL,
*mfp_table_b_device = NULL;
struct rayleigh_struct *rayleigh_table_device = NULL;
struct compton_struct *compton_table_device = NULL;
ulonglong2 *voxels_Edep_device = NULL;
struct detector_struct *detector_data_device = NULL;
struct source_struct *source_data_device = NULL;
ulonglong2 *materials_dose_device = NULL; // !!tally_materials_dose!!
int* seed_input_device = NULL; // Store latest random seed used in GPU in global memory to continue random sequence in consecutive projections. !!DBTv1.4!!
unsigned long long int* total_histories_device = NULL; // Store the total number of histories simulated in the kernel !!MCGPU-PET!!
PSF_element_struct *PSF_device = NULL; // Poiter where the PSF array will be allocated in GPU memory // !!MCGPU-PET!!
int *index_PSF_device = NULL; // Poiter where the PSF counter will be allocated in GPU memory
// -- Sets the CUDA enabled GPU that will be used in the simulation, and allocate and copies the simulation data in the GPU global and constant memories.
init_CUDA_device(&gpu_id, myID, numprocs, &voxel_data, &source_data, &source_energy_data, &detector_data, &mfp_table_data, /*Variables GPU constant memory*/
voxel_mat_dens, &voxel_mat_dens_device, voxel_mat_dens_bytes, /*Variables GPU global memory*/
PSF, &PSF_device, PSF_bytes, &index_PSF_device,
mfp_Woodcock_table, &mfp_Woodcock_table_device, mfp_Woodcock_table_bytes,
mfp_table_a, mfp_table_b, &mfp_table_a_device, &mfp_table_b_device, mfp_table_bytes,
&rayleigh_table, &rayleigh_table_device,
&compton_table, &compton_table_device, &detector_data_device, &source_data_device,
voxels_Edep, &voxels_Edep_device, voxels_Edep_bytes, &dose_ROI_x_min, &dose_ROI_x_max, &dose_ROI_y_min, &dose_ROI_y_max, &dose_ROI_z_min, &dose_ROI_z_max,
materials_dose, &materials_dose_device, flag_material_dose, &seed_input_device, &seed_input, &total_histories_device, num_projections, NVOX_SIM, &True_dev,
&Scatter_dev, &Imagen_T_dev, &Imagen_SC_dev, &Energy_Spectrum_dev, &NBINS, &NE);
// SINOGRAM -- Aseguro que inician en zero --
checkCudaErrors(cudaMemcpy(True_dev, True, NBINS*sizeof(int), cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy(Scatter_dev, Scatter, NBINS*sizeof(int), cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy(Imagen_T_dev, Imagen_T, NVOX_SIM*sizeof(int), cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy(Imagen_SC_dev, Imagen_SC, NVOX_SIM*sizeof(int), cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpy(Energy_Spectrum_dev, Energy_Spectrum, NE*sizeof(int), cudaMemcpyHostToDevice));
checkCudaErrors(cudaMemcpyToSymbol(PETA_DEV, PETA, 2*sizeof(unsigned int),0,cudaMemcpyHostToDevice));
// -- Constant data already moved to the GPU: clean up unnecessary RAM memory
free(mfp_Woodcock_table);
free(mfp_table_a);
free(mfp_table_b);
if (0!=myID) // Keep the geometry data for the MPI root because the voxel densities are still needed to compute the final doses
free(voxel_mat_dens);
#endif
MASTER_THREAD
{
current_time=time(NULL);
printf("\n -- INITIALIZATION finished: elapsed time = %.3f s. \n\n", ((double)(clock()-clock_start))/CLOCKS_PER_SEC);
}
#ifdef USING_MPI
fflush(stdout);
MPI_Barrier(MPI_COMM_WORLD); // Synchronize MPI threads before starting the MC phase.
#endif
///////////////////////////////////////////////////////////////////////////////////////////////////
MASTER_THREAD
{
current_time=time(NULL);
printf("\n\n -- MONTE CARLO LOOP phase. Time: %s\n\n", ctime(¤t_time));
fflush(stdout);
}
clock_start = clock(); // Start the CPU clock
// Prevent simulating blocks for voxels at the top of the phantom (z) where there is no activity:
//int last_z_active = find_last_z_active(&voxel_data, &source_data, voxel_mat_dens); // !!MCGPU-PET!! v.0.2
int last_z_active = voxel_data.num_voxels.z; //JLH
// ORIGINAL: dim3 blocks(voxel_data.num_voxels.x, voxel_data.num_voxels.y, voxel_data.num_voxels.z); // !!MCGPU-PET!! Launch one block per voxel!!
dim3 blocks(voxel_data.num_voxels.x, voxel_data.num_voxels.y, last_z_active); // !!MCGPU-PET!! Launch one block per voxel, except for the top Z layers without any activity
dim3 threads(num_threads_per_block, 1, 1);
#ifdef USING_MPI
if (numprocs>1)
{
update_seed_PRNG(myID, (unsigned long long int)(123456789012), &seed_input); // Set the random number seed far from any other MPI thread (myID)
printf("\n ==> CUDA (MPI process #%d in \"%s\"): simulating %d x %d x %d blocks of %d threads (random seed: %d).\n", myID, MPI_processor_name, blocks.x, blocks.y, blocks.z, threads.x, seed_input);
checkCudaErrors(cudaMemcpy(seed_input_device, &seed_input, sizeof(int), cudaMemcpyHostToDevice)); // Upload initial seed value to GPU memory. !!DBTv1.4!!
MPI_Barrier(MPI_COMM_WORLD); // Synchronize MPI threads to better organize output
}
else
printf("\n ==> LAUNCHING CUDA KERNEL: simulating %d x %d x %d blocks of %d threads.\n\n\n", blocks.x, blocks.y, blocks.z, threads.x);
#else
printf("\n\n ==> CUDA: simulating %d x %d x %d blocks of %d threads.\n\n\n", blocks.x, blocks.y, blocks.z, threads.x);
#endif
fflush(stdout);
clock_kernel = clock();
// -- Launch Monte Carlo simulation kernel:
//!!MCGPU-PET!!: For PET we use acquisition_time_ps instead of histories_per_thread to finish simulation, and we don't have multiple projections (num_p):
track_particles<<<blocks,threads>>>(total_histories_device, seed_input_device, PSF_device, index_PSF_device, voxels_Edep_device, voxel_mat_dens_device, mfp_Woodcock_table_device, mfp_table_a_device, mfp_table_b_device, rayleigh_table_device, compton_table_device, detector_data_device, source_data_device, materials_dose_device, True_dev, Scatter_dev, Imagen_T_dev, Imagen_SC_dev, Energy_Spectrum_dev, E_resol, E_low, E_high, FOVZ, NROWS, NCRYSTALS, NANGLES, NRAD, NZS, NBINS, RES, NVOXS, NE, MRD, SPAN, NSINOS); // !!MCGPU-PET!!
cudaDeviceSynchronize(); // Force the runtime to wait until the GPU kernel is completed
getLastCudaError("\n\n !!Kernel execution failed while simulating particle tracks!! \n\n\n"); // Check if kernel execution generated any error
///////////////////////////////////////////////////////////////////////////////////////////////////
// Get current time and calculate execution time in the MC loop:
time_elapsed_MC_loop = ((double)(clock()-clock_start))/CLOCKS_PER_SEC;
printf("\n\n -- MONTE CARLO LOOP finished: time tallied in main program: %.3f s\n", time_elapsed_MC_loop);
// *** Simulation finished! Move simulation results from GPU to CPU:
// -- Copy the simulated Phase Space File data and other variables from the GPU memory to the CPU:
checkCudaErrors(cudaMemcpy(&total_histories, total_histories_device, sizeof(unsigned long long int), cudaMemcpyDeviceToHost)); // Download the total number of simulated histories !!MCGPU-PET!!
checkCudaErrors(cudaMemcpy(&index_PSF, index_PSF_device, sizeof(int), cudaMemcpyDeviceToHost)); // Download the total number of elements added to the PSF !!MCGPU-PET!!
printf( " Total number of histories simulated = %lld (note: each annihilation produces 2 histories)\n", total_histories); // !!MCGPU-PET!!
if (index_PSF <= detector_data.PSF_size)
{
printf( " Number of coincidences stored in the PSF = %d\n\n\n", index_PSF/2); // !!MCGPU-PET!!
}
else
{
printf( "\n WARNING: %d photons arrived at the detector but only %d elements could be stored in the PSF!!\n", index_PSF, detector_data.PSF_size); // !!MCGPU-PET!!
printf( " Edit the input file to increase the PSF size, or reduce the source activity.\n\n");
index_PSF = detector_data.PSF_size; // The kernel will not save more than the max value of elements allocated
}
clock_t clock_PSF_1 = clock(); // !!October2017!!
checkCudaErrors(cudaMemcpy(PSF, PSF_device, index_PSF*sizeof(PSF_element_struct), cudaMemcpyDeviceToHost) ); // Download the new elements added to the PSF !!MCGPU-PET!!
clock_t clock_PSF_2 = clock();
// -- Report PSF file to disk:
if (detector_data.tally_PSF_SINOGRAM==0||detector_data.tally_PSF_SINOGRAM==1) {
MASTER_THREAD report_PSF(file_name_output, PSF, index_PSF, total_histories, time_elapsed_MC_loop, &source_data, &detector_data, file_name_voxels); // !!MCGPU-PET!!
}
clock_t clock_PSF_3 = clock();
MASTER_THREAD printf(" -- Time spent downloading the PSF from the GPU to the CPU: %.4f s ; time spent reporting PSF to disk: %.4f s\n\n", ((double)(clock_PSF_2-clock_PSF_1))/CLOCKS_PER_SEC, ((double)(clock_PSF_3-clock_PSF_2))/CLOCKS_PER_SEC); // !!October2017!!
// SINOGRAM // !!March2018!!
checkCudaErrors(cudaMemcpy(True, True_dev, NBINS*sizeof(int), cudaMemcpyDeviceToHost)); // Download the True Sinogram !!MCGPU-PET!!
checkCudaErrors(cudaMemcpy(Scatter, Scatter_dev, NBINS*sizeof(int), cudaMemcpyDeviceToHost)); // Download the Scatter Sinogram !!MCGPU-PET!!
checkCudaErrors(cudaMemcpy(Imagen_T, Imagen_T_dev, NVOX_SIM*sizeof(int), cudaMemcpyDeviceToHost)); // Download the Image !!MCGPU-PET!!
checkCudaErrors(cudaMemcpy(Imagen_SC, Imagen_SC_dev, NVOX_SIM*sizeof(int), cudaMemcpyDeviceToHost)); // Download the Image !!MCGPU-PET!!
checkCudaErrors(cudaMemcpy(Energy_Spectrum, Energy_Spectrum_dev, NE*sizeof(int), cudaMemcpyDeviceToHost)); // Download the Spectrum !!MCGPU-PET!!
// Report --
long int sumTru=0;
long int sumSc=0;
long int sumImg_T=0;
long int sumImg_SC=0;
for (int i=0;i<NBINS;i++) {sumTru += True[i]; sumSc += Scatter[i];}
for (int i=0;i<NVOX_SIM;i++) {sumImg_T += Imagen_T[i]; sumImg_SC += Imagen_SC[i];}
if (detector_data.tally_PSF_SINOGRAM==0||detector_data.tally_PSF_SINOGRAM==2) {
if (argc==2) {
// -Modification of the output sinogram to write into compressed gzip
gzFile file1 = gzopen("sinogram_Trues.raw.gz","wb");
gzwrite(file1,True,NBINS*sizeof(int));
gzclose(file1);
gzFile file2 = gzopen("sinogram_Scatter.raw.gz","wb");
gzwrite(file2,Scatter,NBINS*sizeof(int)); // Corrected bug: file2 instead of file1 FEB2022
gzclose(file2);
gzFile file3 = gzopen("image_Trues.raw.gz","wb");
gzwrite(file3,Imagen_T,NVOX_SIM*sizeof(int));
gzclose(file3);
gzFile file4 = gzopen("image_Scatter.raw.gz","wb");
gzwrite(file4,Imagen_SC,NVOX_SIM*sizeof(int));
gzclose(file4);
// -Write uncompressed binary:
// FILE* file1 = fopen("Trues.raw", "wb");
// fwrite(True, sizeof(int), NBINS, file1);
// fclose(file1);
// FILE* file2 = fopen("Scatter.raw", "wb");
// fwrite(Scatter, sizeof(int), NBINS, file2);
// fclose(file2);
// FILE* file3 = fopen("Image_T.raw", "wb");
// fwrite(Imagen_T, sizeof(int), NVOX_SIM, file3);
// fclose(file3);
// FILE* file4 = fopen("Image_SC.raw", "wb");
// fwrite(Imagen_SC, sizeof(int), NVOX_SIM, file4);
// fclose(file3);
}else{
gzFile file1 = gzopen("sinogram_BG_Trues.raw.gz","wb");
gzwrite(file1,True,NBINS*sizeof(int));
gzclose(file1);
gzFile file2 = gzopen("sinogram_BG_Scatter.raw.gz","wb");
gzwrite(file2,Scatter,NBINS*sizeof(int));
gzclose(file2);
gzFile file3 = gzopen("BG_image_True.raw.gz","wb");
gzwrite(file3,Imagen_T,NVOX_SIM*sizeof(int));
gzclose(file3);
gzFile file4 = gzopen("BG_image_Scatter.raw.gz","wb");
gzwrite(file4,Imagen_SC,NVOX_SIM*sizeof(int));
gzclose(file4);
// FILE* file1 = fopen("BG_Trues.raw", "wb");
// fwrite(True, sizeof(int), NBINS, file1);
// fclose(file1);
// FILE* file2 = fopen("BG_Scatter.raw", "wb");
// fwrite(Scatter, sizeof(int), NBINS, file2);
// fclose(file2);
// FILE* file3 = fopen("BG_Image_T.raw", "wb");
// fwrite(Imagen_T, sizeof(int), NVOX_SIM, file3);
// fclose(file3);
// FILE* file4 = fopen("BG_Image_SC.raw", "wb");
// fwrite(Imagen_SC, sizeof(int), NVOX_SIM, file4);
// fclose(file3);
}
} //write only if tally_PSF_SINOGRAM=0 or 2
FILE* file5 = fopen("Energy_Sinogram_Spectrum.dat", "w");
fprintf (file5, "# HISTOGRAM OF ENERGIES (keV) OF DETECTED COINCIDENCE PHOTONS (before energy window selection)\n");
fprintf (file5, "# Energy bin [keV] Counts\n");
for (int i=0 ; i<NE ; i++){ fprintf (file5, "%10d %10d \n",i,Energy_Spectrum[i]); }
fclose (file5);
/* !!TO DO!! //!!MCGPU-PET!!
MASTER_THREAD report_PSF_file(...); // Report PSF computed by master thread
#ifdef USING_MPI
int i;
for(i=1;i<numprocs;i++)
{
// -- Send results from thread i to master, then report following previous results:
return_reduce = MPI_Reduce(PSF_file,..., 0, MPI_COMM_WORLD);
MASTER_THREAD report_PSF_file(...); // Report PSF computed by thread 'i'
...keep track of total number of particles simulated, and total number elements in complete PSF...
}
#endif
...report total number of particles simulated, and total number elements in complete PSF...
*/
// -- Copy the simulated voxel and material doses to CPU:
if (dose_ROI_x_max > -1)
{
MASTER_THREAD clock_kernel = clock();
checkCudaErrors( cudaMemcpy( voxels_Edep, voxels_Edep_device, voxels_Edep_bytes, cudaMemcpyDeviceToHost) ); // Copy final dose results to host (for every MPI threads)
MASTER_THREAD printf(" ==> CUDA: Time copying dose results from device to host: %.6f s\n", float(clock()-clock_kernel)/CLOCKS_PER_SEC);
}
if (flag_material_dose==1)
checkCudaErrors( cudaMemcpy( materials_dose, materials_dose_device, MAX_MATERIALS*sizeof(ulonglong2), cudaMemcpyDeviceToHost) ); // Copy materials dose results to host, if tally enabled in input file. !!tally_materials_dose!!
// -- Clean up GPU device memory:
clock_kernel = clock();
cudaFree(voxel_mat_dens_device);
cudaFree(PSF_device);
cudaFree(mfp_Woodcock_table_device);
cudaFree(mfp_table_a_device);
cudaFree(mfp_table_b_device);
cudaFree(voxels_Edep_device);
//SINOGRAM
cudaFree(True_dev);
cudaFree(Scatter_dev);
cudaFree(Imagen_T_dev);
cudaFree(Imagen_SC_dev);
cudaFree(Energy_Spectrum_dev);
checkCudaErrors( cudaDeviceReset() );
MASTER_THREAD printf(" ==> CUDA: Time freeing the device memory and ending the GPU threads: %.6f s\n", float(clock()-clock_kernel)/CLOCKS_PER_SEC);
#ifdef USING_MPI
if (numprocs>1)
{
current_time=time(NULL); // Get current time (in seconds)
char_time = ctime(¤t_time); char_time[19] = '\0'; // The time is located betwen the characters 11 and 19.
printf(" >> MPI thread %d in \"%s\" done! (local time: %s)\n", myID, MPI_processor_name, &char_time[11]);
fflush(stdout); // Clear the screen output buffer
}
#endif
// *** Report the total dose for all the projections, if the tally is not disabled (must be done after MPI_Barrier to have all the MPI threads synchronized):
MASTER_THREAD clock_start = clock();
if (dose_ROI_x_max > -1)
{
#ifdef USING_MPI
if (numprocs>1)
{
// -- Use MPI_Reduce to accumulate the dose from all projections:
// Allocate memory in the root node to combine the dose results with MPI_REDUCE:
int num_voxels_ROI = voxels_Edep_bytes/((int)sizeof(ulonglong2)); // Number of elements allocated in the "dose" array.
ulonglong2 *voxels_Edep_total = (ulonglong2*) malloc(voxels_Edep_bytes);
if (voxels_Edep_total==NULL)
{
printf("\n\n !!malloc ERROR!! Not enough memory to allocate %d voxels by the MPI root node for the total deposited dose (and uncertainty) array (%f Mbytes)!!\n\n", num_voxels_ROI, voxels_Edep_bytes/(1024.f*1024.f));
exit(-2);
}
else
{
MASTER_THREAD
{
printf("\n >> Array for the total deposited dose correctly allocated by the MPI root node (%f Mbytes).\n", voxels_Edep_bytes/(1024.f*1024.f));
printf( " Waiting at MPI_Barrier for thread synchronization.\n");
}
}
MASTER_THREAD printf("\n >> Calling MPI_Reduce to accumulate the dose from all projections...\n\n");
return_reduce = MPI_Reduce(voxels_Edep, voxels_Edep_total, 2*num_voxels_ROI, MPI_UNSIGNED_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); // Sum all the doses in "voxels_Edep_total" at thread 0.
if (MPI_SUCCESS != return_reduce)
{
printf("\n\n !!ERROR!! Possible error reducing (MPI_SUM) the dose results??? return_reduce = %d for thread %d\n\n\n", return_reduce, myID);
}
// -- Exchange the dose simulated in thread 0 for the final dose from all threads
MASTER_THREAD
{
free(voxels_Edep);
voxels_Edep = voxels_Edep_total; // point the voxels_Edep pointer to the final voxels_Edep array in host memory
voxels_Edep_total = NULL; // This pointer is not needed by now
}
}
#endif
// -- Report the total dose for all the projections:
MASTER_THREAD report_voxels_dose(file_dose_output, num_projections, &voxel_data, voxel_mat_dens, voxels_Edep, time_elapsed_MC_loop, total_histories, dose_ROI_x_min, dose_ROI_x_max, dose_ROI_y_min, dose_ROI_y_max, dose_ROI_z_min, dose_ROI_z_max, &source_data);
}
// -- Report "tally_materials_dose" with data from all MPI threads, if tally enabled:
if (flag_material_dose==1)
{
#ifdef USING_MPI
ulonglong2 materials_dose_total[MAX_MATERIALS];
return_reduce = MPI_Reduce(materials_dose, materials_dose_total, 2*MAX_MATERIALS, MPI_UNSIGNED_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); // !!tally_materials_dose!!
#else
ulonglong2 *materials_dose_total = materials_dose; // Create a dummy pointer to the materials_dose data
#endif
MASTER_THREAD report_materials_dose(num_projections, total_histories, density_nominal, materials_dose_total, mass_materials, file_name_materials); // Report the material dose !!tally_materials_dose!!
}
MASTER_THREAD clock_end = clock();
MASTER_THREAD printf("\n\n ==> CUDA: Time reporting the dose data: %.6f s\n", ((double)(clock_end-clock_start))/CLOCKS_PER_SEC);
// *** Report the total dose for all the projections, if the tally is not disabled (must be done after MPI_Barrier to have all the MPI threads synchronized):
// *** Clean up RAM memory. If CUDA was used, the geometry and table data were already cleaned for MPI threads other than root after copying data to the GPU:
free(voxels_Edep);
free(PSF);
//SINOGRAM
free(True);
free(Scatter);
free(Imagen_T);
free(Imagen_SC);
free(Energy_Spectrum);
#ifdef USING_CUDA
MASTER_THREAD free(voxel_mat_dens);
#else
free(voxel_mat_dens);
free(mfp_Woodcock_table);
free(mfp_table_a);
free(mfp_table_b);
#endif
#ifdef USING_MPI
MPI_Finalize(); // Finalize MPI library: no more MPI calls allowed below.
#endif
// -- Report output size and aditional information:
MASTER_THREAD
{
printf("\n\n ****** MCGPU-PET SINOGRAM INFORMATION ******\n\n");
printf(" >>> Number of radial bins = %i\n", NRAD);
printf(" >>> Number of angular bins = %i\n", NANGLES);
printf(" >>> Total number of sinograms = %i\n", NSINOS);
printf(" >>> Maximum Ring Difference = %i\n", MRD);
printf(" >>> SPAN = %i\n", SPAN);
printf(" >>> Number segments = %i\n", NSEG);
printf(" >>> Number z slices = %i\n\n", NZS);
printf(" >>> Number of true coincidences in the sinogram = %ld \n",sumTru);
printf(" >>> Number of scatter coincidences in the sinogram = %ld \n",sumSc);
printf(" >>> Size of the 3D sinogram = %i X %i X %i\n", NRAD,NANGLES,NSINOS);
printf("\n\n ****** MCGPU-PET IMAGES INFORMATION ******\n\n");
printf(" >>> Two binary 3D matrix (32-bit integer per voxel) with same size as the input voxelized geometry.\n");
printf(" >>> Report how many detected coincidences, Trues and Scatter separately, were emitted from each voxel.\n\n");
printf(" >>> Number of voxels = (%i,%i,%i)\n", voxel_data.num_voxels.x, voxel_data.num_voxels.y, voxel_data.num_voxels.z);
//FEB2022 Wrong reporting before?
// printf(" >>> Number of X,Y voxels = %i\n", RES);
// printf(" >>> Number of Z voxels = %i\n", NZS);
// printf(" >>> Size of the images = %i X %i X %i\n\n", RES,RES,NZS);
printf(" >>> Number of true counts in the image of trues = %ld \n",sumImg_T);
printf(" >>> Number of scatter counts in the image of scatter = %ld \n",sumImg_SC);
}
MASTER_THREAD
{
printf("\n\n\n -- SIMULATION FINISHED!\n");
time_total_MC_init_report = ((double)(clock()-clock_start_beginning))/CLOCKS_PER_SEC;
// -- Report total performance:
printf("\n\n ****** TOTAL SIMULATION PERFORMANCE (including initialization and reporting) ******\n\n");
printf( " >>> Execution time including initialization, transport and report: %.3f s.\n", time_total_MC_init_report);
printf( " >>> Time spent in the Monte Carlo transport only: %.3f s.\n", time_elapsed_MC_loop);
printf( " >>> Time spent in initialization, reporting and clean up: %.3f s.\n", (time_total_MC_init_report-time_elapsed_MC_loop));
printf( " >>> Total number of simulated histories (x-rays): %lld (note: each positron annihilation produces 2 histories)\n", total_histories);
if (time_total_MC_init_report>0.0000001)
printf( " >>> Total speed (using %d thread, including transport, initialization and report times) [x-rays/s]: %.2f\n", numprocs, (double)(total_histories/time_total_MC_init_report));
if (time_elapsed_MC_loop>0.0000001)
printf( " >>> Total speed Monte Carlo transport only (using %d thread) [x-rays/s]: %.2f\n\n", numprocs, (double)(total_histories/time_elapsed_MC_loop));
current_time=time(NULL); // Get current time (in seconds)
printf("\n****** Code execution finished on: %s\n\n", ctime(¤t_time));
}
#ifdef USING_CUDA
cudaDeviceReset(); // Destroy the CUDA context before ending program (flush visual debugger data).
#endif
return 0;
}
////////////////////////////////////////////////////////////////////////////////
//! Read the input file given in the command line and return the significant data.
//! Example input file:
//!
//! 1000000 [Total number of histories to simulate]
//! geometry.vox [Voxelized geometry file name]
//! material.mat [Material data file name]
//!
//! @param[in] argc Command line parameters
//! @param[in] argv Command line parameters: name of input file
//! @param[out] total_histories Total number of particles to simulate
//! @param[out] seed_input Input random number generator seed
//! @param[out] num_threads_per_block Number of CUDA threads for each GPU block
//! @param[out] detector_data
//! @param[out] PSF
//! @param[out] source_data
//! @param[out] file_name_voxels
//! @param[out] file_name_materials
//! @param[out] file_name_output
////////////////////////////////////////////////////////////////////////////////
void read_input(int argc, char** argv, int myID, unsigned long long int* total_histories, int* seed_input, int* gpu_id, int* num_threads_per_block, int* histories_per_thread, struct detector_struct* detector_data, PSF_element_struct** PSF_ptr, unsigned long long int* PSF_bytes, struct source_struct* source_data, struct source_energy_struct* source_energy_data, char* file_name_voxels, char file_name_materials[MAX_MATERIALS][250] , char* file_name_output, char* file_name_espc, int* num_projections, double* D_angle, double* angularROI_0, double* angularROI_1, double* initial_angle, ulonglong2** voxels_Edep_ptr, int* voxels_Edep_bytes, char* file_dose_output, short int* dose_ROI_x_min, short int* dose_ROI_x_max, short int* dose_ROI_y_min, short int* dose_ROI_y_max, short int* dose_ROI_z_min, short int* dose_ROI_z_max, double* SRotAxisD, double* vertical_translation_per_projection, int* flag_material_dose, float* fact_object, float* E_resol, float* E_low, float* E_high, float* FOVZ, int* NROWS, int* NCRYSTALS, int* NANGLES, int* NRAD, int* NZS, int* NBINS, int* RES, int* NVOXS, int* NE, int* MRD, int* SPAN, int* NSEG, int*NSINOS)
{
FILE* file_ptr = NULL;
char new_line[250];
char *new_line_ptr = NULL;
double dummy;
// -- Read the input file name from command line, if given (otherwise keep default value):
if (argc>=2)
{
file_ptr = fopen(argv[1], "r");
if (NULL==file_ptr)
{
printf("\n\n !!read_input ERROR!! Input file not found or not readable. Input file name: \'%s\'\n\n", argv[1]);
// Not finalizing MPI here because we want the execution to fail if there is a problem with any MPI thread!!! MPI_Finalize(); // Finalize MPI library: no more MPI calls allowed below.
exit(-1);
}
}
// else if (argc>2)
// {
//
// MASTER_THREAD printf("\n\n !!read_input ERROR!! Too many input parameter (argc=%d)!! Provide only the input file name.\n\n", argc);
// // Finalizing MPI because all threads will detect the same problem and fail together.
// #ifdef USING_MPI
// MPI_Finalize();
// #endif
// exit(-1);
// }
else
{
MASTER_THREAD printf("\n\n !!read_input ERROR!! Input file name not given as an execution parameter!! Try again...\n\n");
#ifdef USING_MPI
MPI_Finalize();
#endif
exit(-1);
}
MASTER_THREAD printf("\n -- Reading the input file \'%s\':\n", argv[1]);
// -- Init. [SECTION SIMULATION CONFIG v.2016-07-05]: // !!MCGPU-PET!!
do
{
new_line_ptr = fgets(new_line, 250, file_ptr); // Read full line (max. 250 characters).
if (new_line_ptr==NULL)
{
printf("\n\n !!read_input ERROR!! Input file is not readable or does not contain the string \'SECTION SIMULATION CONFIG v.2016-07-05\'!!\n");
exit(-2);
}
}
while(strstr(new_line,"SECTION SIMULATION CONFIG v.2016-07-05")==NULL); // Skip comments and empty lines until the section begins
// new_line_ptr = fgets_trimmed(new_line, 250, file_ptr);
// sscanf(new_line, "%lf", &dummy_double);
// *total_histories = (unsigned long long int) (dummy_double+0.0001); // Maximum unsigned long long value: 18446744073709551615
new_line_ptr = fgets_trimmed(new_line, 250, file_ptr);
sscanf(new_line, "%d", seed_input); // Set the RANECU PRNG seed (the same seed will be used to init the 2 MLCGs in RANECU)
if (*seed_input==0) // Generating random seed
{
struct timeval tv;
gettimeofday(&tv,NULL);
//seed = (int)tv.tv_usec;
*seed_input = (int)((tv.tv_sec / 1000) + (tv.tv_usec * 1000));
}
MASTER_THREAD printf("\n -- Seed used in the execution \'%d\':\n", *seed_input);
// END 03/11/2020
new_line_ptr = fgets_trimmed(new_line, 250, file_ptr);
sscanf(new_line, "%d", gpu_id); // GPU NUMBER WHERE SIMULATION WILL RUN
new_line_ptr = fgets_trimmed(new_line, 250, file_ptr);
sscanf(new_line, "%d", num_threads_per_block); // GPU THREADS PER CUDA BLOCK
new_line_ptr = fgets_trimmed(new_line, 250, file_ptr);
sscanf(new_line, "%f", fact_object); // OBJECT PRESENT? (IF NO --> DENSITY = 0.001)
// printf("----- Factor OBJECT = %f \n",*fact_object);
// printf("----- Factor NUMBER OF THREADS = %i \n",*num_threads_per_block);
#ifdef USING_CUDA
if ((*num_threads_per_block%32)!=0)
{
// --ELIMINTE NEED TO USE NUMBER OF THREADS MULTIPLE OF 32 !!PET!!
MASTER_THREAD printf("\n !!read_input!! The input number of GPU threads per CUDA block, %d, is NOT a multiple of 32 (warp size). Some GPU resources will not be fully utilized.\n\n", *num_threads_per_block);
/*
MASTER_THREAD printf("\n\n !!read_input ERROR!! The input number of GPU threads per CUDA block must be a multiple of 32 (warp size). Input value: %d !!\n\n", *num_threads_per_block);
#ifdef USING_MPI
MPI_Finalize();
#endif
exit(-2);
*/
}
#endif
// !!MCGPU-PET!!
// new_line_ptr = fgets_trimmed(new_line, 250, file_ptr);
// sscanf(new_line, "%d", histories_per_thread); // HISTORIES PER GPU THREAD
// -- Init. [SECTION SOURCE PET SCAN v.2017-03-14]:
do
{
new_line_ptr = fgets(new_line, 250, file_ptr);
if (new_line_ptr==NULL)
{
printf("\n\n !!read_input ERROR!! Input file is not readable or does not contain the string \'SECTION SOURCE PET SCAN v.2017-03-14\'!!\n");
exit(-2);
}
}
while(strstr(new_line,"SECTION SOURCE PET SCAN v.2017-03-14")==NULL); // Skip comments and empty lines until the section begins // !!MCGPU-PET!!
new_line_ptr = fgets_trimmed(new_line, 250, file_ptr);
sscanf(new_line, "%lf", &dummy); // TOTAL PET SCAN ACQUISITION TIME [seconds]
source_data->acquisition_time_ps = (unsigned long long int)(dummy*1.0e12 + 0.5); // Store acquistion time in picoseconds
new_line_ptr = fgets_trimmed(new_line, 250, file_ptr);
sscanf(new_line, "%f", &source_data->mean_life); // ISOTOPE MEAN LIFE
int ii=0, mat=0;
float act=0.0f;
for (ii=0; ii<MAX_MATERIALS; ii++)
source_data->activity[ii] = 0.0f; // Init table to 0 Bq
for (ii=0; ii<MAX_MATERIALS; ii++) // Read input material activity
{
new_line_ptr = fgets_trimmed(new_line, 250, file_ptr);
sscanf(new_line, "%d %f", &mat, &act); // TABLE MATERIAL NUMBER AND VOXEL ACTIVITY [Bq]: 1==1st_material ; 0==end_of_list
if (mat<1)
break;
else
source_data->activity[mat-1] = act; // (The first material is read as number 1 but stored as number 0)
}
// -- Init. [SECTION PHASE SPACE FILE v.2016-07-05]: