-
Notifications
You must be signed in to change notification settings - Fork 3
/
MCGPU-PET_kernel.cu
1836 lines (1576 loc) · 97.8 KB
/
MCGPU-PET_kernel.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//!!MCGPU-PET!! Kernel called with a 3D grid of blocks with x,y,z sizes equal to the voxel sizes: each block simulates emission from a different voxel.
//!!MCGPU-PET!! Changes for PET marked with !!MCGPU-PET!! (ABS, 2017-02-03)
// *****************************************************************************
// *** MCGPU-PET_v0.1 (based on MC-GPU_v1.3) ***
// *** ***
// *** Distribution: https://github.com/DIDSR/MCGPU-PET ***
// *** ***
// *** Authors: ***
// *** ***
// *** MCGPU code foundation and PET source sampling implemented by: ***
// *** ***
// *** - Andreu Badal (Andreu.Badal-Soler[at]fda.hhs.gov) ***
// *** Division of Imaging and Applied Mathematics ***
// *** Office of Science and Engineering Laboratories ***
// *** Center for Devices and Radiological Health ***
// *** U.S. Food and Drug Administration ***
// *** ***
// *** ***
// *** PET detector model and sinogram reporting implemented by: ***
// *** ***
// *** - Joaquin L. Herraiz and Alejandro López-Montes ***
// *** Complutense University of Madrid, EMFTEL, Grupo Fisica Nuclear***
// *** and IPARCOS; Instituto de Investigacion Sanitaria Hospital ***
// *** Clinico San Carlos (IdiSSC), Madrid, Spain ***
// *** ***
// *** Code presented at the IEEE NSS MIC 2021 conference: ***
// *** ***
// *** M-07-01 – GPU-accelerated Monte Carlo-Based Scatter and ***
// *** Prompt-Gamma Corrections in PET, A. López-Montes, J. Cabello, ***
// *** M. Conti, A. Badal, J. L. Herraiz ***
// *** ***
// *** ***
// *** Last update: 2022/02/02 ***
// *****************************************************************************
////////////////////////////////////////////////////////////////////////////////
//
// ****************************
// *** MC-GPU , version 1.3 ***
// ****************************
//
//! Definition of the CUDA GPU kernel for the simulation of x ray tracks in a voxelized geometry.
//! This kernel has been optimized to yield a good performance in the GPU but can still be
//! compiled in the CPU without problems. All the CUDA especific commands are enclosed in
//! pre-processor directives that are skipped if the parameter "USING_CUDA" is not defined
//! at compilation time.
//
// ** DISCLAIMER **
//
// This software and documentation (the "Software") were developed at the Food and
// Drug Administration (FDA) by employees of the Federal Government in the course
// of their official duties. Pursuant to Title 17, Section 105 of the United States
// Code, this work is not subject to copyright protection and is in the public
// domain. Permission is hereby granted, free of charge, to any person obtaining a
// copy of the Software, to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish, distribute,
// sublicense, or sell copies of the Software or derivatives, and to permit persons
// to whom the Software is furnished to do so. FDA assumes no responsibility
// whatsoever for use by other parties of the Software, its source code,
// documentation or compiled executables, and makes no guarantees, expressed or
// implied, about its quality, reliability, or any other characteristic. Further,
// use of this code in no way implies endorsement by the FDA or confers any
// advantage in regulatory decisions. Although this software can be redistributed
// and/or modified freely, we ask that any derivative works bear some notice that
// they are derived from it, and any modified versions bear some notice that they
// have been modified.
//
//
//! @file MC-GPU_kernel_v1.3.cu
//! @author Andreu Badal (Andreu.Badal-Soler@fda.hhs.gov)
//! @date 2012/12/12
// -- Original code started on: 2009/04/14
//
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//! Initialize the image array, ie, set all pixels to zero
//! Essentially, this function has the same effect as the command:
//! "cutilSafeCall(cudaMemcpy(image_device, image, image_bytes, cudaMemcpyHostToDevice))";
//!
//! CUDA performs some initialization work the first time a GPU kernel is called.
//! Therefore, calling a short kernel before the real particle tracking is performed
//! may improve the accuracy of the timing measurements in the relevant kernel.
//!
//! @param[in,out] image Pointer to the image array.
//! @param[in] pixels_per_image Number of pixels in the image (ie, elements in the array).
////////////////////////////////////////////////////////////////////////////////
__global__
void init_image_array_GPU(unsigned long long int* image, int pixels_per_image)
{
int my_pixel = threadIdx.x + blockIdx.x*blockDim.x;
if (my_pixel < pixels_per_image)
{
// -- Set the current pixel to 0 and return, avoiding overflow when more threads than pixels are used:
image[my_pixel] = (unsigned long long int)(0); // Initialize non-scatter image
my_pixel += pixels_per_image; // (advance to next image)
image[my_pixel] = (unsigned long long int)(0); // Initialize Compton image
my_pixel += pixels_per_image; // (advance to next image)
image[my_pixel] = (unsigned long long int)(0); // Initialize Rayleigh image
my_pixel += pixels_per_image; // (advance to next image)
image[my_pixel] = (unsigned long long int)(0); // Initialize multi-scatter image
}
}
// ////////////////////////////////////////////////////////////////////////////////
// //! Initialize the dose deposition array, ie, set all voxel doses to zero
// //!
// //! @param[in,out] dose Pointer to the dose mean and sigma arrays.
// //! @param[in] num_voxels_dose Number of voxels in the dose ROI (ie, elements in the arrays).
// ////////////////////////////////////////////////////////////////////////////////
// __global__
// void init_dose_array_GPU(ulonglong2* voxels_Edep, int num_voxels_dose)
// {
// int my_voxel = threadIdx.x + blockIdx.x*blockDim.x;
// register ulonglong2 ulonglong2_zero;
// ulonglong2_zero.x = ulonglong2_zero.y = (unsigned long long int) 0;
// if (my_voxel < num_voxels_dose)
// {
// dose[my_voxel] = ulonglong2_zero; // Set the current voxel to (0,0) and return, avoiding overflow
// }
// }
////////////////////////////////////////////////////////////////////////////////
//! Main function to simulate x-ray tracks inside a voxelized geometry.
//! Secondary electrons are not simulated (in photoelectric and Compton
//! events the energy is locally deposited).
//!
//! The following global variables, in the GPU __constant__ memory are used:
//! voxel_data_CONST,
//! source_energy_data_CONST
//! mfp_table_data_CONST.
//!
//! @param[in] history_batch Particle batch number (only used in the CPU version when CUDA is disabled!, the GPU uses the built-in variable threadIdx)
//! @param[in] num_p Projection number in the CT simulation. This variable defines a specific angle and the corresponding source and detector will be used.
//! @param[in] histories_per_thread Number of histories to simulate for each call to this function (ie, for GPU thread).
//! @param[in] seed_input Random number generator seed (the same seed is used to initialize the two MLCGs of RANECU).
//! @param[in] voxel_mat_dens Pointer to the voxel densities and material vector (the voxelized geometry), stored in GPU glbal memory.
//! @param[in] mfp_Woodcock_table Two parameter table for the linear interpolation of the Woodcock mean free path (MFP) (stored in GPU global memory).
//! @param[in] mfp_table_a First element for the linear interpolation of the interaction mean free paths (stored in GPU global memory).
//! @param[in] mfp_table_b Second element for the linear interpolation of the interaction mean free paths (stored in GPU global memory).
//! @param[in] rayleigh_table Pointer to the table with the data required by the Rayleigh interaction sampling, stored in GPU global memory.
//! @param[in] compton_table Pointer to the table with the data required by the Compton interaction sampling, stored in GPU global memory.
//! @param[in,out] image Pointer to the image vector in the GPU global memory.
//! @param[in,out] dose Pointer to the array containing the 3D voxel dose (and its uncertainty) in the GPU global memory.
////////////////////////////////////////////////////////////////////////////////
__global__ void track_particles(unsigned long long int* total_histories,
int* seed_input_device,
PSF_element_struct* PSF,
int* index_PSF,
ulonglong2* voxels_Edep,
float3* voxel_mat_dens,
float2* mfp_Woodcock_table,
float3* mfp_table_a,
float3* mfp_table_b,
struct rayleigh_struct* rayleigh_table,
struct compton_struct* compton_table,
struct detector_struct* detector_data,
struct source_struct* source_data,
ulonglong2* materials_dose,
int* True_dev,
int* Scatter_dev,
int* Imagen_T_dev,
int* Imagen_SC_dev,
int* Energy_Spectrum_dev,
float E_resol,
float E_low,
float E_high,
float FOVZ,
int NROWS,
int NCRYSTALS,
int NANGLES,
int NRAD,
int NZS,
int NBINS,
int RES,
int NVOXS, //FEB2022 !!DeBuG!! Is this input necessary? And should it be NVOX_SIM?
int NE,
int MRD,
int SPAN,
int NSINOS) //JLH (PETA=1-->Trues y Scatter. PETA=0 -->Bg)
{
// -- Declare the track state variables:
float3 position, direction, position1, direction1; // !!MCGPU-PET!! Store sampled values for 1st photon, to use in 2nd
float energy, step, prob, randno, mfp_density, mfp_Woodcock;
float3 mfp_table_read_a, mfp_table_read_b;
int2 seed;
int index;
int material0; // Current material, starting at 0 for 1st material
int material_old; // Flag to mark a material or energy change
signed char scatter_state; // Flag for scatter images: scatter_state=0 for non-scattered, =1 for Compton, =2 for Rayleigh, and =3 for multiple scatter.
// Variables for the PSF of the first photon, tallied after a coinicdence with its second is confirmed !!COINCIDENCE!!
float energyFirst=-99.9f, travel_distanceFirst=-99.9f;
float3 positionFirst, directionFirst;
signed char scatter_stateFirst;
// -- Store the Compton table in shared memory from global memory:
// For Compton and Rayleigh the access to memory is not coherent and the caching capability do not speeds up the accesses, they actually slows down the acces to other data.
__shared__ struct compton_struct cgco_SHARED;
// __shared__ struct detector_struct detector_data_SHARED;
// __shared__ struct source_struct source_data_SHARED;
// Use volatile to prevent storing in registers the shared variable updated by different threads
volatile __shared__ unsigned long long int acquisition_time_ps_SHARED; // All threads will generate histories in parallel and increase the shared total acquisition time until total time finished. !!MCGPU-PET!!
volatile __shared__ unsigned long long int total_histories_block_SHARED; // Count all histories generated in this block (ie, this emission voxel)
//FEB2022 volatile __shared__ double inv_activity_SHARED; // Never used?
double inv_activity_thread; // Inverse of the remaining activity in the voxel distributed among threads; gets reduced at each time interval -- JLH
__shared__ double inv_mean_life_SHARED;
__shared__ int tally_TYPE_SHARED; // Flag to report only True coincidences (=1), Scatter (=2) or both (=0, default)
__shared__ int tally_PSF_SINOGRAM_SHARED; // Flag to report only PSF (1), SINOGRAM (2) or BOTH (0)
//FEB2022 int Nvox = gridDim.x*gridDim.y*gridDim.z; // Number of voxels
//FEB2022 if (threadIdx.x>=blockDim.x) return;
//FEB2022 if (blockIdx.x>=gridDim.x || blockIdx.y>=gridDim.y || blockIdx.z>=gridDim.z) return;
int absvox = blockIdx.x + blockIdx.y*gridDim.x + blockIdx.z*gridDim.x*gridDim.y; // Assuming that the grid size is equal to the voxel geomety size! !!MCGPU-PET!!
//FEB2022 if (absvox<0 || absvox>=Nvox) return;
// float voxel_activity = source_data->activity[voxel_mat_dens[absvox].x - 1]; //JLH OLD model: get activity associated to material
float voxel_activity = voxel_mat_dens[absvox].z; //JLH NEW model: get activity for each voxel
unsigned long long int acquisition_time_ps_thread = source_data->acquisition_time_ps; // PET acquisition time in picoseconds (1e-12s)
volatile unsigned long long int histories_thread = 0;
if (voxel_activity>1.0e-7f) {
inv_activity_thread = (blockDim.x/(double)voxel_activity); //JLH (Voxel Activity distributed among threads);
//FEB2022 if (0==threadIdx.x) inv_activity_SHARED = (1.0/(double)voxel_activity);
} else {
return; // ****** Finish kernel right away if the voxel material does not have any activity (negative input activities also disregarded).
}
if (0==threadIdx.x){ // First GPU thread copies the variables to shared memory
// Copy the compton data to shared memory:
cgco_SHARED = *compton_table;
total_histories_block_SHARED = (unsigned long long int)0;
acquisition_time_ps_SHARED = source_data->acquisition_time_ps; // Init PET acquisition time in ps (1e-12s), for all threads !!MCGPU-PET!!
inv_mean_life_SHARED = 1.0/(double)source_data->mean_life;
tally_TYPE_SHARED = detector_data->tally_TYPE; // Keep this constant in shared memory for faster access
tally_PSF_SINOGRAM_SHARED = detector_data->tally_PSF_SINOGRAM; // Keep this constant in shared memory for faster access
}
__syncthreads(); // Make sure all threads will see the initialized shared variable
// -- Initialize the RANECU generator in a position far away from the previous history:
int thread_id = (blockIdx.x + blockIdx.y*gridDim.x + blockIdx.z*gridDim.x*gridDim.y) * blockDim.x + threadIdx.x; // Using a 1D block and a 3D grid. Assuming grid size = voxel geomety size!
int max_histories_per_thread = 555555; //!!MCGPU-PET!! !!DeBuG!! Assuming that no thread will simulate more than this number of histories!!!
init_PRNG(thread_id, max_histories_per_thread, *seed_input_device, &seed);
//char flag_time_finished = (char)0; // Flag to identify threads that don't have any more particle to run
direction1.x = 11.0f; // Mark that a new photon pair must be sampled !!MCGPU-PET!!
// ---------- Sample and simulate positron annihilation histories until acquisition time finishes: ----------//
for(;;){ // -------------Infinite Loop to sample Time (MAIN LOOP) --> HISTORIES
float travel_distance = 0.0f; // Counter to track the distance traveled by each photon: used to estimate time of flight at detector !!MCGPU-PET!!
if (direction1.x>10.0f) { // We need to sample a new pair of photons, and time will be spent:
// If (acquisition_time_ps_thread<0.) break; // ****Break infinite loop when time has run out for this block! !!MCGPU-PET!!
// Sample the time to the next annihilation in picoseconds. DELTA_T = LOG(1.-RAND(0.0D0)*0.99999)/ACTIV
// Each thread in the block will simulate a successive annihilation, and the activity will be reduced accordingly after each event. // ACTIV = ACTIV * EXP(-MEAN_LIFE * DELTA_T)
double Dt = -log(ranecu_double(&seed))*inv_activity_thread; //Time between decays
//for (index=0; index<blockDim.x; index++) { // Update shared block (=voxel) values secuentially for each thread in the block
// if(threadIdx.x==index) {
// if ((unsigned long long int)0==acquisition_time_ps_SHARED) // Check if a previous thread has finished the time
// { // Time finished
// flag_time_finished = (char)1;
// }
// else // Time remaining
// {
// Dt = inv_activity_thread * Dt;
unsigned long long int Dt_ps = __double2ull_rn(Dt*1e12); // Converted into ps (truncation errors may accumulate --> possible precision loss)
if (Dt_ps<acquisition_time_ps_thread) {
acquisition_time_ps_thread = acquisition_time_ps_thread - Dt_ps;
histories_thread = histories_thread + 2; //JLH
//acquisition_time_ps_SHARED = my_acquisition_time; // Update the current acquisition time for all the block, in picoseconds
//total_histories_block_SHARED = total_histories_block_SHARED + (unsigned long long int)2; // Update the number of histories simulated (in pairs)
// JLH
inv_activity_thread = inv_activity_thread * exp(Dt*inv_mean_life_SHARED); // Update the value of the activity, one thread at a time
} else { // Time finished // Sampled time is beyond allowed acquisition time: do not simulate any more
break; // ****Break infinite loop when time has run out for this thread!
//acquisition_time_ps_SHARED = (unsigned long long int)0; // Sampled time is beyond allowed acquisition time: do not simulate any more annihilation for this block
//flag_time_finished = (char)1;
} //Check if Time<ACQ_time
} //direction1.x
//if (flag_time_finished!=0) continue; // No particle to track for this thread, skip for loop !!MCGPU-PET!!
//if (absvox==0) printf("**** NEW HISTORY (%d, %d): %lld ps [seeds: %d, %d]\n",threadIdx.x, thread_id, my_acquisition_time, seed.x, seed.y); // !!Verbose!!
// -- Call the source function to get a primary x ray:
source(&energy, &position, &direction, &position1, &direction1, &seed, source_data, &energyFirst);
scatter_state = (signed char)0; // Reset previous scatter state: new non-scattered particle loaded
// -- Find the current energy bin by truncation:
index = __float2int_rd((energy-mfp_table_data_CONST.e0)*mfp_table_data_CONST.ide); // Using CUDA to convert float to integer rounding down
// -- Get the minimum mfp at the current energy using linear interpolation (Woodcock tracking):
float2 mfp_Woodcock_read = mfp_Woodcock_table[index]; // Read the 2 parameters for the linear interpolation in a single read from global memory
mfp_Woodcock = mfp_Woodcock_read.x + energy * mfp_Woodcock_read.y; // Interpolated minimum MFP
// -- Reset previous material to force a recalculation of the MFPs (negative materials are not allowed in the voxels):
material_old = -1;
//break; //JLH
// *** X-ray interaction loop:
for(;;) { // MAIN INTERACTION LOOP
float3 matdens;
prob = 0.;
absvox = locate_voxel(&position); // (Returns negative value if particle located outside teh voxelized object bounding box)
if (absvox<0) break; // -- Primary particle was not pointing to the voxel region! (but may still be detected after moving in vacuum in a straight line).
do { // *** Virtual interaction loop: // New loop structure in MC-GPU_v1.3: simulate all virtual events before sampling Compton & Rayleigh:
step = -(mfp_Woodcock)*logf(ranecu(&seed)); // Using the minimum MFP in the geometry for the input energy (Woodcock tracking)
travel_distance += step;
position.x += step*direction.x;
position.y += step*direction.y;
position.z += step*direction.z;
// -- Locate the new particle in the voxel geometry:
absvox = locate_voxel(&position); // Get the voxel number at the current position and the voxel coordinates
// Used to check if inside the dose ROI in DOSE TALLY.
if (absvox<0) break; // Particle escaped the voxel region! ("index" is still >0 at this moment)
matdens = voxel_mat_dens[absvox]; // Get the voxel material and density in a single read from global memory
material0 = (int)(matdens.x - 1); // Set the current material by truncation, and set 1st material to value '0'.
// -- Get the data for the linear interpolation of the interaction MFPs, in case the energy or material have changed:
if (material0 != material_old) {
mfp_table_read_a = mfp_table_a[index*(MAX_MATERIALS)+material0];
mfp_table_read_b = mfp_table_b[index*(MAX_MATERIALS)+material0];
material_old = material0; // Store the new material
}
// *** Apply Woodcock tracking:
mfp_density = mfp_Woodcock * matdens.y;
// -- Calculate probability of delta scattering, using the total mean free path for the current material and energy (linear interpolation):
prob = 1.0f - mfp_density * (mfp_table_read_a.x + energy * mfp_table_read_b.x);
randno = ranecu(&seed); // Sample uniform PRN
} while (randno<prob); // [Iterate Do-While Virtual Interaction Loop if there is a delta scattering event]
if (absvox<0) break; // -- Particle escaped the voxel region! Break the interaction loop to call tally image.
// The GPU threads will be stopped and waiting here until ALL threads have a REAL event:
// -- Real event takes place! Check the kind of event and sample the effects of the interaction:
prob += mfp_density * (mfp_table_read_a.y + energy * mfp_table_read_b.y); // Interpolate total Compton MFP ('y' component)
if (randno<prob) { // [Checking Compton scattering]
// *** Compton interaction:
// -- Sample new direction and energy:
double costh_Compton;
randno = energy; // Save temporal copy of the particle energy (variable randno not necessary until next sampling). DOSE TALLY
GCOa(&energy, &costh_Compton, &material0, &seed, &cgco_SHARED);
rotate_double(&direction, costh_Compton, 6.28318530717958647693*ranecu_double(&seed));
randno = energy - randno; // Save temporal copy of the negative of the energy lost in the interaction. DOSE TALLY
// -- Find the new energy interval:
index = __float2int_rd((energy-mfp_table_data_CONST.e0)*mfp_table_data_CONST.ide);
if (index>-1) { // 'index' will be negative only when the energy is below the tabulated minimum energy
// particle will be then absorbed (rejected) after tallying the dose.
// -- Get the Woodcock MFP for the new energy (energy above minimum cutoff):
float2 mfp_Woodcock_read = mfp_Woodcock_table[index]; // Read the 2 parameters for the linear interpolation in a single read from global memory
mfp_Woodcock = mfp_Woodcock_read.x + energy * mfp_Woodcock_read.y; // Interpolated minimum MFP
material_old = -2; // Set an impossible material to force an update of the MFPs data for the nex energy interval
// -- Update scatter state:
if (scatter_state==(signed char)0){
scatter_state = (signed char)1; // Set scatter_state == 1: Compton scattered particle
}else{
scatter_state = (signed char)3; // Set scatter_state == 3: Multi-scattered particle
}
} //Index
} else {
prob += mfp_density * (mfp_table_read_a.z + energy * mfp_table_read_b.z); // Interpolate total Rayleigh MFP ('z' component)
if (randno<prob) { // [Checking Rayleigh scattering]
// *** Rayleigh interaction: -- Sample angular deflection:
double costh_Rayleigh;
float pmax_current = rayleigh_table->pmax[(index+1)*MAX_MATERIALS+material0]; // Get max (ie, value for next bin?)
//cumul prob square form factor for Rayleigh sampling
GRAa(&energy, &costh_Rayleigh, &material0, &pmax_current, &seed, rayleigh_table);
rotate_double(&direction, costh_Rayleigh, 6.28318530717958647693*ranecu_double(&seed));
// -- Update scatter state:
if (scatter_state==(signed char)0) {
scatter_state = (signed char)2; // Set scatter_state == 1: Rayleigh scattered particle
} else {
scatter_state = (signed char)3; // Set scatter_state == 3: Multi-scattered particle
}
} else {
// *** Photoelectric interaction (or pair production): mark particle for absorption after dose tally (ie, index<0)!
randno = -energy; // Save temporal copy of the (negative) energy deposited in the interaction (variable randno not necessary anymore).
index = -11; // A negative "index" marks that the particle was absorbed and that it will never arrive at the detector.
} //randno vs prob
} // index
// -- Tally the dose deposited in Compton and photoelectric interactions:
if (randno<-0.001f) {
float Edep = -1.0f*randno; // If any energy was deposited, this variable will temporarily store the negative value of Edep.
// -- Tally the dose deposited in the current material, if enabled (ie, array allocated and not null):
if (materials_dose!=NULL) tally_materials_dose(&Edep, &material0, materials_dose); // !!tally_materials_dose!!
// -- Tally the energy deposited in the current voxel, if enabled (tally disabled when dose_ROI_x_max_CONST is negative). DOSE TALLY
if (dose_ROI_x_max_CONST > -1) {
short3 voxel_coord;
voxel_coord.x = __float2int_rd(position.x * voxel_data_CONST.inv_voxel_size.x); // !!MCGPU-PET!!
voxel_coord.y = __float2int_rd(position.y * voxel_data_CONST.inv_voxel_size.y);
voxel_coord.z = __float2int_rd(position.z * voxel_data_CONST.inv_voxel_size.z);
// CODE TO MAP EMISSION VOXELS INSTEAD OF REAL DOSE DEPOSITION LOCATIONS: v
//voxel_coord.x=blockIdx.x; voxel_coord.y=blockIdx.y; voxel_coord.z=blockIdx.z;
tally_voxel_energy_deposition(&Edep, &voxel_coord, voxels_Edep);
}
}
// -- Break interaction loop for particles absorbed or with energy below the tabulated cutoff: particle is "absorbed" (ie, track discontinued).
if (index<0) break;
} // [Cycle the X-ray interaction loop]
//!!COINCIDENCE!!
if (index>-1) {
// Particle escaped geometry and not absorbed: chance of coincidence detection
if (direction1.x>10.0f) { // Simulating the second particle of a pair (set by source routine). !!COINCIDENCE!!
if (energyFirst>-0.5f) { // True if the first particle was not absorbed
// Both pair particle escaped the voxels but were not absorbed, check if it will arrive at the detector and tally PSF:
// Report only PSF (tally_PSF_SINOGRAM==1), SINOGRAM (tally_PSF_SINOGRAM==2) or both (tally_PSF_SINOGRAM==0)
// Report only True coincidences (tally_TYPE==1), Scatter (tally_TYPE==2) or both (tally_TYPE==0)
if (tally_TYPE_SHARED==0 || (tally_TYPE_SHARED==1 && scatter_state==0 && scatter_stateFirst==0) || (tally_TYPE_SHARED==2 && (scatter_state!=0 || scatter_stateFirst!=0))) {
if ((tally_PSF_SINOGRAM_SHARED==0)||(tally_PSF_SINOGRAM_SHARED==2)) {
tally_Sinogram(&energy, &energyFirst, &position, &positionFirst, &direction, &directionFirst, &scatter_state, &scatter_stateFirst, detector_data, source_data, &acquisition_time_ps_thread, &travel_distance, &travel_distanceFirst, True_dev, Scatter_dev, Imagen_T_dev, Imagen_SC_dev, Energy_Spectrum_dev, &seed, &E_resol, &E_low, &E_high, &FOVZ, &NROWS, &NCRYSTALS, &NANGLES, &NRAD, &NZS, &NBINS, &RES, &NVOXS, &NE, &MRD, &SPAN, &NSINOS);
}
if ((tally_PSF_SINOGRAM_SHARED==0)||(tally_PSF_SINOGRAM_SHARED==1)) {
tally_PSF_coincidences(&energy, &energyFirst, &position, &positionFirst, &direction, &directionFirst, &scatter_state, &scatter_stateFirst, detector_data, source_data, &acquisition_time_ps_thread, &travel_distance, &travel_distanceFirst, PSF, index_PSF, &seed, &E_resol, &E_low, &E_high); // !!MCGPU-PET!! // !!COINCIDENCE!!
} // PSF/Sinogram output
} // True/Scatter type
} //Energyfirst
} else {
// Store state of first particle of the coinicidence event for reporting later to PSF // !!COINCIDENCE!!
energyFirst = energy;
positionFirst = position;
directionFirst = direction;
scatter_stateFirst = scatter_state;
travel_distanceFirst = travel_distance;
} //direction
} // Index
// OLD CODE NOT REPORTING COINCIDENCES ONLY:
// -- Particle escaped the voxels but was not absorbed, check if it will arrive at the detector and tally its energy:
// tally_PSF(&energy, &position, &direction, &scatter_state, detector_data, source_data, &my_acquisition_time, &travel_distance, PSF, index_PSF); // !!MCGPU-PET!!
} // [Continue with a new history]
// -- Store the final random seed used by the last thread in the grid to global memory in order to continue the random secuence in successive kernel executions in the same GPU.
// Since I am only storing the 'x' component and using it to init both parts of the ranecu generator, the secuence will actually diverge, but I warranty that at least one MLCG will stay uncorrelated.
if ( (blockIdx.x == (gridDim.x-1)) && (blockIdx.y == (gridDim.y-1)) && (blockIdx.z == (gridDim.z-1)) && (threadIdx.x == (blockDim.x-1)))
{ *seed_input_device = seed.x; } // Store last seed used in last kernel thread
atomicAdd(total_histories,histories_thread);
__syncthreads(); // Make sure all threads are done simulating histories
//if (0==threadIdx.x) {atomicAdd(total_histories, total_histories_block_SHARED); }
// Safely add the histories generated by all threads in this block to the complete grid total in global memory (only 1 thread per block will call atomicAdd)
} // [All tracks simulated for this kernel call: return to CPU]
////////////////////////////////////////////////////////////////////////////////
//! Tally the dose deposited in the voxels.
//! This function is called whenever a particle suffers a Compton or photoelectric
//! interaction. It is not necessary to call this function if the dose tally
//! was disabled in the input file (ie, dose_ROI_x_max_CONST < 0).
//! Electrons are not transported in MC-GPU and therefore we are approximating
//! that the dose is equal to the KERMA (energy released by the photons alone).
//! This approximation is acceptable when there is electronic equilibrium and when
//! the range of the secondary electrons is shorter than the voxel size. Usually the
//! doses will be acceptable for photon energies below 1 MeV. The dose estimates may
//! not be accurate at the interface of low density volumes.
//!
//! We need to use atomicAdd() in the GPU to prevent that multiple threads update the
//! same voxel at the same time, which would result in a lose of information.
//! This is very improbable when using a large number of voxels but gives troubles
//! with a simple geometries with few voxels (in this case the atomicAdd will slow
//! down the code because threads will update the voxel dose secuentially).
//!
//!
//! @param[in] Edep Energy deposited in the interaction
//! @param[in] voxel_coord Voxel coordinates, needed to check if particle located inside the input region of interest (ROI)
//! @param[out] voxels_Edep ulonglong2 array containing the 3D voxel dose and dose^2 (ie, uncertainty) as unsigned integers scaled by SCALE_eV.
////////////////////////////////////////////////////////////////////////////////
__device__ void tally_voxel_energy_deposition(float* Edep, short3* voxel_coord, ulonglong2* voxels_Edep) {
// !!DeBuG!! Maybe it would be faster to store a 6 element struct and save temp copy?? struct_short_int_x6_align16 dose_ROI_size = dose_ROI_size_CONST; // Get ROI coordinates from GPU constant memory and store temporal copy
if((voxel_coord->x < dose_ROI_x_min_CONST) || (voxel_coord->x > dose_ROI_x_max_CONST) ||
(voxel_coord->y < dose_ROI_y_min_CONST) || (voxel_coord->y > dose_ROI_y_max_CONST) ||
(voxel_coord->z < dose_ROI_z_min_CONST) || (voxel_coord->z > dose_ROI_z_max_CONST))
{
return; // -- Particle outside the ROI: return without tallying anything.
}
// -- Particle inside the ROI: tally Edep.
register int DX = 1 + (int)(dose_ROI_x_max_CONST - dose_ROI_x_min_CONST);
register int num_voxel = (int)(voxel_coord->x-dose_ROI_x_min_CONST) + ((int)(voxel_coord->y-dose_ROI_y_min_CONST))*DX + ((int)(voxel_coord->z-dose_ROI_z_min_CONST))*DX*(1 + (int)(dose_ROI_y_max_CONST-dose_ROI_y_min_CONST));
#ifdef USING_CUDA
atomicAdd(&voxels_Edep[num_voxel].x, __float2ull_rn((*Edep)*SCALE_eV) ); // Energy deposited at the voxel, scaled by the factor SCALE_eV and rounded.
atomicAdd(&voxels_Edep[num_voxel].y, __float2ull_rn((*Edep)*(*Edep)) ); // (not using SCALE_eV for std_dev to prevent overflow)
#else
voxels_Edep[num_voxel].x += (unsigned long long int)((*Edep)*SCALE_eV + 0.5f);
voxels_Edep[num_voxel].y += (unsigned long long int)((*Edep)*(*Edep) + 0.5f);
#endif
return;
}
// // !!COINCIDENCE!! [December 13, 2017]
////////////////////////////////////////////////////////////////////////////////
//! Tally a Phase Space File (PSF) of the COINCIDENCE photons escaping the voxelized volume.
//! If one of the photons is absorbed, none is reported in PSF. If no absorption happens, both
//! particles are reported in consecutive PSF elements, sorted by shortest arrival time.
//!
//! @param[in] energy X-ray energy
//! @param[in] position Particle position
//! @param[in] direction Particle direction (cosine vectors)
//! @param[in] scatter_state Flag marking primaries, single Compton, single Rayleigh or multiple scattered radiation
//! @param[in] detector_data Variables defining the detector geometry, stored in shared memory for fast access.
//! @param[out] image Pointer to the global memory array storing the PSF
////////////////////////////////////////////////////////////////////////////////
__device__
inline void tally_PSF_coincidences(float* energy, float* energyFirst, float3* position, float3* positionFirst, float3* direction, float3* directionFirst, signed char* scatter_state, signed char* scatter_stateFirst, struct detector_struct* detector_data, struct source_struct* source_data, unsigned long long int* acquisition_time_ps_thread, float* travel_distance, float* travel_distanceFirst, PSF_element_struct* PSF, int* index_PSF, int2 *seed, float* E_resol, float* E_low, float* E_high) // !!MCGPU-PET!! // !!COINCIDENCE!!
{
//! Assuming a cylindrical detector with central axis in the Z direction. The cylinder must be larger than the voxel volume (negative distances not computed) // !!MCGPU-PET!!
// -- Move photon to the surface of the cylinder (radius in cylindrical coordinates equals PSF_radius):
float x0 = position->x - detector_data->PSF_center.x; // Move to coordinate system with cylinder center at origin
float y0 = position->y - detector_data->PSF_center.y;
float A = direction->x*direction->x + direction->y*direction->y;
float B = 2.0f*(x0*direction->x + y0*direction->y);
float C = x0*x0 + y0*y0 - detector_data->PSF_radius*detector_data->PSF_radius;
float dist;
if (A==0.0f) {
dist = 99999999.9f; // Photon moving along Z axis -> it will intersect the cylinder only at infinite distance
} else {
dist = (-B + sqrtf(B*B-4.0f*A*C)) / (2.0f*A); // There will always be a real, positive dist as long as the cylinder is larger than the voxels (no particle is ever outside the cylinder)
}
*travel_distance += dist;
position->x = x0 + dist*direction->x;
position->y = y0 + dist*direction->y;
position->z = (position->z - detector_data->PSF_center.z) + dist*direction->z;
float phi = atan2f(position->y, position->x);
// -- Report only particles that intersect the cylinder inside the PSF detector height:
if (fabsf(position->z)<0.5f*detector_data->PSF_height) {
// Repeat same calculations for second photon:
x0 = positionFirst->x - detector_data->PSF_center.x; // Move to coordinate system with cylinder center at origin
y0 = positionFirst->y - detector_data->PSF_center.y;
A = directionFirst->x*directionFirst->x + directionFirst->y*directionFirst->y;
B = 2.0f*(x0*directionFirst->x + y0*directionFirst->y);
C = x0*x0 + y0*y0 - detector_data->PSF_radius*detector_data->PSF_radius;
if (A==0.0f) {
dist = 99999999.9f; // Photon moving along Z axis -> it will intersect the cylinder only at infinite distance
} else {
dist = (-B + sqrtf(B*B-4.0f*A*C)) / (2.0f*A); // There will always be a real, positive dist as long as the cylinder is larger than the voxels (no particle is ever outside the cylinder)
}
*travel_distanceFirst += dist;
positionFirst->x = x0 + dist*directionFirst->x;
positionFirst->y = y0 + dist*directionFirst->y;
positionFirst->z = (positionFirst->z - detector_data->PSF_center.z) + dist*directionFirst->z;
float phi1 = atan2f(positionFirst->y, positionFirst->x);
// Energy response of the detector (energy resolution) // JLH
float randno1 = ranecu(seed);
float randno2 = ranecu(seed);
float gaussian_var1 = sqrtf(-2.0f*logf(randno1+1e-8f))*cosf(2.0f*PI*randno2);
float randno3 = ranecu(seed);
float randno4 = ranecu(seed);
float gaussian_var2 = sqrtf(-2.0f*logf(randno3+1.0e-8f))*cosf(2.0f*PI*randno4);
float Energia1 = (*energy) + ((*E_resol)*(1.0f/2.35f))*(*energy)*gaussian_var1;
float Energia2 = (*energyFirst) + ((*E_resol)*(1.0f/2.35f))*(*energyFirst)*gaussian_var2;
// -- Report only particles that intersect the cylinder inside the PSF detector height:
if (fabsf(positionFirst->z)<0.5f*detector_data->PSF_height && (Energia1 > *E_low) && (Energia1 < *E_high) && (Energia2 > *E_low) && (Energia2 < *E_high)){
// Safely get two slots in the PSF array in global memory:
int index = atomicAdd(index_PSF, 2);
if (index>2000000000)
*index_PSF = 2000000000; // Prevent overflow of integer counter (max value 2^31-1~2.14e9) //!!DeBuG!!
if (index<detector_data->PSF_size) { // Avoid overflow!
// Store particles in order of arrival (shortest travel first):
int i0, i1;
if (*travel_distance<*travel_distanceFirst)
{
i0=index;
i1=index+1;
}
else
{
i0=index+1;
i1=index;
}
// Store first particle data in global memory:
PSF[i0].emission_time_ps = source_data->acquisition_time_ps - *acquisition_time_ps_thread; // Report emission time starting from time 0 s
PSF[i0].travel_time_ps = (*travel_distance)*inv_SPEEDOFLIGHT; // Convert the distance to picoseconds (assuming speed of light in vacuum for every material)
PSF[i0].emission_absvox = blockIdx.x + blockIdx.y*gridDim.x + blockIdx.z*gridDim.x*gridDim.y; // Assuming that the grid size is equal to the voxel geomety size! !!MCGPU-PET!!
PSF[i0].energy = Energia1;
PSF[i0].z = position->z;
PSF[i0].phi = phi;
PSF[i0].vx = direction->x;
PSF[i0].vy = direction->y;
PSF[i0].vz = direction->z;
PSF[i0].index1 = (short int)(*scatter_state); // Flag for scatter: =0 for non-scattered, =1 for Compton, =2 for Rayleigh, and =3 for multiple scatter.
PSF[i0].index2 = (short int)0; // use not defined yet (decay, prompt...)
// Store second particle data in global memory:
PSF[i1].emission_time_ps = source_data->acquisition_time_ps - *acquisition_time_ps_thread; // Report emission time starting from time 0 s
PSF[i1].travel_time_ps = (*travel_distanceFirst)*inv_SPEEDOFLIGHT; // Convert the distance to picoseconds (assuming speed of light in vacuum for every material)
PSF[i1].emission_absvox = blockIdx.x + blockIdx.y*gridDim.x + blockIdx.z*gridDim.x*gridDim.y; // Assuming that the grid size is equal to the voxel geomety size! !!MCGPU-PET!!
PSF[i1].energy = Energia2;
PSF[i1].z = positionFirst->z;
PSF[i1].phi = phi1;
PSF[i1].vx = directionFirst->x;
PSF[i1].vy = directionFirst->y;
PSF[i1].vz = directionFirst->z;
PSF[i1].index1 = (short int)(*scatter_stateFirst); // Flag for scatter: =0 for non-scattered, =1 for Compton, =2 for Rayleigh, and =3 for multiple scatter.
PSF[i1].index2 = (short int)0; // use not defined yet (decay, prompt...)
}
}
}
}
////////////////////////////////////////////////////////////////////////////////
//! Source that creates primary x rays, according to the defined source model.
//! The particles are automatically moved to the surface of the voxel bounding box,
//! to start the tracking inside a real material. If the sampled particle do not
//! enter the voxels, it is init in the focal spot and the main program will check
//! if it arrives at the detector or not.
//!
//! @param[in] source_data Structure describing the source.
//! @param[in] source_energy_data_CONST Global variable in constant memory space describing the source energy spectrum.
//! @param[out] position Initial particle position (particle transported inside the voxel bbox).
//! @param[out] direction Sampled particle direction (cosine vectors).
//! @param[out] energy Sampled energy of the new x ray.
//! @param[in] seed Current seed of the random number generator, requiered to sample the movement direction.
//! @param[out] absvox Set to <0 if primary particle will not cross the voxels, not changed otherwise (>0).
////////////////////////////////////////////////////////////////////////////////
__device__
inline void source(float* energy, float3* position, float3* direction, float3* position1, float3* direction1, int2* seed, struct source_struct* source_data, float* energyFirst)
{
// *** Assign annihilation photon energy:
*energy = ANNIHILATION_PHOTON_ENERGY; // Set initial energy of 510998.95 eV
if (direction1->x>10.0f) // First photon of the pair: we need to sample
{
// *** Sample the initial direction isotropically:
direction->z = 1.0f - 2.0f*ranecu(seed); //--cosine of theta (spherical coordinates phi,theta)
register float phi_sampled = 6.28318530716f*ranecu(seed); // Angle in (0, 2*pi)
register float theta_sampled = acosf(direction->z);
register float sin_theta_sampled = sqrtf(1.0f - direction->z*direction->z); //--sine of theta
float sinphi_sampled, cosphi_sampled;
sincosf(phi_sampled, &sinphi_sampled,&cosphi_sampled); // Calculate the SIN and COS at the same time.
direction->y = sin_theta_sampled * sinphi_sampled; //spherical coordinates
direction->x = sin_theta_sampled * cosphi_sampled; //spherical coordinates
// *** Sample x ray emission position uniformly inside the voxel:
// Making sure that the particle is not exactly on the surface of the voxel to prevent floating point errors to cause the emission from the wrong absvox:
float r1 = ranecu(seed), r2 = ranecu(seed), r3 = ranecu(seed);
r1 = max_value(EPS_SOURCE, r1-EPS_SOURCE), r2 = max_value(EPS_SOURCE, r2-EPS_SOURCE), r3 = max_value(EPS_SOURCE, r3-EPS_SOURCE);
position->x = ((float)blockIdx.x + r1)/voxel_data_CONST.inv_voxel_size.x; // Assuming that grid size equal to voxel geomety size! !!MCGPU-PET!!
position->y = ((float)blockIdx.y + r2)/voxel_data_CONST.inv_voxel_size.y;
position->z = ((float)blockIdx.z + r3)/voxel_data_CONST.inv_voxel_size.z;
direction1->x = direction->x; // Store sampled position and direction
direction1->y = direction->y;
direction1->z = direction->z;
position1->x = position->x;
position1->y = position->y;
position1->z = position->z;
*energyFirst = -10.0f; // By default, mark that first particle in pair was absorbed -> state changed later if absorption does not happen. !!COINCIDENCE!!
return; // Exit function
}
//else // Second photon of the pair:
// JLH // Instead of using previously sampled info, we generate it again
//{
if (PETA_DEV[0]==0) {
//JLH--- random emission of photon 2 ---for bg simulation
direction->z = 1.0f - 2.0f*ranecu(seed);
register float phi_sampled = 6.28318530716f*ranecu(seed); // Angle in (0, 2*pi)
register float sin_theta_sampled = sqrtf(1.0f - direction->z*direction->z);
float sinphi_sampled, cosphi_sampled;
sincosf(phi_sampled, &sinphi_sampled,&cosphi_sampled); // Calculate the SIN and COS at the same time.
direction->y = sin_theta_sampled * sinphi_sampled;
direction->x = sin_theta_sampled * cosphi_sampled;
}
else{ //180 deg emission // Standard PET
//---------NO-COLLINEARITY--------------
register float phi_sampled2 = atan2f(direction1->y,direction1->x)+PI; //collinearity
register float theta_sampled2 = PI - acos(direction1->z); //collinearity
float V1 = ranecu(seed);
//float V2 = ranecu(seed)*2-1;
float V2 = ranecu(seed);
//float V_2 = V1*V1+V2*V2;
//float THETA_NC = sqrtf(-2*SIGMA_NC*SIGMA_NC*2*logf(V2*V2)/V_2)*V1;
float THETA_NC = (2.0*0.21276*PI/180.0)*sqrtf(-2.0f*logf(V2+1e-8f))*cosf((2.0*PI)*V1); // float SIGMA_NC = 2.0*0.21276*PI/180.0;
float PHI_NC = (2.0*PI)*ranecu(seed);
float ST,CT,CP,SP, ST1,CT1,CP1,SP1;
sincosf(THETA_NC, &ST, &CT); // Calculate the SIN and COS at the same time.
sincosf(PHI_NC, &SP, &CP);
sincosf(theta_sampled2,&ST1,&CT1);
sincosf(phi_sampled2, &SP1,&CP1);
//FEB2022 float ST = sinf(THETA_NC), CT = cosf(THETA_NC), CP = cosf(PHI_NC), SP = sinf(PHI_NC), ST1 = sinf(theta_sampled2), CT1 = cosf(theta_sampled2), SP1 = sinf(phi_sampled2), CP1 = cosf(phi_sampled2);
float GX = CP*ST;
float GY = SP*ST;
float GZ = CT;
float GX1 = GX*CT1*CP1-GY*SP1+GZ*ST1*CP1;
float GY1 = GX*CT1*SP1+GY*CP1+GZ*ST1*SP1;
float GZ1 = -GX*ST1+GZ*CT1;
direction->x = GX1;
direction->y = GY1;
direction->z = GZ1;
//theta_sampled2 = acos(GZ1);
//phi_sampled2 = atan2f(GY1,GX1);
//direction->y = sin(theta_sampled2) * sin(phi_sampled2);
//direction->x = sin(theta_sampled2) * cos(phi_sampled2);
//direction->z = cos(theta_sampled2);
//----END OF NO-COLLINEARITY-----------
//direction->x = -direction1->x;
//direction->y = -direction1->y;
//direction->z = -direction1->z;
// --- end of random JLH ------
}
position->x = position1->x;
position->y = position1->y;
position->z = position1->z;
direction1->x = 11.1f; // Mark that a new photon must be sampled next time
// }
}
////////////////////////////////////////////////////////////////////////////////
//! Functions that moves a particle inside the voxelized geometry bounding box.
//! An EPSILON distance is added to make sure the particles will be clearly inside the bbox,
//! not exactly on the surface.
//!
//! This algorithm makes the following assumtions:
//! - The back lower vertex of the voxel bounding box is always located at the origin: (x0,y0,z0)=(0,0,0).
//! - The initial value of "position" corresponds to the focal spot location.
//! - When a ray is not pointing towards the bbox plane that it should cross according to the sign of the direction,
//! I assign a distance to the intersection =0 instead of the real negative distance. The wall that will be
//! crossed to enter the bbox is always the furthest and therefore a 0 distance will never be used except
//! in the case of a ray starting inside the bbox or outside the bbox and not pointing to any of the 3 planes.
//! In this situation the ray will be transported a 0 distance, meaning that it will stay at the focal spot.
//!
//! (Interesting information on ray-box intersection: http://tog.acm.org/resources/GraphicsGems/gems/RayBox.c)
//!
//! @param[in,out] position Particle position: initially set to the focal spot, returned transported inside the voxel bbox.
//! @param[out] direction Sampled particle direction (cosine vectors).
//! @param[out] intersection_flag Set to <0 if particle outside bbox and will not cross the voxels, not changed otherwise.
//! @param[out] size_bbox Size of the bounding box.
////////////////////////////////////////////////////////////////////////////////
/* [FUNCTION NOT USED IN MCGPU-PET BECAUSE THE PHOTONS START INSIDE THE VOXELIZED GEOMETRY BY DESIGN]
__device__
inline void move_to_bbox(float3* position, float3* direction, float3 size_bbox, int* intersection_flag)
{
float dist_y, dist_x, dist_z;
// -Distance to the nearest Y plane:
if ((direction->y) > EPS_SOURCE) // Moving to +Y: check distance to y=0 plane
{
// Check Y=0 (bbox wall):
if (position->y > 0.0f) // The input position must correspond to the focal spot => position->y == source_data_CONST.position[*num_p].y
dist_y = 0.0f; // No intersection with this plane: particle inside or past the box
// The actual distance would be negative but we set it to 0 bc we will not move the particle if no intersection exist.
else
dist_y = EPS_SOURCE + (-position->y)/(direction->y); // dist_y > 0 for sure in this case
}
else if ((direction->y) < NEG_EPS_SOURCE)
{
// Check Y=voxel_data_CONST.size_bbox.y:
if (position->y < size_bbox.y)
dist_y = 0.0f; // No intersection with this plane
else
dist_y = EPS_SOURCE + (size_bbox.y - position->y)/(direction->y); // dist_y > 0 for sure in this case
}
else // (direction->y)~0
dist_y = NEG_INF; // Particle moving parallel to the plane: no interaction possible (set impossible negative dist = -INFINITE)
// -Distance to the nearest X plane:
if ((direction->x) > EPS_SOURCE)
{
// Check X=0:
if (position->x > 0.0f)
dist_x = 0.0f;
else
dist_x = EPS_SOURCE + (-position->x)/(direction->x); // dist_x > 0 for sure in this case
}
else if ((direction->x) < NEG_EPS_SOURCE)
{
// Check X=voxel_data_CONST.size_bbox.x:
if (position->x < size_bbox.x)
dist_x = 0.0f;
else
dist_x = EPS_SOURCE + (size_bbox.x - position->x)/(direction->x); // dist_x > 0 for sure in this case
}
else
dist_x = NEG_INF;
// -Distance to the nearest Z plane:
if ((direction->z) > EPS_SOURCE)
{
// Check Z=0:
if (position->z > 0.0f)
dist_z = 0.0f;
else
dist_z = EPS_SOURCE + (-position->z)/(direction->z); // dist_z > 0 for sure in this case
}
else if ((direction->z) < NEG_EPS_SOURCE)
{
// Check Z=voxel_data_CONST.size_bbox.z:
if (position->z < size_bbox.z)
dist_z = 0.0f;
else
dist_z = EPS_SOURCE + (size_bbox.z - position->z)/(direction->z); // dist_z > 0 for sure in this case
}
else
dist_z = NEG_INF;
// -- Find the longest distance plane, which is the one that has to be crossed to enter the bbox.
// Storing the maximum distance in variable "dist_z". Distance will be =0 if no intersection exists or
// if the x ray is already inside the bbox.
if ( (dist_y>dist_x) && (dist_y>dist_z) )
dist_z = dist_y; // dist_z == dist_max
else if (dist_x>dist_z)
dist_z = dist_x;
// else
// dist_max = dist_z;
// -- Move particle from the focal spot (current location) to the bbox wall surface (slightly inside):
position->x += dist_z * direction->x;
position->y += dist_z * direction->y;
position->z += dist_z * direction->z;
// Check if the new position is outside the bbox. If true, the particle must be moved back to the focal spot location:
if ( (position->x < 0.0f) || (position->x > size_bbox.x) ||
(position->y < 0.0f) || (position->y > size_bbox.y) ||
(position->z < 0.0f) || (position->z > size_bbox.z) )
{
position->x -= dist_z * direction->x; // Reject new position undoing the previous translation
position->y -= dist_z * direction->y;
position->z -= dist_z * direction->z;
(*intersection_flag) = -111; // Particle outside the bbox AND not pointing to the bbox: set absvox<0 to skip interaction sampling.
}
}
*/
////////////////////////////////////////////////////////////////////////////////
//! Upper limit of the number of random values sampled in a single track.
#define LEAP_DISTANCE 256
//! Multipliers and moduli for the two MLCG in RANECU.
#define a1_RANECU 40014
#define m1_RANECU 2147483563
#define a2_RANECU 40692
#define m2_RANECU 2147483399
////////////////////////////////////////////////////////////////////////////////
//! Initialize the pseudo-random number generator (PRNG) RANECU to a position
//! far away from the previous history (leap frog technique).
//!
//! Each calculated seed initiates a consecutive and disjoint sequence of
//! pseudo-random numbers with length LEAP_DISTANCE, that can be used to
//! in a parallel simulation (Sequence Splitting parallelization method).
//! The basic equation behind the algorithm is:
//! S(i+j) = (a**j * S(i)) MOD m = [(a**j MOD m)*S(i)] MOD m ,
//! which is described in:
//! P L'Ecuyer, Commun. ACM 31 (1988) p.742
//!
//! This function has been adapted from "seedsMLCG.f", see:
//! A Badal and J Sempau, Computer Physics Communications 175 (2006) p. 440-450
//!
//! @param[in] history Particle bach number.
//! @param[in] seed_input_device Initial PRNG seed input (used to initiate both MLCGs in RANECU).
//! @param[out] seed Initial PRNG seeds for the present history.
//!
////////////////////////////////////////////////////////////////////////////////
__device__
inline void init_PRNG(int history_batch, int histories_per_thread, int seed_input, int2* seed)
{
// -- Move the RANECU generator to a unique position for the current batch of histories:
// I have to use an "unsigned long long int" value to represent all the simulated histories in all previous batches
// The maximum unsigned long long int value is ~1.8e19: if history >1.8e16 and LEAP_DISTANCE==1000, 'leap' will overflow.
// **** 1st MLCG:
unsigned long long int leap = ((unsigned long long int)(history_batch+1))*(histories_per_thread*LEAP_DISTANCE);
int y = 1;
int z = a1_RANECU;
// -- Calculate the modulo power '(a^leap)MOD(m)' using a divide-and-conquer algorithm adapted to modulo arithmetic
for(;;)
{
// (A2) Halve n, and store the integer part and the residue
if (0!=(leap&01)) // (bit-wise operation for MOD(leap,2), or leap%2 ==> proceed if leap is an odd number) Equivalent: t=(short)(leap%2);
{
leap >>= 1; // Halve n moving the bits 1 position right. Equivalent to: leap=(leap/2);
y = abMODm(m1_RANECU,z,y); // (A3) Multiply y by z: y = [z*y] MOD m
if (0==leap) break; // (A4) leap==0? ==> finish
}
else // (leap is even)
{
leap>>= 1; // Halve leap moving the bits 1 position right. Equivalent to: leap=(leap/2);
}
z = abMODm(m1_RANECU,z,z); // (A5) Square z: z = [z*z] MOD m
}
// AjMODm1 = y; // Exponentiation finished: AjMODm = expMOD = y = a^j
// -- Compute and display the seeds S(i+j), from the present seed S(i), using the previously calculated value of (a^j)MOD(m):
// S(i+j) = [(a**j MOD m)*S(i)] MOD m
// S_i = abMODm(m,S_i,AjMODm)
seed->x = abMODm(m1_RANECU, seed_input, y); // Using the input seed as the starting seed
// **** 2nd MLCG (repeating the previous calculation for the 2nd MLCG parameters):
leap = ((unsigned long long int)(history_batch+1))*(histories_per_thread*LEAP_DISTANCE);
y = 1;
z = a2_RANECU;
for(;;)
{
// (A2) Halve n, and store the integer part and the residue
if (0!=(leap&01)) // (bit-wise operation for MOD(leap,2), or leap%2 ==> proceed if leap is an odd number) Equivalent: t=(short)(leap%2);
{
leap >>= 1; // Halve n moving the bits 1 position right. Equivalent to: leap=(leap/2);
y = abMODm(m2_RANECU,z,y); // (A3) Multiply y by z: y = [z*y] MOD m
if (0==leap) break; // (A4) leap==0? ==> finish
}
else // (leap is even)
{
leap>>= 1; // Halve leap moving the bits 1 position right. Equivalent to: leap=(leap/2);
}
z = abMODm(m2_RANECU,z,z); // (A5) Square z: z = [z*z] MOD m
}
// AjMODm2 = y;
seed->y = abMODm(m2_RANECU, seed_input, y); // Using the input seed as the starting seed
}
/////////////////////////////////////////////////////////////////////
//! Calculate "(a1*a2) MOD m" with 32-bit integers and avoiding
//! the possible overflow, using the Russian Peasant approach
//! modulo m and the approximate factoring method, as described
//! in: L'Ecuyer and Cote, ACM Trans. Math. Soft. 17 (1991).
//!
//! This function has been adapted from "seedsMLCG.f", see:
//! Badal and Sempau, Computer Physics Communications 175 (2006)
//!
//! @param[in] m,a,s MLCG parameters
//! @return (a1*a2) MOD m
//
// Input: 0 < a1 < m
// 0 < a2 < m
//
// Return value: (a1*a2) MOD m
//
/////////////////////////////////////////////////////////////////////
__device__ __host__ // Function will be callable from host and also from device