From d0c4637b7b8aec0fedd1d9f209a1eafcb904a64a Mon Sep 17 00:00:00 2001 From: jesmigel Date: Wed, 17 Jul 2019 12:48:24 +1000 Subject: [PATCH 01/71] Replaced loop line num with encapsulation ID --- src/search.cxx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/search.cxx b/src/search.cxx index fc2a42bb..fd7a47b9 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2474,7 +2474,7 @@ void MergeSubstructuresPhase(Options &opt, const Int_t nsubset, Particle *&Parts it might be possible to lower the cuts imposed. \todo ADACS optimisation request. Here the function could be altered to employ better parallelisation. Specifically, the loop over - substructures at a given level could be parallelized (see for loop line 2561). Currently, at a given level in the substructure hierarchy + substructures at a given level could be parallelized (see for loop commented with ENCAPSULATION-01). Currently, at a given level in the substructure hierarchy each object is searched sequentially but this does not need to be the case. It would require restructureing the loop and some of calls within the loop so that the available pool of threads over which to run in parallel for the callled subroutines is adaptive. (Or it might be simply more useful to not have the functions called within this loop parallelised. This loop invokes a few routines that have OpenMP @@ -2561,6 +2561,7 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse numcores=new Int_t[nsubsearch+1]; subpfofold=new Int_t[nsubsearch+1]; ns=0; + // START: ENCAPSULATION-01 //here loop over all sublevel groups that need to be searched for substructure for (Int_t i=1;i<=oldnsubsearch;i++) { subpfofold[i]=pfof[subpglist[i][0]]; From 3d87e5f519107a2ccee64d95626ea56f89775eb2 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Wed, 17 Jul 2019 13:39:50 +1000 Subject: [PATCH 02/71] COMMENTS: Parametisation --- src/search.cxx | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/search.cxx b/src/search.cxx index fd7a47b9..32692925 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2562,7 +2562,17 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse subpfofold=new Int_t[nsubsearch+1]; ns=0; // START: ENCAPSULATION-01 + /* - If the whole for loop is to be encapsulated, + ** the following variables are in consideration to be parametised: + ** oldnsubsearch + ** subpglist + ** pfof + ** subnumingroup + ** opt.icmrefadjust + ** ompsearchnum + */ //here loop over all sublevel groups that need to be searched for substructure + // ADACS: this loop proceses halos (that are independent) sequentially. This is unecessary. for (Int_t i=1;i<=oldnsubsearch;i++) { subpfofold[i]=pfof[subpglist[i][0]]; subPart=new Particle[subnumingroup[i]]; From c636718b1a314e5f20265b748af3a6900039c272 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Wed, 17 Jul 2019 13:43:14 +1000 Subject: [PATCH 03/71] COMMENTS: returned initial //ADACS --- src/search.cxx | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/search.cxx b/src/search.cxx index 32692925..2dc9f7ea 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2579,6 +2579,8 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse for (Int_t j=0;j &Partsubse cm[0]=cmx;cm[1]=cmy;cm[2]=cmz; cmvel[0]=cmvelx;cmvel[1]=cmvely;cmvel[2]=cmvelz; for (int k=0;k<3;k++) {cm[k]/=mtotregion;cmvel[k]/=mtotregion;} + //ADACS: once phase-space CM calculated reset reference but again maybe unecessary parallelisation #ifdef USEOPENMP if (subnumingroup[i]>ompsearchnum) { #pragma omp parallel default(shared) @@ -2637,6 +2640,9 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse } #endif } + //ADACS: for large objects, extra processing steps are requried + //ADACS: Some of these subroutines make use of OpenMP. For this to continue + //ADACS: the pool of threads would have to be changed if (subnumingroup[i]>=MINSUBSIZE&&opt.foftype!=FOF6DCORE) { //now if object is large enough for phase-space decomposition and search, compare local field to bg field opt.Ncell=opt.Ncellfac*subnumingroup[i]; @@ -2670,6 +2676,8 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse CalcVelSigmaTensor(subnumingroup[i], subPart, sigma2x, sigma2y, sigma2z, eigvec, I); opt.HaloLocalSigmaV=opt.HaloSigmaV=pow(sigma2x*sigma2y*sigma2z,1.0/3.0); } + //ADACS: Here the object is searched. Not much of this uses OpenMP but there are + // one or two subroutines called within SearchSubset that do make use of OpenMP. subpfof=SearchSubset(opt,subnumingroup[i],subnumingroup[i],subPart,subngroup[i],sublevel,&numcores[i]); //now if subngroup>0 change the pfof ids of these particles in question and see if there are any substrucures that can be searched again. //the group ids must be stored along with the number of groups in this substructure that will be searched at next level. @@ -2710,6 +2718,7 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse delete[] subpfof; delete[] subPart; //increase tot num of objects at sublevel + //ADACS: this would need a reduction at the end. ns+=subngroup[i]; } //if objects have been found adjust the StrucLevelData From ea799d8d18b4cb1d3b24b1cf1d73c7fa152fa506 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Wed, 17 Jul 2019 17:22:57 +1000 Subject: [PATCH 04/71] COMMENT: added end to ENCAPSULATION-01 --- src/search.cxx | 1 + 1 file changed, 1 insertion(+) diff --git a/src/search.cxx b/src/search.cxx index 2dc9f7ea..81e8e958 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2721,6 +2721,7 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse //ADACS: this would need a reduction at the end. ns+=subngroup[i]; } + // END: ENCAPSULATION-01 //if objects have been found adjust the StrucLevelData //this stores the address of the parent particle and pfof along with child substructure particle and pfof if (ns>0) { From add29954765de58f4f0917156e4a0e0fd976fada Mon Sep 17 00:00:00 2001 From: jesmigel Date: Wed, 17 Jul 2019 17:55:18 +1000 Subject: [PATCH 05/71] Updated comment tags --- src/search.cxx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 81e8e958..e66d0a7e 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2474,7 +2474,7 @@ void MergeSubstructuresPhase(Options &opt, const Int_t nsubset, Particle *&Parts it might be possible to lower the cuts imposed. \todo ADACS optimisation request. Here the function could be altered to employ better parallelisation. Specifically, the loop over - substructures at a given level could be parallelized (see for loop commented with ENCAPSULATION-01). Currently, at a given level in the substructure hierarchy + substructures at a given level could be parallelized (see for loop commented with ENCAPSULATE-01). Currently, at a given level in the substructure hierarchy each object is searched sequentially but this does not need to be the case. It would require restructureing the loop and some of calls within the loop so that the available pool of threads over which to run in parallel for the callled subroutines is adaptive. (Or it might be simply more useful to not have the functions called within this loop parallelised. This loop invokes a few routines that have OpenMP @@ -2561,7 +2561,7 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse numcores=new Int_t[nsubsearch+1]; subpfofold=new Int_t[nsubsearch+1]; ns=0; - // START: ENCAPSULATION-01 + // START: ENCAPSULATE-01 /* - If the whole for loop is to be encapsulated, ** the following variables are in consideration to be parametised: ** oldnsubsearch @@ -2721,7 +2721,7 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse //ADACS: this would need a reduction at the end. ns+=subngroup[i]; } - // END: ENCAPSULATION-01 + // END: ENCAPSULATE-01 //if objects have been found adjust the StrucLevelData //this stores the address of the parent particle and pfof along with child substructure particle and pfof if (ns>0) { From 85c8f0bf71a19580e6a84d09892f93f46b2b0e6a Mon Sep 17 00:00:00 2001 From: jesmigel Date: Tue, 20 Aug 2019 12:40:55 +1000 Subject: [PATCH 06/71] TEST ENCAPSULATION: ENCAPSULATE-01 --- src/search.cxx | 99 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 81 insertions(+), 18 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index e66d0a7e..f8b92dba 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2141,6 +2141,30 @@ private(i,tid,Pval,x1,D2,dval,mval,pid,pidcore) } } +// ENCAPSULATED: ECAPSULATION-01 +Double_t * GetMass_SubPart(Double_t cmx, Double_t cmy, Double_t cmz, Double_t cmvelx,Double_t cmvely, Double_t cmvelz, Double_t mtotregion, + Int_t *subnumingroup, Particle *subPart, Int_t i) +{ + Int_t j; + Double_t array_output[7]; + for (j=0;j &Partsubse Double_t cmx=0.,cmy=0.,cmz=0.,cmvelx=0.,cmvely=0.,cmvelz=0.; Double_t mtotregion=0.0; Int_t j; + Double_t * test_encapsulate; if (opt.icmrefadjust) { #ifdef USEOPENMP if (subnumingroup[i]>ompsearchnum) { #pragma omp parallel default(shared) { #pragma omp for private(j) reduction(+:mtotregion,cmx,cmy,cmz,cmvelx,cmvely,cmvelz) - for (j=0;j Date: Tue, 20 Aug 2019 12:55:34 +1000 Subject: [PATCH 07/71] Encapsulation failed. Reverted back to old variable assignment logic but printing out values yielded by the new function --- src/search.cxx | 58 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index f8b92dba..fadfec83 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2636,26 +2636,28 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse subPart, i ); - cmx=test_encapsulate[0]; - cmy=test_encapsulate[1]; - cmz=test_encapsulate[2]; - cmvelx=test_encapsulate[3]; - cmvely=test_encapsulate[4]; - cmvelz=test_encapsulate[5]; - mtotregion=test_encapsulate[6]; + + for (j=0;j &Partsubse subPart, i ); - cmx=test_encapsulate[0]; - cmy=test_encapsulate[1]; - cmz=test_encapsulate[2]; - cmvelx=test_encapsulate[3]; - cmvely=test_encapsulate[4]; - cmvelz=test_encapsulate[5]; - mtotregion=test_encapsulate[6]; + for (j=0;j Date: Tue, 20 Aug 2019 13:07:57 +1000 Subject: [PATCH 08/71] Added cout against cmx variable --- src/search.cxx | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/search.cxx b/src/search.cxx index e66d0a7e..c16b2c3b 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2599,6 +2599,14 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse cmvelz+=subPart[j].Vz()*subPart[j].GetMass(); mtotregion+=subPart[j].GetMass(); } + cout<<"ORIGINAL[cmx]: "< &Partsubse cmvelz+=subPart[j].Vz()*subPart[j].GetMass(); mtotregion+=subPart[j].GetMass(); } + cout<<"ORIGINAL[cmx]: "< Date: Tue, 20 Aug 2019 13:19:33 +1000 Subject: [PATCH 09/71] TEST: encapsulation --- src/search.cxx | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 5deceaf8..fa9f6bd2 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2607,6 +2607,7 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse // ADACS: (save for very high res zooms of individual objects containing billions of particles Double_t cmx=0.,cmy=0.,cmz=0.,cmvelx=0.,cmvely=0.,cmvelz=0.; Double_t mtotregion=0.0; + Double_t * test_encapsulate; Int_t j; if (opt.icmrefadjust) { #ifdef USEOPENMP @@ -2623,7 +2624,19 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse cmvelz+=subPart[j].Vz()*subPart[j].GetMass(); mtotregion+=subPart[j].GetMass(); } - cout<<"ORIGINAL[cmx]: "< &Partsubse cmvelz+=subPart[j].Vz()*subPart[j].GetMass(); mtotregion+=subPart[j].GetMass(); } - cout<<"ORIGINAL[cmx]: "< Date: Tue, 20 Aug 2019 13:36:53 +1000 Subject: [PATCH 10/71] ENCAPSULATION-01: function implementation --- src/search.cxx | 66 ++++++++++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 34 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index fa9f6bd2..7993b0bb 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2615,15 +2615,15 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse #pragma omp parallel default(shared) { #pragma omp for private(j) reduction(+:mtotregion,cmx,cmy,cmz,cmvelx,cmvely,cmvelz) - for (j=0;j &Partsubse subPart, i ); - cout<<"ORIGINAL[cmx]: "< &Partsubse subPart, i ); - cout<<"ORIGINAL[cmx]: "< Date: Tue, 20 Aug 2019 13:47:01 +1000 Subject: [PATCH 11/71] ENCAPSULATION-01: encapsulation N/A due to pragma directive --- src/search.cxx | 58 ++++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 7993b0bb..49b90f8e 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2615,34 +2615,36 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse #pragma omp parallel default(shared) { #pragma omp for private(j) reduction(+:mtotregion,cmx,cmy,cmz,cmvelx,cmvely,cmvelz) - // for (j=0;j Date: Tue, 20 Aug 2019 15:14:32 +1000 Subject: [PATCH 12/71] Segmentation fault during runtime --- src/search.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cxx b/src/search.cxx index 49b90f8e..e2672f4e 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2607,7 +2607,7 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse // ADACS: (save for very high res zooms of individual objects containing billions of particles Double_t cmx=0.,cmy=0.,cmz=0.,cmvelx=0.,cmvely=0.,cmvelz=0.; Double_t mtotregion=0.0; - Double_t * test_encapsulate; + Double_t * test_encapsulate = NULL; Int_t j; if (opt.icmrefadjust) { #ifdef USEOPENMP From 42a3d3b093ca4a44974460af0e89afd3b63e9754 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Tue, 20 Aug 2019 15:29:24 +1000 Subject: [PATCH 13/71] ENCAPSULATION-01: used pointer to array for segfault --- src/search.cxx | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index e2672f4e..6f8f1f8c 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2607,7 +2607,7 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse // ADACS: (save for very high res zooms of individual objects containing billions of particles Double_t cmx=0.,cmy=0.,cmz=0.,cmvelx=0.,cmvely=0.,cmvelz=0.; Double_t mtotregion=0.0; - Double_t * test_encapsulate = NULL; + Double_t * test_encapsulate; Int_t j; if (opt.icmrefadjust) { #ifdef USEOPENMP @@ -2670,13 +2670,13 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse subPart, i ); - cmx=test_encapsulate[0]; - cmy=test_encapsulate[1]; - cmz=test_encapsulate[2]; - cmvelx=test_encapsulate[3]; - cmvely=test_encapsulate[4]; - cmvelz=test_encapsulate[5]; - mtotregion=test_encapsulate[6]; + cmx=*(test_encapsulate + 0); + cmy=*(test_encapsulate + 1); + cmz=*(test_encapsulate + 2); + cmvelx=*(test_encapsulate + 3); + cmvely=*(test_encapsulate + 4); + cmvelz=*(test_encapsulate + 5); + mtotregion=*(test_encapsulate + 6); #ifdef USEOPENMP } #endif From 68add6bc069f9f9c26eea6922f6f028935955e87 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Wed, 21 Aug 2019 09:28:09 +1000 Subject: [PATCH 14/71] ENCAPSULATION-01: Segfault fix by static variables --- src/search.cxx | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/search.cxx b/src/search.cxx index 6f8f1f8c..7a23ff64 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2145,8 +2145,15 @@ private(i,tid,Pval,x1,D2,dval,mval,pid,pidcore) Double_t * GetMass_SubPart(Double_t cmx, Double_t cmy, Double_t cmz, Double_t cmvelx,Double_t cmvely, Double_t cmvelz, Double_t mtotregion, Int_t *subnumingroup, Particle *subPart, Int_t i) { + static Double_t array_output[7]; + array_output[0]=cmx; + array_output[1]=cmy; + array_output[2]=cmz; + array_output[3]=cmvelx; + array_output[4]=cmvely; + array_output[5]=cmvelz; + array_output[6]=mtotregion; Int_t j; - Double_t array_output[7]; for (j=0;j Date: Thu, 22 Aug 2019 02:29:43 +0800 Subject: [PATCH 15/71] Python script to compare catalogs Added a script that compares two VR catalogs. The idea is to compare a reference output to a comparison one produced with a newer version of VR. Useful for testing changes. --- examples/catalocomparison_VRinputonly.py | 118 +++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 examples/catalocomparison_VRinputonly.py diff --git a/examples/catalocomparison_VRinputonly.py b/examples/catalocomparison_VRinputonly.py new file mode 100644 index 00000000..85a564d7 --- /dev/null +++ b/examples/catalocomparison_VRinputonly.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python2.7 +# -*- coding: utf-8 -*- + +""" + + This python script reads two input VR particle catalog files and quickly compares them. It determines + if the catalogs match. If they do not, further tests are run. For a perfect match, particles are in the + same order. Information is passed to the script via a simple text file that has the following format + VRrefbasefilename VRrefinputformat + VRcompbasefilename VRcompinputformat + +""" + + +import sys,os,string,time,re,struct +from subprocess import call +import numpy as np + +#load VR python routines +pathtovelociraptor=sys.argv[0].split('examples')[0] +sys.path.append(pathtovelociraptor+'/tools/') +import velociraptor_python_tools as vpt + +def PerfectCrossMatch(VRdata): + iflag1 = (VRdata['ref']['properties']['num'] != VRdata['comp']['properties']['num']) + iflag2 = (VRdata['ref']['particles']['Npart'].size != VRdata['comp']['particles']['Npart'].size) + if (iflag1): + print('Catalog contains different number of objects ... Not perfect match') + if (iflag2): + print('Particle catalog contains different number of particles ... Not perfect match') + if (iflag1 or iflag2): + return 0 + num = VRdata['ref']['particles']['Npart'].size + ref = np.concatenate(VRdata['ref']['particles']['Particle_IDs']) + comp = np.concatenate(VRdata['comp']['particles']['Particle_IDs']) + if (np.array_equal(ref,comp)): + if (np.where(np.isin(ref,comp))[0].size == num): + print('Particle catalog contains same number of particles but IDs in different order ... Not perfect match but close') + return 1 + else: + print('Particle catalog contains same number of particles but IDs differ ... Not perfect match') + return 0 + return 2 + +def CheckProperties(VRdata): + iflag1 = (VRdata['ref']['properties']['num'] != VRdata['comp']['properties']['num']) + iflag2 = (VRdata['ref']['particles']['Npart'].size != VRdata['comp']['particles']['Npart'].size) + proplist = ['Mass_tot', 'Vmax'] + if (iflag1 == True): + return 0 + partdiff = np.zeros(VRdata['ref']['properties']['num'], dtype = np.int32) + propdiff = np.zeros(VRdata['ref']['properties']['num'], dtype = np.int32) + num = VRdata['ref']['properties']['num'] + #number of objects the same but particle list ordered differently + time1 = time.clock() + for i in range(num): + if not np.array_equal(VRdata['ref']['particles']['Particle_IDs'][i], VRdata['comp']['particles']['Particle_IDs'][i]: + partdiff[i] = 1 + for prop in proplist: + if (VRdata['ref']['properties'][prop][i] != VRdata['comp']['properties'][prop][i]): + propdiff[i] = 1 + numpartdiff = np.sum(partdiff) + numpropdiff = np.sum(propdiff) + print('Finished processing individual objects in ', time.clock()-time1) + if (numpartdiff > 0): + print('Difference in particles', numpartdiff, ' of', num) + if (numpropdiff > 0): + print('Difference in properties', numpropdiff, ' of', num) + if (numpropdiff == 0 and numpartdiff > 0): + print('Difference in order of particles but not resulting properties, nor number of particles in each object') + return 1 + return 0 + +#if __name__ == '__main__': + +print('Running', sys.argv[0]) +print('Input is file name of config file') +print('Config file should contain the following') +print('VRrefbasefilename VRrefinputformat') +print('VRcompbasefilename VRcompinputformat') + +if (os.path.isfile(sys.argv[1])==False): + print("Missing input info file",sys.argv[1]) + exit(1) + +#load the plot info file, +print("Reading reference VR file", sys.argv[1]) +infofile=open(sys.argv[1], 'r') +VRdata = {'label': None} + +time1=time.clock() +for label in ['ref', 'comp']: + data = infofile.strip().split(' ') + VRdata[label]= {'filename': None, 'inputformat': None, 'particles': None, 'properties': None, 'num': 0} + VRdata[label]['filename'], VRdata[label]['inputformat'] = data[0], np.int32(data[1]) + print('Reading ',label,' stored in ',VRdata[label]['filename']) + VRdata[label]['particles'] = ReadParticleDataFile(VRdata[label]['filename'], VRdata[label]['inputformat']) + VRdata[label]['properties'], numhalos = ReadPropertyFile(VRdata[label]['filename'], VRdata[label]['inputformat']) + VRdata[label]['num'] = numhalos + +print('Finished reading information', time.clock()-time1) +print('Checking for perfect match') +iflag = PerfectCrossMatch(VRdata) +if (iflag == 1): + CheckProperties(VRdata) + +# Return an overall PASS or FAIL +if iflag == 0: + print('\n*********************') + print('* Comparison FAILED *') + print('*********************\n') + exit(1) +else: + print('\n*********************') + print('* Comparison PASSED *') + print('*********************\n') + exit(0) + From 4622c9f4d0ebe78d27d1b0681b3084d9a18eefdc Mon Sep 17 00:00:00 2001 From: Pascal Jahan Elahi Date: Thu, 22 Aug 2019 02:56:00 +0800 Subject: [PATCH 16/71] Update to suggested encapsulation for search I have suggested an encapsulation, making use of existing functions. Will need alterations to make use of a maximum thread pool. --- src/search.cxx | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 7a23ff64..784ff6e2 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2173,6 +2173,23 @@ Double_t * GetMass_SubPart(Double_t cmx, Double_t cmy, Double_t cmz, Double_t cm return array_output; } +void AdjustSubPartToPhaseCM(Int_t num, Particle *subPart, GMatrix &cmphase) +{ + int nthreads = 1; +#ifdef USEOPENMP + nthreads = max(1, (int)(num/(float)ompsearchnum)); + nthreads = min(nthreads,omp_get_max_threads()); +#pragma omp parallel for \ +default(shared) \ +num_threads(nthreads) +#endif + for (auto j=0;j &Partsubset, Int_t *&pfof, Int_t &ngroup, Int_t &nhalos, PropData *pdata) { @@ -2608,10 +2625,19 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse subpfofold[i]=pfof[subpglist[i][0]]; subPart=new Particle[subnumingroup[i]]; for (Int_t j=0;j &Partsubse } #endif } + */ //ADACS: for large objects, extra processing steps are requried //ADACS: Some of these subroutines make use of OpenMP. For this to continue //ADACS: the pool of threads would have to be changed @@ -2748,8 +2775,8 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse CalcVelSigmaTensor(subnumingroup[i], subPart, sigma2x, sigma2y, sigma2z, eigvec, I); opt.HaloLocalSigmaV=opt.HaloSigmaV=pow(sigma2x*sigma2y*sigma2z,1.0/3.0); } - //ADACS: Here the object is searched. Not much of this uses OpenMP but there are - // one or two subroutines called within SearchSubset that do make use of OpenMP. + //ADACS: Here the object is searched. Not much of this uses OpenMP but there are + // one or two subroutines called within SearchSubset that do make use of OpenMP. subpfof=SearchSubset(opt,subnumingroup[i],subnumingroup[i],subPart,subngroup[i],sublevel,&numcores[i]); //now if subngroup>0 change the pfof ids of these particles in question and see if there are any substrucures that can be searched again. //the group ids must be stored along with the number of groups in this substructure that will be searched at next level. @@ -2782,10 +2809,10 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse } } } - for (j=0;j0) pfof[subpglist[i][j]]=ngroup+ngroupidoffset+subpfof[j]; + for (Int_t j=0;j0) pfof[subpglist[i][j]]=ngroup+ngroupidoffset+subpfof[j]; ngroupidoffset+=subngroup[i]; //now alter subsubpglist so that index pointed is global subset index as global subset is used to get the particles to be searched for subsubstructure - for (j=1;j<=subngroup[i];j++) for (Int_t k=0;k Date: Mon, 26 Aug 2019 10:28:01 +1000 Subject: [PATCH 17/71] Returned original code in preparation for encapsulating entire ENCAPSULATE-01 block --- src/search.cxx | 62 +++++++++----------------------------------------- 1 file changed, 11 insertions(+), 51 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 784ff6e2..01e49d20 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2633,7 +2633,7 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse //this routine is within this file, also has internal parallelisation AdjustSubPartToPhaseCM(subnumingroup[i], subPart, cmphase); } - /* + //now if low statistics, then possible that very central regions of subhalo will be higher due to cell size used and Nv search //so first determine centre of subregion // ADACS: here is an example of unecessary parallelisation in most cases @@ -2657,59 +2657,19 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse cmvelz+=subPart[j].Vz()*subPart[j].GetMass(); mtotregion+=subPart[j].GetMass(); } - // ADACS: encapsulation forbidden - // pragma directive expects for loop function - // test_encapsulate = GetMass_SubPart( - // cmx, - // cmy, - // cmz, - // cmvelx, - // cmvely, - // cmvelz, - // mtotregion, - // subnumingroup, - // subPart, - // i - // ); - // cmx=test_encapsulate[0]; - // cmy=test_encapsulate[1]; - // cmz=test_encapsulate[2]; - // cmvelx=test_encapsulate[3]; - // cmvely=test_encapsulate[4]; - // cmvelz=test_encapsulate[5]; - // mtotregion=test_encapsulate[6]; } } else { #endif - // for (j=0;j &Partsubse } #endif } - */ + //ADACS: for large objects, extra processing steps are requried //ADACS: Some of these subroutines make use of OpenMP. For this to continue //ADACS: the pool of threads would have to be changed From 78431c428ba3ae37f1f6209422ddb999459c1a99 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Mon, 26 Aug 2019 11:38:42 +1000 Subject: [PATCH 18/71] Syntax error. Added closing ) --- examples/catalocomparison_VRinputonly.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/catalocomparison_VRinputonly.py b/examples/catalocomparison_VRinputonly.py index 85a564d7..8afdc9f8 100644 --- a/examples/catalocomparison_VRinputonly.py +++ b/examples/catalocomparison_VRinputonly.py @@ -54,7 +54,7 @@ def CheckProperties(VRdata): #number of objects the same but particle list ordered differently time1 = time.clock() for i in range(num): - if not np.array_equal(VRdata['ref']['particles']['Particle_IDs'][i], VRdata['comp']['particles']['Particle_IDs'][i]: + if not np.array_equal(VRdata['ref']['particles']['Particle_IDs'][i], VRdata['comp']['particles']['Particle_IDs'][i]): partdiff[i] = 1 for prop in proplist: if (VRdata['ref']['properties'][prop][i] != VRdata['comp']['properties'][prop][i]): From aa9ea61a31237bae134e828718646c879e4c24cd Mon Sep 17 00:00:00 2001 From: jesmigel Date: Mon, 26 Aug 2019 13:01:11 +1000 Subject: [PATCH 19/71] Removed proof of concept encapsulation --- src/search.cxx | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 01e49d20..cd70c580 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2142,37 +2142,6 @@ private(i,tid,Pval,x1,D2,dval,mval,pid,pidcore) } // ENCAPSULATED: ECAPSULATION-01 -Double_t * GetMass_SubPart(Double_t cmx, Double_t cmy, Double_t cmz, Double_t cmvelx,Double_t cmvely, Double_t cmvelz, Double_t mtotregion, - Int_t *subnumingroup, Particle *subPart, Int_t i) -{ - static Double_t array_output[7]; - array_output[0]=cmx; - array_output[1]=cmy; - array_output[2]=cmz; - array_output[3]=cmvelx; - array_output[4]=cmvely; - array_output[5]=cmvelz; - array_output[6]=mtotregion; - Int_t j; - for (j=0;j Date: Mon, 26 Aug 2019 13:04:05 +1000 Subject: [PATCH 20/71] Deleted unused test array --- src/search.cxx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/search.cxx b/src/search.cxx index cd70c580..781c30bf 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2609,7 +2609,6 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse // ADACS: (save for very high res zooms of individual objects containing billions of particles Double_t cmx=0.,cmy=0.,cmz=0.,cmvelx=0.,cmvely=0.,cmvelz=0.; Double_t mtotregion=0.0; - Double_t * test_encapsulate; Int_t j; if (opt.icmrefadjust) { #ifdef USEOPENMP From b9ae800896a1cc49348a1dd508eb7e7ff8f51021 Mon Sep 17 00:00:00 2001 From: pelahi Date: Wed, 28 Aug 2019 10:34:03 +0800 Subject: [PATCH 21/71] Bug fix, missing readline call. --- examples/catalocomparison_VRinputonly.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/examples/catalocomparison_VRinputonly.py b/examples/catalocomparison_VRinputonly.py index 8afdc9f8..f7ad9a50 100644 --- a/examples/catalocomparison_VRinputonly.py +++ b/examples/catalocomparison_VRinputonly.py @@ -8,7 +8,7 @@ same order. Information is passed to the script via a simple text file that has the following format VRrefbasefilename VRrefinputformat VRcompbasefilename VRcompinputformat - + """ @@ -23,10 +23,10 @@ def PerfectCrossMatch(VRdata): iflag1 = (VRdata['ref']['properties']['num'] != VRdata['comp']['properties']['num']) - iflag2 = (VRdata['ref']['particles']['Npart'].size != VRdata['comp']['particles']['Npart'].size) + iflag2 = (VRdata['ref']['particles']['Npart'].size != VRdata['comp']['particles']['Npart'].size) if (iflag1): print('Catalog contains different number of objects ... Not perfect match') - if (iflag2): + if (iflag2): print('Particle catalog contains different number of particles ... Not perfect match') if (iflag1 or iflag2): return 0 @@ -41,11 +41,11 @@ def PerfectCrossMatch(VRdata): print('Particle catalog contains same number of particles but IDs differ ... Not perfect match') return 0 return 2 - + def CheckProperties(VRdata): iflag1 = (VRdata['ref']['properties']['num'] != VRdata['comp']['properties']['num']) iflag2 = (VRdata['ref']['particles']['Npart'].size != VRdata['comp']['particles']['Npart'].size) - proplist = ['Mass_tot', 'Vmax'] + proplist = ['Mass_tot', 'Vmax'] if (iflag1 == True): return 0 partdiff = np.zeros(VRdata['ref']['properties']['num'], dtype = np.int32) @@ -56,11 +56,11 @@ def CheckProperties(VRdata): for i in range(num): if not np.array_equal(VRdata['ref']['particles']['Particle_IDs'][i], VRdata['comp']['particles']['Particle_IDs'][i]): partdiff[i] = 1 - for prop in proplist: + for prop in proplist: if (VRdata['ref']['properties'][prop][i] != VRdata['comp']['properties'][prop][i]): propdiff[i] = 1 numpartdiff = np.sum(partdiff) - numpropdiff = np.sum(propdiff) + numpropdiff = np.sum(propdiff) print('Finished processing individual objects in ', time.clock()-time1) if (numpartdiff > 0): print('Difference in particles', numpartdiff, ' of', num) @@ -90,7 +90,7 @@ def CheckProperties(VRdata): time1=time.clock() for label in ['ref', 'comp']: - data = infofile.strip().split(' ') + data = infofile.readline().strip().split(' ') VRdata[label]= {'filename': None, 'inputformat': None, 'particles': None, 'properties': None, 'num': 0} VRdata[label]['filename'], VRdata[label]['inputformat'] = data[0], np.int32(data[1]) print('Reading ',label,' stored in ',VRdata[label]['filename']) @@ -115,4 +115,3 @@ def CheckProperties(VRdata): print('* Comparison PASSED *') print('*********************\n') exit(0) - From e7a036570817ba14c7dc91cdebef6a805b6d126e Mon Sep 17 00:00:00 2001 From: pelahi Date: Wed, 28 Aug 2019 11:47:05 +0800 Subject: [PATCH 22/71] Bug fix in python comparison script --- examples/catalocomparison_VRinputonly.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/catalocomparison_VRinputonly.py b/examples/catalocomparison_VRinputonly.py index f7ad9a50..77adef18 100644 --- a/examples/catalocomparison_VRinputonly.py +++ b/examples/catalocomparison_VRinputonly.py @@ -94,8 +94,8 @@ def CheckProperties(VRdata): VRdata[label]= {'filename': None, 'inputformat': None, 'particles': None, 'properties': None, 'num': 0} VRdata[label]['filename'], VRdata[label]['inputformat'] = data[0], np.int32(data[1]) print('Reading ',label,' stored in ',VRdata[label]['filename']) - VRdata[label]['particles'] = ReadParticleDataFile(VRdata[label]['filename'], VRdata[label]['inputformat']) - VRdata[label]['properties'], numhalos = ReadPropertyFile(VRdata[label]['filename'], VRdata[label]['inputformat']) + VRdata[label]['particles'] = vpt.ReadParticleDataFile(VRdata[label]['filename'], VRdata[label]['inputformat']) + VRdata[label]['properties'], numhalos = vpt.ReadPropertyFile(VRdata[label]['filename'], VRdata[label]['inputformat']) VRdata[label]['num'] = numhalos print('Finished reading information', time.clock()-time1) From 4a74f32ff12f230a9e438b11db017c083a55df84 Mon Sep 17 00:00:00 2001 From: pelahi Date: Wed, 28 Aug 2019 12:22:48 +0800 Subject: [PATCH 23/71] bug fix --- examples/catalocomparison_VRinputonly.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/catalocomparison_VRinputonly.py b/examples/catalocomparison_VRinputonly.py index 77adef18..579c8178 100644 --- a/examples/catalocomparison_VRinputonly.py +++ b/examples/catalocomparison_VRinputonly.py @@ -96,7 +96,7 @@ def CheckProperties(VRdata): print('Reading ',label,' stored in ',VRdata[label]['filename']) VRdata[label]['particles'] = vpt.ReadParticleDataFile(VRdata[label]['filename'], VRdata[label]['inputformat']) VRdata[label]['properties'], numhalos = vpt.ReadPropertyFile(VRdata[label]['filename'], VRdata[label]['inputformat']) - VRdata[label]['num'] = numhalos + VRdata[label]['properties']['num'] = numhalos print('Finished reading information', time.clock()-time1) print('Checking for perfect match') From e8d8ef6f4e2ce057eb259e44bbdfe69583432393 Mon Sep 17 00:00:00 2001 From: pelahi Date: Wed, 28 Aug 2019 13:51:08 +0800 Subject: [PATCH 24/71] added script to profile VR --- examples/profile_build.sh | 48 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100755 examples/profile_build.sh diff --git a/examples/profile_build.sh b/examples/profile_build.sh new file mode 100755 index 00000000..e4625c0a --- /dev/null +++ b/examples/profile_build.sh @@ -0,0 +1,48 @@ +#!/bin/bash -l +# This script profiles stf using perf and hotspot to produce flame graphs +# It produces a build directory with a certian labels +# runs cmake with the desired options, runs the code on the desired input +# using the desired config and running the hotspot stuff +# it assumes the existence of perf that is in linux-tools-common +# also makes use of wget + +#script that produces lots of qsub scripts to run velociraptor on simulation output +if [ $# -eq 0 ] || [ "$1" == "--help" ] +then + echo "This script profiles a VR run." + echo "The interface is as follows:" + echo "buildlabel buildoptions VRargs VRconfig" + echo "buildlabel: string, label for the build" + echo "buildoptions: string, cmake options for build. Ex: \" -DVR_USE_GAS=ON \"" + echo "VRargs: string, options for VR. Ex: \"-i inputfile -I 2 -s 1 -o outputfile \"" + echo "VRconfig: string, path and file name of the config file" + exit +fi + + +#initial and final snapshot numbers +buildlabel=$1 +buildoptions=$2 +VRargs=$3 +VRconfig=$4 + +workingdir=`pwd` +scriptdir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +#build exe +cd ${scriptdir}../ +mkdir build-${buildlabel} +cd build-${buildlabel} +rm -rf * +cmake ${buildoptions} -DCMAKE_BUILD_TYPE=RelWithDebugInfo ../ +make -j + +wget https://github.com/KDAB/hotspot/releases/download/v1.1.0/hotspot-v1.1.0-x86_64.AppImage +chmod +x hotspot-v1.1.0-x86_64.AppImage + +#run code +#this will have produced a perf.data +perf record ./stf ${VRargs} -C ${VRconfig} + +#run hotspot +./hotspot-v1.1.0-x86_64.AppImage perf.data From c5d3bf6fd52b7ea858b911181eb0c2d893227acb Mon Sep 17 00:00:00 2001 From: pelahi Date: Wed, 28 Aug 2019 13:55:50 +0800 Subject: [PATCH 25/71] bug fix in script. --- examples/profile_build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/profile_build.sh b/examples/profile_build.sh index e4625c0a..bd6111b0 100755 --- a/examples/profile_build.sh +++ b/examples/profile_build.sh @@ -30,7 +30,7 @@ workingdir=`pwd` scriptdir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" #build exe -cd ${scriptdir}../ +cd ${scriptdir}/../ mkdir build-${buildlabel} cd build-${buildlabel} rm -rf * From 2535c574b4112879618f74390fa45a558a095ded Mon Sep 17 00:00:00 2001 From: pelahi Date: Thu, 29 Aug 2019 14:21:40 +0800 Subject: [PATCH 26/71] Quick update to encapsulation --- src/search.cxx | 89 +++++++++++++++++++++++++------------------------- 1 file changed, 45 insertions(+), 44 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 781c30bf..4ecd36ce 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2596,13 +2596,13 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse for (Int_t j=0;j &Partsubse Int_t j; if (opt.icmrefadjust) { #ifdef USEOPENMP - if (subnumingroup[i]>ompsearchnum) { + if (subnumingroup[i]>ompsearchnum) { #pragma omp parallel default(shared) { #pragma omp for private(j) reduction(+:mtotregion,cmx,cmy,cmz,cmvelx,cmvely,cmvelz) - for (j=0;jompsearchnum) { + if (subnumingroup[i]>ompsearchnum) { #pragma omp parallel default(shared) { #pragma omp for private(j) - for (j=0;j Date: Thu, 29 Aug 2019 16:29:41 +1000 Subject: [PATCH 27/71] Rollback to baseline code --- src/search.cxx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 781c30bf..9f459d5d 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2596,12 +2596,12 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse for (Int_t j=0;j Date: Thu, 29 Aug 2019 14:31:26 +0800 Subject: [PATCH 28/71] Bug fix and code format update in encapsulation region. --- src/search.cxx | 81 ++++++++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 4ecd36ce..61188ec2 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2607,65 +2607,74 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse //so first determine centre of subregion // ADACS: here is an example of unecessary parallelisation in most cases // ADACS: (save for very high res zooms of individual objects containing billions of particles - Double_t cmx=0.,cmy=0.,cmz=0.,cmvelx=0.,cmvely=0.,cmvelz=0.; - Double_t mtotregion=0.0; - Int_t j; if (opt.icmrefadjust) { + Double_t cmx=0.,cmy=0.,cmz=0.,cmvelx=0.,cmvely=0.,cmvelz=0.; + Double_t mtotregion=0.0; #ifdef USEOPENMP - if (subnumingroup[i]>ompsearchnum) { + if (subnumingroup[i]>ompsearchnum) + { #pragma omp parallel default(shared) { -#pragma omp for private(j) reduction(+:mtotregion,cmx,cmy,cmz,cmvelx,cmvely,cmvelz) - for (j=0;jompsearchnum) { + if (subnumingroup[i]>ompsearchnum) + { #pragma omp parallel default(shared) { -#pragma omp for private(j) - for (j=0;j Date: Thu, 29 Aug 2019 18:04:58 +1000 Subject: [PATCH 29/71] Commented suggested block --- src/search.cxx | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index ca0ebfc0..650d54bf 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2602,11 +2602,6 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse // //this routine is within this file, also has internal parallelisation // AdjustSubPartToPhaseCM(subnumingroup[i], subPart, cmphase); // } -<<<<<<< HEAD - -======= - ->>>>>>> 96dd40b78674085d929c4c4a2a8f79abce31819e //now if low statistics, then possible that very central regions of subhalo will be higher due to cell size used and Nv search //so first determine centre of subregion // ADACS: here is an example of unecessary parallelisation in most cases From efe064ba937424b05dcdd9810fe0924d0d2c651d Mon Sep 17 00:00:00 2001 From: jesmigel Date: Mon, 9 Sep 2019 09:22:41 +1000 Subject: [PATCH 30/71] ENCAPSULATE-01: Remiplemented PJE suggestion and updated statemets using cmvel by accessing contents of returned cm by CalcPhaseCM --- src/search.cxx | 84 ++++++++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 37 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 650d54bf..d2d0cc9e 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2609,41 +2609,45 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse if (opt.icmrefadjust) { Double_t cmx=0.,cmy=0.,cmz=0.,cmvelx=0.,cmvely=0.,cmvelz=0.; Double_t mtotregion=0.0; -#ifdef USEOPENMP - if (subnumingroup[i]>ompsearchnum) - { -#pragma omp parallel default(shared) -{ -#pragma omp for reduction(+:mtotregion,cmx,cmy,cmz,cmvelx,cmvely,cmvelz) - for (Int_t j=0;jompsearchnum) +// { +//#pragma omp parallel default(shared) +//{ +//#pragma omp for reduction(+:mtotregion,cmx,cmy,cmz,cmvelx,cmvely,cmvelz) +// for (Int_t j=0;j &Partsubse { for (int k=0;k<3;k++) { - subPart[j].SetPosition(k,subPart[j].GetPosition(k)-cm[k]);subPart[j].SetVelocity(k,subPart[j].GetVelocity(k)-cmvel[k]); + // subPart[j].SetPosition(k,subPart[j].GetPosition(k)-cm[k]); + //subPart[j].SetVelocity(k,subPart[j].GetVelocity(k)-cmvel[k]); + subPart[j].SetPosition(k,subPart[j].GetPosition(k)-cm[k,0]); + subPart[j].SetVelocity(k,subPart[j].GetVelocity(k)-cm[k+3,0]); } } } @@ -2668,7 +2675,10 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse { for (int k=0;k<3;k++) { - subPart[j].SetPosition(k,subPart[j].GetPosition(k)-cm[k]);subPart[j].SetVelocity(k,subPart[j].GetVelocity(k)-cmvel[k]); + //subPart[j].SetPosition(k,subPart[j].GetPosition(k)-cm[k]); + //subPart[j].SetVelocity(k,subPart[j].GetVelocity(k)-cmvel[k]); + subPart[j].SetPosition(k,subPart[j].GetPosition(k)-cm[k,0]); + subPart[j].SetVelocity(k,subPart[j].GetVelocity(k)-cm[k+3,0]); } } #ifdef USEOPENMP From 2a7a988e803ada49d6e9637e6200c33d25f0959b Mon Sep 17 00:00:00 2001 From: jesmigel Date: Mon, 9 Sep 2019 15:27:44 +1000 Subject: [PATCH 31/71] Print variables to be compared --- examples/catalocomparison_VRinputonly.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/examples/catalocomparison_VRinputonly.py b/examples/catalocomparison_VRinputonly.py index 9387a72b..47997d60 100644 --- a/examples/catalocomparison_VRinputonly.py +++ b/examples/catalocomparison_VRinputonly.py @@ -22,8 +22,21 @@ import velociraptor_python_tools as vpt def PerfectCrossMatch(VRdata): - iflag1 = (VRdata['ref']['properties']['num'] != VRdata['comp']['properties']['num']) - iflag2 = (VRdata['ref']['particles']['Npart'].size != VRdata['comp']['particles']['Npart'].size) + ref_properties_num = VRdata['ref']['properties']['num'] + cmp_particles_npart = VRdata['comp']['properties']['num'] + ref_properties_num = VRdata['ref']['particles']['Npart'] + ref_particles_npart = VRdata['comp']['particles']['Npart'] + print("") + print("==========================") + print("VRdata['ref']['properties']['num']", ref_properties_num) + print("VRdata['comp']['properties']['num']", cmp_particles_npart) + print("VRdata['ref']['particles']['Npart']", ref_properties_num) + print("VRdata['ref']['particles']['Npart']", ref_particles_npart) + print("VRdata['ref']['particles']['Npart'].size", ref_properties_num.size) + print("VRdata['ref']['particles']['Npart'].size", ref_particles_npart.size) + + iflag1 = (ref_properties_num != cmp_particles_npart) + iflag2 = (ref_properties_num != ref_particles_npart.size) if (iflag1): print('Catalog contains different number of objects ... Not perfect match') if (iflag2): From 4f69eac918e34e0a349ac53bd49f660327b3177c Mon Sep 17 00:00:00 2001 From: jesmigel Date: Mon, 9 Sep 2019 15:30:04 +1000 Subject: [PATCH 32/71] Reverted back to baseline seach but included suggested encapsulation function --- src/search.cxx | 174 +++++++++++++++++++++---------------------------- 1 file changed, 73 insertions(+), 101 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index d2d0cc9e..b37a1c1a 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2141,23 +2141,6 @@ private(i,tid,Pval,x1,D2,dval,mval,pid,pidcore) } } -// ENCAPSULATED: ECAPSULATION-01 -void AdjustSubPartToPhaseCM(Int_t num, Particle *subPart, GMatrix &cmphase) -{ - int nthreads = 1; -#ifdef USEOPENMP - nthreads = max(1, (int)(num/(float)ompsearchnum)); - nthreads = min(nthreads,omp_get_max_threads()); -#pragma omp parallel for \ -default(shared) \ -num_threads(nthreads) -#endif - for (auto j=0;j &Partsubset, Int_t *&pfof, Int_t &ngroup, Int_t &nhalos, PropData *pdata) { @@ -2594,98 +2595,69 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse subpfofold[i]=pfof[subpglist[i][0]]; subPart=new Particle[subnumingroup[i]]; for (Int_t j=0;jompsearchnum) -// { -//#pragma omp parallel default(shared) -//{ -//#pragma omp for reduction(+:mtotregion,cmx,cmy,cmz,cmvelx,cmvely,cmvelz) -// for (Int_t j=0;jompsearchnum) - { + if (subnumingroup[i]>ompsearchnum) { #pragma omp parallel default(shared) { -#pragma omp for - for (Int_t j=0;jompsearchnum) { +#pragma omp parallel default(shared) +{ +#pragma omp for private(j) + for (j=0;j &Partsubse CalcVelSigmaTensor(subnumingroup[i], subPart, sigma2x, sigma2y, sigma2z, eigvec, I); opt.HaloLocalSigmaV=opt.HaloSigmaV=pow(sigma2x*sigma2y*sigma2z,1.0/3.0); } - //ADACS: Here the object is searched. Not much of this uses OpenMP but there are - // one or two subroutines called within SearchSubset that do make use of OpenMP. + //ADACS: Here the object is searched. Not much of this uses OpenMP but there are + // one or two subroutines called within SearchSubset that do make use of OpenMP. subpfof=SearchSubset(opt,subnumingroup[i],subnumingroup[i],subPart,subngroup[i],sublevel,&numcores[i]); //now if subngroup>0 change the pfof ids of these particles in question and see if there are any substrucures that can be searched again. //the group ids must be stored along with the number of groups in this substructure that will be searched at next level. @@ -2756,10 +2728,10 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse } } } - for (Int_t j=0;j0) pfof[subpglist[i][j]]=ngroup+ngroupidoffset+subpfof[j]; + for (j=0;j0) pfof[subpglist[i][j]]=ngroup+ngroupidoffset+subpfof[j]; ngroupidoffset+=subngroup[i]; //now alter subsubpglist so that index pointed is global subset index as global subset is used to get the particles to be searched for subsubstructure - for (Int_t j=1;j<=subngroup[i];j++) for (Int_t k=0;k Date: Tue, 10 Sep 2019 06:36:44 +1000 Subject: [PATCH 33/71] suggested encapsulation by PJE --- src/search.cxx | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/search.cxx b/src/search.cxx index b37a1c1a..28e5d85f 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2602,7 +2602,14 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse Double_t cmx=0.,cmy=0.,cmz=0.,cmvelx=0.,cmvely=0.,cmvelz=0.; Double_t mtotregion=0.0; Int_t j; + //suggested encapsulation by PJE if (opt.icmrefadjust) { + //this routine is in substructureproperties.cxx. Has internal parallelisation + GMatrix cmphase = CalcPhaseCM(subnumingroup[i], subPart); + //this routine is within this file, also has internal parallelisation + AdjustSubPartToPhaseCM(subnumingroup[i], subPart, cmphase); + } + /* if (opt.icmrefadjust) { #ifdef USEOPENMP if (subnumingroup[i]>ompsearchnum) { #pragma omp parallel default(shared) @@ -2657,7 +2664,7 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse #ifdef USEOPENMP } #endif - } + } //*/ //ADACS: for large objects, extra processing steps are requried //ADACS: Some of these subroutines make use of OpenMP. For this to continue //ADACS: the pool of threads would have to be changed From 995cb2c3868dd66732fd2c01463eacac5eec0426 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Tue, 10 Sep 2019 06:38:01 +1000 Subject: [PATCH 34/71] Added print statements for comparison --- examples/catalocomparison_VRinputonly.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/catalocomparison_VRinputonly.py b/examples/catalocomparison_VRinputonly.py index 47997d60..7223da83 100644 --- a/examples/catalocomparison_VRinputonly.py +++ b/examples/catalocomparison_VRinputonly.py @@ -23,20 +23,20 @@ def PerfectCrossMatch(VRdata): ref_properties_num = VRdata['ref']['properties']['num'] - cmp_particles_npart = VRdata['comp']['properties']['num'] - ref_properties_num = VRdata['ref']['particles']['Npart'] - ref_particles_npart = VRdata['comp']['particles']['Npart'] + cmp_properties_num = VRdata['comp']['properties']['num'] + ref_particles_npart = VRdata['ref']['particles']['Npart'] + cmp_particles_npart = VRdata['comp']['particles']['Npart'] print("") print("==========================") print("VRdata['ref']['properties']['num']", ref_properties_num) - print("VRdata['comp']['properties']['num']", cmp_particles_npart) - print("VRdata['ref']['particles']['Npart']", ref_properties_num) + print("VRdata['comp']['properties']['num']", cmp_properties_num) print("VRdata['ref']['particles']['Npart']", ref_particles_npart) - print("VRdata['ref']['particles']['Npart'].size", ref_properties_num.size) + print("VRdata['comp']['particles']['Npart']", cmp_particles_npart) print("VRdata['ref']['particles']['Npart'].size", ref_particles_npart.size) + print("VRdata['comp']['particles']['Npart'].size", cmp_particles_npart.size) - iflag1 = (ref_properties_num != cmp_particles_npart) - iflag2 = (ref_properties_num != ref_particles_npart.size) + iflag1 = (ref_properties_num != cmp_properties_num) + iflag2 = (ref_particles_npart != ref_particles_npart.size) if (iflag1): print('Catalog contains different number of objects ... Not perfect match') if (iflag2): From 6fc6a46e5d8d1d97d520e8e55be032fbb45c1252 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Tue, 10 Sep 2019 08:25:12 +1000 Subject: [PATCH 35/71] updated comparison variables --- examples/catalocomparison_VRinputonly.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/catalocomparison_VRinputonly.py b/examples/catalocomparison_VRinputonly.py index 7223da83..baf36158 100644 --- a/examples/catalocomparison_VRinputonly.py +++ b/examples/catalocomparison_VRinputonly.py @@ -36,13 +36,14 @@ def PerfectCrossMatch(VRdata): print("VRdata['comp']['particles']['Npart'].size", cmp_particles_npart.size) iflag1 = (ref_properties_num != cmp_properties_num) - iflag2 = (ref_particles_npart != ref_particles_npart.size) + iflag2 = (ref_particles_npart.size != cmp_particles_npart.size) if (iflag1): print('Catalog contains different number of objects ... Not perfect match') if (iflag2): print('Particle catalog contains different number of particles ... Not perfect match') if (iflag1 or iflag2): return 0 + num = VRdata['ref']['particles']['Npart'].size ref = np.concatenate(VRdata['ref']['particles']['Particle_IDs']) comp = np.concatenate(VRdata['comp']['particles']['Particle_IDs']) From ac15a805b68e9311fb62df72a157b5b24621fd98 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Tue, 10 Sep 2019 08:42:10 +1000 Subject: [PATCH 36/71] Added print statement for unique particle ids present in reference result missing from output being compared to --- examples/catalocomparison_VRinputonly.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/examples/catalocomparison_VRinputonly.py b/examples/catalocomparison_VRinputonly.py index baf36158..24640755 100644 --- a/examples/catalocomparison_VRinputonly.py +++ b/examples/catalocomparison_VRinputonly.py @@ -38,8 +38,12 @@ def PerfectCrossMatch(VRdata): iflag1 = (ref_properties_num != cmp_properties_num) iflag2 = (ref_particles_npart.size != cmp_particles_npart.size) if (iflag1): + print("") + print("==========================") print('Catalog contains different number of objects ... Not perfect match') if (iflag2): + print("") + print("==========================") print('Particle catalog contains different number of particles ... Not perfect match') if (iflag1 or iflag2): return 0 @@ -47,12 +51,24 @@ def PerfectCrossMatch(VRdata): num = VRdata['ref']['particles']['Npart'].size ref = np.concatenate(VRdata['ref']['particles']['Particle_IDs']) comp = np.concatenate(VRdata['comp']['particles']['Particle_IDs']) + print("") + print("==========================") + print("Particle ID comparison") + print("Size: ", num) + print("ref", ref) + print("comp", comp) if (np.array_equal(ref,comp) == False): if (np.where(np.isin(ref,comp))[0].size == num): + print("") + print("==========================") print('Particle catalog contains same number of particles but IDs in different order ... Not perfect match but close') return 1 else: + print("") + print("==========================") print('Particle catalog contains same number of particles but IDs differ ... Not perfect match') + print("Unique values in ref not present in comp") + print(np.setdiff1d(ref,comp)) return 0 return 2 From 491f6ed243a109d7021a6ada271240c776ae6578 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Tue, 10 Sep 2019 10:55:05 +1000 Subject: [PATCH 37/71] Marked block for particle id comparison --- examples/catalocomparison_VRinputonly.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/catalocomparison_VRinputonly.py b/examples/catalocomparison_VRinputonly.py index 24640755..f7dfbbe6 100644 --- a/examples/catalocomparison_VRinputonly.py +++ b/examples/catalocomparison_VRinputonly.py @@ -48,6 +48,7 @@ def PerfectCrossMatch(VRdata): if (iflag1 or iflag2): return 0 + # TODO: Encapsulate block to ParticleComparison num = VRdata['ref']['particles']['Npart'].size ref = np.concatenate(VRdata['ref']['particles']['Particle_IDs']) comp = np.concatenate(VRdata['comp']['particles']['Particle_IDs']) @@ -64,11 +65,12 @@ def PerfectCrossMatch(VRdata): print('Particle catalog contains same number of particles but IDs in different order ... Not perfect match but close') return 1 else: + list_ref_missing_particles = np.setdiff1d(ref,comp) print("") print("==========================") print('Particle catalog contains same number of particles but IDs differ ... Not perfect match') print("Unique values in ref not present in comp") - print(np.setdiff1d(ref,comp)) + print(list_ref_missing_particles) return 0 return 2 From 78d617ddfadcbacea13f41ebc6b53b7bbf979c59 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Mon, 23 Sep 2019 11:27:24 +1000 Subject: [PATCH 38/71] Successful encapsulation comparison. Removed commented block --- src/search.cxx | 57 +------------------------------------------------- 1 file changed, 1 insertion(+), 56 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 28e5d85f..6f9f2d29 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2609,62 +2609,7 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse //this routine is within this file, also has internal parallelisation AdjustSubPartToPhaseCM(subnumingroup[i], subPart, cmphase); } - /* if (opt.icmrefadjust) { -#ifdef USEOPENMP - if (subnumingroup[i]>ompsearchnum) { -#pragma omp parallel default(shared) -{ -#pragma omp for private(j) reduction(+:mtotregion,cmx,cmy,cmz,cmvelx,cmvely,cmvelz) - for (j=0;jompsearchnum) { -#pragma omp parallel default(shared) -{ -#pragma omp for private(j) - for (j=0;j Date: Mon, 14 Oct 2019 10:09:57 +1100 Subject: [PATCH 39/71] ENCAPSULATION: PreCalcSearchSubSet --- src/search.cxx | 75 ++++++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 33 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 6f9f2d29..6a594627 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2476,6 +2476,45 @@ num_threads(nthreads) } } +void PreCalcSearchSubSet(Int_t *subnumingroup, Int_t i, Options &opt, Particle *subPart, + Int_t sublevel, KDTree *tree, Int_t ngrid, int ThisTask, GridCell *grid, Coordinate *gvel, Matrix *gveldisp) +{ + if (subnumingroup[i]>=MINSUBSIZE&&opt.foftype!=FOF6DCORE) { + //now if object is large enough for phase-space decomposition and search, compare local field to bg field + opt.Ncell=opt.Ncellfac*subnumingroup[i]; + //if ncell is such that uncertainty would be greater than 0.5% based on Poisson noise, increase ncell till above unless cell would contain >25% + while (opt.Ncellopt.Ncell) opt.Ncell*=2; + tree=InitializeTreeGrid(opt,subnumingroup[i],subPart); + ngrid=tree->GetNumLeafNodes(); + if (opt.iverbose) cout<opt.HaloVelDispScale) opt.HaloVelDispScale=opt.HaloSigmaV; +#ifdef HALOONLYDEN + GetVelocityDensity(opt,subnumingroup[i],subPart); +#endif + GetDenVRatio(opt,subnumingroup[i],subPart,ngrid,grid,gvel,gveldisp); + GetOutliersValues(opt,subnumingroup[i],subPart,sublevel); + opt.idenvflag++;//largest field halo used to deteremine statistics of ratio + } + //otherwise only need to calculate a velocity scale for merger separation + else { + Matrix eigvec(0.),I(0.); + Double_t sigma2x,sigma2y,sigma2z; + CalcVelSigmaTensor(subnumingroup[i], subPart, sigma2x, sigma2y, sigma2z, eigvec, I); + opt.HaloLocalSigmaV=opt.HaloSigmaV=pow(sigma2x*sigma2y*sigma2z,1.0/3.0); + } +} + /*! Given a initial ordered candidate list of substructures, find all substructures that are large enough to be searched. These substructures are used as a mean background velocity field and a new outlier list is found and searched. @@ -2613,39 +2652,9 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse //ADACS: for large objects, extra processing steps are requried //ADACS: Some of these subroutines make use of OpenMP. For this to continue //ADACS: the pool of threads would have to be changed - if (subnumingroup[i]>=MINSUBSIZE&&opt.foftype!=FOF6DCORE) { - //now if object is large enough for phase-space decomposition and search, compare local field to bg field - opt.Ncell=opt.Ncellfac*subnumingroup[i]; - //if ncell is such that uncertainty would be greater than 0.5% based on Poisson noise, increase ncell till above unless cell would contain >25% - while (opt.Ncellopt.Ncell) opt.Ncell*=2; - tree=InitializeTreeGrid(opt,subnumingroup[i],subPart); - ngrid=tree->GetNumLeafNodes(); - if (opt.iverbose) cout<opt.HaloVelDispScale) opt.HaloVelDispScale=opt.HaloSigmaV; -#ifdef HALOONLYDEN - GetVelocityDensity(opt,subnumingroup[i],subPart); -#endif - GetDenVRatio(opt,subnumingroup[i],subPart,ngrid,grid,gvel,gveldisp); - GetOutliersValues(opt,subnumingroup[i],subPart,sublevel); - opt.idenvflag++;//largest field halo used to deteremine statistics of ratio - } - //otherwise only need to calculate a velocity scale for merger separation - else { - Matrix eigvec(0.),I(0.); - Double_t sigma2x,sigma2y,sigma2z; - CalcVelSigmaTensor(subnumingroup[i], subPart, sigma2x, sigma2y, sigma2z, eigvec, I); - opt.HaloLocalSigmaV=opt.HaloSigmaV=pow(sigma2x*sigma2y*sigma2z,1.0/3.0); - } + // PreCalcSearchSubSet() + // subnumingroup, opt, subpart, sublevel, tree, ngrid, thistask, grid, gvel, gveldisp + PreCalcSearchSubSet(subnumingroup, i, opt, subPart, sublevel, tree, ngrid, ThisTask, grid, gvel, gveldisp); //ADACS: Here the object is searched. Not much of this uses OpenMP but there are // one or two subroutines called within SearchSubset that do make use of OpenMP. subpfof=SearchSubset(opt,subnumingroup[i],subnumingroup[i],subPart,subngroup[i],sublevel,&numcores[i]); From e5583d1ec1a4a0bdbac520198790a3b8aaf5b175 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Mon, 28 Oct 2019 12:00:29 +1100 Subject: [PATCH 40/71] Final encapsulation --- src/search.cxx | 54 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 6a594627..5bbd8330 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2458,6 +2458,9 @@ void MergeSubstructuresPhase(Options &opt, const Int_t nsubset, Particle *&Parts // cout<0) { + //if also keeping track of cores then must allocate coreflag + if (numcores[i]>0 && opt.iHaloCoreSearch>=1) { + coreflag=new Int_t[ng+1]; + for (int icore=1;icore<=ng;icore++) coreflag[icore]=1+(icore>ng-numcores[i]); + } + else {coreflag=NULL;} + iunbindflag=CheckUnboundGroups(opt,subnumingroup[i],subPart,subngroup[i],subpfof,subsubnumingroup[i],subsubpglist[i],1, coreflag); + if (iunbindflag) { + for (int j=1;j<=ng;j++) delete[] subsubpglist[i][j]; + delete[] subsubnumingroup[i]; + delete[] subsubpglist[i]; + if (subngroup[i]>0) { + subsubnumingroup[i]=BuildNumInGroup(subnumingroup[i], subngroup[i], subpfof); + subsubpglist[i]=BuildPGList(subnumingroup[i], subngroup[i], subsubnumingroup[i], subpfof); + } + //if need to update number of cores, + if (numcores[i]>0 && opt.iHaloCoreSearch>=1) { + numcores[i]=0; + for (int icore=1;icore<=subngroup[i];icore++)numcores[i]+=(coreflag[icore]==2); + delete[] coreflag; + } + } + } + for (j=0;j0) pfof[subpglist[i][j]]=ngroup+ngroupidoffset+subpfof[j]; + ngroupidoffset+=subngroup[i]; + //now alter subsubpglist so that index pointed is global subset index as global subset is used to get the particles to be searched for subsubstructure + for (j=1;j<=subngroup[i];j++) for (Int_t k=0;k &Partsubse */ //here loop over all sublevel groups that need to be searched for substructure // ADACS: this loop proceses halos (that are independent) sequentially. This is unecessary. + // #pragma omp parallel for... + // parallelise loop, collection, and/or calculations per collection + // TODO: give a collection a pool of threads thread_pool for (Int_t i=1;i<=oldnsubsearch;i++) { subpfofold[i]=pfof[subpglist[i][0]]; subPart=new Particle[subnumingroup[i]]; @@ -2657,10 +2701,13 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse PreCalcSearchSubSet(subnumingroup, i, opt, subPart, sublevel, tree, ngrid, ThisTask, grid, gvel, gveldisp); //ADACS: Here the object is searched. Not much of this uses OpenMP but there are // one or two subroutines called within SearchSubset that do make use of OpenMP. + subpfof=SearchSubset(opt,subnumingroup[i],subnumingroup[i],subPart,subngroup[i],sublevel,&numcores[i]); //now if subngroup>0 change the pfof ids of these particles in question and see if there are any substrucures that can be searched again. //the group ids must be stored along with the number of groups in this substructure that will be searched at next level. //now check if self bound and if not, id doesn't change from original subhalo,ie: subpfof[j]=0 + // SearchSubStruct(subngroup, i, ng, subsubnumingroup, subnumingroup, subpfof, subsubpglist, opt, + // numcores, coreflag, iunbindflag, subPart, pfof, subpglist, ngroup, ngroupidoffset); if (subngroup[i]) { ng=subngroup[i]; subsubnumingroup[i]=BuildNumInGroup(subnumingroup[i], subngroup[i], subpfof); @@ -2694,11 +2741,12 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse //now alter subsubpglist so that index pointed is global subset index as global subset is used to get the particles to be searched for subsubstructure for (j=1;j<=subngroup[i];j++) for (Int_t k=0;k Date: Mon, 28 Oct 2019 12:01:05 +1100 Subject: [PATCH 41/71] Added stdout thread count --- src/search.cxx | 1 + 1 file changed, 1 insertion(+) diff --git a/src/search.cxx b/src/search.cxx index 5bbd8330..88b09dff 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2469,6 +2469,7 @@ void AdjustSubPartToPhaseCM(Int_t num, Particle *subPart, GMatrix &cmphase) #ifdef USEOPENMP nthreads = max(1, (int)(num/(float)ompsearchnum)); nthreads = min(nthreads,omp_get_max_threads()); + cout << "OPTIMISATION-01: Total allocated threads: " << nthreads; #pragma omp parallel for \ default(shared) \ num_threads(nthreads) From 4269513a71a69e849fdefc6c770779d84bca476c Mon Sep 17 00:00:00 2001 From: jesmigel Date: Mon, 28 Oct 2019 12:02:50 +1100 Subject: [PATCH 42/71] newline to threadcount --- src/search.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cxx b/src/search.cxx index 88b09dff..46a33569 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2469,7 +2469,7 @@ void AdjustSubPartToPhaseCM(Int_t num, Particle *subPart, GMatrix &cmphase) #ifdef USEOPENMP nthreads = max(1, (int)(num/(float)ompsearchnum)); nthreads = min(nthreads,omp_get_max_threads()); - cout << "OPTIMISATION-01: Total allocated threads: " << nthreads; + cout << "OPTIMISATION-01: Total allocated threads: " << nthreads << endl; #pragma omp parallel for \ default(shared) \ num_threads(nthreads) From 2159f7eb2bf009efb21012983d35aeaf454ef565 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Tue, 29 Oct 2019 20:02:52 +1100 Subject: [PATCH 43/71] OPTIMISATION-01: Print statements --- src/search.cxx | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/search.cxx b/src/search.cxx index 46a33569..e325592b 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2467,9 +2467,15 @@ void AdjustSubPartToPhaseCM(Int_t num, Particle *subPart, GMatrix &cmphase) { int nthreads = 1; #ifdef USEOPENMP + cout << "OPTIMISATION-01: BEGIN PRINT" << endl; + cout << "OPTIMISATION-01: num[" << num << "]" << endl; + cout << "OPTIMISATION-01: omp_get_max_threads[" << omp_get_max_threads() << "]" << endl; + cout << "OPTIMISATION-01: ompsearchnum[" << ompsearchnum << "]" << endl; nthreads = max(1, (int)(num/(float)ompsearchnum)); + cout << "OPTIMISATION-01: nthreads.1[" << nthreads << "]" << endl; nthreads = min(nthreads,omp_get_max_threads()); - cout << "OPTIMISATION-01: Total allocated threads: " << nthreads << endl; + cout << "OPTIMISATION-01: nthreads.2[" << nthreads << "]" << endl; + cout << "OPTIMISATION-01: END PRINT" << endl; #pragma omp parallel for \ default(shared) \ num_threads(nthreads) From d14efe2c77db8d9f86e2c36195774cdc693ec4f2 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Mon, 4 Nov 2019 12:04:18 +1100 Subject: [PATCH 44/71] Applied Encapsulation-3 --- src/search.cxx | 97 ++++++++++++++++++-------------------------------- 1 file changed, 34 insertions(+), 63 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index e325592b..6dd6b9f2 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2527,41 +2527,40 @@ void PreCalcSearchSubSet(Int_t *subnumingroup, Int_t i, Options &opt, Particle * } // ENCAPSULATED: ENCAPSULATION-03 -void SearchSubStruct(Int_t *subngroup, Int_t i, Int_t ng, Int_t **subsubnumingroup, Int_t *subnumingroup, - Int_t *subpfof, Int_t ***subsubpglist, Options &opt, Int_t *numcores, Int_t *coreflag, bool iunbindflag, - Particle *subPart, Int_t *&pfof, Int_t **subpglist, Int_t &ngroup, Int_t ngroupidoffset) { - Int_t j; - ng=subngroup[i]; - subsubnumingroup[i]=BuildNumInGroup(subnumingroup[i], subngroup[i], subpfof); - subsubpglist[i]=BuildPGList(subnumingroup[i], subngroup[i], subsubnumingroup[i], subpfof); - if (opt.uinfo.unbindflag&&subngroup[i]>0) { - //if also keeping track of cores then must allocate coreflag - if (numcores[i]>0 && opt.iHaloCoreSearch>=1) { - coreflag=new Int_t[ng+1]; - for (int icore=1;icore<=ng;icore++) coreflag[icore]=1+(icore>ng-numcores[i]); - } - else {coreflag=NULL;} - iunbindflag=CheckUnboundGroups(opt,subnumingroup[i],subPart,subngroup[i],subpfof,subsubnumingroup[i],subsubpglist[i],1, coreflag); - if (iunbindflag) { - for (int j=1;j<=ng;j++) delete[] subsubpglist[i][j]; - delete[] subsubnumingroup[i]; - delete[] subsubpglist[i]; - if (subngroup[i]>0) { +void SearchSubStruct(Int_t *numcores, Int_t i, Options &opt, Int_t ng, bool iunbindflag, Int_t *subnumingroup, + Particle *subPart, Int_t *subngroup, Int_t *subpfof, Int_t **subsubnumingroup, Int_t ***subsubpglist, + Int_t *&pfof, Int_t **subpglist, Int_t &ngroup, Int_t ngroupidoffset) { + Int_t *coreflag; + ng=subngroup[i]; subsubnumingroup[i]=BuildNumInGroup(subnumingroup[i], subngroup[i], subpfof); subsubpglist[i]=BuildPGList(subnumingroup[i], subngroup[i], subsubnumingroup[i], subpfof); - } - //if need to update number of cores, - if (numcores[i]>0 && opt.iHaloCoreSearch>=1) { - numcores[i]=0; - for (int icore=1;icore<=subngroup[i];icore++)numcores[i]+=(coreflag[icore]==2); - delete[] coreflag; - } + if (opt.uinfo.unbindflag&&subngroup[i]>0) { + //if also keeping track of cores then must allocate coreflag + if (numcores[i]>0 && opt.iHaloCoreSearch>=1) { + coreflag=new Int_t[ng+1]; + for (int icore=1;icore<=ng;icore++) coreflag[icore]=1+(icore>ng-numcores[i]); + } + else {coreflag=NULL;} + iunbindflag=CheckUnboundGroups(opt,subnumingroup[i],subPart,subngroup[i],subpfof,subsubnumingroup[i],subsubpglist[i],1, coreflag); + if (iunbindflag) { + for (int j=1;j<=ng;j++) delete[] subsubpglist[i][j]; + delete[] subsubnumingroup[i]; + delete[] subsubpglist[i]; + if (subngroup[i]>0) { + subsubnumingroup[i]=BuildNumInGroup(subnumingroup[i], subngroup[i], subpfof); + subsubpglist[i]=BuildPGList(subnumingroup[i], subngroup[i], subsubnumingroup[i], subpfof); + } + //if need to update number of cores, + if (numcores[i]>0 && opt.iHaloCoreSearch>=1) { + numcores[i]=0; + for (int icore=1;icore<=subngroup[i];icore++)numcores[i]+=(coreflag[icore]==2); + delete[] coreflag; } - } - for (j=0;j0) pfof[subpglist[i][j]]=ngroup+ngroupidoffset+subpfof[j]; - ngroupidoffset+=subngroup[i]; + }} + for (Int_t j=0;j0) pfof[subpglist[i][j]]=ngroup+ngroupidoffset+subpfof[j]; + //now alter subsubpglist so that index pointed is global subset index as global subset is used to get the particles to be searched for subsubstructure - for (j=1;j<=subngroup[i];j++) for (Int_t k=0;k &Partsubse //now if subngroup>0 change the pfof ids of these particles in question and see if there are any substrucures that can be searched again. //the group ids must be stored along with the number of groups in this substructure that will be searched at next level. //now check if self bound and if not, id doesn't change from original subhalo,ie: subpfof[j]=0 - // SearchSubStruct(subngroup, i, ng, subsubnumingroup, subnumingroup, subpfof, subsubpglist, opt, - // numcores, coreflag, iunbindflag, subPart, pfof, subpglist, ngroup, ngroupidoffset); if (subngroup[i]) { - ng=subngroup[i]; - subsubnumingroup[i]=BuildNumInGroup(subnumingroup[i], subngroup[i], subpfof); - subsubpglist[i]=BuildPGList(subnumingroup[i], subngroup[i], subsubnumingroup[i], subpfof); - if (opt.uinfo.unbindflag&&subngroup[i]>0) { - //if also keeping track of cores then must allocate coreflag - if (numcores[i]>0 && opt.iHaloCoreSearch>=1) { - coreflag=new Int_t[ng+1]; - for (int icore=1;icore<=ng;icore++) coreflag[icore]=1+(icore>ng-numcores[i]); - } - else {coreflag=NULL;} - iunbindflag=CheckUnboundGroups(opt,subnumingroup[i],subPart,subngroup[i],subpfof,subsubnumingroup[i],subsubpglist[i],1, coreflag); - if (iunbindflag) { - for (int j=1;j<=ng;j++) delete[] subsubpglist[i][j]; - delete[] subsubnumingroup[i]; - delete[] subsubpglist[i]; - if (subngroup[i]>0) { - subsubnumingroup[i]=BuildNumInGroup(subnumingroup[i], subngroup[i], subpfof); - subsubpglist[i]=BuildPGList(subnumingroup[i], subngroup[i], subsubnumingroup[i], subpfof); - } - //if need to update number of cores, - if (numcores[i]>0 && opt.iHaloCoreSearch>=1) { - numcores[i]=0; - for (int icore=1;icore<=subngroup[i];icore++)numcores[i]+=(coreflag[icore]==2); - delete[] coreflag; - } - } - } - for (j=0;j0) pfof[subpglist[i][j]]=ngroup+ngroupidoffset+subpfof[j]; + SearchSubStruct(numcores, i, opt, ng, iunbindflag, subnumingroup, + subPart, subngroup, subpfof, subsubnumingroup, subsubpglist, + pfof, subpglist, ngroup, ngroupidoffset); + ngroupidoffset+=subngroup[i]; - //now alter subsubpglist so that index pointed is global subset index as global subset is used to get the particles to be searched for subsubstructure - for (j=1;j<=subngroup[i];j++) for (Int_t k=0;k Date: Mon, 4 Nov 2019 12:09:33 +1100 Subject: [PATCH 45/71] Removed outedated encapsulate comments --- src/search.cxx | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 6dd6b9f2..c6460770 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2665,16 +2665,7 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse numcores=new Int_t[nsubsearch+1]; subpfofold=new Int_t[nsubsearch+1]; ns=0; - // START: ENCAPSULATE-01 - /* - If the whole for loop is to be encapsulated, - ** the following variables are in consideration to be parametised: - ** oldnsubsearch - ** subpglist - ** pfof - ** subnumingroup - ** opt.icmrefadjust - ** ompsearchnum - */ + // START: ENCAPSULATE //here loop over all sublevel groups that need to be searched for substructure // ADACS: this loop proceses halos (that are independent) sequentially. This is unecessary. // #pragma omp parallel for... @@ -2726,7 +2717,8 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse //ADACS: this would need a reduction at the end. ns+=subngroup[i]; // reduction } - // END: ENCAPSULATE-01 + // END: ENCAPSULATE + //if objects have been found adjust the StrucLevelData //this stores the address of the parent particle and pfof along with child substructure particle and pfof if (ns>0) { From 3b24fa9652a25d1ff14b5cb6c700ab649aa2ecac Mon Sep 17 00:00:00 2001 From: jesmigel Date: Mon, 4 Nov 2019 12:18:00 +1100 Subject: [PATCH 46/71] Removed test function setNthread --- src/search.cxx | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index c6460770..48288b58 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2458,10 +2458,6 @@ void MergeSubstructuresPhase(Options &opt, const Int_t nsubset, Particle *&Parts // cout< Date: Mon, 11 Nov 2019 13:00:34 +1100 Subject: [PATCH 47/71] Updated threadpool object skeleton --- src/ompvar.h | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/src/ompvar.h b/src/ompvar.h index e61fb7ae..16c7ae21 100644 --- a/src/ompvar.h +++ b/src/ompvar.h @@ -43,3 +43,75 @@ struct OMP_ImportInfo { int task; }; #endif + +/* + * OMP_Threadpool_datastructure + * - coupled to OMP_Threadpool + * - structure to store threadpool information + * - captures the following: + * thread state active(True)/inactive(false) + * total number of particles + */ +struct OMP_Threadpool_datastructure { + bool active; + Int_t particle_total; +}; + +/* + * OMP_Threadpool + * - contains routines used for optimised parallel computing/processing + * throttle_up - increase number of active threads + * throttle_down - decrease number of active threads + * - captures the following: + * specific threadpool information stored to OMP_Threadpool_datastructure + * total thread size + * number of active threads + */ +struct OMP_Threadpool { + // Total thread available + // Active threads + // Unsigned long long for number of particles + // omp thread class - + // omp retrieve active threads + // specify offset of thread + + /* Private block */ + private: + map details; + int thread_count, active_threads; + + /* Public block */ + public: + void throttle_up(){ + // TODO: add the following logic: + // OMP_Threadpool_datastructure update state of thread from inactive to active + active_threads += 1; + } + + void throttle_down(){ + // TODO: add the following logic: + // OMP_Threadpool_datastructure update state of thread from active to inactive + active_threads -= 1; + } + + void set_total_threads(int thread_count){ + thread_count = thread_count; + } + + int get_total_threads(){ + return thread_count; + } + + int get_active_threads(){ + return active_threads; + } + + /* Confirmation Functions */ + void print_total_threads(){ + printf("Total Threads: %d\n", thread_count); + } + + void print_active_threads(){ + printf("Active Threads: %d\n", active_threads); + } +}; From 9bde1559e56a5d3b18553e7a9d3c410c7547b696 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Mon, 11 Nov 2019 13:01:04 +1100 Subject: [PATCH 48/71] Updated comments --- src/search.cxx | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 83433721..92536f91 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2458,6 +2458,7 @@ void MergeSubstructuresPhase(Options &opt, const Int_t nsubset, Particle *&Parts // cout< &Partsubse //now if subngroup>0 change the pfof ids of these particles in question and see if there are any substrucures that can be searched again. //the group ids must be stored along with the number of groups in this substructure that will be searched at next level. //now check if self bound and if not, id doesn't change from original subhalo,ie: subpfof[j]=0 - // SearchSubStruct(subngroup, i, ng, subsubnumingroup, subnumingroup, subpfof, subsubpglist, opt, - // numcores, coreflag, iunbindflag, subPart, pfof, subpglist, ngroup, ngroupidoffset); if (subngroup[i]) { SearchSubStruct(numcores, i, opt, ng, iunbindflag, subnumingroup, subPart, subngroup, subpfof, subsubnumingroup, subsubpglist, @@ -3551,7 +3550,7 @@ private(i,tid,p1,pindex,x1,D2,dval,rval,icheck,nnID,dist2,baryonfofold) map remap; Int_t newng=0, oldpid, newpid; remap[0]=0; - for (i=1;i<=ng;i++) { + for (auto i=1;i<=ng;i++) { if (ningall[i]>0) { newng++; remap[i]=newng; From 3a8a61f61daefff8b238ab497cfd47159af942f4 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Mon, 11 Nov 2019 16:39:57 +1100 Subject: [PATCH 49/71] Reverted bug --- src/search.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cxx b/src/search.cxx index 92536f91..15d60354 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -373,7 +373,7 @@ Int_t* SearchFullSet(Options &opt, const Int_t nbodies, vector &Part, numingroup=BuildNumInGroupTyped(Nlocal,numgroups,pfof,Part.data(),DARKTYPE); for (i=0;i=MINSUBSIZE); - else Part[i].SetType(-1); + else Part[i].SetType(0); numlocalden += (Part[i].GetType()>0); } } From 10bd93f4472eb0264eb567a176a3b37437f2f967 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Mon, 11 Nov 2019 18:16:23 +1100 Subject: [PATCH 50/71] bug is false positive --- src/search.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cxx b/src/search.cxx index 15d60354..92536f91 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -373,7 +373,7 @@ Int_t* SearchFullSet(Options &opt, const Int_t nbodies, vector &Part, numingroup=BuildNumInGroupTyped(Nlocal,numgroups,pfof,Part.data(),DARKTYPE); for (i=0;i=MINSUBSIZE); - else Part[i].SetType(0); + else Part[i].SetType(-1); numlocalden += (Part[i].GetType()>0); } } From 89257d205e283b931a75909a5bc5665e54f1ddab Mon Sep 17 00:00:00 2001 From: jesmigel Date: Mon, 11 Nov 2019 19:23:16 +1100 Subject: [PATCH 51/71] Reverted to settyp(0) --- src/search.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cxx b/src/search.cxx index 83433721..8a96ba89 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -373,7 +373,7 @@ Int_t* SearchFullSet(Options &opt, const Int_t nbodies, vector &Part, numingroup=BuildNumInGroupTyped(Nlocal,numgroups,pfof,Part.data(),DARKTYPE); for (i=0;i=MINSUBSIZE); - else Part[i].SetType(-1); + else Part[i].SetType(0); numlocalden += (Part[i].GetType()>0); } } From 4cab92a5c6814b83f72bd6efb56be5de258cc2c6 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Thu, 2 Jan 2020 10:15:06 +1100 Subject: [PATCH 52/71] Removed unused function --- src/search.cxx | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index c5d8b77b..6e9384c1 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2619,10 +2619,6 @@ void RemoveSpuriousDynamicalSubstructures(Options &opt, const Int_t nsubset, Int } } -int setNthreads(){ - return 0; -} - ///adjust to phase centre inline void AdjustSubPartToPhaseCM(Int_t num, Particle *subPart, GMatrix &cmphase) { From 3be589de06f631ffacf9ee945b8ede0550a7a8e3 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Mon, 6 Jan 2020 11:30:13 +1100 Subject: [PATCH 53/71] Added for compilation test --- Dockerfile | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..df7b5e7f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +FROM ubuntu:18.04 + +# DYNAMIC BRANCH CAN BE OVERWRITTEN DURING RUNTIME +ENV BRANCH=feature/threadpool-struct + +# NEEDS TO BE UPDATED FOR SPECIFIC VERSIONS +RUN apt update && \ + apt install -y g++ libomp-dev libgsl-dev libhdf5-serial-dev git cmake + +WORKDIR /home/ubuntu/ + +# INITIALISE PROJECT DIRECTORY +RUN git clone https://github.com/pelahi/VELOCIraptor-STF.git && \ + cd VELOCIraptor-STF && git checkout ${BRANCH} && git submodule update --init --recursive + +WORKDIR /home/ubuntu/VELOCIraptor-STF + +# BUILD BINARY +RUN mkdir build && cd build && cmake .. && make all From 396a79d800fbddfe1e464f1a0b64a5a5b35fda62 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Mon, 6 Jan 2020 11:31:31 +1100 Subject: [PATCH 54/71] Added TODO --- src/search.cxx | 1 + 1 file changed, 1 insertion(+) diff --git a/src/search.cxx b/src/search.cxx index 6e9384c1..0810045f 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2896,6 +2896,7 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse //this routine is within this file, also has internal parallelisation AdjustSubPartToPhaseCM(subnumingroup[i], subPart, cmphase); } + // TODO: Optimise bottleneck PreCalcSearchSubSet(opt, subnumingroup[i], subPart, sublevel); subpfof = SearchSubset(opt, subnumingroup[i], subnumingroup[i], subPart, subngroup[i], sublevel, &numcores[i]); From 8fd9ee719c3268c0087fab0f84670992a4c6262c Mon Sep 17 00:00:00 2001 From: Jesmigel Cantos Date: Wed, 22 Jan 2020 13:06:44 +1100 Subject: [PATCH 55/71] Record duration CleanAndUpdateGroupsFromSubSearch --- src/search.cxx | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/search.cxx b/src/search.cxx index 0810045f..7b2e5950 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2695,12 +2695,16 @@ inline void CleanAndUpdateGroupsFromSubSearch(Options &opt, bool iunbindflag; Int_t ng=subngroup; Int_t *coreflag; + Double_t time_temp, time_1; if (subngroup == 0) return; subsubnumingroup = BuildNumInGroup(subnumingroup, subngroup, subpfof); subsubpglist = BuildPGList(subnumingroup, subngroup, subsubnumingroup, subpfof); if (opt.uinfo.unbindflag&&subngroup>0) { + time_temp = MyGetTime(); + cout<<"TIME[CleanAndUpdateGroupsFromSubSearch] - opt.uinfo.unbindflag&&subngroup>0"<0 && opt.iHaloCoreSearch>=1) { coreflag=new Int_t[subngroup+1]; @@ -2727,11 +2731,23 @@ inline void CleanAndUpdateGroupsFromSubSearch(Options &opt, } } } + cout<<"END: TIME[CleanAndUpdateGroupsFromSubSearch] - opt.uinfo.unbindflag&&subngroup>0"<0) pfof[subpglist[j]]=ngroup+ngroupidoffset+subpfof[j]; } + cout<<"END: TIME[CleanAndUpdateGroupsFromSubSearch] - loop1"< Date: Wed, 22 Jan 2020 13:35:28 +1100 Subject: [PATCH 56/71] Record duration CleanAndUpdateGroupsFromSubSearch --- src/search.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 7b2e5950..7d5d5312 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2701,8 +2701,8 @@ inline void CleanAndUpdateGroupsFromSubSearch(Options &opt, subsubnumingroup = BuildNumInGroup(subnumingroup, subngroup, subpfof); subsubpglist = BuildPGList(subnumingroup, subngroup, subsubnumingroup, subpfof); + time_temp = MyGetTime(); if (opt.uinfo.unbindflag&&subngroup>0) { - time_temp = MyGetTime(); cout<<"TIME[CleanAndUpdateGroupsFromSubSearch] - opt.uinfo.unbindflag&&subngroup>0"<0"< Date: Thu, 23 Jan 2020 11:10:07 +1100 Subject: [PATCH 57/71] Record duration CleanAndUpdateGroupsFromSubSearch --- src/search.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cxx b/src/search.cxx index 7d5d5312..9aa0de58 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2701,7 +2701,7 @@ inline void CleanAndUpdateGroupsFromSubSearch(Options &opt, subsubnumingroup = BuildNumInGroup(subnumingroup, subngroup, subpfof); subsubpglist = BuildPGList(subnumingroup, subngroup, subsubnumingroup, subpfof); - time_temp = MyGetTime(); + time_temp = new MyGetTime(); if (opt.uinfo.unbindflag&&subngroup>0) { cout<<"TIME[CleanAndUpdateGroupsFromSubSearch] - opt.uinfo.unbindflag&&subngroup>0"< Date: Thu, 23 Jan 2020 12:49:43 +1100 Subject: [PATCH 58/71] Record duration CleanAndUpdateGroupsFromSubSearch --- src/search.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cxx b/src/search.cxx index 9aa0de58..7d5d5312 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2701,7 +2701,7 @@ inline void CleanAndUpdateGroupsFromSubSearch(Options &opt, subsubnumingroup = BuildNumInGroup(subnumingroup, subngroup, subpfof); subsubpglist = BuildPGList(subnumingroup, subngroup, subsubnumingroup, subpfof); - time_temp = new MyGetTime(); + time_temp = MyGetTime(); if (opt.uinfo.unbindflag&&subngroup>0) { cout<<"TIME[CleanAndUpdateGroupsFromSubSearch] - opt.uinfo.unbindflag&&subngroup>0"< Date: Thu, 23 Jan 2020 13:05:08 +1100 Subject: [PATCH 59/71] Record duration CleanAndUpdateGroupsFromSubSearch --- src/search.cxx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 7d5d5312..9da2924e 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2701,7 +2701,7 @@ inline void CleanAndUpdateGroupsFromSubSearch(Options &opt, subsubnumingroup = BuildNumInGroup(subnumingroup, subngroup, subpfof); subsubpglist = BuildPGList(subnumingroup, subngroup, subsubnumingroup, subpfof); - time_temp = MyGetTime(); + time_temp = MyGetTime()/1000000; if (opt.uinfo.unbindflag&&subngroup>0) { cout<<"TIME[CleanAndUpdateGroupsFromSubSearch] - opt.uinfo.unbindflag&&subngroup>0"<0"<0) pfof[subpglist[j]]=ngroup+ngroupidoffset+subpfof[j]; } cout<<"END: TIME[CleanAndUpdateGroupsFromSubSearch] - loop1"< Date: Thu, 23 Jan 2020 13:23:04 +1100 Subject: [PATCH 60/71] Record duration involved functions --- src/search.cxx | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 9da2924e..4419aad9 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2695,16 +2695,12 @@ inline void CleanAndUpdateGroupsFromSubSearch(Options &opt, bool iunbindflag; Int_t ng=subngroup; Int_t *coreflag; - Double_t time_temp, time_1; if (subngroup == 0) return; subsubnumingroup = BuildNumInGroup(subnumingroup, subngroup, subpfof); subsubpglist = BuildPGList(subnumingroup, subngroup, subsubnumingroup, subpfof); - time_temp = MyGetTime()/1000000; if (opt.uinfo.unbindflag&&subngroup>0) { - cout<<"TIME[CleanAndUpdateGroupsFromSubSearch] - opt.uinfo.unbindflag&&subngroup>0"<0 && opt.iHaloCoreSearch>=1) { coreflag=new Int_t[subngroup+1]; @@ -2730,24 +2726,13 @@ inline void CleanAndUpdateGroupsFromSubSearch(Options &opt, delete[] coreflag; } } - cout<<"END: TIME[CleanAndUpdateGroupsFromSubSearch] - opt.uinfo.unbindflag&&subngroup>0"<0) pfof[subpglist[j]]=ngroup+ngroupidoffset+subpfof[j]; } - cout<<"END: TIME[CleanAndUpdateGroupsFromSubSearch] - loop1"< &Partsubse AdjustSubPartToPhaseCM(subnumingroup[i], subPart, cmphase); } // TODO: Optimise bottleneck + Double_t time_temp; + cout<<"TIME[PreCalcSearchSubSet]"< Date: Thu, 23 Jan 2020 13:39:02 +1100 Subject: [PATCH 61/71] Record duration involved functions --- src/search.cxx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 4419aad9..14b9ea4c 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2900,23 +2900,23 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse // TODO: Optimise bottleneck Double_t time_temp; cout<<"TIME[PreCalcSearchSubSet]"< Date: Thu, 23 Jan 2020 13:55:50 +1100 Subject: [PATCH 62/71] Record duration involved functions --- src/search.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cxx b/src/search.cxx index 14b9ea4c..5edeb884 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2898,7 +2898,7 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse AdjustSubPartToPhaseCM(subnumingroup[i], subPart, cmphase); } // TODO: Optimise bottleneck - Double_t time_temp; + double time_temp; cout<<"TIME[PreCalcSearchSubSet]"< Date: Thu, 23 Jan 2020 14:06:53 +1100 Subject: [PATCH 63/71] Record duration involved functions --- src/search.cxx | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 5edeb884..59072117 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2901,18 +2901,15 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse double time_temp; cout<<"TIME[PreCalcSearchSubSet]"< Date: Wed, 29 Jan 2020 11:36:38 +1100 Subject: [PATCH 64/71] Updated profiler --- examples/profile_build.sh | 48 ---------------------------- examples/testing/flamegraph.Makefile | 47 +++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 48 deletions(-) delete mode 100755 examples/profile_build.sh create mode 100644 examples/testing/flamegraph.Makefile diff --git a/examples/profile_build.sh b/examples/profile_build.sh deleted file mode 100755 index bd6111b0..00000000 --- a/examples/profile_build.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash -l -# This script profiles stf using perf and hotspot to produce flame graphs -# It produces a build directory with a certian labels -# runs cmake with the desired options, runs the code on the desired input -# using the desired config and running the hotspot stuff -# it assumes the existence of perf that is in linux-tools-common -# also makes use of wget - -#script that produces lots of qsub scripts to run velociraptor on simulation output -if [ $# -eq 0 ] || [ "$1" == "--help" ] -then - echo "This script profiles a VR run." - echo "The interface is as follows:" - echo "buildlabel buildoptions VRargs VRconfig" - echo "buildlabel: string, label for the build" - echo "buildoptions: string, cmake options for build. Ex: \" -DVR_USE_GAS=ON \"" - echo "VRargs: string, options for VR. Ex: \"-i inputfile -I 2 -s 1 -o outputfile \"" - echo "VRconfig: string, path and file name of the config file" - exit -fi - - -#initial and final snapshot numbers -buildlabel=$1 -buildoptions=$2 -VRargs=$3 -VRconfig=$4 - -workingdir=`pwd` -scriptdir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - -#build exe -cd ${scriptdir}/../ -mkdir build-${buildlabel} -cd build-${buildlabel} -rm -rf * -cmake ${buildoptions} -DCMAKE_BUILD_TYPE=RelWithDebugInfo ../ -make -j - -wget https://github.com/KDAB/hotspot/releases/download/v1.1.0/hotspot-v1.1.0-x86_64.AppImage -chmod +x hotspot-v1.1.0-x86_64.AppImage - -#run code -#this will have produced a perf.data -perf record ./stf ${VRargs} -C ${VRconfig} - -#run hotspot -./hotspot-v1.1.0-x86_64.AppImage perf.data diff --git a/examples/testing/flamegraph.Makefile b/examples/testing/flamegraph.Makefile new file mode 100644 index 00000000..41b79f8a --- /dev/null +++ b/examples/testing/flamegraph.Makefile @@ -0,0 +1,47 @@ +# This Makefile profiles stf using perf to produce flame graphs +.PHONEY: info record_prereq fold flamegraph run display + +#script that produces lots of qsub scripts to run velociraptor on simulation output +info: + @echo "This script profiles a VR run." + +# PERF +_PERF_OPT_RECORD=--call-graph lbr -g -s -o +_PERF_PATH_ROOT=`pwd`/fg +_PERF_FILE_RAW=$(_PERF_PATH_ROOT)/perf.raw.`date +%Y%m%d`.`hostname`.data + +# FLAMEGRAPH +_FLAMEGRAPH_GIT_URL=https://github.com/brendangregg/FlameGraph.git +_FLAMEGRAPH_PATH=$(_PERF_PATH_ROOT)/FlameGraph +_FLAMEGRAPH_FILE_FOLDED=$(_PERF_PATH_FOLDED)/perf.processed.`date +%Y%m%d`.`hostname`.folded +_FLAMEGRAPH_SCRIPT_STACKCOLLAPSE=$(_FLAMEGRAPH_PATH)/stackcollapse-perf.pl +_FLAMEGRAPH_SCRIPT_MAIN=$(_FLAMEGRAPH_PATH)/flamegraph.pl +_FLAMEGRAPH_FILE_SVG=$(_PERF_PATH_ROOT) + +# STF +_STF_PARAMETERS="DEFAULT" +include ./payloadname.env + +record_prereq: + @mkdir -p $(_PERF_PATH_ROOT) + @cd $(_PERF_PATH_ROOT) && git clone $(_FLAMEGRAPH_GIT_URL) && cd.. + +record: record_prereq + perf record $(_PERF_OPT_RECORD) -o $(_PERF_FILE_RAW) ./stf $(_STF_PARAMETERS) + @echo "To manually test recorded data, execute: " + @echo "perf report -i $(_PERF_FILE_RAW)" + +fold: + perf script -i $(_PERF_FILE_RAW) | $(_FLAMEGRAPH_SCRIPT_STACKCOLLAPSE) > $(_FLAMEGRAPH_FILE_FOLDED) + +flamegraph: + cat $(_FLAMEGRAPH_FILE_FOLDED) | $(_FLAMEGRAPH_SCRIPT_MAIN) > $(_FLAMEGRAPH_FILE_SVG) + +run: record fold flamegraph + +# Display output for proofreading +# It can be appended to job scripts or test run in CLI +display: + @echo "perf record $(_PERF_OPT_RECORD) -o $(_PERF_FILE_RAW) ./stf $(_STF_PARAMETERS)" + @echo "perf script -i $(_PERF_FILE_RAW) | $(_FLAMEGRAPH_SCRIPT_STACKCOLLAPSE) > $(_FLAMEGRAPH_FILE_FOLDED)" + @echo "cat $(_FLAMEGRAPH_FILE_FOLDED) | $(_FLAMEGRAPH_SCRIPT_MAIN) > $(_FLAMEGRAPH_FILE_SVG)" \ No newline at end of file From 2df7719e4ecc8f85199fd47b7aa3dac6c3360a3a Mon Sep 17 00:00:00 2001 From: Jesmigel Cantos Date: Wed, 29 Jan 2020 11:51:58 +1100 Subject: [PATCH 65/71] Added payload description --- examples/testing/flamegraph.Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/testing/flamegraph.Makefile b/examples/testing/flamegraph.Makefile index 41b79f8a..e300654e 100644 --- a/examples/testing/flamegraph.Makefile +++ b/examples/testing/flamegraph.Makefile @@ -19,8 +19,9 @@ _FLAMEGRAPH_SCRIPT_MAIN=$(_FLAMEGRAPH_PATH)/flamegraph.pl _FLAMEGRAPH_FILE_SVG=$(_PERF_PATH_ROOT) # STF +# Dynamic stf input parameters through environment variables to be set in an input payload "./payloadname.env " _STF_PARAMETERS="DEFAULT" -include ./payloadname.env +include ./payloadname.env record_prereq: @mkdir -p $(_PERF_PATH_ROOT) From 3edb3a4067e0c75ff47634fc2db2c302d912f4d6 Mon Sep 17 00:00:00 2001 From: Jesmigel Cantos Date: Wed, 29 Jan 2020 14:55:59 +1100 Subject: [PATCH 66/71] Timestamp on against functions --- src/search.cxx | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/search.cxx b/src/search.cxx index 59072117..9fc75fe0 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -2899,21 +2899,17 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse } // TODO: Optimise bottleneck double time_temp; - cout<<"TIME[PreCalcSearchSubSet]"< &Partsubse //this routine is within this file, also has internal parallelisation AdjustSubPartToPhaseCM(subnumingroup[i], subPart, cmphase); } + // TODO: Optimise bottleneck + double time_temp; + time_temp = MyGetTime(); PreCalcSearchSubSet(opt2, subnumingroup[i], subPart, sublevel); subpfof = SearchSubset(opt2, subnumingroup[i], subnumingroup[i], subPart, subngroup[i], sublevel, &numcores[i]); From aabe5ac2da3d8e83b7f55641ca29a0c145817bc0 Mon Sep 17 00:00:00 2001 From: jesmigel <51466333+jesmigel@users.noreply.github.com> Date: Thu, 30 Jan 2020 14:22:14 +1100 Subject: [PATCH 67/71] Feature/threadpool struct (#62) * Update to submodule * Update to extra hydro/star/bh properties * Update to MPI to handle extra hydro/star/bh properties Not full functioning with Isend/recv when sending information in chunks. * bug fix * Update to MPI sending hydro/star/bh information in chunks Previous code sending information after FOF in chunks failed when sending extra hydro information. Code updated to handle sends in chunks. Old code should work but uncertain why it was failing. * Added a new example configuration. * Sample config file showing loading extra properties from HDF file. * bug fix. * bug fix. * Add functions indicating compilation options Add functions that are defined if compilation option turned on. Allows for simple checks to see if library compiled with said option. * Update to have functions defining compilation state. * Allow for substructures to be merged back with background Substructures can now also merge with background if not distant enough in phase-space. * Significant update to swift interface to handle other particle types * bug fix * bug fix * Update submodule * Update to submodule * Update to merging of subhalos if overlapping in phase * Only consider SFR > 0 when reading an EAGLE or SWIFT snapshot. Negative values encode different information. (#45) * Update to phase-merging of substructures * Update to merging of substructures and background if overlap in phase * Minor bug fixes to memory usage. * HDF5 bug fix * Minor memory leak bug fixes * Code cleaned up Commented code blocks removed. Update to using strings instead of character arrays to store file names. * bug fix missing ifdef * Memory leak bug fixes. * Update to sample config, incorrect option * A few more minor memory leaks fixed. * A few more memory leak bug fixes. * Few more memory leak bug fixes * Bug fix to reading extra info with HDF5 Issue with improper init of unique pointers * Code clean up and memory footprint reduction * Update to merging of structures with background * Update to config to allow VR to not produce particle id files * Allow VR to be invoked multiple times from swift with different configs * Not require VR to be compiled with gas for swift invocation * Added some if statements around some computations Reduce required computations if certain criteria are met. * bug fix * Bug fixes * cmake checks if parallel hdf present * update to cmake to handle parallel hdf5 * Update to parallel HDF5 * Update to parallel HDF5 * Bug fixes to parallel HDF5 * Bug fixes to parallel hdf5 * some formatting * Bug fixes for parallel HDF5 * Bug fix * Revision of parallel HDF5 addressing bugs * Bug fixes to parallel HDF5 * Added metallicity in apertures as output * New weighting scheme for core growth from hydro sims * New weighting scheme for core growth from hydro sims * Update to parallel HDF5 * Update to parallel HDF5 * Bug fixes for parallel HDF5 * bug fix * bug fix * bug fix * bug fix * Bug fixes to paralle HDF5 Also some commented code removed. * Bug fix * Check HDF5 version for compression capabilities * Update for parallel HDF5 * Bug fix * Minor updates Updated config option keywords and also added functions defined by compile time options. * Minor updates to examples and documentation. * Interface added for Omega_k * Update to rst documentation. * Update to documentation * Bug fix * Update to submodule * Update for parallel HDF5 Now have specific communicator for writing which can be used to group mpi processe into collective parallel writes. * Minor updates Updated config option keywords and also added functions defined by compile time options. * Minor updates to examples and documentation. * Interface added for Omega_k * Update to rst documentation. * Update to documentation * Update to doc and examples * Remove unnecessary cout * Quick update on readme to have link to read the docs * Update README.md * Update README.md * Added gas internal energy to kinetic energy calculations * Bug fix * Fixed missing for loop to allow for compilation with stars (#42) * update to submodule * Update to parallel hdf5 to allow multiple write communicators * Update to parallel HDF5 Write Communicator * Update to handle extra DM properties Update to VR to handle extra DM properties for modified dark matter simulations. Still requires updates to calculate properties and load data from HDF file. * Update and bug fix. * Update to CI tests * Bug fix * Added HDF5 interface for extra dm info * Update to extra properties * Update to config and intel mpi bug fix. * MPI bug fix * bug fixes to extra dm info and parallel HDF5 * Bug fix when writing empty data sets * Fix for extra DM properties. * Bug fix for extra dm * Bug fix * Bug fix * Update to parallel HDF5 Now correctly writing the number of items in an individual file when in parallel hdf5. * Update for parallel HDF5 and bug fix for extra DM * Bug fix * Bug fix. * Bug fix for parallel HDF5 io * Bug fix for parallel HDF5 * Update to documentation * Update to documentation Also minor update to names used internally in CMake * Update to documentation * Bug fix for extra DM * Update to meta info written * quick clean up of bound fractions Clean up code for no mass * Minor to openmp binding energy loop Clean up code to track down bug * bug fix for binding fraction * Tidy up code, small openmp bug fix, property bug fix Potential energy for large groups had double counting. Unbinding processes small groups first, then large groups. Baryon search is skipped if asked for but no baryons present. * bug fix * Update to encapsulation for subsequent parallelisation * bug fix, tidying up code * nomass bug fix * Update to kinetic rotational energies of gas * Bug fix for NOMASS * Fix uninitialized Total_num_of_halos in .profiles output (#52) * Correct parameters in sample_swiftdm_3dfof_subhalo.cfg example (#53) * Clean up unbinding routines, adding encapsulation * Added encapsulation * minor zoom simulation baryon fraction correction * minor zoom simulation baryon fraction correction * Update to potential calculations * Bug fix in binding energy if using minimum potential reference position If using minimum potential as reference position then a few properties were not initialised. Now fixed. * Remove debugging statement * Update to encapsulation and OpenMP implementation * Improve load balancing of openmp loop * bug fix for MPI * Tidy up code * Tidy up code * Bug fix in writing attributes * Update to submodule * Update to submodule * Updated threadpool object skeleton * Updated comments * Possible memory leak bug fix * Reverted bug * bug is false positive * Fix several memory leaks in the Swift interface (#55) * Delete pfof[] before returning from InvokeVelociraptorHydro * Avoid memory leak in HaloCoreGrowth by using std::vectors * Fix really tiny memory leak in GetSOMasses * Deallocate numingroup in swiftinterface.cxx * Update to OMP split * Update to CMakeList.txt so the correct compilers are found * Bug fix for rmax=0 (to be tested). * Update configuration check for inline swift Now invoke the standard configuration check after check swift specific checks. * Remove inlining of a function * Update to config check for on the fly finding. * Set iontheflyfinding flag before calling ConfigCheck() (#56) * Update to python tools submodule * Update to submodule * Minor bug fixes for memory leaks * 6DFOF memory leak fixed. * Update to submodule * Optimised OpenMP FOF search * Update to OpenMP split of potential energy calculations * Some further opemmp optimisations * bug fix * Bug fix to metadata of output when running mpi with single task * OpenMP code cleaned up * Bug fix * Update to submodule * Update to submodule * Update to Fitting * Update to fitting for GSL * Update to allow for GSL Fitting * Submodule update * Removed unused function * Added for compilation test * Added TODO * Record duration CleanAndUpdateGroupsFromSubSearch * Record duration CleanAndUpdateGroupsFromSubSearch * Record duration CleanAndUpdateGroupsFromSubSearch * Record duration CleanAndUpdateGroupsFromSubSearch * Record duration CleanAndUpdateGroupsFromSubSearch * Record duration involved functions * Record duration involved functions * Record duration involved functions * Record duration involved functions * Updated profiler * Added payload description * Timestamp on against functions Co-authored-by: Pascal Jahan Elahi Co-authored-by: Matthieu Schaller <42518815+MatthieuSchaller@users.noreply.github.com> Co-authored-by: Josh Borrow Co-authored-by: John Helly Co-authored-by: Rhys Poulton --- .gitignore | 4 + .travis.yml | 18 + .travis/install.sh | 11 +- CMakeLists.txt | 52 +- Dockerfile | 19 + NBodylib | 2 +- README.md | 111 +- VERSION | 2 +- doc/Makefile | 20 + doc/_static/theme_overrides.css | 14 + doc/conf.py | 19 +- doc/dev.rst | 12 + doc/getting.rst | 18 +- doc/index.rst | 103 +- doc/output.rst | 305 ++- doc/requirements.txt | 2 +- doc/usage.rst | 374 ++- doc/velociraptoricon.png | 1 + examples/profile_build.sh | 48 - examples/sample_dmcosmological_run.cfg | 9 + examples/sample_eaglehydro_3dfof_subhalo.cfg | 9 + examples/sample_eaglehydro_6dfof_subhalo.cfg | 11 +- examples/sample_hydrocosmological_run.cfg | 8 +- examples/sample_swiftdm_3dfof_subhalo.cfg | 213 ++ examples/sample_swifthydro_3dfof_subhalo.cfg | 22 +- ...fthydro_3dfof_subhalo_extra_properties.cfg | 235 ++ examples/sample_swifthydro_6dfof_subhalo.cfg | 9 + examples/sample_zoomdmcosmological_run.cfg | 9 + examples/sample_zoomhydrocosmological_run.cfg | 8 +- examples/samplestfrun.sh | 18 +- examples/test.out.cfg | 4 - examples/testing/flamegraph.Makefile | 48 + src/allvars.h | 518 +++- src/bgfield.cxx | 20 +- src/buildandsortarrays.cxx | 5 + src/gadgetio.cxx | 18 +- src/hdfio.cxx | 992 +++++++- src/hdfitems.h | 1491 +++++++---- src/io.cxx | 946 +++++-- src/localbgcomp.cxx | 233 +- src/localfield.cxx | 6 +- src/main.cxx | 63 +- src/mpihdfio.cxx | 24 +- src/mpiroutines.cxx | 2210 +++++++++++++++-- src/mpivar.cxx | 2 + src/mpivar.h | 20 +- src/nchiladaio.cxx | 7 +- src/omproutines.cxx | 86 +- src/ompvar.h | 77 +- src/proto.h | 144 +- src/ramsesio.cxx | 18 +- src/search.cxx | 993 +++++--- src/stf-fitting.h | 121 + src/stf.h | 277 +-- src/substructureproperties.cxx | 766 ++++-- src/swiftinterface.cxx | 222 +- src/swiftinterface.h | 40 +- src/tipsyio.cxx | 2 + src/ui.cxx | 144 +- src/unbind.cxx | 1525 ++++-------- src/utilities.cxx | 30 +- tools | 2 +- 62 files changed, 9073 insertions(+), 3667 deletions(-) create mode 100644 Dockerfile create mode 100644 doc/Makefile create mode 100644 doc/_static/theme_overrides.css create mode 100644 doc/dev.rst create mode 120000 doc/velociraptoricon.png delete mode 100755 examples/profile_build.sh create mode 100644 examples/sample_swiftdm_3dfof_subhalo.cfg create mode 100644 examples/sample_swifthydro_3dfof_subhalo_extra_properties.cfg delete mode 100644 examples/test.out.cfg create mode 100644 examples/testing/flamegraph.Makefile diff --git a/.gitignore b/.gitignore index 51d39fe8..9027e504 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,7 @@ stf/analysis/treefrog/doc/xml/* stf/Makefile.config *~ *.tag + +build/ + +doc/_build/ diff --git a/.travis.yml b/.travis.yml index ae3c2e67..b0ac2de3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -49,12 +49,30 @@ matrix: sources: ubuntu-toolchain-r-test packages: [g++-5, libhdf5-dev, hdf5-tools, libgsl0-dev, libopenmpi-dev] env: COMPILER=g++-5 USEGAS=1 USESTARS=1 + - compiler: gcc + addons: + apt: + sources: ubuntu-toolchain-r-test + packages: [g++-5, libhdf5-dev, hdf5-tools, libgsl0-dev, libopenmpi-dev] + env: COMPILER=g++-5 USEGAS=1 USESTARS=1 USEBH=1 + - compiler: gcc + addons: + apt: + sources: ubuntu-toolchain-r-test + packages: [g++-5, libhdf5-dev, hdf5-tools, libgsl0-dev, libopenmpi-dev] + env: COMPILER=g++-5 USEEXTRADM=1 - compiler: gcc addons: apt: sources: ubuntu-toolchain-r-test packages: [g++-5, libhdf5-dev, hdf5-tools, libgsl0-dev, libopenmpi-dev] env: COMPILER=g++-5 USESWIFT=1 + - compiler: gcc + addons: + apt: + sources: ubuntu-toolchain-r-test + packages: [g++-5, libhdf5-dev, hdf5-tools, libgsl0-dev, libopenmpi-dev] + env: COMPILER=g++-5 NOMASS=1 - compiler: gcc addons: apt: diff --git a/.travis/install.sh b/.travis/install.sh index 71a1be44..e4b9dcef 100644 --- a/.travis/install.sh +++ b/.travis/install.sh @@ -41,8 +41,17 @@ fi if [ "$USESTARS" = 1 ]; then VR_CMAKE_OPTIONS+=" -DVR_USE_STAR=ON " fi +if [ "$USEBH" = 1 ]; then + VR_CMAKE_OPTIONS+=" -DVR_USE_BH=ON " +fi +if [ "$USEEXTRADM" = 1 ]; then + VR_CMAKE_OPTIONS+=" -DVR_USE_EXTRA_DM_PROPERTIES=ON " +fi if [ "$USESWIFT" = 1 ]; then - VR_CMAKE_OPTIONS+=" -DVR_USE_SWIFT_INTERFACE=ON " + VR_CMAKE_OPTIONS+=" -DVR_USE_SWIFT_INTERFACE=ON -DCMAKE_CXX_FLAGS=-fPIC " +fi +if [ "$NOMASS" = 1 ]; then + VR_CMAKE_OPTIONS+=" -DVR_NO_MASS=ON " fi # Go, go, go! diff --git a/CMakeLists.txt b/CMakeLists.txt index 7c5da68c..1a8571c2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,7 +22,7 @@ list(GET VERSION_LIST 1 VR_VERSION_MINOR) if( ${CMAKE_MAJOR_VERSION} EQUAL 3 AND ${CMAKE_MINOR_VERSION} LESS 4 ) set(VR_LANGS C CXX) else() - set(VR_LANGS C CXX) + set(VR_LANGS CXX C) endif() project(VELOCIraptor VERSION ${VR_VERSION} LANGUAGES ${VR_LANGS}) @@ -37,6 +37,9 @@ endmacro() # Input formats vr_option(HDF5 "Attempt to include HDF5 support in VELOCIraptor" ON) +vr_option(ALLOWCOMPRESSIONHDF5 "Attempt to include HDF5 compression support in VELOCIraptor" ON) +vr_option(ALLOWPARALLELHDF5 "Attempt to include parallel HDF5 support in VELOCIraptor" ON) +vr_option(ALLOWCOMPRESSIONPARALLELHDF5 "Attempt to include parallel HDF5 compression support in VELOCIraptor" OFF) vr_option(XDR "XDR input support (used by nchilada)" OFF) # Precision options @@ -66,6 +69,7 @@ vr_option(GADGET_BH_INFO "Support for extra black hole information in Gadget" vr_option(USE_STAR "Use star particles" OFF) vr_option(USE_GAS "Use gas particles" OFF) vr_option(USE_BH "Use black holes" OFF) +vr_option(USE_EXTRA_DM_PROPERTIES "Store extra dark matter properties" OFF) vr_option(USE_HYDRO "Use all particle types, (gas, star, bh, etc)" OFF) vr_option(NO_MASS "Particles do not store mass (useful for pure N-body sims and reducing memory footprint)" OFF) vr_option(USE_EXTRA_INPUT_INFO "Store where particles are located in the input file" OFF) @@ -100,6 +104,9 @@ endif() if (VR_USE_BH) set(NBODY_USE_BH ON) endif() +if (VR_USE_EXTRA_DM_PROPERTIES) + set(NBODY_USE_EXTRA_DM_PROPERTIES ON) +endif() if (VR_USE_EXTRA_INPUT_INFO) set(NBODY_USE_EXTRA_INPUT_INFO ON) endif() @@ -159,6 +166,7 @@ vr_option_defines(GADGET_BH_INFO EXTRABHINFO) vr_option_defines(USE_GAS GASON) vr_option_defines(USE_STAR STARON) vr_option_defines(USE_BH BHON) +vr_option_defines(USE_EXTRA_DM_PROPERTIES EXTRADMON) vr_option_defines(NO_MASS NOMASS) vr_option_defines(USE_EXTRA_INPUT_INFO EXTRAINPUTINFO) vr_option_defines(USE_EXTRA_FOF_INFO EXTRAFOFINFO) @@ -189,16 +197,33 @@ macro(find_hdf5) if (HDF5_ROOT) set(ENV{HDF5_ROOT} ${HDF5_ROOT}) endif() - #find_package(HDF5 COMPONENTS CXX) find_package(HDF5 COMPONENTS C) if (HDF5_FOUND) + # list(APPEND VR_INCLUDE_DIRS ${HDF5_INCLUDE_DIRS}) list(APPEND VR_LIBS ${HDF5_LIBRARIES}) list(APPEND VR_DEFINES USEHDF) set(VR_HAS_HDF5 Yes) - endif() + #check if parallel hdf present + if (HDF5_IS_PARALLEL AND VR_HAS_MPI AND VR_ALLOWPARALLELHDF5) + set (ENV{HDF5_PREFER_PARALLEL} true) + set(VR_HAS_PARALLEL_HDF5 Yes) + list(APPEND VR_DEFINES USEPARALLELHDF) + if (HDF5_VERSION VERSION_GREATER "1.10.0" AND VR_ALLOWCOMPRESSIONPARALLELHDF5) + set(VR_HAS_COMPRESSED_HDF5 Yes) + list(APPEND VR_DEFINES USEHDFCOMPRESSION) + list(APPEND VR_DEFINES PARALLELCOMPRESSIONACTIVE) + endif() + else() + if (VR_ALLOWCOMPRESSIONHDF5) + set(VR_HAS_COMPRESSED_HDF5 Yes) + list(APPEND VR_DEFINES USEHDFCOMPRESSION) + endif() + endif() + endif() endmacro() + # # How we find MPI and set it up # @@ -248,24 +273,27 @@ endmacro() ensure_git_submodules() find_gsl() +set(VR_HAS_MPI No) +if (VR_MPI) + find_mpi() +endif() + set(VR_HAS_HDF5 No) +set(VR_HAS_COMPRESSED_HDF5 No) +set(VR_HAS_PARALLEL_HDF5 No) if (VR_HDF5) find_hdf5() endif() -set(VR_HAS_MPI No) -if (VR_MPI) - find_mpi() -endif() # This provides us with the nbodylib library # We need to add it unless it was already added by somebody else if (NOT TARGET nbodylib) add_subdirectory(NBodylib) - if (NBODYLIB_VERSION VERSION_LESS "1.16") + if (NBODYLIB_VERSION VERSION_LESS "1.23") message(FATAL_ERROR "NBodyLib version ${NBODYLIB_VERSION} unsupported, - VELOCIraptor requires >= 1.16, try running git submodule update --recursive --remote") + VELOCIraptor requires >= 1.23, try running git submodule update --recursive --remote") endif() list(INSERT VR_DOC_DIRS 0 ${NBODYLIB_DOC_DIRS}) endif() @@ -309,7 +337,12 @@ endmacro() message("\nVELOCIraptor successfully configured with the following settings:") vr_report("File formats" "HDF5" HDF5 + "Compressed HDF5" COMPRESSED_HDF5 + "Parallel HDF5" PARALLEL_HDF5 "nchilada" XDR) +if (VR_HAS_COMPRESSED_HDF5 AND VR_HAS_PARALLEL_HDF5) + message("\n WARNING: Parallel Compression HDF5 active, use with caution as it is unstable!\n") +endif() vr_report("Precision-specifics" "Long Integers" LONG_INT "Single precision floats" SINGLE_PRECISION) @@ -328,6 +361,7 @@ vr_report("Particle-specifics" "Activate gas (& associated physics, properties calculated)" USE_GAS "Activate stars (& associated physics, properties calculated)" USE_STAR "Activate black holes (& associated physics, properties calculated)" USE_BH + "Activate extra dark matter properties (& associated properties)" USE_EXTRA_DM_PROPERTIES "Mass not stored (for uniform N-Body sims, reduce mem footprint)" NO_MASS "Large memory KDTree to handle > max 32-bit integer entries per tree" USE_LARGE_KDTREE ) diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..df7b5e7f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +FROM ubuntu:18.04 + +# DYNAMIC BRANCH CAN BE OVERWRITTEN DURING RUNTIME +ENV BRANCH=feature/threadpool-struct + +# NEEDS TO BE UPDATED FOR SPECIFIC VERSIONS +RUN apt update && \ + apt install -y g++ libomp-dev libgsl-dev libhdf5-serial-dev git cmake + +WORKDIR /home/ubuntu/ + +# INITIALISE PROJECT DIRECTORY +RUN git clone https://github.com/pelahi/VELOCIraptor-STF.git && \ + cd VELOCIraptor-STF && git checkout ${BRANCH} && git submodule update --init --recursive + +WORKDIR /home/ubuntu/VELOCIraptor-STF + +# BUILD BINARY +RUN mkdir build && cd build && cmake .. && make all diff --git a/NBodylib b/NBodylib index 6e9d2095..cfaf0bc8 160000 --- a/NBodylib +++ b/NBodylib @@ -1 +1 @@ -Subproject commit 6e9d209510f344165896e6662bed40174fa68977 +Subproject commit cfaf0bc8f351d8de4a481a5a2e6296602c04c739 diff --git a/README.md b/README.md index b955dbba..1e3afb68 100644 --- a/README.md +++ b/README.md @@ -46,13 +46,16 @@ ___________.__ .___ ================================================================================================ ## Content - (for more information type make doc in main dir and in NBodylib dir and - see documents in the doc directory) + +This is brief description of the package. For details please see online documentation at +[readthedocs](https://velociraptor-stf.readthedocs.io/) + +The repo contains the following directories src/ contains main source code for the algorithm doc/ contains Doxygen generated latex and html file of code examples/ contains examples of configuration files and how to run the code - NBodylib/ submodule:contains library of objects and routines used by algorithm + NBodylib/ submodule: contains library of objects and routines used by algorithm tools/ submodule: contains python tools of manipulating/reading output @@ -60,70 +63,74 @@ ___________.__ .___ ## Compiling (see documentation for more information) - VELOCIraptor uses CMake as its build tool. cmake is used to perform system-level checks, - like looking for libraries and setting up the rules for the build, and then generates the - actual build scripts in one of the supported build systems. Among other things, cmake supports - out-of-tree builds (useful to keep more than one build with different settings, and to avoid - cluttering the original source code directories) and several build system, like make and - ninja files. +VELOCIraptor uses CMake as its build tool. cmake is used to perform system-level checks, +like looking for libraries and setting up the rules for the build, and then generates the +actual build scripts in one of the supported build systems. Among other things, cmake supports +out-of-tree builds (useful to keep more than one build with different settings, and to avoid +cluttering the original source code directories) and several build system, like make and +ninja files. - VELOCIraptor uses submodules so if you have a fresh clone you can use +VELOCIraptor uses submodules so if you have a fresh clone you can use git submodule update --init --recursive - to update the submodules use +to update the submodules use git submodule update --recursive --remote - Note that cmake will init the submodules by default +Note that cmake will init the submodules by default - The simplest way of building is, standing on the root your repository, run cmake to produce - Makefiles and then compile with these steps: +The simplest way of building is, standing on the root your repository, run cmake to produce +Makefiles and then compile with these steps: mkdir build cd build cmake .. # By default will generate Makefiles make all - There are a variety of options that can be invoked - and these can be viewed using +There are a variety of options that can be invoked and these can be viewed using + cmake -LH - (though this only works after having run cmake at least once) - Although documentation is present on the readthedocs site, extra documentation can be produced - by typing +(though this only works after having run cmake at least once) + +Although documentation is present on the readthedocs site, extra documentation can be produced +by typing + make doc - which will produce html and latex documents using Doxygen. This will be located in - doc/html/index.html - and - doc/latex/refman.tex - Note that VELOCIraptor and all variants do not support non-Unix environments. (Mac OS X is fine; Windows is not). +which will produce html and latex documents using Doxygen. This will be located in +doc/html/index.html and doc/latex/refman.tex + +Note that VELOCIraptor and all variants do not support non-Unix environments. (Mac OS X is fine; Windows is not). ================================================================================================ ## Running (see documentation for more information) - Running is as simple as +Running is as simple as + ./bin/stf -i input -s nsnaportype -C configfile - a sample of a configuation file is in examples - for mpi enabled executable + +a sample of a configuation file is in examples. +For mpi enabled executable + mpirun -np mutipleoftwo ./bin/stf - Note that at the moment, mpirun assumes that a single structure can fit onto the shared - memory local to the mpi thread. If larger haloes are to be analyzed, it is suggested that - the iSingleHalo option be set to 1, and the analysis is done on a shared memory machine - with enough memory. A more complete version capable of handling large structures across - mpi domains that are then searched for substructures is in the works. + +Note that at the moment, mpirun assumes that a single structure can fit onto the shared +memory local to the mpi thread. If larger haloes are to be analyzed, it is suggested that +the iSingleHalo option be set to 1, and the analysis is done on a shared memory machine +with enough memory. A more complete version capable of handling large structures across +mpi domains that are then searched for substructures is in the works. ## Outputs - The code will produce several files related to the configuration options (.configuration) - input data (.siminfo), units information based on the input and the configuration options (.units) - and several files containing information about the structures identified. These files can be split - into separate files containing field objects (halos) and internal structures (subhalos) if desired - (set by configuration option). The files can be in several formats - (ascii, binary [not recommended], HDF, ADIOS [in works, alpha]). - These files are +The code will produce several files related to the configuration options (.configuration) +input data (.siminfo), units information based on the input and the configuration options (.units) +and several files containing information about the structures identified. These files can be split +into separate files containing field objects (halos) and internal structures (subhalos) if desired +(set by configuration option). The files can be in several formats: ascii, binary (not recommended, HDF5. +These files are i) Properties File (.properties) Contains a variety of properties calculate for each structure identified and also contains @@ -143,34 +150,32 @@ ___________.__ .___ Contains the substructure hierarchy information, such as the hostID (which is -1 if it is a field structure) an objects ID, number of direct substructures. - The code can also output a simple list which is particle id ordered that simply has the (sub)halo of a paritcle - (and is zero if particle doesn't belong to a list.) These outputs are outname.fof.grp. Note that the fof.grp - format is collected from all MPI threads and is only ascii output. +The code can also output a simple list which is particle id ordered that simply has the (sub)halo of a particle +(and is zero if particle doesn't belong to a list.) These outputs are outname.fof.grp. Note that the fof.grp +format is collected from all MPI threads and is only ascii output. ================================================================================================ ## Altering IO for other file types (see documentation for more information) - Naturally, not all simulations will be in the io formats already written. An example of - several implemented io routines are in the src directory. The routine needs to load all - the appropriate date into a Particle array. +Naturally, not all simulations will be in the io formats already written. An example of +several implemented io routines are in the src directory. The routine needs to load all +the appropriate date into a Particle array. - Currently VELOCIraptor can read Gadget (1,2), HDF, RAMSES, TIPSY, and Nchilada (alpha) +Currently VELOCIraptor can read Gadget (1,2), HDF, RAMSES, TIPSY, and Nchilada (alpha) ================================================================================================ ## Associated analysis: - TreeFrog (Fomerly Halotree): - - This code is a separate repo +TreeFrog (Fomerly Halotree): This code is a separate repo git clone https://github.com/pelahi/TreeFrog.git - but is useful for checking halo catalogs, particularly the - examples/catalogcomparisontolerancecheck.py code that runs TreeFrog to compare two catalogs. - The goal of this code is as a test to see if a new catalog produced by velociraptor matches - a reference catalog. +but is useful for checking halo catalogs, particularly the +examples/catalogcomparisontolerancecheck.py code that runs TreeFrog to compare two catalogs. +The goal of this code is as a test to see if a new catalog produced by velociraptor matches +a reference catalog. ================================================================================================ diff --git a/VERSION b/VERSION index f42aadee..ec8bede2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.44 +1.50 diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 00000000..cbece05e --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SPHINXPROJ = VELOCIraptor +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/doc/_static/theme_overrides.css b/doc/_static/theme_overrides.css new file mode 100644 index 00000000..bd8163fb --- /dev/null +++ b/doc/_static/theme_overrides.css @@ -0,0 +1,14 @@ +/* override table width restrictions */ +@media screen and (min-width: 1024px) { + + .wy-table-responsive table td { + /* !important prevents the common CSS stylesheets from overriding + this as on RTD they are loaded after this stylesheet */ + white-space: normal !important; + } + + .wy-table-responsive { + overflow: visible !important; + } +} + diff --git a/doc/conf.py b/doc/conf.py index f559b022..e12a33d7 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -17,7 +17,9 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. # import os, sys, subprocess -# sys.path.insert(0, os.path.abspath('.')) +rst_prolog = ''' +.. |vr| replace:: **VELOCIraptor** +''' # Run doxygen if we're in RTD to generate the XML documentation from C++ read_the_docs_build = os.environ.get('READTHEDOCS', None) == 'True' @@ -28,7 +30,7 @@ # If your documentation needs a minimal Sphinx version, state it here. # -# needs_sphinx = '1.0' +needs_sphinx = '1.8' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom @@ -101,7 +103,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'alabaster' +html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -112,7 +114,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +# html_static_path = ['_static'] # -- Options for HTMLHelp output ------------------------------------------ @@ -170,3 +172,12 @@ author, 'VELOCIraptor', 'One line description of project.', 'Miscellaneous'), ] + +#for html fix table sizes +html_static_path = ['_static'] + +html_context = { + 'css_files': [ + '_static/theme_overrides.css', # override wide tables in RTD theme + ], + } diff --git a/doc/dev.rst b/doc/dev.rst new file mode 100644 index 00000000..0b4bfaec --- /dev/null +++ b/doc/dev.rst @@ -0,0 +1,12 @@ +.. _dev: + +Developing |vr| +################################################### + +|vr| is an freely available from `github `_. +It is in active development, with additions for hydrodynamical inputs, extra inputs and functionality being implemented. +The code can also be called as a library for on-the-fly halo finding integration +into any code. Currently there are hooks for `SWIFTSIM `_. + +Integration into N-Body/Hydro +============================= diff --git a/doc/getting.rst b/doc/getting.rst index 249c873b..6e244205 100644 --- a/doc/getting.rst +++ b/doc/getting.rst @@ -1,15 +1,15 @@ .. _getting: -Getting **VELOCIraptor** +Getting |vr| ######################## **VELOCIraptor** is currently hosted in `GitHub `_. -To get a copy you can clone the repository -:: - git clone https://github.com/pelahi/VELOCIraptor-STF +To get a copy you can clone the repository:: + + git clone https://github.com/pelahi/VELOCIraptor-STF **VELOCIraptor**'s compilation system is based on `cmake `_. ``cmake`` will -check that you have a proper compiler (anything supporting C++14 or later should do), +check that you have a proper compiler (anything supporting C++11 or later should do), and scan the system for all required dependencies. To compile **VELOCIraptor** run (assuming you are inside the ``VELOCIraptor-STF/`` directory already):: @@ -84,6 +84,8 @@ These can be passed to ``cmake`` * Enable input/output formats * For HDF | ``VR_HDF5``: boolean on whether to include HDF support + | ``VR_ALLOWPARALLELHDF5``: boolean on whether to allow for parallel HDF support (if available) + | ``VR_ALLOWPARALLELHDF5COMPRESSIONHDF5``: boolean on whether to allow for compression parallel HDF support (THIS IS UNSTABLE, USE WITH CAUTION) | ``HDF5_ROOT``: specify a local directory containing HDF library. * for XDR (nchilada) input | ``VR_XDR``: boolean on whether to include XDR support @@ -131,13 +133,15 @@ These can be passed to ``cmake`` * only calculate local density distribution for particles residing in field objects (but using all particles to estimate quantity). Default. ``VR_STRUCTURE_DEN=ON`` - * or just use particles inside field objects, reducing cpu cycles but will bias estimates for particle in outer region of field structures, overrides STRUCTUREDEN | + * or just use particles inside field objects, reducing cpu cycles but will bias estimates for particle in outer region of field structures, overrides STRUCTUREDEN ``VR_HALO_DEN=ON`` * flag useful for zoom simulations with a high resolution region ``VR_ZOOM_SIM=ON`` .. topic:: Executable flags + * Produce SWIFTSIM compatible library (executable still produced but does simply returns warning) - ``VR_USE_SWIFT_INTERFACE=ON`` + | ``VR_USE_SWIFT_INTERFACE=ON`` + | ``CMAKE_CXX_FLAGS=-fPIC`` * Enable debugging ``DEBUG=ON`` diff --git a/doc/index.rst b/doc/index.rst index ebeb81b3..65e317c9 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -2,37 +2,90 @@ sphinx-quickstart on Mon Jul 31 10:13:40 2017. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. - **VELOCIraptor** is a C++ halo finder using MPI and OpenMP APIs. - It comes with a halo merger tree builder, **TreeFrog** (also C++ MPI+OpenMP) - and several associated analysis tools in python, example configuration files - and analysis python scripts (and sample jupyter notebooks). -Welcome to **VELOCIraptor**'s documentation! -############################################# -**VELOCIraptor** is a C++ halo finder using MPI and OpenMP APIs. -The repository also contains several associated analysis tools in python, example configuration files -and analysis python scripts (and sample jupyter notebooks). +VELOCIraptor +============ +.. image:: velociraptoricon.png + :scale: 100 % + :align: left + +|vr| is a C++ halo finder using MPI and OpenMP APIs. +The repository also contains several associated analysis tools in python, +example configuration files and analysis python scripts (and sample jupyter notebooks). +The code can also be compiled as a library for on-the-fly halo finding within an +N-body/hydrodynamnical code. Currently integration is limited to `SWIFTSIM `_ +but extensions are in the works for other codes. + +There is an associated halo merger tree code `TreeFrog `_ (also C++ MPI+OpenMP). + +If you are using |vr| please cite the following paper, which describe the code in full:: + + @ARTICLE{doi:10.1017/pasa.2019.12, + author = {{Elahi}, Pascal J. and {Ca{\~n}as}, Rodrigo and {Poulton}, Rhys J.~J. and {Tobar}, Rodrigo J. and {Willis}, James S. and {Lagos}, Claudia del P. and {Power}, Chris and {Robotham}, Aaron S.~G.}, + title = {Hunting for galaxies and halos in simulations with VELOCIraptor}, + journal = {\pasa}, + keywords = {dark matter, galaxies: evolution, galaxies: halos, methods: numerical, Astrophysics -Cosmology and Nongalactic Astrophysics}, + year = {2019}, + month = {Jan}, + volume = {36}, + eid = {e021}, + pages = {e021}, + doi = {10.1017/pasa.2019.12}, + archivePrefix = {arXiv}, + eprint = {1902.01010}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2019PASA...36...21E}, + } + +If using |vr| for galaxy finding, please also cite:: + + @ARTICLE{doi:10.1093/mnras/sty2725, + author = {{Ca{\~n}as}, Rodrigo and {Elahi}, Pascal J. and {Welker}, Charlotte and {del P Lagos}, Claudia and {Power}, Chris and {Dubois}, Yohan and {Pichon}, Christophe}, + title = {Introducing a new, robust galaxy-finder algorithm for simulations}, + journal = {\mnras}, + keywords = {methods: numerical, galaxies: evolution, dark matter, cosmology: theory, Astrophysics - Astrophysics of Galaxies}, + year = {2019}, + month = {Jan}, + volume = {482}, + number = {2}, + pages = {2039-2064}, + doi = {10.1093/mnras/sty2725}, + archivePrefix = {arXiv}, + eprint = {1806.11417}, + primaryClass = {astro-ph.GA}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2019MNRAS.482.2039C}, + } + +The original idea, which also discusses the identification of tidal debris is in:: + + @ARTICLE{doi:10.1111/j.1365-2966.2011.19485.x, + author = {{Elahi}, Pascal J. and {Thacker}, Robert J. and {Widrow}, Lawrence M.}, + title = {Peaks above the Maxwellian Sea: a new approach to finding substructures in N-body haloes}, + journal = {\mnras}, + keywords = {methods: data analysis, methods: numerical, galaxies: haloes, galaxies: structure, dark matter, Astrophysics - Cosmology and Extragalactic Astrophysics}, + year = {2011}, + month = {Nov}, + volume = {418}, + number = {1}, + pages = {320-335}, + doi = {10.1111/j.1365-2966.2011.19485.x}, + archivePrefix = {arXiv}, + eprint = {1107.4289}, + primaryClass = {astro-ph.CO}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2011MNRAS.418..320E}, + } + +An online entry can also be found +at `NASA's ADS service `_. .. toctree:: :maxdepth: 2 - :numbered: - :titlesonly: - :glob: - :hidden: :caption: Contents: - getting.rst - usage.rst - output.rst - + getting + usage + output + dev -Sections -======== -* :ref:`getting` : How to compile the code -* :ref:`usage` : How to run the code -* :ref:`output` : How to use output data -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` +.. _pascaljelahi@gmail.com: mailto:pascaljelahi@gmail.com diff --git a/doc/output.rst b/doc/output.rst index 9a652e99..18bfdfd0 100644 --- a/doc/output.rst +++ b/doc/output.rst @@ -1,11 +1,11 @@ .. _output: -Understanding and Analysing **VELOCIraptor** Output +Understanding and Analysing |vr| Output ################################################### -**VELOCIraptor** produces several different types of output files. +|vr| produces several different types of output files. -(with the mpi threads appending their rank to the end of the file name): +(with the mpi threads appending their rank to the end of the file name unless not compiled with MPI or if Parallel HDF5 is used.): .. topic:: Standard files @@ -14,21 +14,51 @@ Understanding and Analysing **VELOCIraptor** Output * ``.catalog_particles``: a file containing a list of particle IDs of those in structures. Information contained in ``.catalog_groups`` is used to parse this data. * ``.catalog_particles.unbound``: similar to ``catalog_particles`` but lists particles in structures but are formally unbound. Information contained in ``.catalog_groups`` is used to parse this data. +.. topic:: Extra files + + * ``.catalog_parttypes``: a file similar to ``.catalog_particles`` but containing a list of particle types of those in structures. Information contained in ``.catalog_groups`` is used to parse this data. Produced if multiple particle types are processed by |vr|. + * ``.catalog_parttypes.unbound``: similar to ``catalog_parttypes`` but lists particles in structures but are formally unbound. + * ``.profiles`` : a file containing the radial profiles of groups. Produced if radial profiles are requested. + * ``.catalog_SOlist`` : a file containing the a list of particle IDs of particles found within a large Spherical region around Field halos. Produced if a list of paritcles wihtin so regions is requested. + Properties ========== -There are a variety of properties calculated for each object found. Some are typical of all halo finders -such as the mass of an object (which can be a halo, subhalo, tidal debris), along with more complex properties -such as the eigenvectors and eigenvalues of the mass distribution defined by the reduced inertia tensor. -The number of properties also varies with the type of run. For hydrodynamic simulations where **VELOCIraptor** -has been compiled to use gas properties and star properties, gas masses, temperatures, etc are also calculated. +There are a variety of properties calculated for each object found. Some are +typical of all halo finders such as the mass of an object (which can be a halo, +subhalo, tidal debris), along with more complex properties such as the +eigenvectors and eigenvalues of the mass distribution defined by the reduced +inertia tensor. The number of properties also varies with the type of run. For +hydrodynamic simulations where |vr| has been compiled to use gas, star and black hole +properties, such as masses, temperatures, etc are also calculated. The code +will also calculate properties based on loading specific extra fields associated +with particle types (this interface requires HDF5 input or on the fly invocation +and outputs properties with the same name as the loaded property, see :ref:`usage`). + +We give an almost complete list of properties and the keyword associate with the property (in ASCII and HDF5). +For clarity we list properties in several tables corresponding to + + - :ref:`standardhaloprops`, + - :ref:`gasprops`, + - :ref:`starprops`, + - :ref:`bhprops`, + - :ref:`interloperprops`, + - :ref:`extradmprops`, + +.. _standardhaloprops: + +Standard Properties +------------------- + +This is a list of standard properties that are alwas calculated unless indicated +otherwise (some require an extra configuration option). -We give an almost complete list of properties and the keyword associate with the property (in ASCII, HDF5 and ADIOS outputs). +.. _standardhalopropstable: +-------------------+-------------------------------------------------------------------------------------------------------+ | Name | Comments | +===================+=======================================================================================================+ -| `ID and Type information` | +| **ID and Type information** | +-------------------+-------------------------------------------------------------------------------------------------------+ | ID | Halo ID. ID = index of halo + 1 + TEMPORALHALOIDVAL * Snapshot_value, | | | giving a temporally unique halo id that can be quickly parsed for an | @@ -37,6 +67,7 @@ We give an almost complete list of properties and the keyword associate with the | ID_mbp | Particle ID of the most bound particle in the group. | +-------------------+-------------------------------------------------------------------------------------------------------+ | hostHaloID | ID of the host field halo. If an object is a field halo, this is -1. | ++-------------------+-------------------------------------------------------------------------------------------------------+ | Structuretype | Structure types contain information on how the object was found and at | | | what level in the subhalo hierarchy. Field halos are 10. Substructures | | | identified using the local velocity field are type 10+10=20, | @@ -46,7 +77,7 @@ We give an almost complete list of properties and the keyword associate with the +-------------------+-------------------------------------------------------------------------------------------------------+ | numSubStruct | Number of substructures. Subhalos can have subsubhalos. | +-------------------+-------------------------------------------------------------------------------------------------------+ -| `Mass and radius properties: All properties are in output units.` | +| **Mass and radius properties**: `All properties are in output units.` | +-------------------+-------------------------------------------------------------------------------------------------------+ | npart | Number of particles belonging exclusively to the object. | +-------------------+-------------------------------------------------------------------------------------------------------+ @@ -87,8 +118,30 @@ We give an almost complete list of properties and the keyword associate with the +-------------------+-------------------------------------------------------------------------------------------------------+ | R_HalfMass | Half mass radius based on the Mass_tot. | +-------------------+-------------------------------------------------------------------------------------------------------+ -| `Position and Velocity : All properties are in output units.` | -| `Objects need not have positions periodically wrapped.` | +| **Angular Momentum in Spherical Overdensity**: `Calculate if extra halo properties are requested` | +| `by setting the config option ` **Extensive_halo_properties_output=1** | +| `Also calculates inclusive spherical overdensity and also exclusive to halo as _exclusive.` | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Lx_200c | :math:`x` component of the total angular momentum all the mass within :math:`R_{200\rho_c}`. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Ly_200c | |ditto| | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Lz_200c | |ditto| | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Lx_200m | :math:`x` component of the total angular momentum all the mass within :math:`R_{200\rho_m}`. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Ly_200m | |ditto| | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Lz_200m | |ditto| | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Lx_BN98 | :math:`x` component of the total angular momentum all the mass within :math:`R_{BN98}`. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Ly_BN98 | |ditto| | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Lz_BN98 | |ditto| | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| **Position and Velocity**: `All properties are in output units.` | +| `Objects have positions periodically wrapped.` | +-------------------+-------------------------------------------------------------------------------------------------------+ | Xc | :math:`x` coordinate of centre-of-mass. | +-------------------+-------------------------------------------------------------------------------------------------------+ @@ -102,6 +155,12 @@ We give an almost complete list of properties and the keyword associate with the +-------------------+-------------------------------------------------------------------------------------------------------+ | Zcmbp | |ditto| | +-------------------+-------------------------------------------------------------------------------------------------------+ +| Xcminpot | :math:`x` coordinate of the minimum potential. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Ycminpot | |ditto| | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Zcminpot | |ditto| | ++-------------------+-------------------------------------------------------------------------------------------------------+ | VXc | :math:`v_x` velocity of centre-of-mass. | +-------------------+-------------------------------------------------------------------------------------------------------+ | VYc | |ditto| | @@ -114,7 +173,13 @@ We give an almost complete list of properties and the keyword associate with the +-------------------+-------------------------------------------------------------------------------------------------------+ | VZcmbp | |ditto| | +-------------------+-------------------------------------------------------------------------------------------------------+ -| `Velocity and Angular Momentum : All properties are in output units.` | +| VXcminpot | :math:`v_x` velocity of the particle with the minimum potential. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| VYcminpot | |ditto| | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| VZcminpot | |ditto| | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| **Velocity and Angular Momentum**: `All properties are in output units.` | +-------------------+-------------------------------------------------------------------------------------------------------+ | Vmax | Maximum circular velocity based on particles belonging exclusively to | | | the object, where circular velocities are defined by | @@ -161,7 +226,7 @@ We give an almost complete list of properties and the keyword associate with the | | momentum axis and the second sum is over kinetic energies | | | (see Sales et al (2010)). | +-------------------+-------------------------------------------------------------------------------------------------------+ -| `Morphology : All properties are in output units.` | +| **Morphology**: `All properties are in output units.` | +-------------------+-------------------------------------------------------------------------------------------------------+ | | following Prada et al, (2012a) where we solve | | cNFW | Calculated assuming an NFW profile (Navarro, Frenk, & White 1997) | @@ -197,7 +262,7 @@ We give an almost complete list of properties and the keyword associate with the +-------------------+-------------------------------------------------------------------------------------------------------+ | eig_zz | |ditto| | +-------------------+-------------------------------------------------------------------------------------------------------+ -| `Energy: All properties are in output units.` | +| **Energy**: `All properties are in output units.` | +-------------------+-------------------------------------------------------------------------------------------------------+ | Ekin | The total kinetic energy, :math:`\sum T_i`. | +-------------------+-------------------------------------------------------------------------------------------------------+ @@ -205,7 +270,7 @@ We give an almost complete list of properties and the keyword associate with the +-------------------+-------------------------------------------------------------------------------------------------------+ | Efrac | The fraction of particles that are formally bound (i.e., have :math:`W_i+T_i<0`). | +-------------------+-------------------------------------------------------------------------------------------------------+ -| Quantities within :math:`R(V_{\rm max})`. Variety of properties based on particles within :math:`r\leq R(V_{\rm max})`. | +| **Quantities within** :math:`R(V_{\rm max})`: Properties based on particles within :math:`r\leq R(V_{\rm max})`. | +-------------------+-------------------------------------------------------------------------------------------------------+ | RVmax_sigV | Dispersion, like sigV for :math:`r\leq R(V_{\rm max})`. | +-------------------+-------------------------------------------------------------------------------------------------------+ @@ -257,8 +322,63 @@ We give an almost complete list of properties and the keyword associate with the +-------------------+-------------------------------------------------------------------------------------------------------+ | RVmax_eig_zz | |ditto| | +-------------------+-------------------------------------------------------------------------------------------------------+ -| `Gas quantities: Bulk properties of gas particles/tracers when compiled to process gas properties. Properties unique to` | -| `gas are T_gas and SFR_gas.` | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| **Additional Spherical Overdensity Mass/radius**: `If extra spherical overdensity values are requested via` | +| Overdensity_values_in_critical_density `config option, code calculates masses/radii/angular momentum following` | +| `a naming convention of` SO_property_rhocrivalue_rhocrit `where rhocritvalue is the overdensity value in units of the` | +| `critical density, e.g.,` SO_mass_100_rhocrit. | +| `The code will also calculate quantities based on particle type: gas, star, interloper, following` | +| SO_property_parttype_rhocrivalue_rhocrit | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| mass | Mass enclosing a average density of the associated SO value. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Lx | Angular momentum of enclosed mass in x-direction | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Ly | |ditto| in y-direction | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Lz | |ditto| in z-direction | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| **Aperture quantities**: `If aperture quantities are requested code calculates a variety of properties` | +| `within spherical aperture in pkpc.` | +| `Naming convention is` Aperture_quantity_radiusvalue_kpc, `or for a specific` `particle type` | +| Aperture_quantity_parttype_radiusvalue_kpc, `e.g.` Aperture_mass_10_kpc. | +| `Particle types where individual quantities are calculated: gas, star, bh, interloper.` | +| `We list the property names here.` | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| mass | Total mass in aperture. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| npart | Total number of particles. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| rhalfmass | Radius enclosing half the mass within the aperture. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| veldisp | Velocity disperion | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| **Projected aperture quantities**: `Similar to aperture quantitites but for 3 different projections based on particles` | +| `within a projected radius in pkpc. Naming convention is` Projected_aperture_i_quantity_radiusvalue_kpc, `where` | +| `i is from 0, 1, 2 for a x,y,z projection.` | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| mass | Total mass in aperture. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| rhalfmass | Radius enclosing half the mass within the aperture. | ++-------------------+-------------------------------------------------------------------------------------------------------+ + +.. _gasprops: + +Gas Properties +-------------- + +This is a list of gas properties that are calculated if code is compiled with +**USE_GAS**. Some require an extra configuration option. Also, Spherical overdensity +masses + angular momentum, aperture properties, projected aperture properties are calculated +for gas particles along along with some extra gas only properties. + +.. _gaspropstable: + ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Name | Comments | ++===================+=======================================================================================================+ +| **Gas quantities**: `Bulk properties of gas particles/tracers when compiled to process gas properties. Properties unique` | +| `to gas are T_gas and SFR_gas.` | +-------------------+-------------------------------------------------------------------------------------------------------+ | n_gas | Number of gas particles. | +-------------------+-------------------------------------------------------------------------------------------------------+ @@ -338,20 +458,137 @@ We give an almost complete list of properties and the keyword associate with the +-------------------+-------------------------------------------------------------------------------------------------------+ | Zmet_gas | Average metallicity of gas. | +-------------------+-------------------------------------------------------------------------------------------------------+ -| SFR_gas | Average star formation rate of gas. | +| SFR_gas | Total star formation rate of gas. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| **Star Forming (sf)/Non Star Forming (nsf) Gas quantities**: `Similar to gas properties but split by sf/nsf gas.` | +| `For brevity, we list only quantities unique to sf, as the nsf gas is similar but with _nsf naming convention.` | +| `Only calculated if` **USE_GAS** `and` **USE_STAR** `flags on.` | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| M_gas_sf | Total gas mass :math:`M_{\rm gas}`. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| R_HalfMass_gas_sf | Half mass radii. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| sigV_gas_sf | Velocity dispersion of the gas. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Lx_gas_sf | Like Lx_gas but for star forming gas. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Ly_gas_sf | |ditto| | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Lz_gas_sf | |ditto| | +-------------------+-------------------------------------------------------------------------------------------------------+ -| `Star quantities. Bulk properties of star particles when compiled to process star properties. Similar to gas properties` | +| Krot_gas_sf | Like Krot_gas but for star forming gas | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| T_gas_sf | Average temperature of star forming gas. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Zmet_gas_sf | Average metallicity of star forming gas. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| **Aperture quantities**: `If aperture quantities are requested code calculates a variety of properties` | +| `within spherical aperture in pkpc.` | +| `Naming convention is` Aperture_quantity_gas_radiusvalue_kpc. | +| `We list the additional properties calculated for gas here (which are in addition to mass,rhalfmass, etc).` | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Zmet | Average gas metallicity in aperture. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| SFR | Total star formation rate of gas in aperture. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| **Projected aperture quantities**: `Similar to aperture quantitites but for 3 different projections based on particles` | +| `within a projected radius in pkpc. Naming convention is` Projected_aperture_i_quantity_gas_radiusvalue_kpc, `where` | +| `i is from 0, 1, 2 for a x,y,z projection.` | +| `We list the additional properties calculated for gas here (which are in addition to mass,rhalfmass, etc).` | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Zmet | Average gas metallicity in projected aperture. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| SFR | Total star formation rate of gas in projected aperture. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| **Extra Gas Properties**: `If extra gas fields are loaded by listing them using` Gas_internal_property_names | +| Gas_chemistry_names `and/or` Gas_chemistry_production_names. `The output will follow the following naming convention:` | +| nameoffield_gas `e.g.``, AlphaElements_gas. | +| `Also requires that code is compiled with the` **USE_GAS** `flag` | +| `As an example we show the fields if` Gas_internal_property_names=Pressure,MetalFromSNIa | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Pressure_gas | Mass weighted average of this field. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| MetalFromSNIa_gas | Mass weighted average of this field. | ++-------------------+-------------------------------------------------------------------------------------------------------+ + +.. _starprops: + +Star Properties +--------------- + +This is a list of stellar properties that are calculated if code is compiled with +**USE_STAR**. Some require an extra configuration option. + +.. _starpropstable: + ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Name | Comments | ++===================+=======================================================================================================+ +| **Star quantities**: `Bulk stellar properties when compiled to process star properties. Similar to gas properties` | | `but has _star instead of _ gas. For brevity, we list only quantities unique to star particles.` | +-------------------+-------------------------------------------------------------------------------------------------------+ -| tage_gas | Average stellar age. | +| tage_star | Average stellar age. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| **Aperture quantities**: `If aperture quantities are requested code calculates a variety of properties` | +| `within spherical aperture in pkpc.` | +| `Naming convention is` Aperture_quantity_star_radiusvalue_kpc. | +| `We list the additional properties calculated for star here (which are in addition to mass,rhalfmass, etc).` | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Zmet | Average stellar metallicity in aperture. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| **Projected aperture quantities**: `Similar to aperture quantitites but for 3 different projections based on particles` | +| `within a projected radius in pkpc. Naming convention is` Projected_aperture_i_quantity_star_radiusvalue_kpc, `where` | +| `i is from 0, 1, 2 for a x,y,z projection.` | +| `We list the additional properties calculated for gas here (which are in addition to mass,rhalfmass, etc).` | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Zmet | Average stellar metallicity in projected aperture. | ++-------------------+-------------------------------------------------------------------------------------------------------+ +| **Extra Star Properties**: `Like the extra gas properties but calculated if ` Star_internal_property_names | +| Star_chemistry_names `and/or` Star_chemistry_production_names. | +| `Naming convention is nameoffield_star` | +| `Also requires that code is compiled with the` **USE_STAR** `flag` | ++-------------------+-------------------------------------------------------------------------------------------------------+ + +.. _bhprops: + +Black Hole Properties +--------------------- + +This is a list of black hole properties that are calculated if code is compiled with +**USE_BH**. Some require an extra configuration option. + +.. _bhpropstable: + +-------------------+-------------------------------------------------------------------------------------------------------+ -| `Black hole quantities. Bulk properties of black hole particles when compiled to process black hole properties.` | +| Name | Comments | ++===================+=======================================================================================================+ +| **Black hole quantities**: `Bulk properties of black hole particles when compiled to process black hole properties.` | +-------------------+-------------------------------------------------------------------------------------------------------+ | n_bh | Number of black hole particles. | +-------------------+-------------------------------------------------------------------------------------------------------+ | Mass_bh | Total mass of black hole particles. | +-------------------+-------------------------------------------------------------------------------------------------------+ -| `Interloper particles. If analysing multi-resolution simulations, low resolution particles are often treated as` | +| **Extra Black hole Properties**: `Like the extra gas properties but calculated if ` BH_internal_property_names | +| BH_chemistry_names `and/or` BH_chemistry_production_names. | +| `Naming convention is nameoffield_bh` | +| `Also requires that code is compiled with the` **USE_BH** `flag` | ++-------------------+-------------------------------------------------------------------------------------------------------+ + +.. _interloperprops: + +Interloper Properties +--------------------- + +This is a list of interloper DM properties that are calculated if code is compiled with +**ZOOM_SIM**. These properties are based on low resolution particles and can be +used to gauge the level of contamination + +.. _interloperpropstable: + ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Name | Comments | ++===================+=======================================================================================================+ +| **Interloper particles**: `If analysing multi-resolution simulations, low resolution particles are often treated as` | | `contaminants. These are bulk properties of low resolution contaminant particles.` | +-------------------+-------------------------------------------------------------------------------------------------------+ | n_interloper | Number of low resolution, interloper particles. | @@ -359,4 +596,24 @@ We give an almost complete list of properties and the keyword associate with the | Mass_interloper | Total mass of low resolution, interloper particles. | +-------------------+-------------------------------------------------------------------------------------------------------+ + +.. _extradmprops: + +Extra DM Properties +------------------- + +This is a list of Extra DM properties that are calculated if code is compiled with +**USE_EXTRADM**. These properties are useful if running on standard dark matter. + +.. _extradmpropstable: + ++-------------------+-------------------------------------------------------------------------------------------------------+ +| Name | Comments | ++===================+=======================================================================================================+ +| **Extra DM Properties**: `Like the extra gas properties but calculated if ` Extra_DM_internal_property_names | +| `has fields specified. Useful for nonstandard dark matter runs, such as annihilating or interacting dark matter.` | +| `Naming convention is nameoffield_extra_dm` | +| `Also requires that code is compiled with the` **USE_EXTRADM** `flag` | ++-------------------+-------------------------------------------------------------------------------------------------------+ + .. |ditto| unicode:: U+03003 .. ditto mark diff --git a/doc/requirements.txt b/doc/requirements.txt index bb74883b..b5b6d447 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,2 +1,2 @@ -Sphinx==1.4 +Sphinx>=1.8 breathe diff --git a/doc/usage.rst b/doc/usage.rst index 2ce7307d..87c519e3 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -32,12 +32,12 @@ The output produced by VELOCIraptor will typically consist of several files cont bulk properties of structures found; particles belonging to these structures; and several additional files containing configuration information. -When running in MPI, currently each mpi thread writes its own output. - -.. note:: At the moment, mpirun assumes that a single structure can fit onto the memory local to the mpi thread. If larger field objects (haloes) are to be analyzed such that they are unlikely to fit into local memory, it is suggested another machine be used. Revision is in the works to use the Singlehalo_search option after field halos have been identified. - -.. note:: Certain compilation options rename the executable to reflect compile time options (see :ref:`compileoptions` for a list). Examples are using gas or star particles, which appends `-gas`, `-star`, to the executable name. +When running in MPI, currently each mpi thread writes its own output unless +the code has been compiled with a parallel HDF5 library and HDF5 output is requested. +In that case, a single file is written containing data from all threads for each type +of output requested. +.. note:: At the moment, mpirun assumes that a single structure can fit onto the memory local to the mpi thread. If larger field objects (haloes) are to beanalyzed such that they are unlikely to fit into local memory, it is suggested another machine be used. Revision is in the works to use the Singlehalo_search option after field halos have been identified. .. _cmdargs: @@ -79,16 +79,29 @@ A more typical command for a large cosmological simulation might be something li export OMP_NUM_THREADS=4 mpirun -np 64 ./stf -i somehdfbasename -s 128 -I 2 -Z 64 -o output -C configfile.txt > stf.log +.. _swiftintegration: + +Running within swiftsim +----------------------- + +|vr| is also able to be called from within an N-body/Hydrodynamnical code as a library. +Currently the code has been integrated in to **swifsim**. Details can be found +in the **swiftsim** `documentation `_. +The key is that the **swiftsim** code's configuration file lists the |vr| configuration file +used to run |vr|. + .. _briefoutput: Output ------ -Here we provide a *brief* description of the standard data products provided by **VELOCIraptor**. +Here we provide a *brief* description of the standard data products provided by |vr|. For a more detailed discussion and some sample analysis using these data products see :ref:`output`. -When operating in a typical configuration with typical compile time options, the executable (or each mpi thread) -will produce several files (with the mpi threads appending their rank to the end of the file name): +When operating in a typical configuration with typical compile time options, +the executable (or each mpi thread) will produce several files (with the mpi +threads appending their rank to the end of the file name, unless parallel HDF5 output is requested). +The files typically produced are : .. topic:: Output files @@ -97,59 +110,71 @@ will produce several files (with the mpi threads appending their rank to the end * ``.catalog_particles``: a file containing a list of particle IDs of those in structures. Information contained in ``.catalog_groups`` is used to parse this data. * ``.catalog_particles.unbound``: similar to ``catalog_particles`` but lists particles in structures but are formally unbound. Information contained in ``.catalog_groups`` is used to parse this data. +.. topic:: Extra output files + + * ``.profiles``: a file containing the radial mass profiles of (sub)halos + * ``.catalog_parttypes``: a file similar to ``.catalog_particles`` but stores particle type instead of paricle id. + * ``.catalog_parttypes.unbound``: a file similar to ``.catalog_parttypes`` but for unbound particles. + * ``.extendedinfo``: a file containing extra information on where particles are located in the input file for quick extraction from said input file of particles within groups. Still in alpha + * ``.catalog_SOlist``: a file containing particle IDs within the spherical overdensity region of halos. + .. _configoptions: Configuration File ------------------ An example configuration file can be found the examples directory within the repository -(see for instance :download:`sample <../examples/sample.cfg>`). This sample file lists -all the options. *Only the keywords listed here will be used, all other words/characters -are ignored*. One can check the options used by examining **foo.configuration**, where **foo** is -your base output filename. +(see for instance :download:`sample <../examples/sample_dmcosmological_run.cfg>`). +This sample file lists all the options. *Only the keywords listed here will be used, all other words/characters are ignored*. One can check the options used by examining **foo.configuration**, where **foo** is your base output filename. + +We suggest the following files as a basis: + * :download:`N-body simulations configuration <../examples/sample_hydrocosmological_run.cfg>` + * :download:`Hydro simulations configuration <../examples/sample_hydrocosmological_run.cfg>` + * :download:`SWIFT N-body simulation configuration <../examples/sample_swiftdm_3dfof_subhalo.cfg>` + * :download:`SWIFT Hydro simulation configuration <../examples/sample_swifthydro_3dfof_subhalo_extra_properties.cfg>` .. warning:: Note that if misspell a keyword it will not be used. .. warning:: Since this file is always written **DO NOT** name your input configuration file **foo.configuration**. - There are numerous key words that can be passed. Here we list them, grouped into several categories: -:ref:`Outputs `, -:ref:`Inputs `, -:ref:`Parameters related to type of search `, -:ref:`Field search `, -:ref:`Substructure search `, -:ref:`Local Velocity Density `, -:ref:`Core search `, -:ref:`Unbinding `, -:ref:`Units `, -:ref:`Cosmology `, -:ref:`Miscellaneous `, -:ref:`MPI `. -.. _config_output: + - :ref:`IO ` -.. topic:: Output related + - :ref:`Inputs ` + - :ref:`Outputs ` - ``Output = filename`` - * Output base name. Overrides the name passed with the command line argument **-o**. Only implemented for completeness. - ``Output_den = filename`` - * A filename for storing the intermediate step of calculating local densities. This is particularly useful if the code is not compiled with **STRUCDEN** & **HALOONLYDEN** (see :ref:`compileoptions`). - ``Separate_output_files = 1/0`` - * Flag indicating whether separate files are written for field and subhalo groups. - ``Write_group_array_file = 1/0`` - * Flag indicating whether to producing a file which lists for every particle the group they belong to. Can be used with **tipsy** format or to tag every particle. - ``Binary_output = 3/2/1/0`` - * Integer indicating whether output is hdf (2), binary (1), ascii (0) or adios (3). HDF and ADIOS formats require external libraries (see :ref:'compileoptions') - ``Extensive_halo_properties_output = 1/0`` - * Flag indicating whether to calculate/output even more halo properties. - ``Extended_output = 1/0`` - * Flag indicating whether produce extended output for quick particle extraction from input catalog of particles in structures - ``Comoving_units = 1/0`` - * Flag indicating whether the properties output is in physical or comoving little h units. + - :ref:`Parameters related to type of search ` + + - :ref:`Field search ` + - :ref:`Substructure search ` + - :ref:`Local Velocity Density ` + - :ref:`Core search ` + + - :ref:`Unbinding ` + - :ref:`Properties ` + - :ref:`Units/Cosmology ` + + - :ref:`Units ` + - :ref:`Cosmology ` + + - :ref:`Parallel ` + + - :ref:`MPI ` + - :ref:`OpenMP ` + + - :ref:`Miscellaneous ` + + +.. _config_io: + +I/O +^^^ + +Input and output related options .. _config_input: -.. topic:: Input related +_topic:: Input related ``Cosmological_input = 1/0`` * Flag indicating that input simulation is cosmological or not. With cosmological input, a variety of length/velocity scales are set to determine such things as the virial overdensity, linking length. @@ -157,6 +182,10 @@ There are numerous key words that can be passed. Here we list them, grouped into * Amount of information to read from input file in one go (100000). ``HDF_name_convention =`` * Integer describing HDF dataset naming convection. Currently implemented values can be found in :ref:`subsection_hdfnames`. + ``Input_includes_dm_particle = 1/0`` + * Flag indicating whether file contains dark matter/N-body particles in input file. + ``Input_includes_gas_particle = 1/0`` + * Flag indicating whether file contains gas particles in input file. ``Input_includes_star_particle = 1/0`` * Flag indicating whether file contains star particles in input file. ``Input_includes_bh_particle = 1/0`` @@ -165,16 +194,72 @@ There are numerous key words that can be passed. Here we list them, grouped into * Flag indicating whether file contains wind particles in input file. ``Input_includes_tracer_particle = 1/0`` * Flag indicating whether file contains tracer particles in input file. - ``NSPH_extra_blocks =`` - * Integer inticading the number of extra **SPH** blocks are read in the file if gadget input. - ``NStar_extra_blocks =`` - * Integer inticading the number of extra **star** blocks are read in the file if gadget input. - ``NBH_extra_blocks =`` - * Integer inticading the number of extra **BH** blocks are read in the file if gadget input. + ``Input_includes_extradm_particle = 1/0`` + * Flag indicating whether file contains extra (low resolution) N-body particles in input file from a zoom simulation. + Gas related input + ``Gas_internal_property_names = ,`` + * Comma separated list of strings listing extra gas properties to be read from HDF file for which bulk mean/total properties are calculated for objects. Useful way of passing properties like molecular H2 fraction, etc. + ``Gas_chemistry_names = ,`` + * Comma separated list of strings listing extra chemical properties to be read from HDF file for which bulk mean/total properties are calculated for objects. Useful way of passing properties like molecular H2 fraction, etc. + ``Gas_chemistry_production_names = ,`` + * Comma separated list of strings listing extra production channels for metals to be read from HDF file for which bulk mean/total properties are calculated for objects. Useful way of passing properties like molecular H2 fraction, etc. + Star related input + ``Star_internal_property_names = ,`` + * Comma separated list of strings listing extra star properties to be read from HDF file for which bulk mean/total properties are calculated for objects. Useful way of passing properties like molecular H2 fraction, etc. + ``Star_chemistry_names = ,`` + * Comma separated list of strings listing extra chemical properties to be read from HDF file for which bulk mean/total properties are calculated for objects. Useful way of passing properties like molecular H2 fraction, etc. + ``Star_chemistry_production_names = ,`` + * Comma separated list of strings listing extra production channels for metals to be read from HDF file for which bulk mean/total properties are calculated for objects. Useful way of passing properties like molecular H2 fraction, etc. + Black hole related input + ``BH_internal_property_names = ,`` + * Comma separated list of strings listing extra black properties to be read from HDF file for which bulk mean/total properties are calculated for objects. Useful way of passing properties like molecular H2 fraction, etc. + ``BH_chemistry_names = ,`` + * Comma separated list of strings listing extra chemical properties to be read from HDF file for which bulk mean/total properties are calculated for objects. Useful way of passing properties like molecular H2 fraction, etc. + ``BH_chemistry_production_names = ,`` + * Comma separated list of strings listing extra production channels for metals to be read from HDF file for which bulk mean/total properties are calculated for objects. Useful way of passing properties like molecular H2 fraction, etc. + Extra DM related input + ``Extra_dm_internal_property_names = ,`` + * Comma separated list of strings listing extra dm properties to be read from HDF file for which bulk mean/total properties are calculated for objects. Useful for modified dark matter simulations, such as annihilating and self-interactive dark matter. + Gadget related input + ``NSPH_extra_blocks =`` + * Integer inticading the number of extra **SPH** blocks are read in the file if gadget input. + ``NStar_extra_blocks =`` + * Integer inticading the number of extra **star** blocks are read in the file if gadget input. + ``NBH_extra_blocks =`` + * Integer inticading the number of extra **BH** blocks are read in the file if gadget input. -.. _config_search_type: +.. _config_output: -.. topic:: Parameters related to type of search +.. topic:: Output related + + ``Output = filename`` + * Output base name. Overrides the name passed with the command line argument **-o**. Only implemented for completeness. + ``Output_den = filename`` + * A filename for storing the intermediate step of calculating local densities. This is particularly useful if the code is not compiled with **STRUCDEN** & **HALOONLYDEN** (see :ref:`compileoptions`). + ``Separate_output_files = 1/0`` + * Flag indicating whether separate files are written for field and subhalo groups. + ``Write_group_array_file = 1/0`` + * Flag indicating whether to producing a file which lists for every particle the group they belong to. Can be used with **tipsy** format or to tag every particle. + ``Binary_output = 2/1/0`` + * Integer flag indicating type of output. + - **2** self-describing binar format of HDF5. **Recommended**. + - **1** raw binary. + - **0** ASCII. + ``Extended_output = 1/0`` + * Flag indicating whether produce extended output for quick particle extraction from input catalog of particles in structures + ``Spherical_overdensity_halo_particle_list_output = 1/0`` + * Flag indicating whether particle IDs identified within the spherical overdensity of field halos is written (to a .catalog_SOlist). Useful if looking at evolution of particles within spherical overdensities. + ``Sort_by_binding_energy = 1/0`` + * Flag indicating whether particle IDs written in .catalog_particles are sorted by binding energy (1) or potential energy (0). + ``No_particle_ID_list_output = 1/0`` + * Flag indicating whether particle IDs written (i.e., write the .catalog_\* files). Default is 1. Particle ID files are necessary for constructing merger trees but if just properties of (sub)halos, then turn off. + +.. _config_search: + +Searching for Structures +^^^^^^^^^^^^^^^^^^^^^^^^ + +Options related to searching for (sub)halos. General search parameters set particles to be search and the overall type of search. ``Particle_search_type = 1/2/3/4`` * An integer describing what types of particles are searched. A full list of options is in :ref:`subsection_searchtypes`. Typical options are: @@ -184,8 +269,8 @@ There are numerous key words that can be passed. Here we list them, grouped into - **4** Gas particles (which are typically defined as type 0 for gadget) are searched ``Baryon_searchflag = 0/1/2`` * An integer indicating gas/stellar search done separately from DM search. + - **2** field search also altered to treat baryons differently, allowing only DM particles to be used as head links (ie link dm-dm, dm-baryon, but not baryon-baryon nor baryon-dm). Then DM substructure search with baryons associated to closest DM particle in phase-space. **Recommended**. - **1** field search run as normal and then substructure search for baryons run using baryons identified in field search. - - **2** field search also altered to treat baryons differently, allowing only DM particles to be used as head links (ie link dm-dm, dm-baryon, but not baryon-baryon nor baryon-dm). Then DM substructure search with baryons associated to closest DM particle in phase-space. - **0** do nothing special for baryon particles. ``Search_for_substructure = 1/0`` * Flag indicating whether field objects are searched for internal substructures. Default is 1 (on) @@ -201,9 +286,9 @@ There are numerous key words that can be passed. Here we list them, grouped into - **5** standard 3D FOF based algorithm - **4** standard 3D FOF based algorithm :strong:`FOLLOWED` by 6D FOF search using the velocity scale defined by the largest halo on particles in 3DFOF groups - **3** standard 3D FOF based algorithm :strong:`FOLLOWED` by 6D FOF search using :emphasis:`adaptive` velocity scale for each 3DFOF group on particles in these groups. - ``Halo_linking_length_factor = 2.0`` - * Multiplicative factor of order unity that allows one to use different physical linking lengths between field objects and substructures. :strong:`Note`: substructure search defines the base linking length via ```Physical_linking_length``. Typically for standard 3DFOF searches of dark matter haloes, set to 2.0, as typical base linking length is 0.1 times the interparticle spacing when examining cosmological simulations. - ``Halo_velocity_linking_length_factor =`` + ``Halo_3D_linking_length = 0.2`` + * Linking length used to find configuration space 3D FOF halos. If cosmological file then assumed to be in units of inter particle spacing, if loading in a single halo then can be based on average interparticle spacing calculated, otherwise in input units. Default is 0.2 in interpaticle spacing units. + ``Halo_velocity_linking_length_factor = 1.0`` * Multiplicative factor of order unity for the dispersions used in 6D searches. Typical values are order unity as velocity dispersions are used to define the velocity linking length scale. ``Halo_6D_linking_length_factor = 1.0`` * Multiplicative factor of order unity that allows one to use different configuration space linking lengths between 3DFOF and 6DFOF field search. Typically this is 1.0 @@ -228,10 +313,10 @@ There are numerous key words that can be passed. Here we list them, grouped into * Minimum significance level of a substructure which should be order unity (default is 1) ``Velocity_ratio = 2.0`` * Speed ratio used in linking particles which should be order unity and > 1 (default is 2) - ``Velocity_opening_angle = 0.10 `` + ``Velocity_opening_angle = 0.10`` * Angle between velocities when linking (in units of :math:`\pi`) (default is 0.10) - ``Physical_linking_length = 0.1`` - * Physical linking length used in FOF. If cosmological gadget file then assumed to be in units of inter particle spacing, if loading in a single halo then can be based on average interparticle spacing calculated, otherwise in input units. Default is 0.1 in interpaticle spacing units. + ``Substructure_physical_linking_length = 0.1`` + * Physical linking length used in phase-space substructure FOF. If cosmological file then assumed to be in units of inter particle spacing, if loading in a single halo then can be based on average interparticle spacing calculated, otherwise in input units. Default is 0.1 in interpaticle spacing units. ``CMrefadjustsubsearch_flag = 1/0`` * Flag indicating whether particles are moved to the rough CM velocity frame of the background before substructures are searched for (default is on) ``Iterative_searchflag = 1/0`` @@ -249,10 +334,15 @@ There are numerous key words that can be passed. Here we list them, grouped into .. _config_local_vden: -.. topic:: Parameters related to local density estimator used to identify particles in substructures. +.. topic:: Configuration for local density calculation used to identify substructures **Note**: default values are fine and typically do not need to be set in the configuration file. + ``Local_velocity_density_approximate_calculation = 2/1/0`` + * Flag indicating how to calculate computationally expensive local velocity densities. + - **2** approximative search limited to particles in halos (requires no mpi communication). **Recommended**. + - **1** approximative search, group particles in leaf nodes of tree + - **0** full search per particle. ``Nsearch_velocity = 32`` * Number of velocity neighbours used to calculate velocity density (suggested value is 32) ``Nsearch_physical = 32`` @@ -261,7 +351,7 @@ There are numerous key words that can be passed. Here we list them, grouped into * Fraction of a halo contained in a subvolume used to characterize the background (suggested value is 0.01) ``Grid_type = 1`` * Integer describing type of grid used to decompose volume for substructure search (suggested value is 1) - - **1** standard physical shannon entropy, balanced KD tree volume decomposition into cells + - **1** standard physical shannon entropy, balanced KD tree volume decomposition into cells. **Recommended** - **2** phase phase-space shannon entropy, balanced KD tree volume decomposition into cells - **3** simple simple physical balanced KD tree decomposition of volume into cells @@ -273,6 +363,9 @@ There are numerous key words that can be passed. Here we list them, grouped into ``Halo_core_search = 0/1/2`` * Integer allows one to explicitly search for large 6D FOF cores that are indicative of a recent major merger. Since substructure is defined on the scale of the maximum cell size and major mergers typically result two or more phase-space dense regions that are *larger* than the cell size used in reasonable substructure searches, one can identify them using this search. The overall goal is to treat these objects differently than a substructure. However, if 2 is set, then smaller core is treated as substruture and all particles within the FOF envelop are assigned to the cores based on their phase-space distance to core particles. + - **2** search for cores and growth them. **Recommended**. + - **1** + - **0** do not search cores. ``Use_adaptive_core_search = 0/1`` * Flag allows one to run complex adaptive phase-space search for large 6D FOF cores and then use these linking lengths to separate mergers. 0 is simple high density dispersively cold cores with velocity scale adaptive, 1 is adaptive in both configuration & velocity. ``Use_phase_tensor_core_growth = 0/1`` @@ -296,11 +389,23 @@ There are numerous key words that can be passed. Here we list them, grouped into ``Halo_core_phase_significance = 2.0`` * Significance a core must be in terms of phase-space distance scaled by dispersions (sigma). Typical values are order unity & > 1. +.. topic:: Configuration for cleaning up substructuers that overlap in phase-space. + + Substructures can be merged together if they overlap in phase space. + + ``Structure_phase_merge_dist = 0.25`` + * Phase-distance normalised by dispersions below which structures are merged together. Typical valuse are < 1. + ``Apply_phase_merge_to_host = 1`` + * Flag whether to also check substructures can be merged with the host background. 1 is on. + + .. _config_unbinding: -.. topic:: Unbinding Parameters +Unbinding +^^^^^^^^^ - Particles in strutures can be checked to see if they are bound relative to a kinetic reference frame (CM of the structure). +Particles in strutures can be checked to see if they are bound relative to a kinetic reference frame (CM of the structure). +This cleans the (sub)structures of spurious objects and particles. ``Unbind_flag = 1/0`` * Flag indciating whether substructures passed through an unbinding routine. @@ -320,6 +425,72 @@ There are numerous key words that can be passed. Here we list them, grouped into * The minimum number of particles used to calculate the velocity of the minimum of the potential (default is 10). ``Frac_pot_ref = 0.1`` * Fraction of particles used to calculate the velocity of the minimum of the potential (0.1). If smaller than ``Min_npot_ref``, that is used. + ``Unbinding_max_unbound_removal_fraction_per_iteration = 0.5`` + * Maximum fraction of unbound particles removed per iteration in unbinding process. + ``Unbinding_max_unbound_fraction = 0.95`` + * Maximum fraction of particles that can be considered unbound before group removed entirely and is not processed iteratively. + ``Unbinding_max_unbound_fraction_allowed = 0.005`` + * Maximum fraction of unbound particles allowed after unbinding. If set to zero, all unbound particles removed. + + +.. _config_properties: + +Properties +^^^^^^^^^^ + +Configuration options related to the bulk properties calculated. + + ``Inclusive_halo_mass = 3/2/1/0`` + * Flag indicating whether inclusive masses are calculated for field objects. + - **3** indicates inclusive SO masses are calculated after substructure is found. + - **2** indicates inclusive SO masses are calculated before substructure is found. + - **1** indicates inclusive SO masses are calculated before substructure is found but limited to particles in the halo. + - **0** indicates masses exclusive. + ``Iterate_cm_flag = 0`` + * Flag indicating whether to iteratively find the centre-of-mass of an object (1) or simply deterine bulk centre of mass and centre of mass velocity (0). Calculation is based on all particles exclusively belonging to the object. + ``Reference_frame_for_properties = 2`` + * Flag indicating what reference position to use when calculating radially dependent properties. + - **2** use the position of the particle with the minimum potential. + - **1** use the position of the most bound particle. + - **0** use the centre-of-mass. + ``Extensive_halo_properties_output = 1`` + * Flag indicating that one should calculate more properties for objects, such as angular momentum in spherical overdensity apertures. + ``Extensive_gas_properties_output = 1`` + * Flag indicating that in addition to calculating extra halo properties also calculate gas content in spherical overdensity apertures as well as their angular momentum. Must be used in conjunction with ``Extensive_halo_properties_output = 1``. + ``Extensive_star_properties_output = 1`` + * Flag indicating that in addition to calculating extra halo properties also calculate stellar content in spherical overdensity apertures as well as their angular momentum. Must be used in conjunction with ``Extensive_halo_properties_output = 1``. + Aperture related config options + ``Calculate_aperture_quantities = 1`` + * Flag on whether to calculate aperture related masses, dispersions, metallicities + ``Number_of_apertures = 6`` + * Number of spherical apertures + ``Aperture_values_in_kpc = 3,5,10,30,50,100,`` + * Comma separated list of values in kpc + ``Number_of_projected_apertures = 3`` + * Number of projected apertures. Code calculates 3 projections per aperture: x, y, z. + ``Projected_aperture_values_in_kpc=10,50,100,`` + * Comma separated list of values in kpc + Spherical overdensity related config options + ``Number_of_overdensities = 5`` + * Number of spherical overdensities + ``Overdensity_values_in_critical_density=25,100,500,1000,2500,`` + * Comma separated list of spherical overdensity thresholds in units of the critical density in cosmological simulations + Radial profile related config options + ``Calculate_radial_profiles = 1`` + * Flag on whether to calculate radial profiles of masses + ``Radial_profile_norm = 0`` + * Flag setting the radial normalisation and scaling. Default is log rad bins, in proper kpc + ``Number_of_radial_profile_bin_edges = 9`` + * Number of bin edges listed. Assumes lowest bin edge is r=0. + ``Radial_profile_bin_edges = -2.,-1.50,-1.00,-0.50,0.00,0.50,1.00,1.50,2.00`` + * Comma separated list of (log) r bin edges. Here example is for log r in proper kpc binning so values are log(r). + +.. _config_siminfo: + +Simulation Info +^^^^^^^^^^^^^^^ + +Options related to the input and output units and cosmology. .. _config_units: @@ -345,6 +516,8 @@ There are numerous key words that can be passed. Here we list them, grouped into * Specify the conversion factor from the output unit to km/s ``Mass_unit_to_solarmass =`` * Specify the conversion factor from the output unit to solar masses + ``Comoving_units = 1/0`` + * Flag indicating whether the properties output is in physical or comoving little h units. .. _config_cosmology: @@ -352,42 +525,41 @@ There are numerous key words that can be passed. Here we list them, grouped into If input is cosmological, then for some input formats (gadget, HDF), these quantites can be read from the input file. Tipsy formats require that these be set in the configuration file. - ``Period =`` - * Period of the box in input units. \n - ``Scale_factor =`` + ``Period = 0`` + * Period of the box in input units. + ``Scale_factor = 1.0`` * Scale factor time - ``h_val =`` + ``h_val = 1.0`` * The "little h" value often used in cosmological simulations. - ``Omega_m =`` + ``Omega_m = 1.0`` * Matter density in units of the critical density at z=0 used in cosmological simulations. - ``Omega_Lambda =`` + ``Omega_Lambda = 0.0`` * Energy density of the cosmological constant (or dark energy ) in units of the critical density at z=0 used in cosmological simulations. - ``Omega_cdm =`` + ``Omega_cdm = 1.0`` * Dark matter density in units of the critical density at z=0 used in cosmological simulations. For non-standard DM models (annihilating, decaying, coupled), may be useful to provide the current DM density. - ``Omega_b =`` - * Baryon density in units of the critical density at z=0 used in cosmological simulations - ``w_of_DE =`` + ``Omega_b = 0.0`` + * Baryon density in units of the critical density at z=0 used in cosmological simulations. + ``Omega_r = 0.0`` + * Radiation density in units of the critical density at z=0 used in cosmological simulations. Typically 0 (negligible). + ``Omega_nu = 0.0`` + * Neutrino density in units of the critical density at z=0 used in cosmological simulations. Typically 0 (negligible). + ``Omega_k = 0.0`` + * Curvature density in units of the critical density at z=0 used in cosmological simulations. Typically 0 (flat). + ``Omega_DE = 0.0`` + * Dark Energy density in units of the critical density at z=0 used in cosmological simulations. This is addition to (or replacing) the energy density of the cosmological constant and has an associated equation of state, :math:`w_{DE}`. + ``w_of_DE = -1.0`` * Equation of state of the dark energy fluid, :math:`w=\frac{p}{\rho}`. This is not necessary unless one is using a cosmological simulation with :math:`w\neq -1`. Currently not fully implemented. - ``Virial_density =`` + ``Virial_density = 200.0`` * Virial overdensity in units of the background matter density used in cosmological simulations. If -1, then the Bryan & Norman 1998 virial density is calculated based on a LCDM cosmology, otherwise overrides the Bryan & Norman calculation. - ``Critical_density =`` + ``Critical_density = 1.0`` * Critical density in input units used in cosmological simulations. -.. _config_misc: - -.. topic:: Miscellaneous +.. _config_parallel: - Other configuration options - - ``Snapshot_value =`` - * If halo ids need to be offset to some starting value based on the snapshot of the output (say to make temporally unique halo ids that are useful for some halo merger tree codes), one can specific a snapshot number. All halo ids will be listed as internal haloid + snapnum * :math:`10^{12}` (or if using 32 bit integers and 64 bit integers, then ids offset by :math:`10^{6}`). - ``Effective_Resolution =`` - * If running a multiple resolution zoom simulation, simple method of scaling the linking length by using the period and this effective resolution, ie: :math:`p/N_{\rm eff}` - ``Verbose = 0/1/2`` - * Integer indicating how talkative the code is (2 very verbose, 1 verbose, 0 quiet). - ``Inclusive_halo_mass = 1/0`` - * Flag indicating whether inclusive masses are calculated for field objects. +Parallel +^^^^^^^^ +Options related to MPI/OpenMP/Pthread parallelisation. .. _config_mpi: @@ -399,6 +571,32 @@ There are numerous key words that can be passed. Here we list them, grouped into * Factor used in memory allocated in mpi mode to store particles is (1+factor)* the memory need for the initial mpi decomposition. This factor should be >0 and is mean to allow a little room for particles to be exchanged between mpi threads withouth having to require new memory allocations and copying of data. ``MPI_particle_total_buf_size =`` * Total memory size in bytes used to store particles in temporary buffer such that particles are sent to non-reading mpi processes in chunks of size buffer_size/NProcs/sizeof(Particle). + ``MPI_number_of_tasks_per_write =`` + * Number of mpi tasks that are grouped for collective HDF5 writes is parallel HDF5 is enabled. Net result is that the total number of files written is ceiling(Number of MPI tasks)/(Number of tasks per write) + +.. _config_openmp: + +.. topic:: OpenMP specific parallelisation options + + ``OMP_run_fof = 1`` + * Flag indicating whether to run FOF searches with OpenMP threads. + ``OMP_fof_region_size = 100000000`` + * Number of particles per OpenMP region. + +.. _config_misc: + +Miscellaneous +^^^^^^^^^^^^^ + + Other configuration options + + ``Snapshot_value =`` + * If halo ids need to be offset to some starting value based on the snapshot of the output (say to make temporally unique halo ids that are useful for some halo merger tree codes), one can specific a snapshot number. All halo ids will be listed as internal haloid + snapnum * :math:`10^{12}` (or if using 32 bit integers and 64 bit integers, then ids offset by :math:`10^{6}`). + ``Effective_Resolution =`` + * If running a multiple resolution zoom simulation, simple method of scaling the linking length by using the period and this effective resolution, ie: :math:`p/N_{\rm eff}` + ``Verbose = 0/1/2`` + * Integer indicating how talkative the code is (2 very verbose, 1 verbose, 0 quiet). + .. _subsection_searchtypes: diff --git a/doc/velociraptoricon.png b/doc/velociraptoricon.png new file mode 120000 index 00000000..e524557f --- /dev/null +++ b/doc/velociraptoricon.png @@ -0,0 +1 @@ +../velociraptoricon.png \ No newline at end of file diff --git a/examples/profile_build.sh b/examples/profile_build.sh deleted file mode 100755 index bd6111b0..00000000 --- a/examples/profile_build.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash -l -# This script profiles stf using perf and hotspot to produce flame graphs -# It produces a build directory with a certian labels -# runs cmake with the desired options, runs the code on the desired input -# using the desired config and running the hotspot stuff -# it assumes the existence of perf that is in linux-tools-common -# also makes use of wget - -#script that produces lots of qsub scripts to run velociraptor on simulation output -if [ $# -eq 0 ] || [ "$1" == "--help" ] -then - echo "This script profiles a VR run." - echo "The interface is as follows:" - echo "buildlabel buildoptions VRargs VRconfig" - echo "buildlabel: string, label for the build" - echo "buildoptions: string, cmake options for build. Ex: \" -DVR_USE_GAS=ON \"" - echo "VRargs: string, options for VR. Ex: \"-i inputfile -I 2 -s 1 -o outputfile \"" - echo "VRconfig: string, path and file name of the config file" - exit -fi - - -#initial and final snapshot numbers -buildlabel=$1 -buildoptions=$2 -VRargs=$3 -VRconfig=$4 - -workingdir=`pwd` -scriptdir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - -#build exe -cd ${scriptdir}/../ -mkdir build-${buildlabel} -cd build-${buildlabel} -rm -rf * -cmake ${buildoptions} -DCMAKE_BUILD_TYPE=RelWithDebugInfo ../ -make -j - -wget https://github.com/KDAB/hotspot/releases/download/v1.1.0/hotspot-v1.1.0-x86_64.AppImage -chmod +x hotspot-v1.1.0-x86_64.AppImage - -#run code -#this will have produced a perf.data -perf record ./stf ${VRargs} -C ${VRconfig} - -#run hotspot -./hotspot-v1.1.0-x86_64.AppImage perf.data diff --git a/examples/sample_dmcosmological_run.cfg b/examples/sample_dmcosmological_run.cfg index 3fa92231..fcc3637a 100644 --- a/examples/sample_dmcosmological_run.cfg +++ b/examples/sample_dmcosmological_run.cfg @@ -111,6 +111,12 @@ Halo_core_loop_ellv_fac=1.0 #how much to change the velocity space linking per i Halo_core_loop_elln_fac=1.2 #how much to change the min number of particles per iteration Halo_core_phase_significance=2.0 #how significant a core must be in terms of dispersions (sigma) significance +#merge substructures if the overlap in phase-space by some fraction of their dispersion +#here distance has to be less than 0.25 sigma +Structure_phase_merge_dist=0.25 +#also merge structures with background if overlap heavily in phase-space based on dispersions. +Apply_phase_merge_to_host=1 + ################################ #Unbinding options (VELOCIraptor is able to accurately identify tidal debris so particles need not be bound to a structure) ################################ @@ -131,6 +137,9 @@ Frac_pot_ref=1.0 Min_npot_ref=20 #reference frame only meaningful if calculating velocity frame using subset of particles in object. Can use radially sorted fraction of particles about minimum potential or centre of mass Kinetic_reference_frame_type=0 +Unbinding_max_unbound_removal_fraction_per_iteration=0.5 +Unbinding_max_unbound_fraction=0.95 +Unbinding_max_unbound_fraction_allowed=0.005 ################################ #Cosmological parameters diff --git a/examples/sample_eaglehydro_3dfof_subhalo.cfg b/examples/sample_eaglehydro_3dfof_subhalo.cfg index 2d5dda39..99344dfc 100644 --- a/examples/sample_eaglehydro_3dfof_subhalo.cfg +++ b/examples/sample_eaglehydro_3dfof_subhalo.cfg @@ -120,6 +120,12 @@ Halo_core_loop_ellv_fac=1.0 #how much to change the velocity space linking per i Halo_core_loop_elln_fac=1.2 #how much to change the min number of particles per iteration Halo_core_phase_significance=2.0 #how significant a core must be in terms of dispersions (sigma) significance +#merge substructures if the overlap in phase-space by some fraction of their dispersion +#here distance has to be less than 0.25 sigma +Structure_phase_merge_dist=0.25 +#also merge structures with background if overlap heavily in phase-space based on dispersions. +Apply_phase_merge_to_host=1 + ################################ #Unbinding options (VELOCIraptor is able to accurately identify tidal debris so particles need not be bound to a structure) ################################ @@ -140,6 +146,9 @@ Frac_pot_ref=1.0 Min_npot_ref=20 #reference frame only meaningful if calculating velocity frame using subset of particles in object. Can use radially sorted fraction of particles about minimum potential or centre of mass Kinetic_reference_frame_type=0 +Unbinding_max_unbound_removal_fraction_per_iteration=0.5 +Unbinding_max_unbound_fraction=0.95 +Unbinding_max_unbound_fraction_allowed=0.005 ################################ #Cosmological parameters diff --git a/examples/sample_eaglehydro_6dfof_subhalo.cfg b/examples/sample_eaglehydro_6dfof_subhalo.cfg index 0a34cc5f..dbda7c74 100644 --- a/examples/sample_eaglehydro_6dfof_subhalo.cfg +++ b/examples/sample_eaglehydro_6dfof_subhalo.cfg @@ -3,7 +3,7 @@ #runs 6DFOF algorithm, calculates many properties #Units currently set to take in as input, Mpc, 1e10 solar masses, km/s, output in same units #To set temporally unique halo ids, alter Snapshot_value=SNAP to appropriate value. Ie: for snapshot 12, change SNAP to 12 -#note that the code must be compiled with -DVR_USE_GAS=ON -DVR_USE_STAR=ON -DVR_USE_BH=ON +#note that the code must be compiled with -DVR_USE_GAS=ON -DVR_USE_STAR=ON -DVR_USE_BH=ON ################################ @@ -120,6 +120,12 @@ Halo_core_loop_ellv_fac=1.0 #how much to change the velocity space linking per i Halo_core_loop_elln_fac=1.2 #how much to change the min number of particles per iteration Halo_core_phase_significance=2.0 #how significant a core must be in terms of dispersions (sigma) significance +#merge substructures if the overlap in phase-space by some fraction of their dispersion +#here distance has to be less than 0.25 sigma +Structure_phase_merge_dist=0.25 +#also merge structures with background if overlap heavily in phase-space based on dispersions. +Apply_phase_merge_to_host=1 + ################################ #Unbinding options (VELOCIraptor is able to accurately identify tidal debris so particles need not be bound to a structure) ################################ @@ -140,6 +146,9 @@ Frac_pot_ref=1.0 Min_npot_ref=20 #reference frame only meaningful if calculating velocity frame using subset of particles in object. Can use radially sorted fraction of particles about minimum potential or centre of mass Kinetic_reference_frame_type=0 +Unbinding_max_unbound_removal_fraction_per_iteration=0.5 +Unbinding_max_unbound_fraction=0.95 +Unbinding_max_unbound_fraction_allowed=0.005 ################################ #Cosmological parameters diff --git a/examples/sample_hydrocosmological_run.cfg b/examples/sample_hydrocosmological_run.cfg index 0a34cc5f..407e7345 100644 --- a/examples/sample_hydrocosmological_run.cfg +++ b/examples/sample_hydrocosmological_run.cfg @@ -3,7 +3,7 @@ #runs 6DFOF algorithm, calculates many properties #Units currently set to take in as input, Mpc, 1e10 solar masses, km/s, output in same units #To set temporally unique halo ids, alter Snapshot_value=SNAP to appropriate value. Ie: for snapshot 12, change SNAP to 12 -#note that the code must be compiled with -DVR_USE_GAS=ON -DVR_USE_STAR=ON -DVR_USE_BH=ON +#note that the code must be compiled with -DVR_USE_GAS=ON -DVR_USE_STAR=ON -DVR_USE_BH=ON ################################ @@ -120,6 +120,12 @@ Halo_core_loop_ellv_fac=1.0 #how much to change the velocity space linking per i Halo_core_loop_elln_fac=1.2 #how much to change the min number of particles per iteration Halo_core_phase_significance=2.0 #how significant a core must be in terms of dispersions (sigma) significance +#merge substructures if the overlap in phase-space by some fraction of their dispersion +#here distance has to be less than 0.25 sigma +Structure_phase_merge_dist=0.25 +#also merge structures with background if overlap heavily in phase-space based on dispersions. +Apply_phase_merge_to_host=1 + ################################ #Unbinding options (VELOCIraptor is able to accurately identify tidal debris so particles need not be bound to a structure) ################################ diff --git a/examples/sample_swiftdm_3dfof_subhalo.cfg b/examples/sample_swiftdm_3dfof_subhalo.cfg new file mode 100644 index 00000000..ca3c92ec --- /dev/null +++ b/examples/sample_swiftdm_3dfof_subhalo.cfg @@ -0,0 +1,213 @@ +#suggested configuration file for cosmological dm and subhalo catalog +#Configuration file for analysing all particles +#runs 3DFOF algorithm, calculates many properties +#Units currently set to take in as input, Mpc, 1e10 solar masses, km/s, output in same units +#To set temporally unique halo ids, alter Snapshot_value=SNAP to appropriate value. Ie: for snapshot 12, change SNAP to 12 +#Script calculates several aperture quantities, also several spherical overensity. Currently does NOT +#write all the particles within the lowest spherical density. + +################################ +#input options +#set up to use SWIFT HDF input, load dark matter only +################################ +HDF_name_convention=6 #HDF SWIFT naming convention +Input_includes_dm_particle=1 #include dark matter particles in hydro input +Input_includes_gas_particle=0 #include gas particles in hydro input +Input_includes_star_particle=0 #include star particles in hydro input +Input_includes_bh_particle=0 #include bh particles in hydro input +Input_includes_wind_particle=0 #include wind particles in hydro input (used by Illustris and moves particle type 0 to particle type 3 when decoupled from hydro forces). Here shown as example +Input_includes_tracer_particle=0 #include tracer particles in hydro input (used by Illustris). Here shown as example +Input_includes_extradm_particle=0 #include extra dm particles stored in particle type 2 and type 3, useful for zooms + +#cosmological run +Cosmological_input=1 + +################################ +#unit options, should always be provided +################################ + +#units conversion from input input to desired internal unit +Length_input_unit_conversion_to_output_unit=1.0 #default code unit, +Velocity_input_unit_conversion_to_output_unit=1.0 #default velocity unit, +Mass_input_unit_conversion_to_output_unit=1.0 #default mass unit, +#assumes input is in 1e10 msun, Mpc and km/s and output units are the same +Gravity=43.0211349 #for 1e10 Msun, km/s and Mpc +Hubble_unit=100.0 # assuming units are km/s and Mpc, then value of Hubble in km/s/Mpc +#converting hydro quantities +Stellar_age_input_is_cosmological_scalefactor=1 +Metallicity_input_unit_conversion_to_output_unit=1.0 +Stellar_age_input_unit_conversion_to_output_unit=1.0 +Star_formation_rate_input_unit_conversion_to_output_unit=1.0 + +#set the units of the output by providing conversion to a defined unit +#conversion of output length units to kpc +Length_unit_to_kpc=1000.0 +#conversion of output velocity units to km/s +Velocity_to_kms=1.0 +#conversion of output mass units to solar masses +Mass_to_solarmass=1.0e10 +Metallicity_to_solarmetallicity=1.0 +Star_formation_rate_to_solarmassperyear=1.0 +Stellar_age_to_yr=1.0 +#ensures that output is physical and not comoving distances per little h +Comoving_units=0 + +#sets the total buffer size in bytes used to store temporary particle information +#of mpi read threads before they are broadcast to the appropriate waiting non-read threads +#if not set, default value is equivalent to 1e6 particles per mpi process, quite large +#but significantly minimises the number of send/receives +#in this example the buffer size is roughly that for a send/receive of 10000 particles +#for 100 mpi processes +MPI_particle_total_buf_size=100000000 + +################################ +#search related options +################################ + +#how to search a simulation +Particle_search_type=2 #search all particles, see allvars for other types +#for baryon search +Baryon_searchflag=0 #if 1 search for baryons separately using phase-space search when identifying substructures, 2 allows special treatment in field FOF linking and phase-space substructure search, 0 treat the same as dark matter particles +#for search for substruture +Search_for_substructure=1 #if 0, end search once field objects are found +#also useful for zoom simulations or simulations of individual objects, setting this flag means no field structure search is run +Singlehalo_search=0 #if file is single halo in which one wishes to search for substructure +#additional option for field haloes +Keep_FOF=0 #if field 6DFOF search is done, allows to keep structures found in 3DFOF (can be interpreted as the inter halo stellar mass when only stellar search is used).\n + +#minimum size for structures +Minimum_size=20 #min 20 particles +Minimum_halo_size=35 #if field halos have different minimum sizes, otherwise set to -1. + +#for field fof halo search +FoF_Field_search_type=5 #5 3DFOF search for field halos, 4 for 6DFOF clean up of field halos, 3 for 6DFOF with velocity scale distinct for each halo +Halo_3D_linking_length=0.20 #3DFOF linking length in interparticle spacing + +#for mean field estimates and local velocity density distribution funciton estimator related quantiites, rarely need to change this +Cell_fraction = 0.01 #fraction of field fof halo used to determine mean velocity distribution function. Typical values are ~0.005-0.02 +Grid_type=1 #normal entropy based grid, shouldn't have to change +Nsearch_velocity=32 #number of velocity neighbours used to calculate local velocity distribution function. Typial values are ~32 +Nsearch_physical=256 #numerof physical neighbours from which the nearest velocity neighbour set is based. Typical values are 128-512 +Local_velocity_density_approximate_calculation=2 #approximative and mpi local calculation of density, less accurate much faster. + +#for substructure search, rarely ever need to change this +FoF_search_type=1 #default phase-space FOF search. Don't really need to change +Iterative_searchflag=1 #iterative substructure search, for substructure find initial candidate substructures with smaller linking lengths then expand search region +Outlier_threshold=2.5 #outlier threshold for a particle to be considered residing in substructure, that is how dynamically distinct a particle is. Typical values are >2 +Substructure_physical_linking_length=0.10 #physical linking length. IF reading periodic volumes in gadget/hdf/ramses, in units of the effective inter-particle spacing. Otherwise in user defined code units. Here set to 0.10 as iterative flag one, values of 0.1-0.3 are typical. +Velocity_ratio=2.0 #ratio of speeds used in phase-space FOF +Velocity_opening_angle=0.10 #angle between velocities. 18 degrees here, typical values are ~10-30 +Velocity_linking_length=0.20 #where scaled by structure dispersion +Significance_level=1.0 #how significant a substructure is relative to Poisson noise. Values >= 1 are fine. + +#for iterative substructure search, rarely ever need to change this +Iterative_threshold_factor=1.0 #change in threshold value when using iterative search. Here no increase in threshold if iterative or not +Iterative_linking_length_factor=2.0 #increase in final linking final iterative substructure search will be sqrt(2.25)*this factor +Iterative_Vratio_factor=1.0 #change in Vratio when using iterative search. no change in vratio +Iterative_ThetaOp_factor=1.0 #change in velocity opening angle. no change in velocity opening angle + +#for checking for halo merger remnants, which are defined as large, well separated phase-space density maxima +Halo_core_search=2 # searches for separate 6dfof cores in field haloes, and then more than just flags halo as merging, assigns particles to each merging "halo". 2 is full separation, 1 is flagging, 0 is off +#if searching for cores, linking lengths. likely does not need to change much +Use_adaptive_core_search=0 #calculate dispersions in configuration & vel space to determine linking lengths +Use_phase_tensor_core_growth=2 #use full stepped phase-space tensor assignment +Halo_core_ellx_fac=0.7 #how linking lengths are changed when searching for local 6DFOF cores, +Halo_core_ellv_fac=2.0 #how velocity lengths based on dispersions are changed when searching for local 6DFOF cores +Halo_core_ncellfac=0.005 #fraction of total halo particle number setting min size of a local 6DFOF core +Halo_core_num_loops=8 #number of loops to iteratively search for cores +Halo_core_loop_ellx_fac=0.75 #how much to change the configuration space linking per iteration +Halo_core_loop_ellv_fac=1.0 #how much to change the velocity space linking per iteration +Halo_core_loop_elln_fac=1.2 #how much to change the min number of particles per iteration +Halo_core_phase_significance=2.0 #how significant a core must be in terms of dispersions (sigma) significance + +#merge substructures if the overlap in phase-space by some fraction of their dispersion +#here distance has to be less than 0.25 sigma +Structure_phase_merge_dist=0.25 +#also merge structures with background if overlap heavily in phase-space based on dispersions. +Apply_phase_merge_to_host=1 + +################################ +#Unbinding options (VELOCIraptor is able to accurately identify tidal debris so particles need not be bound to a structure) +################################ + +#unbinding related items +Unbind_flag=1 #run unbinding +#objects must have particles that meet the allowed kinetic to potential ratio AND also have some total fraction that are completely bound. +Unbinding_type=0 +#alpha factor used to determine whether particle is "bound" alaph*T+W<0. For standard subhalo catalogues use >0.9 but if interested in tidal debris 0.2-0.5 +Allowed_kinetic_potential_ratio=0.95 +Min_bound_mass_frac=0.65 #minimum bound mass fraction +#run unbinding of field structures, aka halos. This is useful for sams and 6DFOF halos but may not be useful if interested in 3DFOF mass functions. +Bound_halos=0 +#don't keep background potential when unbinding +Keep_background_potential=1 +#use all particles to determine velocity frame for unbinding +Frac_pot_ref=1.0 +Min_npot_ref=20 +#reference frame only meaningful if calculating velocity frame using subset of particles in object. Can use radially sorted fraction of particles about minimum potential or centre of mass +Kinetic_reference_frame_type=0 + +################################ +#Cosmological parameters +#this is typically overwritten by information in the gadget/hdf header if those input file types are read +################################ +h_val=1.0 +Omega_m=0.3 +Omega_Lambda=0.7 +Critical_density=1.0 +Virial_density=200 #so-called virial overdensity value +Omega_b=0. #no baryons + +################################ +#Calculation of properties related options +################################ +Inclusive_halo_masses=3 #calculate inclusive masses for halos using full Spherical overdensity apertures once all substructures have been found (if substructures are searched for). +#when calculating properties, for field objects calculate inclusive masses +Iterate_cm_flag=0 #do not interatively find the centre-of-mass, giving bulk centre of mass and centre of mass velocity. +Sort_by_binding_energy=1 #sort by binding energy +Reference_frame_for_properties=2 #use the position of the particle with the minimum potential as the point about which properties should be calculated. +#calculate more (sub)halo properties (like angular momentum in spherical overdensity apertures, both inclusive and exclusive) +Extensive_halo_properties_output=1 + +#aperture related (list must be in increasing order and terminates with , ie: 1,2,3, ) +#calculate aperture masses +Calculate_aperture_quantities=1 +Number_of_apertures=6 +Aperture_values_in_kpc=3,5,10,30,50,100, +Number_of_projected_apertures=3 +Projected_aperture_values_in_kpc=10,50,100, + +#spherical overdensity related quantities +Virial_density=500 #user defined virial overdensity. Note that 200 rho_c, 200 rho_m and BN98 are already calculated. +#number of spherical overdensity thresholds +Number_of_overdensities=5 +Overdensity_values_in_critical_density=25,100,500,1000,2500, + +#calculate radial profiles +Calculate_radial_profiles=1 +Number_of_radial_profile_bin_edges=20 +#default radial normalisation log rad bins, in proper kpc +Radial_profile_norm=0 +Radial_profile_bin_edges=-2.,-1.87379263,-1.74758526,-1.62137789,-1.49517052,-1.36896316,-1.24275579,-1.11654842,-0.99034105,-0.86413368,-0.73792631,-0.61171894,-0.48551157,-0.3593042,-0.23309684,-0.10688947,0.0193179,0.14552527,0.27173264,0.39794001, + +################################ +#output related +################################ + +Write_group_array_file=0 #write a group array file +Separate_output_files=0 #separate output into field and substructure files similar to subfind +Binary_output=2 #binary output 1, ascii 0, and HDF 2 +#do not output particles residing in the spherical overdensity apertures of halos, only the particles exclusively belonging to halos +Spherical_overdensity_halo_particle_list_output=0 + +#halo ids are adjusted by this value * 1000000000000 (or 1000000 if code compiled with the LONGINTS option turned off) +#to ensure that halo ids are temporally unique. So if you had 100 snapshots, for snap 100 set this to 100 and 100*1000000000000 will +#be added to the halo id as set for this snapshot, so halo 1 becomes halo 100*1000000000000+1 and halo 1 of snap 0 would just have ID=1 + +#ALTER THIS as part of a script to get temporally unique ids +Snapshot_value=SNAP + +################################ +#other options +################################ +Verbose=0 #how talkative do you want the code to be, 0 not much, 1 a lot, 2 chatterbox diff --git a/examples/sample_swifthydro_3dfof_subhalo.cfg b/examples/sample_swifthydro_3dfof_subhalo.cfg index 4c48e992..309c7cb3 100644 --- a/examples/sample_swifthydro_3dfof_subhalo.cfg +++ b/examples/sample_swifthydro_3dfof_subhalo.cfg @@ -6,6 +6,7 @@ #note that the code must be compiled with -DVR_USE_GAS=ON -DVR_USE_STAR=ON -DVR_USE_BH=ON + ################################ #input options #set up to use EAGLE HDF input, load gas, star, bh and dark matter @@ -114,12 +115,18 @@ Use_phase_tensor_core_growth=2 #use full stepped phase-space tensor assignment Halo_core_ellx_fac=0.7 #how linking lengths are changed when searching for local 6DFOF cores, Halo_core_ellv_fac=2.0 #how velocity lengths based on dispersions are changed when searching for local 6DFOF cores Halo_core_ncellfac=0.005 #fraction of total halo particle number setting min size of a local 6DFOF core -Halo_core_num_loops=8 #number of loops to iteratively search for cores +Halo_core_num_loops=6 #number of loops to iteratively search for cores Halo_core_loop_ellx_fac=0.75 #how much to change the configuration space linking per iteration Halo_core_loop_ellv_fac=1.0 #how much to change the velocity space linking per iteration Halo_core_loop_elln_fac=1.2 #how much to change the min number of particles per iteration Halo_core_phase_significance=2.0 #how significant a core must be in terms of dispersions (sigma) significance +#merge substructures if the overlap in phase-space by some fraction of their dispersion +#here distance has to be less than 0.25 sigma +Structure_phase_merge_dist=0.25 +#also merge structures with background if overlap heavily in phase-space based on dispersions. +Apply_phase_merge_to_host=1 + ################################ #Unbinding options (VELOCIraptor is able to accurately identify tidal debris so particles need not be bound to a structure) ################################ @@ -140,17 +147,14 @@ Frac_pot_ref=1.0 Min_npot_ref=20 #reference frame only meaningful if calculating velocity frame using subset of particles in object. Can use radially sorted fraction of particles about minimum potential or centre of mass Kinetic_reference_frame_type=0 +Unbinding_max_unbound_removal_fraction_per_iteration=0.5 +Unbinding_max_unbound_fraction=0.95 +Unbinding_max_unbound_fraction_allowed=0.005 ################################ #Cosmological parameters -#this is typically overwritten by information in the gadget/hdf header if those input file types are read -################################ -h_val=1.0 -Omega_m=0.3 -Omega_Lambda=0.7 -Critical_density=1.0 -Virial_density=200 #so-called virial overdensity value -Omega_b=0. #no baryons +#This does not need to be set since it is read from the input file +################################ ################################ #Calculation of properties related options diff --git a/examples/sample_swifthydro_3dfof_subhalo_extra_properties.cfg b/examples/sample_swifthydro_3dfof_subhalo_extra_properties.cfg new file mode 100644 index 00000000..31db9f8c --- /dev/null +++ b/examples/sample_swifthydro_3dfof_subhalo_extra_properties.cfg @@ -0,0 +1,235 @@ +#suggested configuration file for cosmological hydro run and subhalo (and galaxy ie: associated baryons) catalog +#Configuration file for analysing all particles +#runs 3DFOF algorithm, calculates many properties +#Units currently set to take in as input, Mpc, 1e10 solar masses, km/s, output in same units +#To set temporally unique halo ids, alter Snapshot_value=SNAP to appropriate value. Ie: for snapshot 12, change SNAP to 12 +#note that the code must be compiled with -DVR_USE_GAS=ON -DVR_USE_STAR=ON -DVR_USE_BH=ON +#This code also tries to load from the input hdf file a variety of extra fields +#to calculate the average quantities of those fields per object + +################################ +#input options +#set up to use EAGLE HDF input, load gas, star, bh and dark matter +################################ +HDF_name_convention=6 #HDF SWIFT naming convention +Input_includes_dm_particle=1 #include dark matter particles in hydro input +Input_includes_gas_particle=1 #include gas particles in hydro input +Input_includes_star_particle=1 #include star particles in hydro input +Input_includes_bh_particle=1 #include bh particles in hydro input +Input_includes_wind_particle=0 #include wind particles in hydro input (used by Illustris and moves particle type 0 to particle type 3 when decoupled from hydro forces). Here shown as example +Input_includes_tracer_particle=0 #include tracer particles in hydro input (used by Illustris). Here shown as example +Input_includes_extradm_particle=0 #include extra dm particles stored in particle type 2 and type 3, useful for zooms + +#this is an example of how we use extra properties +#list of extra properties load as an internal property. This is a set of strings separated by , +Gas_internal_property_names=Pressures,Densities, +#here we list an explicit set of chemical elements +Gas_chemistry_names=MassFractionCarbon,MassFractionOxygen,MassFractionIron, +#here we list an explicit set of chemical elements +Gas_chemistry_production_names=MetalMassFractionFromAGB,MetalMassFractionFromSNII, + +#here we store stellar related properties +Star_internal_property_names=BirthDensity,InitialMass, +#here we list an explicit set of chemical elements +Star_chemistry_names=MassFractionCarbon,MassFractionOxygen,MassFractionIron, +#here we list an explicit set of chemical elements +Star_chemistry_production_names=MetalMassFractionFromAGB,MetalMassFractionFromSNII, + +#here we store Black hole related properties +BH_internal_property_names=BirthDensity,AccretionRate,MassFromAccretion,MassFromMergers,ScaleFatorOfLastMerger,LastMergeMassRatio +#here we list an explicit set of chemical elements +BH_chemistry_names=MassFractionCarbon,MassFractionOxygen,MassFractionIron, +#here we list an explicit set of chemical elements +BH_chemistry_production_names=MetalMassFractionFromAGB,MetalMassFractionFromSNII, + +#cosmological run +Cosmological_input=1 + +################################ +#unit options, should always be provided +################################ + +#units conversion from input input to desired internal unit +Length_input_unit_conversion_to_output_unit=1.0 #default code unit, +Velocity_input_unit_conversion_to_output_unit=1.0 #default velocity unit, +Mass_input_unit_conversion_to_output_unit=1.0 #default mass unit, +#assumes input is in 1e10 msun, Mpc and km/s and output units are the same +Gravity=43.0211349 #for 1e10 Msun, km/s and Mpc +Hubble_unit=100.0 # assuming units are km/s and Mpc, then value of Hubble in km/s/Mpc +#converting hydro quantities +Stellar_age_input_is_cosmological_scalefactor=1 +Metallicity_input_unit_conversion_to_output_unit=1.0 +Stellar_age_input_unit_conversion_to_output_unit=1.0 +Star_formation_rate_input_unit_conversion_to_output_unit=1.0 + +#set the units of the output by providing conversion to a defined unit +#conversion of output length units to kpc +Length_unit_to_kpc=1000.0 +#conversion of output velocity units to km/s +Velocity_to_kms=1.0 +#conversion of output mass units to solar masses +Mass_to_solarmass=1.0e10 +Metallicity_to_solarmetallicity=1.0 +Star_formation_rate_to_solarmassperyear=1.0 +Stellar_age_to_yr=1.0 +#ensures that output is physical and not comoving distances per little h +Comoving_units=0 + +#sets the total buffer size in bytes used to store temporary particle information +#of mpi read threads before they are broadcast to the appropriate waiting non-read threads +#if not set, default value is equivalent to 1e6 particles per mpi process, quite large +#but significantly minimises the number of send/receives +#in this example the buffer size is roughly that for a send/receive of 10000 particles +#for 100 mpi processes +MPI_particle_total_buf_size=100000000 + +################################ +#search related options +################################ + +#how to search a simulation +Particle_search_type=1 #search all particles, see allvars for other types +#for baryon search +Baryon_searchflag=2 #if 1 search for baryons separately using phase-space search when identifying substructures, 2 allows special treatment in field FOF linking and phase-space substructure search, 0 treat the same as dark matter particles +#for search for substruture +Search_for_substructure=1 #if 0, end search once field objects are found +#also useful for zoom simulations or simulations of individual objects, setting this flag means no field structure search is run +Singlehalo_search=0 #if file is single halo in which one wishes to search for substructure +#additional option for field haloes +Keep_FOF=0 #if field 6DFOF search is done, allows to keep structures found in 3DFOF (can be interpreted as the inter halo stellar mass when only stellar search is used).\n + +#minimum size for structures +Minimum_size=20 #min 20 particles +Minimum_halo_size=35 #if field halos have different minimum sizes, otherwise set to -1. + +#for field fof halo search +FoF_Field_search_type=5 #5 3DFOF search for field halos, 4 for 6DFOF clean up of field halos, 3 for 6DFOF with velocity scale distinct for each halo +Halo_3D_linking_length=0.20 #3DFOF linking length in interparticle spacing + +#for mean field estimates and local velocity density distribution funciton estimator related quantiites, rarely need to change this +Cell_fraction = 0.01 #fraction of field fof halo used to determine mean velocity distribution function. Typical values are ~0.005-0.02 +Grid_type=1 #normal entropy based grid, shouldn't have to change +Nsearch_velocity=32 #number of velocity neighbours used to calculate local velocity distribution function. Typial values are ~32 +Nsearch_physical=256 #numerof physical neighbours from which the nearest velocity neighbour set is based. Typical values are 128-512 +Local_velocity_density_approximate_calculation=2 #approximative and mpi local calculation of density, less accurate much faster. + +#for substructure search, rarely ever need to change this +FoF_search_type=1 #default phase-space FOF search. Don't really need to change +Iterative_searchflag=1 #iterative substructure search, for substructure find initial candidate substructures with smaller linking lengths then expand search region +Outlier_threshold=2.5 #outlier threshold for a particle to be considered residing in substructure, that is how dynamically distinct a particle is. Typical values are >2 +Substructure_physical_linking_length=0.10 #physical linking length. IF reading periodic volumes in gadget/hdf/ramses, in units of the effective inter-particle spacing. Otherwise in user defined code units. Here set to 0.10 as iterative flag one, values of 0.1-0.3 are typical. +Velocity_ratio=2.0 #ratio of speeds used in phase-space FOF +Velocity_opening_angle=0.10 #angle between velocities. 18 degrees here, typical values are ~10-30 +Velocity_linking_length=0.20 #where scaled by structure dispersion +Significance_level=1.0 #how significant a substructure is relative to Poisson noise. Values >= 1 are fine. + +#for iterative substructure search, rarely ever need to change this +Iterative_threshold_factor=1.0 #change in threshold value when using iterative search. Here no increase in threshold if iterative or not +Iterative_linking_length_factor=2.0 #increase in final linking final iterative substructure search will be sqrt(2.25)*this factor +Iterative_Vratio_factor=1.0 #change in Vratio when using iterative search. no change in vratio +Iterative_ThetaOp_factor=1.0 #change in velocity opening angle. no change in velocity opening angle + +#for checking for halo merger remnants, which are defined as large, well separated phase-space density maxima +Halo_core_search=2 # searches for separate 6dfof cores in field haloes, and then more than just flags halo as merging, assigns particles to each merging "halo". 2 is full separation, 1 is flagging, 0 is off +#if searching for cores, linking lengths. likely does not need to change much +Use_adaptive_core_search=0 #calculate dispersions in configuration & vel space to determine linking lengths +Use_phase_tensor_core_growth=2 #use full stepped phase-space tensor assignment +Halo_core_ellx_fac=0.7 #how linking lengths are changed when searching for local 6DFOF cores, +Halo_core_ellv_fac=2.0 #how velocity lengths based on dispersions are changed when searching for local 6DFOF cores +Halo_core_ncellfac=0.005 #fraction of total halo particle number setting min size of a local 6DFOF core +Halo_core_num_loops=6 #number of loops to iteratively search for cores +Halo_core_loop_ellx_fac=0.75 #how much to change the configuration space linking per iteration +Halo_core_loop_ellv_fac=1.0 #how much to change the velocity space linking per iteration +Halo_core_loop_elln_fac=1.2 #how much to change the min number of particles per iteration +Halo_core_phase_significance=2.0 #how significant a core must be in terms of dispersions (sigma) significance + +#merge substructures if the overlap in phase-space by some fraction of their dispersion +#here distance has to be less than 0.25 sigma +Structure_phase_merge_dist=0.25 +#also merge structures with background if overlap heavily in phase-space based on dispersions. +Apply_phase_merge_to_host=1 + +################################ +#Unbinding options (VELOCIraptor is able to accurately identify tidal debris so particles need not be bound to a structure) +################################ + +#unbinding related items +Unbind_flag=1 #run unbinding +#objects must have particles that meet the allowed kinetic to potential ratio AND also have some total fraction that are completely bound. +Unbinding_type=0 +#alpha factor used to determine whether particle is "bound" alaph*T+W<0. For standard subhalo catalogues use >0.9 but if interested in tidal debris 0.2-0.5 +Allowed_kinetic_potential_ratio=0.95 +Min_bound_mass_frac=0.65 #minimum bound mass fraction +#run unbinding of field structures, aka halos. This is useful for sams and 6DFOF halos but may not be useful if interested in 3DFOF mass functions. +Bound_halos=0 +#don't keep background potential when unbinding +Keep_background_potential=1 +#use all particles to determine velocity frame for unbinding +Frac_pot_ref=1.0 +Min_npot_ref=20 +#reference frame only meaningful if calculating velocity frame using subset of particles in object. Can use radially sorted fraction of particles about minimum potential or centre of mass +Kinetic_reference_frame_type=0 +Unbinding_max_unbound_removal_fraction_per_iteration=0.5 +Unbinding_max_unbound_fraction=0.95 +Unbinding_max_unbound_fraction_allowed=0.005 + +################################ +#Cosmological parameters +#This does not need to be set since it is read from the input file +################################ + +################################ +#Calculation of properties related options +################################ +Inclusive_halo_masses=3 #calculate inclusive masses for halos using full Spherical overdensity apertures once all substructures have been found (if substructures are searched for). +#when calculating properties, for field objects calculate inclusive masses +Iterate_cm_flag=0 #do not interatively find the centre-of-mass, giving bulk centre of mass and centre of mass velocity. +Sort_by_binding_energy=1 #sort by binding energy +Reference_frame_for_properties=2 #use the position of the particle with the minimum potential as the point about which properties should be calculated. +#calculate more (sub)halo properties (like angular momentum in spherical overdensity apertures, both inclusive and exclusive) +Extensive_halo_properties_output=1 +Extensive_gas_properties_output=1 +Extensive_star_properties_output=1 + +#aperture related (list must be in increasing order and terminates with , ie: 1,2,3, ) +#calculate aperture masses +Calculate_aperture_quantities=1 +Number_of_apertures=6 +Aperture_values_in_kpc=3,5,10,30,50,100, +Number_of_projected_apertures=3 +Projected_aperture_values_in_kpc=10,50,100, + +#spherical overdensity related quantities +Virial_density=500 #user defined virial overdensity. Note that 200 rho_c, 200 rho_m and BN98 are already calculated. +#number of spherical overdensity thresholds +Number_of_overdensities=5 +Overdensity_values_in_critical_density=25,100,500,1000,2500, + +#calculate radial profiles +Calculate_radial_profiles=1 +Number_of_radial_profile_bin_edges=20 +#default radial normalisation log rad bins, in proper kpc +Radial_profile_norm=0 +Radial_profile_bin_edges=-2.,-1.87379263,-1.74758526,-1.62137789,-1.49517052,-1.36896316,-1.24275579,-1.11654842,-0.99034105,-0.86413368,-0.73792631,-0.61171894,-0.48551157,-0.3593042,-0.23309684,-0.10688947,0.0193179,0.14552527,0.27173264,0.39794001, + +################################ +#output related +################################ + +Write_group_array_file=0 #write a group array file +Separate_output_files=0 #separate output into field and substructure files similar to subfind +Binary_output=2 #binary output 1, ascii 0, and HDF 2 +#do not output particles residing in the spherical overdensity apertures of halos, only the particles exclusively belonging to halos +Spherical_overdensity_halo_particle_list_output=0 + +#halo ids are adjusted by this value * 1000000000000 (or 1000000 if code compiled with the LONGINTS option turned off) +#to ensure that halo ids are temporally unique. So if you had 100 snapshots, for snap 100 set this to 100 and 100*1000000000000 will +#be added to the halo id as set for this snapshot, so halo 1 becomes halo 100*1000000000000+1 and halo 1 of snap 0 would just have ID=1 + +#ALTER THIS as part of a script to get temporally unique ids +Snapshot_value=SNAP + +################################ +#other options +################################ +Verbose=0 #how talkative do you want the code to be, 0 not much, 1 a lot, 2 chatterbox diff --git a/examples/sample_swifthydro_6dfof_subhalo.cfg b/examples/sample_swifthydro_6dfof_subhalo.cfg index e43c407c..2abca564 100644 --- a/examples/sample_swifthydro_6dfof_subhalo.cfg +++ b/examples/sample_swifthydro_6dfof_subhalo.cfg @@ -120,6 +120,12 @@ Halo_core_loop_ellv_fac=1.0 #how much to change the velocity space linking per i Halo_core_loop_elln_fac=1.2 #how much to change the min number of particles per iteration Halo_core_phase_significance=2.0 #how significant a core must be in terms of dispersions (sigma) significance +#merge substructures if the overlap in phase-space by some fraction of their dispersion +#here distance has to be less than 0.25 sigma +Structure_phase_merge_dist=0.25 +#also merge structures with background if overlap heavily in phase-space based on dispersions. +Apply_phase_merge_to_host=1 + ################################ #Unbinding options (VELOCIraptor is able to accurately identify tidal debris so particles need not be bound to a structure) ################################ @@ -140,6 +146,9 @@ Frac_pot_ref=1.0 Min_npot_ref=20 #reference frame only meaningful if calculating velocity frame using subset of particles in object. Can use radially sorted fraction of particles about minimum potential or centre of mass Kinetic_reference_frame_type=0 +Unbinding_max_unbound_removal_fraction_per_iteration=0.5 +Unbinding_max_unbound_fraction=0.95 +Unbinding_max_unbound_fraction_allowed=0.005 ################################ #Cosmological parameters diff --git a/examples/sample_zoomdmcosmological_run.cfg b/examples/sample_zoomdmcosmological_run.cfg index 1b9fd339..02178202 100644 --- a/examples/sample_zoomdmcosmological_run.cfg +++ b/examples/sample_zoomdmcosmological_run.cfg @@ -112,6 +112,12 @@ Halo_core_loop_ellv_fac=1.0 #how much to change the velocity space linking per i Halo_core_loop_elln_fac=1.2 #how much to change the min number of particles per iteration Halo_core_phase_significance=2.0 #how significant a core must be in terms of dispersions (sigma) significance +#merge substructures if the overlap in phase-space by some fraction of their dispersion +#here distance has to be less than 0.25 sigma +Structure_phase_merge_dist=0.25 +#also merge structures with background if overlap heavily in phase-space based on dispersions. +Apply_phase_merge_to_host=1 + ################################ #Unbinding options (VELOCIraptor is able to accurately identify tidal debris so particles need not be bound to a structure) ################################ @@ -132,6 +138,9 @@ Frac_pot_ref=1.0 Min_npot_ref=20 #reference frame only meaningful if calculating velocity frame using subset of particles in object. Can use radially sorted fraction of particles about minimum potential or centre of mass Kinetic_reference_frame_type=0 +Unbinding_max_unbound_removal_fraction_per_iteration=0.5 +Unbinding_max_unbound_fraction=0.95 +Unbinding_max_unbound_fraction_allowed=0.005 ################################ #Cosmological parameters diff --git a/examples/sample_zoomhydrocosmological_run.cfg b/examples/sample_zoomhydrocosmological_run.cfg index 4ad58fad..1577c32f 100644 --- a/examples/sample_zoomhydrocosmological_run.cfg +++ b/examples/sample_zoomhydrocosmological_run.cfg @@ -3,7 +3,7 @@ #runs 6DFOF algorithm, calculates many properties #Units currently set to take in as input, Mpc, 1e10 solar masses, km/s, output in same units #To set temporally unique halo ids, alter Snapshot_value=SNAP to appropriate value. Ie: for snapshot 12, change SNAP to 12 -#note that the code must be compiled with -DVR_ZOOM_SIM=ON -DVR_USE_GAS=ON -DVR_USE_STAR=ON -DVR_USE_BH=ON +#note that the code must be compiled with -DVR_ZOOM_SIM=ON -DVR_USE_GAS=ON -DVR_USE_STAR=ON -DVR_USE_BH=ON ################################ #input options @@ -119,6 +119,12 @@ Halo_core_loop_ellv_fac=1.0 #how much to change the velocity space linking per i Halo_core_loop_elln_fac=1.2 #how much to change the min number of particles per iteration Halo_core_phase_significance=2.0 #how significant a core must be in terms of dispersions (sigma) significance +#merge substructures if the overlap in phase-space by some fraction of their dispersion +#here distance has to be less than 0.25 sigma +Structure_phase_merge_dist=0.25 +#also merge structures with background if overlap heavily in phase-space based on dispersions. +Apply_phase_merge_to_host=1 + ################################ #Unbinding options (VELOCIraptor is able to accurately identify tidal debris so particles need not be bound to a structure) ################################ diff --git a/examples/samplestfrun.sh b/examples/samplestfrun.sh index ef6df582..bef7824b 100755 --- a/examples/samplestfrun.sh +++ b/examples/samplestfrun.sh @@ -18,23 +18,23 @@ outdir=./ #code directory codedir=./ #stf executable -stfexe=${codedir}/bin/stf +stfexe=${vrdir}/bin/stf #tree executable -treefrogexe=${codedir}/bin/treefrog +treefrogexe=${treefrogdir}/bin/treefrog echo $isnap,$fsnap,$nsnaps -for ((j=$isnap; j<=$fsnap; j++)) +for ((j=$isnap; j<=$fsnap; j++)) do jj=`printf "%03d" $j` - cp $paramfile $outdir/$simname.sn$jj.param; + cp $paramfile $outdir/$simname.sn$jj.param; sed -i .old 's/Output=OUTNAME/Output='"$outdir"'/'"$simname"'.c'"$i"'.sn'"$jj"'/g' $outdir/$simname.sn$jj.param; sed -i .old 's/Snapshot_value=SNVALUE/Snapshot_value='"$j"'/g' $outdir/$simname.sn$jj.param; ifile=`printf "%s/snapshot_%03d" $indir $j` - $stfexe -i $ifile -s $nfiles -C $outdir/$simname.sn$jj.param > $outdir/$simname.sn$jj.log; + $stfexe -i $ifile -s $nfiles -C $outdir/$simname.sn$jj.param > $outdir/$simname.sn$jj.log; done -#treefrog commands +#treefrog commands #largest particle ID value Neff=1024 @@ -42,7 +42,7 @@ Nid=`echo $Neff | awk '{print $1^3.0}'` #number of steps used when linking numsteps=4 siglimit=0.1 -#to make sure halo ids temporally unique, use this value times snapshot, +#to make sure halo ids temporally unique, use this value times snapshot, halotemporalidval=10000000000 #specify format, 0 ascii, 1 binary, 2 hdf5 ibinary=0 @@ -52,11 +52,9 @@ ifield=0 numfiles=1 rm $outdir/halolist.txt -for ((j=$isnap; j<=$fsnap; j++)) +for ((j=$isnap; j<=$fsnap; j++)) do jj=`printf "%03d" $j` echo $outdir/$simname.sn$jj >> $outdir/halolist.txt done $treefrogexe -i $outdir/halolist.txt -s $nsnaps -N $numfiles -n $Nid -t $numsteps -h $halotemporalidval -B $ibinary -F $ifield -o $outdir/$simname.tree $outdir/tree.log - - diff --git a/examples/test.out.cfg b/examples/test.out.cfg deleted file mode 100644 index 51835047..00000000 --- a/examples/test.out.cfg +++ /dev/null @@ -1,4 +0,0 @@ -Length_unit=10.0 -Velocity_unit=12000123.0 -Mass_unit=42.0 -Output=test.out diff --git a/examples/testing/flamegraph.Makefile b/examples/testing/flamegraph.Makefile new file mode 100644 index 00000000..e300654e --- /dev/null +++ b/examples/testing/flamegraph.Makefile @@ -0,0 +1,48 @@ +# This Makefile profiles stf using perf to produce flame graphs +.PHONEY: info record_prereq fold flamegraph run display + +#script that produces lots of qsub scripts to run velociraptor on simulation output +info: + @echo "This script profiles a VR run." + +# PERF +_PERF_OPT_RECORD=--call-graph lbr -g -s -o +_PERF_PATH_ROOT=`pwd`/fg +_PERF_FILE_RAW=$(_PERF_PATH_ROOT)/perf.raw.`date +%Y%m%d`.`hostname`.data + +# FLAMEGRAPH +_FLAMEGRAPH_GIT_URL=https://github.com/brendangregg/FlameGraph.git +_FLAMEGRAPH_PATH=$(_PERF_PATH_ROOT)/FlameGraph +_FLAMEGRAPH_FILE_FOLDED=$(_PERF_PATH_FOLDED)/perf.processed.`date +%Y%m%d`.`hostname`.folded +_FLAMEGRAPH_SCRIPT_STACKCOLLAPSE=$(_FLAMEGRAPH_PATH)/stackcollapse-perf.pl +_FLAMEGRAPH_SCRIPT_MAIN=$(_FLAMEGRAPH_PATH)/flamegraph.pl +_FLAMEGRAPH_FILE_SVG=$(_PERF_PATH_ROOT) + +# STF +# Dynamic stf input parameters through environment variables to be set in an input payload "./payloadname.env " +_STF_PARAMETERS="DEFAULT" +include ./payloadname.env + +record_prereq: + @mkdir -p $(_PERF_PATH_ROOT) + @cd $(_PERF_PATH_ROOT) && git clone $(_FLAMEGRAPH_GIT_URL) && cd.. + +record: record_prereq + perf record $(_PERF_OPT_RECORD) -o $(_PERF_FILE_RAW) ./stf $(_STF_PARAMETERS) + @echo "To manually test recorded data, execute: " + @echo "perf report -i $(_PERF_FILE_RAW)" + +fold: + perf script -i $(_PERF_FILE_RAW) | $(_FLAMEGRAPH_SCRIPT_STACKCOLLAPSE) > $(_FLAMEGRAPH_FILE_FOLDED) + +flamegraph: + cat $(_FLAMEGRAPH_FILE_FOLDED) | $(_FLAMEGRAPH_SCRIPT_MAIN) > $(_FLAMEGRAPH_FILE_SVG) + +run: record fold flamegraph + +# Display output for proofreading +# It can be appended to job scripts or test run in CLI +display: + @echo "perf record $(_PERF_OPT_RECORD) -o $(_PERF_FILE_RAW) ./stf $(_STF_PARAMETERS)" + @echo "perf script -i $(_PERF_FILE_RAW) | $(_FLAMEGRAPH_SCRIPT_STACKCOLLAPSE) > $(_FLAMEGRAPH_FILE_FOLDED)" + @echo "cat $(_FLAMEGRAPH_FILE_FOLDED) | $(_FLAMEGRAPH_SCRIPT_MAIN) > $(_FLAMEGRAPH_FILE_SVG)" \ No newline at end of file diff --git a/src/allvars.h b/src/allvars.h index 2be75bf9..1158318a 100644 --- a/src/allvars.h +++ b/src/allvars.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -54,8 +55,8 @@ ///if using OpenMP API #ifdef USEOPENMP #include -#include "ompvar.h" #endif +#include "ompvar.h" ///if using HDF API #ifdef USEHDF @@ -205,6 +206,8 @@ using namespace NBody; ///number below which just use PP calculation for potential, which occurs roughly at when n~2*log(n) (from scaling of n^2 vs n ln(n) for PP vs tree and factor of 2 is ///for extra overhead in producing tree. For reasonable values of n (>100) this occurs at ~100. Here to account for extra memory need for tree, we use n=3*log(n) or 150 #define UNBINDNUM 150 +#define POTPPCALCNUM 150 +#define POTOMPCALCNUM 1000 ///when unbinding check to see if system is bound and least bound particle is also bound #define USYSANDPART 0 ///when unbinding check to see if least bound particle is also bound @@ -234,16 +237,6 @@ using namespace NBody; //@} -/// \defgroup OMPLIMS For determining whether loop contains enough for openm to be worthwhile. -//@{ -#ifndef USEOPENMP -#define ompsearchnum 50000 -#define ompunbindnum 1000 -#define ompperiodnum 50000 -#define omppropnum 50000 -#endif -//@} - /// \defgroup PROPLIMS Particle limits for calculating properties //@{ #define PROPNFWMINNUM 100 @@ -381,6 +374,8 @@ struct Options int isubfindproperties; ///for output, produce subfind like format int isubfindoutput; + ///flag indicating that VR is running on the fly + bool iontheflyfinding; ///disable particle id related output like fof.grp or catalog_group data. Useful if just want halo properties ///and not interested in tracking. Code writes halo properties catalog and exits. @@ -396,6 +391,8 @@ struct Options /// mpi factor by which to multiple the memory allocated, ie: buffer region /// to reduce likelihood of having to expand/allocate new memory Double_t mpipartfac; + /// if using parallel output, number of mpi threads to group together + int mpinprocswritesize; /// run FOF using OpenMP int iopenmpfof; @@ -404,7 +401,7 @@ struct Options ///\name length,m,v,grav conversion units //@{ - Double_t lengthinputconversion, massinputconversion, energyinputconversion, velocityinputconversion; + Double_t lengthinputconversion, massinputconversion, energyinputconversion, internalenergyinputconversion, velocityinputconversion; Double_t SFRinputconversion, metallicityinputconversion, stellarageinputconversion; int istellaragescalefactor, isfrisssfr; Double_t G; @@ -555,6 +552,12 @@ struct Options Double_t halocorephasedistsig; ///factor by which a substructure s must be closer than in phase-space to merger with another substructure in sigma units Double_t coresubmergemindist; + ///whether substructure phase-space distance merge check is applied to background host halo as well. + int icoresubmergewithbg; + ///fraction of size a substructure must be of host to be considered a spurious dynamical substructure + Double_t minfracsubsizeforremoval; + ///Maximum allowed mean local velocity density ratio above which structure is considered highly unrelaxed. + Double_t maxmeanlocalvelratio; //@} ///for storing a snapshot value to make halo ids unique across snapshots long long snapshotvalue; @@ -654,6 +657,23 @@ struct Options Double_t gas_sfr_threshold; //@} + /// \name options related to calculating detailed hydro/star/bh properties related to chemistry/feedbac, etc + //@{ + vector gas_internalprop_names; + vector star_internalprop_names; + vector bh_internalprop_names; + + vector gas_chem_names; + vector star_chem_names; + vector bh_chem_names; + + vector gas_chemproduction_names; + vector star_chemproduction_names; + vector bh_chemproduction_names; + + vector extra_dm_internalprop_names; + //@} + Options() { lengthinputconversion = 1.0; @@ -709,7 +729,7 @@ struct Options iBaryonSearch=0; icmrefadjust=1; iIterateCM = 1; - iLocalVelDenApproxCalcFlag = 1 ; + iLocalVelDenApproxCalcFlag = 2 ; Neff=-1; @@ -764,6 +784,9 @@ struct Options halocorenumfaciter=1.0; halocorephasedistsig=2.0; coresubmergemindist=0.0; + icoresubmergewithbg=0; + minfracsubsizeforremoval=0.75; + maxmeanlocalvelratio=0.5; iverbose=0; iwritefof=0; @@ -805,6 +828,7 @@ struct Options mpiparticletotbufsize=-1; mpiparticlebufsize=-1; + mpinprocswritesize=1; lengthtokpc=-1.0; velocitytokms=-1.0; @@ -841,7 +865,12 @@ struct Options iopenmpfof = 1; openmpfofsize = ompfofsearchnum; #endif + + iontheflyfinding = false; } + Options(Options &opt) = default; + Options& operator=(const Options&) = default; + Options& operator=(Options&&) = default; }; struct ConfigInfo{ @@ -991,6 +1020,13 @@ struct ConfigInfo{ datainfo.push_back(to_string(opt.halocorephasedistsig)); datatype.push_back(python_type_string(opt.halocorephasedistsig)); + //for merging structures together + nameinfo.push_back("Structure_phase_merge_dist"); + datainfo.push_back(to_string(opt.coresubmergemindist)); + datatype.push_back(python_type_string(opt.coresubmergemindist)); + nameinfo.push_back("Apply_phase_merge_to_host"); + datainfo.push_back(to_string(opt.icoresubmergewithbg)); + datatype.push_back(python_type_string(opt.icoresubmergewithbg)); //for changing factors used in iterative search nameinfo.push_back("Iterative_threshold_factor"); @@ -1074,6 +1110,7 @@ struct ConfigInfo{ datainfo.push_back(to_string(opt.stellaragetoyrs)); datatype.push_back(python_type_string(opt.stellaragetoyrs)); + // simulation/cosmology info nameinfo.push_back("Period"); datainfo.push_back(to_string(opt.p)); datatype.push_back(python_type_string(opt.p)); @@ -1107,6 +1144,9 @@ struct ConfigInfo{ nameinfo.push_back("Omega_nu"); datainfo.push_back(to_string(opt.Omega_nu)); datatype.push_back(python_type_string(opt.Omega_nu)); + nameinfo.push_back("Omega_k"); + datainfo.push_back(to_string(opt.Omega_k)); + datatype.push_back(python_type_string(opt.Omega_k)); nameinfo.push_back("Omega_DE"); datainfo.push_back(to_string(opt.Omega_de)); datatype.push_back(python_type_string(opt.Omega_de)); @@ -1225,6 +1265,66 @@ struct ConfigInfo{ datainfo.push_back(datastring); datatype.push_back(python_type_string(opt.SOthresholds_values_crit[0])); } + if (opt.gas_internalprop_names.size()>0){ + nameinfo.push_back("Gas_internal_property_names"); + datastring=string("");for (auto &x:opt.gas_internalprop_names) {datastring+=x;datastring+=string(",");} + datainfo.push_back(datastring); + datatype.push_back("str"); + } + if (opt.gas_chem_names.size()>0){ + nameinfo.push_back("Gas_chemistry_names"); + datastring=string("");for (auto &x:opt.gas_chem_names) {datastring+=x;datastring+=string(",");} + datainfo.push_back(datastring); + datatype.push_back("str"); + } + if (opt.gas_chemproduction_names.size()>0){ + nameinfo.push_back("Gas_chemistry_production_names"); + datastring=string("");for (auto &x:opt.gas_chemproduction_names) {datastring+=x;datastring+=string(",");} + datainfo.push_back(datastring); + datatype.push_back("str"); + } + if (opt.star_internalprop_names.size()>0){ + nameinfo.push_back("Star_internal_property_names"); + datastring=string("");for (auto &x:opt.star_internalprop_names) {datastring+=x;datastring+=string(",");} + datainfo.push_back(datastring); + datatype.push_back("str"); + } + if (opt.star_chem_names.size()>0){ + nameinfo.push_back("Star_chemistry_names"); + datastring=string("");for (auto &x:opt.star_chem_names) {datastring+=x;datastring+=string(",");} + datainfo.push_back(datastring); + datatype.push_back("str"); + } + if (opt.star_chemproduction_names.size()>0){ + nameinfo.push_back("Star_chemistry_production_names"); + datastring=string("");for (auto &x:opt.star_chemproduction_names) {datastring+=x;datastring+=string(",");} + datainfo.push_back(datastring); + datatype.push_back("str"); + } + if (opt.bh_internalprop_names.size()>0){ + nameinfo.push_back("BH_internal_property_names"); + datastring=string("");for (auto &x:opt.star_internalprop_names) {datastring+=x;datastring+=string(",");} + datainfo.push_back(datastring); + datatype.push_back("str"); + } + if (opt.bh_chem_names.size()>0){ + nameinfo.push_back("BH_chemistry_names"); + datastring=string("");for (auto &x:opt.star_chem_names) {datastring+=x;datastring+=string(",");} + datainfo.push_back(datastring); + datatype.push_back("str"); + } + if (opt.bh_chemproduction_names.size()>0){ + nameinfo.push_back("BH_chemistry_production_names"); + datastring=string("");for (auto &x:opt.bh_chemproduction_names) {datastring+=x;datastring+=string(",");} + datainfo.push_back(datastring); + datatype.push_back("str"); + } + if (opt.extra_dm_internalprop_names.size()>0){ + nameinfo.push_back("Extra_DM_internal_property_names"); + datastring=string("");for (auto &x:opt.extra_dm_internalprop_names) {datastring+=x;datastring+=string(",");} + datainfo.push_back(datastring); + datatype.push_back("str"); + } //other options nameinfo.push_back("Verbose"); @@ -1260,7 +1360,6 @@ struct ConfigInfo{ datainfo.push_back(to_string(opt.iextendedoutput)); datatype.push_back(python_type_string(opt.iextendedoutput)); - //HDF io related info nameinfo.push_back("HDF_name_convention"); datainfo.push_back(to_string(opt.ihdfnameconvention)); @@ -1331,6 +1430,11 @@ struct ConfigInfo{ datainfo.push_back(""); datatype.push_back(""); #endif + #ifdef EXTRADMON + nameinfo.push_back("#USEEXTRADMPROPERTIES"); + datainfo.push_back(""); + datatype.push_back(""); + #endif #ifdef HIGHRES nameinfo.push_back("#ZOOMSIM"); datainfo.push_back(""); @@ -1494,7 +1598,7 @@ struct GridCell den=0; } ~GridCell(){ - if (nparts>0)delete nindex; + if (nparts>0)delete[] nindex; } }; @@ -1638,11 +1742,13 @@ struct PropData vector aperture_veldisp_gas; vector aperture_vrdisp_gas; vector aperture_SFR_gas; + vector aperture_Z_gas; vector aperture_rhalfmass_gas; + vector aperture_L_gas; vector aperture_mass_proj_gas; vector aperture_rhalfmass_proj_gas; vector aperture_SFR_proj_gas; - vector aperture_L_gas; + vector aperture_Z_proj_gas; vector profile_npart_gas; vector profile_npart_inclusive_gas; vector profile_mass_gas; @@ -1687,9 +1793,11 @@ struct PropData vector aperture_veldisp_gas_sf; vector aperture_vrdisp_gas_sf; vector aperture_rhalfmass_gas_sf; + vector aperture_Z_gas_sf; + vector aperture_L_gas_sf; vector aperture_mass_proj_gas_sf; vector aperture_rhalfmass_proj_gas_sf; - vector aperture_L_gas_sf; + vector aperture_Z_proj_gas_sf; vector profile_npart_gas_sf; vector profile_npart_inclusive_gas_sf; vector profile_mass_gas_sf; @@ -1734,9 +1842,11 @@ struct PropData vector aperture_veldisp_gas_nsf; vector aperture_vrdisp_gas_nsf; vector aperture_rhalfmass_gas_nsf; + vector aperture_Z_gas_nsf; + vector aperture_L_gas_nsf; vector aperture_mass_proj_gas_nsf; vector aperture_rhalfmass_proj_gas_nsf; - vector aperture_L_gas_nsf; + vector aperture_Z_proj_gas_nsf; vector profile_npart_gas_nsf; vector profile_npart_inclusive_gas_nsf; vector profile_mass_gas_nsf; @@ -1788,9 +1898,11 @@ struct PropData vector aperture_veldisp_star; vector aperture_vrdisp_star; vector aperture_rhalfmass_star; + vector aperture_Z_star; + vector aperture_L_star; vector aperture_mass_proj_star; vector aperture_rhalfmass_proj_star; - vector aperture_L_star; + vector aperture_Z_proj_star; vector profile_npart_star; vector profile_npart_inclusive_star; vector profile_mass_star; @@ -1847,6 +1959,24 @@ struct PropData //@} #endif + /// \name extra hydro/star/bh properties such as chemistry/feedback/metal production + + //@{ +#if defined(GASON) + HydroProperties hydroprop; +#endif +#if defined(STARON) + StarProperties starprop; +#endif +#if defined(BHON) + BHProperties bhprop; +#endif +#if defined(EXTRADMON) + Int_t n_dm; + ExtraDMProperties extradmprop; +#endif + //@} + PropData() { num=gNFOF=gN6DFOF=0; @@ -1898,6 +2028,7 @@ struct PropData L_200mean_excl_gas[0]=L_200mean_excl_gas[1]=L_200mean_excl_gas[2]=0; L_BN98_excl_gas[0]=L_BN98_excl_gas[1]=L_BN98_excl_gas[2]=0; #ifdef STARON + n_gas_sf = n_gas_nsf = 0; M_gas_sf=M_gas_sf_rvmax=M_gas_sf_30kpc=M_gas_sf_50kpc=0; L_gas_sf[0]=L_gas_sf[1]=L_gas_sf[2]=0; q_gas_sf=s_gas_sf=1.0; @@ -1962,10 +2093,15 @@ struct PropData #endif #ifdef HIGHRES n_interloper=M_interloper=0; +#endif +#ifdef EXTRADMON + n_dm = 0; #endif } ///equals operator, useful if want inclusive information before substructure search - PropData& operator=(const PropData &p){ + PropData& operator=(const PropData &p) = default; + /* + PropData& operator=(const PropData &p) { num=p.num; gcm=p.gcm;gcmvel=p.gcmvel; gposmbp=p.gposmbp;gvelmbp=p.gvelmbp; @@ -2052,7 +2188,7 @@ struct PropData aperture_veldisp=p.aperture_veldisp; aperture_vrdisp=p.aperture_vrdisp; aperture_rhalfmass=p.aperture_rhalfmass; - #if defined(GASON) || defined(STARON) || defined(BHON) +#if defined(GASON) || defined(STARON) || defined(BHON) aperture_npart_dm=p.aperture_npart_dm; aperture_mass_dm=p.aperture_mass_dm; aperture_veldisp_dm=p.aperture_veldisp_dm; @@ -2066,6 +2202,7 @@ struct PropData aperture_rhalfmass_gas=p.aperture_rhalfmass_gas; #ifdef STARON aperture_SFR_gas=p.aperture_SFR_gas; + aperture_Z_gas=p.aperture_Z_gas; aperture_npart_gas_sf=p.aperture_npart_gas_sf; aperture_npart_gas_nsf=p.aperture_npart_gas_nsf; aperture_mass_gas_sf=p.aperture_mass_gas_sf; @@ -2076,6 +2213,8 @@ struct PropData aperture_vrdisp_gas_nsf=p.aperture_vrdisp_gas_nsf; aperture_rhalfmass_gas_sf=p.aperture_rhalfmass_gas_sf; aperture_rhalfmass_gas_nsf=p.aperture_rhalfmass_gas_nsf; + aperture_Z_gas_sf=p.aperture_Z_gas_sf; + aperture_Z_gas_nsf=p.aperture_Z_gas_nsf; #endif #endif #ifdef STARON @@ -2084,6 +2223,7 @@ struct PropData aperture_veldisp_star=p.aperture_veldisp_star; aperture_vrdisp_star=p.aperture_vrdisp_star; aperture_rhalfmass_star=p.aperture_rhalfmass_star; + aperture_Z_star=p.aperture_Z_star; #endif aperture_mass_proj=p.aperture_mass_proj; aperture_rhalfmass_proj=p.aperture_rhalfmass_proj; @@ -2091,13 +2231,20 @@ struct PropData aperture_mass_proj_gas=p.aperture_mass_proj_gas; aperture_rhalfmass_proj_gas=p.aperture_rhalfmass_proj_gas; #ifdef STARON + aperture_SFR_proj_gas=p.aperture_SFR_proj_gas; + aperture_Z_proj_gas=p.aperture_Z_proj_gas; aperture_mass_proj_gas_sf=p.aperture_mass_proj_gas_sf; + aperture_mass_proj_gas_nsf=p.aperture_mass_proj_gas_nsf; + aperture_rhalfmass_proj_gas_sf=p.aperture_rhalfmass_proj_gas_sf; aperture_rhalfmass_proj_gas_nsf=p.aperture_rhalfmass_proj_gas_nsf; + aperture_Z_proj_gas_sf=p.aperture_Z_proj_gas_sf; + aperture_Z_proj_gas_nsf=p.aperture_Z_proj_gas_nsf; #endif #endif #ifdef STARON aperture_mass_proj_star=p.aperture_mass_proj_star; aperture_rhalfmass_proj_star=p.aperture_rhalfmass_proj_star; + aperture_Z_proj_star=p.aperture_Z_proj_star; #endif profile_npart=p.profile_npart; profile_mass=p.profile_mass; @@ -2127,6 +2274,7 @@ struct PropData #endif return *this; } + */ //allocate memory for profiles void Allocate(Options &opt) { @@ -2150,6 +2298,7 @@ struct PropData aperture_rhalfmass_gas.resize(opt.aperturenum); #ifdef STARON aperture_SFR_gas.resize(opt.aperturenum); + aperture_Z_gas.resize(opt.aperturenum); aperture_npart_gas_sf.resize(opt.aperturenum); aperture_npart_gas_nsf.resize(opt.aperturenum); aperture_mass_gas_sf.resize(opt.aperturenum); @@ -2160,6 +2309,8 @@ struct PropData aperture_vrdisp_gas_nsf.resize(opt.aperturenum); aperture_rhalfmass_gas_sf.resize(opt.aperturenum); aperture_rhalfmass_gas_nsf.resize(opt.aperturenum); + aperture_Z_gas_sf.resize(opt.aperturenum); + aperture_Z_gas_nsf.resize(opt.aperturenum); #endif #endif #ifdef STARON @@ -2168,6 +2319,7 @@ struct PropData aperture_veldisp_star.resize(opt.aperturenum); aperture_vrdisp_star.resize(opt.aperturenum); aperture_rhalfmass_star.resize(opt.aperturenum); + aperture_Z_star.resize(opt.aperturenum); #endif #ifdef HIGHRES aperture_npart_interloper.resize(opt.aperturenum); @@ -2194,6 +2346,7 @@ struct PropData for (auto &x:aperture_rhalfmass_gas) x=-1; #ifdef STARON for (auto &x:aperture_SFR_gas) x=0; + for (auto &x:aperture_Z_gas) x=0; for (auto &x:aperture_npart_gas_sf) x=0; for (auto &x:aperture_mass_gas_sf) x=-1; for (auto &x:aperture_npart_gas_nsf) x=0; @@ -2202,6 +2355,8 @@ struct PropData for (auto &x:aperture_veldisp_gas_nsf) x=0; for (auto &x:aperture_rhalfmass_gas_sf) x=-1; for (auto &x:aperture_rhalfmass_gas_nsf) x=-1; + for (auto &x:aperture_Z_gas_sf) x=0; + for (auto &x:aperture_Z_gas_nsf) x=0; #endif #endif #ifdef STARON @@ -2209,6 +2364,7 @@ struct PropData for (auto &x:aperture_mass_star) x=-1; for (auto &x:aperture_veldisp_star) x=0; for (auto &x:aperture_rhalfmass_star) x=-1; + for (auto &x:aperture_Z_star) x=0; #endif #ifdef HIGHRES for (auto &x:aperture_npart_interloper) x=0; @@ -2233,15 +2389,19 @@ struct PropData aperture_rhalfmass_proj_gas.resize(opt.apertureprojnum); #ifdef STARON aperture_SFR_proj_gas.resize(opt.apertureprojnum); + aperture_Z_proj_gas.resize(opt.apertureprojnum); aperture_mass_proj_gas_sf.resize(opt.apertureprojnum); aperture_mass_proj_gas_nsf.resize(opt.apertureprojnum); aperture_rhalfmass_proj_gas_sf.resize(opt.apertureprojnum); aperture_rhalfmass_proj_gas_nsf.resize(opt.apertureprojnum); + aperture_Z_proj_gas_sf.resize(opt.apertureprojnum); + aperture_Z_proj_gas_nsf.resize(opt.apertureprojnum); #endif #endif #ifdef STARON aperture_mass_proj_star.resize(opt.apertureprojnum); aperture_rhalfmass_proj_star.resize(opt.apertureprojnum); + aperture_Z_proj_star.resize(opt.apertureprojnum); #endif for (auto &x:aperture_mass_proj) x[0]=x[1]=x[2]=-1; @@ -2251,15 +2411,19 @@ struct PropData for (auto &x:aperture_rhalfmass_proj_gas) x[0]=x[1]=x[2]=-1; #ifdef STARON for (auto &x:aperture_SFR_proj_gas) x[0]=x[1]=x[2]=0; + for (auto &x:aperture_Z_proj_gas) x[0]=x[1]=x[2]=0; for (auto &x:aperture_mass_proj_gas_sf) x[0]=x[1]=x[2]=-1; for (auto &x:aperture_rhalfmass_proj_gas_sf) x[0]=x[1]=x[2]=-1; for (auto &x:aperture_mass_proj_gas_nsf) x[0]=x[1]=x[2]=-1; for (auto &x:aperture_rhalfmass_proj_gas_nsf) x[0]=x[1]=x[2]=-1; + for (auto &x:aperture_Z_proj_gas_sf) x[0]=x[1]=x[2]=-1; + for (auto &x:aperture_Z_proj_gas_nsf) x[0]=x[1]=x[2]=-1; #endif #endif #ifdef STARON for (auto &x:aperture_mass_proj_star) x[0]=x[1]=x[2]=-1; for (auto &x:aperture_rhalfmass_proj_star) x[0]=x[1]=x[2]=-1; + for (auto &x:aperture_Z_proj_star) x[0]=x[1]=x[2]=-1; #endif } } @@ -2971,6 +3135,74 @@ struct PropData } } #endif + +#ifdef GASON + if (opt.gas_internalprop_names.size()+ opt.gas_chem_names.size()+opt.gas_chemproduction_names.size()>0) { + for (auto &extrafield:opt.gas_internalprop_names) + { + val = hydroprop.GetInternalProperties(extrafield); + Fout.write((char*)&val,sizeof(val)); + } + for (auto &extrafield:opt.gas_chem_names) + { + val = hydroprop.GetChemistry(extrafield); + Fout.write((char*)&val,sizeof(val)); + } + for (auto &extrafield:opt.gas_chemproduction_names) + { + val = hydroprop.GetChemistryProduction(extrafield); + Fout.write((char*)&val,sizeof(val)); + } + } +#endif +#ifdef STARON + if (opt.star_internalprop_names.size()+opt.star_chem_names.size()+opt.star_chemproduction_names.size()>0) { + for (auto &extrafield:opt.star_internalprop_names) + { + val = starprop.GetInternalProperties(extrafield); + Fout.write((char*)&val,sizeof(val)); + } + for (auto &extrafield:opt.star_chem_names) + { + val = starprop.GetChemistry(extrafield); + Fout.write((char*)&val,sizeof(val)); + } + for (auto &extrafield:opt.star_chemproduction_names) + { + val = starprop.GetChemistryProduction(extrafield); + Fout.write((char*)&val,sizeof(val)); + } + } +#endif +#ifdef BHON + if (opt.bh_internalprop_names.size()+opt.bh_chem_names.size()+opt.bh_chemproduction_names.size()>0) { + for (auto &extrafield:opt.bh_internalprop_names) + { + val = bhprop.GetInternalProperties(extrafield); + Fout.write((char*)&val,sizeof(val)); + } + for (auto &extrafield:opt.bh_chem_names) + { + val = bhprop.GetChemistry(extrafield); + Fout.write((char*)&val,sizeof(val)); + } + for (auto &extrafield:opt.bh_chemproduction_names) + { + val = bhprop.GetChemistryProduction(extrafield); + Fout.write((char*)&val,sizeof(val)); + } + } +#endif +#ifdef EXTRADMON + if (opt.extra_dm_internalprop_names.size()>0) { + for (auto &extrafield:opt.extra_dm_internalprop_names) + { + val = extradmprop.GetExtraProperties(extrafield); + Fout.write((char*)&val,sizeof(val)); + } + } +#endif + if (opt.iaperturecalc && opt.aperturenum>0){ for (auto j=0;j0){ @@ -3110,6 +3354,18 @@ struct PropData for (auto j=0;j0) { + for (auto &extrafield:opt.gas_internalprop_names) + Fout<0) { + for (auto &extrafield:opt.star_internalprop_names) + Fout<0) { + for (auto &extrafield:opt.bh_internalprop_names) + Fout<0) { + for (auto &extrafield:opt.extra_dm_internalprop_names) + Fout<0){ for (auto j=0;j0) { @@ -3522,6 +3826,18 @@ struct PropData for (auto j=0;j desiredproprealtype; vector hdfdesiredproprealtype; - // if (sizeof(Double_t)==sizeof(double)) desiredproprealtype.push_back(PredType::NATIVE_DOUBLE); - // else desiredproprealtype.push_back(PredType::NATIVE_FLOAT); if (sizeof(Double_t)==sizeof(double)) hdfdesiredproprealtype.push_back(H5T_NATIVE_DOUBLE); else hdfdesiredproprealtype.push_back(H5T_NATIVE_FLOAT); @@ -3625,17 +3939,6 @@ struct PropDataHeader{ //if using hdf, store the type #ifdef USEHDF - // predtypeinfo.push_back(PredType::STD_U64LE); - // predtypeinfo.push_back(PredType::STD_I64LE); - // predtypeinfo.push_back(PredType::STD_I64LE); - // predtypeinfo.push_back(PredType::STD_I64LE); - // predtypeinfo.push_back(PredType::STD_U64LE); - // predtypeinfo.push_back(PredType::STD_U64LE); - // predtypeinfo.push_back(PredType::STD_I32LE); - // if (opt.iKeepFOF==1){ - // predtypeinfo.push_back(PredType::STD_I64LE); - // predtypeinfo.push_back(PredType::STD_I64LE); - // } hdfpredtypeinfo.push_back(H5T_NATIVE_ULONG); hdfpredtypeinfo.push_back(H5T_NATIVE_LONG); hdfpredtypeinfo.push_back(H5T_NATIVE_LONG); @@ -4107,6 +4410,54 @@ struct PropDataHeader{ #endif #endif + //if extra hydro properties are calculated +#ifdef GASON + if (opt.gas_internalprop_names.size()+opt.gas_chem_names.size()+opt.gas_chemproduction_names.size() > 0) + { + for (auto x:opt.gas_internalprop_names) headerdatainfo.push_back(x+string("_gas")); + for (auto x:opt.gas_chem_names) headerdatainfo.push_back(x+string("_gas")); + for (auto x:opt.gas_chemproduction_names) headerdatainfo.push_back(x+string("_gas")); +#ifdef USEHDF + sizeval=hdfpredtypeinfo.size(); + for (int i=sizeval;i 0) + { + for (auto x:opt.star_internalprop_names) headerdatainfo.push_back(x+string("_star")); + for (auto x:opt.star_chem_names) headerdatainfo.push_back(x+string("_star")); + for (auto x:opt.star_chemproduction_names) headerdatainfo.push_back(x+string("_star")); +#ifdef USEHDF + sizeval=hdfpredtypeinfo.size(); + for (int i=sizeval;i 0) + { + for (auto x:opt.bh_internalprop_names) headerdatainfo.push_back(x+string("_bh")); + for (auto x:opt.bh_chem_names) headerdatainfo.push_back(x+string("_bh")); + for (auto x:opt.bh_chemproduction_names) headerdatainfo.push_back(x+string("_bh")); +#ifdef USEHDF + sizeval=hdfpredtypeinfo.size(); + for (int i=sizeval;i 0) + { + for (auto x:opt.extra_dm_internalprop_names) headerdatainfo.push_back(x+string("_extra_dm")); +#ifdef USEHDF + sizeval=hdfpredtypeinfo.size(); + for (int i=sizeval;i0 && opt.aperturenum>0) { for (auto i=0; i headerdatainfo; #ifdef USEHDF - // vector predtypeinfo; vector hdfpredtypeinfo; #endif #ifdef USEADIOS @@ -4406,9 +4772,6 @@ struct ProfileDataHeader{ ProfileDataHeader(Options&opt){ int sizeval; #ifdef USEHDF - // vector desiredproprealtype; - // if (sizeof(Double_t)==sizeof(double)) desiredproprealtype.push_back(PredType::NATIVE_DOUBLE); - // else desiredproprealtype.push_back(PredType::NATIVE_FLOAT); vector hdfdesiredproprealtype; if (sizeof(Double_t)==sizeof(double)) hdfdesiredproprealtype.push_back(H5T_NATIVE_DOUBLE); else hdfdesiredproprealtype.push_back(H5T_NATIVE_FLOAT); @@ -4422,7 +4785,6 @@ struct ProfileDataHeader{ offsetscalarentries=0; headerdatainfo.push_back("ID"); #ifdef USEHDF - // predtypeinfo.push_back(PredType::STD_U64LE); hdfpredtypeinfo.push_back(H5T_NATIVE_ULONG); #endif #ifdef USEADIOS @@ -4432,7 +4794,6 @@ struct ProfileDataHeader{ if (opt.iprofilenorm != PROFILERNORMPHYS) { headerdatainfo.push_back(opt.profileradnormstring); #ifdef USEHDF - // predtypeinfo.push_back(desiredproprealtype[0]); hdfpredtypeinfo.push_back(hdfdesiredproprealtype[0]); #endif #ifdef USEADIOS @@ -4576,10 +4937,11 @@ struct StrucLevelData void Allocate(Int_t numgroups){ nsinlevel=numgroups; Phead=new Particle*[numgroups+1]; + Pparenthead=new Particle*[numgroups+1]; gidhead=new Int_t*[numgroups+1]; - stypeinlevel=new Int_t[numgroups+1]; gidparenthead=new Int_t*[numgroups+1]; giduberparenthead=new Int_t*[numgroups+1]; + stypeinlevel=new Int_t[numgroups+1]; nextlevel=NULL; } ///initialize @@ -4591,10 +4953,11 @@ struct StrucLevelData nextlevel=NULL; if (nsinlevel>0) { delete[] Phead; + delete[] Pparenthead; delete[] gidhead; - delete[] stypeinlevel; delete[] gidparenthead; delete[] giduberparenthead; + delete[] stypeinlevel; } } }; @@ -4676,9 +5039,6 @@ struct DataGroupNames { DataGroupNames(){ #ifdef USEHDF - // vector desiredproprealtype; - // if (sizeof(Double_t)==sizeof(double)) desiredproprealtype.push_back(PredType::NATIVE_DOUBLE); - // else desiredproprealtype.push_back(PredType::NATIVE_FLOAT); vector hdfdesiredproprealtype; if (sizeof(Double_t)==sizeof(double)) hdfdesiredproprealtype.push_back(H5T_NATIVE_DOUBLE); else hdfdesiredproprealtype.push_back(H5T_NATIVE_FLOAT); @@ -4705,23 +5065,6 @@ struct DataGroupNames { prop.push_back("Stellar_age_unit_to_yr"); #endif #ifdef USEHDF -// propdatatype.push_back(PredType::STD_I32LE); -// propdatatype.push_back(PredType::STD_I32LE); -// propdatatype.push_back(PredType::STD_U64LE); -// propdatatype.push_back(PredType::STD_U64LE); -// propdatatype.push_back(PredType::STD_U32LE); -// propdatatype.push_back(PredType::STD_U32LE); -// propdatatype.push_back(desiredproprealtype[0]); -// propdatatype.push_back(desiredproprealtype[0]); -// propdatatype.push_back(desiredproprealtype[0]); -// propdatatype.push_back(desiredproprealtype[0]); -// propdatatype.push_back(desiredproprealtype[0]); -// #if defined(GASON) || defined(STARON) || defined(BHON) -// propdatatype.push_back(desiredproprealtype[0]); -// propdatatype.push_back(desiredproprealtype[0]); -// propdatatype.push_back(desiredproprealtype[0]); -// #endif - hdfpropdatatype.push_back(H5T_NATIVE_INT); hdfpropdatatype.push_back(H5T_NATIVE_INT); hdfpropdatatype.push_back(H5T_NATIVE_ULONG); @@ -4767,19 +5110,11 @@ struct DataGroupNames { group.push_back("Offset"); group.push_back("Offset_unbound"); #ifdef USEHDF - // groupdatatype.push_back(PredType::STD_I32LE); - // groupdatatype.push_back(PredType::STD_I32LE); - // groupdatatype.push_back(PredType::STD_U64LE); - // groupdatatype.push_back(PredType::STD_U64LE); - // groupdatatype.push_back(PredType::STD_U32LE); - // groupdatatype.push_back(PredType::STD_U64LE); - // groupdatatype.push_back(PredType::STD_U64LE); - hdfgroupdatatype.push_back(H5T_NATIVE_INT); hdfgroupdatatype.push_back(H5T_NATIVE_INT); hdfgroupdatatype.push_back(H5T_NATIVE_ULONG); hdfgroupdatatype.push_back(H5T_NATIVE_ULONG); - hdfgroupdatatype.push_back(H5T_NATIVE_UINT); + hdfgroupdatatype.push_back(H5T_NATIVE_ULONG); hdfgroupdatatype.push_back(H5T_NATIVE_ULONG); hdfgroupdatatype.push_back(H5T_NATIVE_ULONG); @@ -4800,12 +5135,6 @@ struct DataGroupNames { part.push_back("Total_num_of_particles_in_all_groups"); part.push_back("Particle_IDs"); #ifdef USEHDF - // partdatatype.push_back(PredType::STD_I32LE); - // partdatatype.push_back(PredType::STD_I32LE); - // partdatatype.push_back(PredType::STD_U64LE); - // partdatatype.push_back(PredType::STD_U64LE); - // partdatatype.push_back(PredType::STD_I64LE); - hdfpartdatatype.push_back(H5T_NATIVE_INT); hdfpartdatatype.push_back(H5T_NATIVE_INT); hdfpartdatatype.push_back(H5T_NATIVE_ULONG); @@ -4827,11 +5156,6 @@ struct DataGroupNames { types.push_back("Total_num_of_particles_in_all_groups"); types.push_back("Particle_types"); #ifdef USEHDF - // typesdatatype.push_back(PredType::STD_I32LE); - // typesdatatype.push_back(PredType::STD_I32LE); - // typesdatatype.push_back(PredType::STD_U64LE); - // typesdatatype.push_back(PredType::STD_U64LE); - // typesdatatype.push_back(PredType::STD_U16LE); hdftypesdatatype.push_back(H5T_NATIVE_INT); hdftypesdatatype.push_back(H5T_NATIVE_INT); @@ -4854,13 +5178,6 @@ struct DataGroupNames { hierarchy.push_back("Number_of_substructures_in_halo"); hierarchy.push_back("Parent_halo_ID"); #ifdef USEHDF - // hierarchydatatype.push_back(PredType::STD_I32LE); - // hierarchydatatype.push_back(PredType::STD_I32LE); - // hierarchydatatype.push_back(PredType::STD_U64LE); - // hierarchydatatype.push_back(PredType::STD_U64LE); - // hierarchydatatype.push_back(PredType::STD_U32LE); - // hierarchydatatype.push_back(PredType::STD_I64LE); - hdfhierarchydatatype.push_back(H5T_NATIVE_INT); hdfhierarchydatatype.push_back(H5T_NATIVE_INT); hdfhierarchydatatype.push_back(H5T_NATIVE_ULONG); @@ -4891,18 +5208,6 @@ struct DataGroupNames { #endif #ifdef USEHDF -// SOdatatype.push_back(PredType::STD_I32LE); -// SOdatatype.push_back(PredType::STD_I32LE); -// SOdatatype.push_back(PredType::STD_U64LE); -// SOdatatype.push_back(PredType::STD_U64LE); -// SOdatatype.push_back(PredType::STD_U64LE); -// SOdatatype.push_back(PredType::STD_U64LE); -// SOdatatype.push_back(PredType::STD_U32LE); -// SOdatatype.push_back(PredType::STD_U64LE); -// SOdatatype.push_back(PredType::STD_I64LE); -// #if defined(GASON) || defined(STARON) || defined(BHON) -// SOdatatype.push_back(PredType::STD_I32LE); -// #endif hdfSOdatatype.push_back(H5T_NATIVE_INT); hdfSOdatatype.push_back(H5T_NATIVE_INT); hdfSOdatatype.push_back(H5T_NATIVE_ULONG); @@ -4943,17 +5248,6 @@ struct DataGroupNames { profile.push_back("Num_of_bin_edges"); profile.push_back("Radial_bin_edges"); #ifdef USEHDF - // profiledatatype.push_back(PredType::STD_I32LE); - // profiledatatype.push_back(PredType::STD_I32LE); - // profiledatatype.push_back(PredType::STD_U64LE); - // profiledatatype.push_back(PredType::STD_U64LE); - // profiledatatype.push_back(PredType::STD_U64LE); - // profiledatatype.push_back(PredType::STD_U64LE); - // profiledatatype.push_back(PredType::C_S1); - // profiledatatype.push_back(PredType::STD_I32LE); - // profiledatatype.push_back(PredType::STD_I32LE); - // profiledatatype.push_back(desiredproprealtype[0]); - hdfprofiledatatype.push_back(H5T_NATIVE_INT); hdfprofiledatatype.push_back(H5T_NATIVE_INT); hdfprofiledatatype.push_back(H5T_NATIVE_ULONG); diff --git a/src/bgfield.cxx b/src/bgfield.cxx index 17d22ede..d603547b 100644 --- a/src/bgfield.cxx +++ b/src/bgfield.cxx @@ -30,7 +30,7 @@ KDTree* InitializeTreeGrid(Options &opt, const Int_t nbodies, Particle *Part){ //and rotate velocities to new frame #ifdef USEOPENMP #pragma omp parallel default(shared) \ -private(i,vel) +private(i,vel) if (nbodies > ompsubsearchnum) { #pragma omp for #endif @@ -48,22 +48,29 @@ private(i,vel) //then build tree KDTree *tree; + int itreetype = tree->TPHYS, ikerntype = tree->KEPAN, isplittingcriterion = 0, ianiso = 0 , iscale = 0; + bool runomp = (nbodies > ompsubsearchnum); if (opt.iverbose>=2) cout<<"Grid system using leaf nodes with maximum size of "<=2) cout<<"Building Physical Tree using simple spatial extend as splitting criterion"<TPHYS); + //tree=new KDTree(Part,nbodies,opt.Ncell,tree->TPHYS); } else if (opt.gridtype==PHYSENGRID) { if (opt.iverbose>=2) cout<<"Building physical Tree using minimum shannon entropy as splitting criterion"<TPHYS,tree->KEPAN,100,1); - tree=new KDTree(Part,nbodies,opt.Ncell,tree->TPHYS,tree->KEPAN,100,1); + //tree=new KDTree(Part,nbodies,opt.Ncell,tree->TPHYS,tree->KEPAN,100,1); } else if (opt.gridtype==PHASEENGRID) { if (opt.iverbose>=2) cout<<"Building Phase-space Tree using minimum shannon entropy as splitting criterion"<TPHS; + isplittingcriterion = 1; + ianiso = 1; //tree=new KDTree(*S,opt.Ncell,tree->TPHS,tree->KEPAN,100,1,1);//if phase tree, use entropy criterion with anisotropic kernel //if phase tree, use entropy criterion with anisotropic kernel - tree=new KDTree(Part,nbodies,opt.Ncell,tree->TPHS,tree->KEPAN,100,1,1); + //tree=new KDTree(Part,nbodies,opt.Ncell,tree->TPHS,tree->KEPAN,100,1,1); } + tree=new KDTree(Part,nbodies,opt.Ncell,itreetype,ikerntype,100,isplittingcriterion,ianiso,iscale,NULL,NULL,runomp); return tree; } @@ -128,6 +135,7 @@ void FillTreeGrid(Options &opt, const Int_t nbodies, const Int_t ngrid, KDTree * } //resets particle order delete tree; + delete[] ptemp; if (opt.iverbose>=2) cout<<"Done."<=2) cout<<"Calculating Grid Mean Velocity"< ompsubsearchnum) { #pragma omp for #endif @@ -175,7 +183,7 @@ Matrix* GetCellVelDisp(Options &opt, const Int_t nbodies, Particle *Part, Int_t if (opt.iverbose>=2) cout<<"Calculating Grid Velocity Dispersion"< ompsubsearchnum) { #pragma omp for #endif diff --git a/src/buildandsortarrays.cxx b/src/buildandsortarrays.cxx index fd94c142..204a1fd9 100644 --- a/src/buildandsortarrays.cxx +++ b/src/buildandsortarrays.cxx @@ -26,6 +26,7 @@ Int_t *BuildNumInGroupTyped(const Int_t nbodies, const Int_t numgroups, Int_t *p Int_t **BuildPGList(const Int_t nbodies, const Int_t numgroups, Int_t *numingroup, Int_t *pfof){ Int_t **pglist=new Int_t*[numgroups+1]; Int_t pid; + pglist[0]=NULL; for (Int_t i=1;i<=numgroups;i++) { pglist[i] = NULL; if (numingroup[i]<=0) continue; @@ -44,6 +45,7 @@ Int_t **BuildPGList(const Int_t nbodies, const Int_t numgroups, Int_t *numingrou Int_t **BuildPGListTyped(const Int_t nbodies, const Int_t numgroups, Int_t *numingroup, Int_t *pfof, Particle *P, int type){ Int_t **pglist=new Int_t*[numgroups+1]; Int_t pid; + pglist[0]=NULL; for (Int_t i=1;i<=numgroups;i++) { pglist[i] = NULL; if (numingroup[i]<=0) continue; @@ -62,6 +64,7 @@ Int_t **BuildPGListTyped(const Int_t nbodies, const Int_t numgroups, Int_t *numi Int_t **BuildPGList(const Int_t nbodies, const Int_t numgroups, Int_t *numingroup, Int_t *pfof, Particle *Part){ Int_t **pglist=new Int_t*[numgroups+1]; Int_t pid, index; + pglist[0]=NULL; for (Int_t i=1;i<=numgroups;i++) { pglist[i] = NULL; if (numingroup[i]<=0) continue; @@ -81,6 +84,7 @@ Int_t **BuildPGList(const Int_t nbodies, const Int_t numgroups, Int_t *numingrou Int_t **BuildPGList(const Int_t nbodies, const Int_t numgroups, Int_t *numingroup, Int_t *pfof, Int_t *ids){ Int_t **pglist=new Int_t*[numgroups+1]; Int_t pid; + pglist[0]=NULL; for (Int_t i=1;i<=numgroups;i++) { pglist[i] = NULL; if (numingroup[i]<=0) continue; @@ -133,6 +137,7 @@ Int_tree_t *BuildGroupTailArray(const Int_t nbodies, const Int_t numgroups, Int_ ///build the group particle arrays need for unbinding procedure Particle **BuildPartList(Int_t numgroups, Int_t *numingroup, Int_t **pglist, Particle* Part){ Particle **gPart=new Particle*[numgroups+1]; + gPart[0] = NULL; for (Int_t i=1;i<=numgroups;i++) { gPart[i] = NULL; if (numingroup[i]<=0) continue; diff --git a/src/gadgetio.cxx b/src/gadgetio.cxx index 82c177cd..0b94fb39 100644 --- a/src/gadgetio.cxx +++ b/src/gadgetio.cxx @@ -775,9 +775,6 @@ void ReadGadget(Options &opt, vector &Part, const Int_t nbodies,Partic Pbaryons[i].SetMass(Pbaryons[i].GetMass()*mscale); for (int j=0;j<3;j++) Pbaryons[i].SetVelocity(j,Pbaryons[i].GetVelocity(j)*opt.velocityinputconversion*sqrt(opt.a)+Hubbleflow*Pbaryons[i].GetPosition(j)); for (int j=0;j<3;j++) Pbaryons[i].SetPosition(j,Pbaryons[i].GetPosition(j)*lscale); -#ifdef GASON - Pbaryons[i].SetU(Pbaryons[i].GetU()*opt.velocityinputconversion*opt.velocityinputconversion); -#endif } } @@ -1190,7 +1187,7 @@ void ReadGadget(Options &opt, vector &Part, const Int_t nbodies,Partic } #endif Nbuf[ibuf]++; - MPIAddParticletoAppropriateBuffer(ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocal, Part.data(), Nreadbuf, Preadbuf); + MPIAddParticletoAppropriateBuffer(opt, ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocal, Part.data(), Nreadbuf, Preadbuf); count2++; } else if (opt.partsearchtype==PSTDARK) { @@ -1212,7 +1209,7 @@ void ReadGadget(Options &opt, vector &Part, const Int_t nbodies,Partic //when running hydro runs, need to reset particle buffer quantities //related to hydro info to zero Nbuf[ibuf]++; - MPIAddParticletoAppropriateBuffer(ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocal, Part.data(), Nreadbuf, Preadbuf); + MPIAddParticletoAppropriateBuffer(opt, ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocal, Part.data(), Nreadbuf, Preadbuf); count2++; } else if (opt.iBaryonSearch) { @@ -1250,7 +1247,7 @@ void ReadGadget(Options &opt, vector &Part, const Int_t nbodies,Partic else if (k==GSTARTYPE) Nlocalbaryon[2]++; else if (k==GBHTYPE) Nlocalbaryon[3]++; } - MPIAddParticletoAppropriateBuffer(ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocalbaryon[0], Pbaryons, Nreadbuf, Preadbuf); + MPIAddParticletoAppropriateBuffer(opt, ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocalbaryon[0], Pbaryons, Nreadbuf, Preadbuf); bcount2++; } } @@ -1276,7 +1273,7 @@ void ReadGadget(Options &opt, vector &Part, const Int_t nbodies,Partic } #endif Nbuf[ibuf]++; - MPIAddParticletoAppropriateBuffer(ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocal, Part.data(), Nreadbuf, Preadbuf); + MPIAddParticletoAppropriateBuffer(opt, ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocal, Part.data(), Nreadbuf, Preadbuf); count2++; } } @@ -1302,7 +1299,7 @@ void ReadGadget(Options &opt, vector &Part, const Int_t nbodies,Partic } #endif Nbuf[ibuf]++; - MPIAddParticletoAppropriateBuffer(ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocal, Part.data(), Nreadbuf, Preadbuf); + MPIAddParticletoAppropriateBuffer(opt, ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocal, Part.data(), Nreadbuf, Preadbuf); count2++; } } @@ -1333,6 +1330,9 @@ void ReadGadget(Options &opt, vector &Part, const Int_t nbodies,Partic MPI_Ssend(&Nbuf[ibuf],1,MPI_Int_t, ibuf, ibuf+NProcs, MPI_COMM_WORLD); if (Nbuf[ibuf]>0) { MPI_Ssend(&Pbuf[ibuf*BufSize], sizeof(Particle)*Nbuf[ibuf], MPI_BYTE, ibuf, ibuf, MPI_COMM_WORLD); + MPISendHydroInfoFromReadThreads(opt, Nbuf[ibuf], &Pbuf[ibuf*BufSize], ibuf); + MPISendStarInfoFromReadThreads(opt, Nbuf[ibuf], &Pbuf[ibuf*BufSize], ibuf); + MPISendBHInfoFromReadThreads(opt, Nbuf[ibuf], &Pbuf[ibuf*BufSize], ibuf); Nbuf[ibuf]=0; //last broadcast with Nbuf[ibuf]=0 so that receiver knows no more particles are to be broadcast MPI_Ssend(&Nbuf[ibuf],1,MPI_Int_t,ibuf,ibuf+NProcs,MPI_COMM_WORLD); @@ -1390,6 +1390,8 @@ void ReadGadget(Options &opt, vector &Part, const Int_t nbodies,Partic MPI_Bcast(&(Ntotal),sizeof(Ntotal),MPI_BYTE,0,MPI_COMM_WORLD); MPI_Bcast(&opt.zoomlowmassdm,sizeof(opt.zoomlowmassdm),MPI_BYTE,0,MPI_COMM_WORLD); #endif + //store how to convert input internal energies to physical output internal energies + opt.internalenergyinputconversion = opt.velocityinputconversion*opt.velocityinputconversion; ///If compiled with HIGHRES, the code assumes that the gadget data is a multi-resolution simulation ///with the lowest mass dark matter particle corresponding to the highest resolution and ///thus the physical linking length is assumed to be in fraction of interparticle spacing diff --git a/src/hdfio.cxx b/src/hdfio.cxx index 8ee4382e..881f759a 100644 --- a/src/hdfio.cxx +++ b/src/hdfio.cxx @@ -71,7 +71,7 @@ void ReadHDF(Options &opt, vector &Part, const Int_t nbodies,Particle char buf[2000]; HDF_Group_Names hdf_gnames (opt.ihdfnameconvention); //structures store names in groups - HDF_Header *hdf_header_info; + vector hdf_header_info; HDF_Part_Info hdf_gas_info(HDFGASTYPE,opt.ihdfnameconvention); HDF_Part_Info hdf_dm_info(HDFDMTYPE,opt.ihdfnameconvention); HDF_Part_Info hdf_extradm_info(HDFDM1TYPE,opt.ihdfnameconvention); @@ -93,19 +93,14 @@ void ReadHDF(Options &opt, vector &Part, const Int_t nbodies,Particle hdf_parts[5]=&hdf_bh_info; //to store the groups, data sets and their associated data spaces - // H5File *Fhdf; - // Group *partsgroup; - // Attribute *headerattribs; - // DataSpace *headerdataspace; - // DataSet *partsdataset; - // DataSpace *partsdataspace; - // DataSpace chunkspace; vector Fhdf; vector partsgroup; vector headerattribs; vector headerdataspace; vector partsdataset; vector partsdataspace; + vector partsdataset_extra; + vector partsdataspace_extra; hid_t chunkspace; int chunksize=opt.inputbufsize; //buffers to load data @@ -149,6 +144,13 @@ void ReadHDF(Options &opt, vector &Part, const Int_t nbodies,Particle int *ireadtask,*readtaskID; Int_t ninputoffset; + //for extra fields related to chemistry, feedback etc + int numextrafields = 0; + vector numextrafieldsvec(NHDFTYPE); + string extrafield; + int iextraoffset; + double *extrafieldbuff = NULL; + #ifdef USEMPI if (ThisTask == 0) #endif @@ -205,10 +207,10 @@ void ReadHDF(Options &opt, vector &Part, const Int_t nbodies,Particle } //used in mpi to load access to all the data blocks of interest - // DataSet *partsdatasetall; - // DataSpace *partsdataspaceall; vector partsdatasetall; vector partsdataspaceall; + vector partsdatasetall_extra; + vector partsdataspaceall_extra; //extra blocks to store info float *velfloatbuff=new float[chunksize*3]; @@ -271,14 +273,8 @@ void ReadHDF(Options &opt, vector &Part, const Int_t nbodies,Particle if (ireadtask[ThisTask]>=0) { #endif //read the header - hdf_header_info=new HDF_Header[opt.num_files]; + hdf_header_info.resize(opt.num_files); for (i=0; i &Part, const Int_t nbodies,Particle for (auto &x:partsgroup) x=-1; for (auto &x:partsdataset) x=-1; for (auto &x:partsdataspace) x=-1; + + //handle any extra fields that should be loaded related to chemistry + numextrafields = 0; + for (auto &nf:numextrafieldsvec) nf=0; + #if defined(GASON) + numextrafieldsvec[HDFGASTYPE] = opt.gas_internalprop_names.size() + opt.gas_chem_names.size() + opt.gas_chemproduction_names.size(); + numextrafields += numextrafieldsvec[HDFGASTYPE]; + #endif + #if defined(STARON) + numextrafieldsvec[HDFSTARTYPE] = opt.star_internalprop_names.size() + opt.star_chem_names.size() + opt.star_chemproduction_names.size(); + numextrafields += numextrafieldsvec[HDFSTARTYPE]; + #endif + #if defined(BHON) + numextrafieldsvec[HDFBHTYPE] = opt.bh_internalprop_names.size() + opt.bh_chem_names.size() + opt.bh_chemproduction_names.size(); + numextrafields += numextrafieldsvec[HDFBHTYPE]; + #endif + #if defined(EXTRADMON) + numextrafieldsvec[HDFDMTYPE] = opt.extra_dm_internalprop_names.size(); + numextrafields += numextrafieldsvec[HDFDMTYPE]; + #endif + if (numextrafields>0) { + partsdataset_extra.resize(opt.num_files*numextrafields); + partsdataspace_extra.resize(opt.num_files*numextrafields); + for (auto &x:partsdataset_extra) x=-1; + for (auto &x:partsdataspace_extra) x=-1; + } + #ifdef USEMPI partsdatasetall.resize(opt.num_files*NHDFTYPE*NHDFDATABLOCK); partsdataspaceall.resize(opt.num_files*NHDFTYPE*NHDFDATABLOCK); for (auto &x:partsdatasetall) x=-1; for (auto &x:partsdataspaceall) x=-1; + if (numextrafields>0) { + partsdatasetall_extra.resize(opt.num_files*numextrafields); + partsdataspaceall_extra.resize(opt.num_files*numextrafields); + for (auto &x:partsdatasetall_extra) x=-1; + for (auto &x:partsdataspaceall_extra) x=-1; + extrafieldbuff = new double[numextrafields*chunksize]; + } #endif for(i=0; i1) sprintf(buf,"%s.%d.hdf5",opt.fname,(int)i); @@ -318,7 +348,17 @@ void ReadHDF(Options &opt, vector &Part, const Int_t nbodies,Particle cout<<" Expecting "<(Fhdf[i], hdf_header_info[i].names[hdf_header_info[i].IBoxSize]); + + /* Read the BoxSize */ + if (opt.ihdfnameconvention == HDFSWIFTEAGLENAMES || opt.ihdfnameconvention == HDFOLDSWIFTEAGLENAMES) { + /* SWIFT can have non-cubic boxes; but for cosmological runs they will always be cubes. + * This makes the BoxSize a vector attribute, with it containing three values, but they + * will always be the same. */ + hdf_header_info[i].BoxSize = read_attribute_v(Fhdf[i], hdf_header_info[i].names[hdf_header_info[i].IBoxSize])[0]; + } else { + hdf_header_info[i].BoxSize = read_attribute(Fhdf[i], hdf_header_info[i].names[hdf_header_info[i].IBoxSize]); + } + vdoublebuff=read_attribute_v(Fhdf[i], hdf_header_info[i].names[hdf_header_info[i].IMass]); for (k=0;k &Part, const Int_t nbodies,Particle // SWIFT snapshots already include the 1/h factor factor, // so there is no need to include it. - if(opt.ihdfnameconvention == HDFSWIFTEAGLENAMES) { + if(opt.ihdfnameconvention == HDFSWIFTEAGLENAMES || opt.ihdfnameconvention == HDFOLDSWIFTEAGLENAMES) { mscale=opt.massinputconversion;lscale=opt.lengthinputconversion*aadjust;lvscale=opt.lengthinputconversion*opt.a; } else { @@ -519,19 +559,21 @@ void ReadHDF(Options &opt, vector &Part, const Int_t nbodies,Particle } } if (opt.partsearchtype==PSTDARK && opt.iBaryonSearch) { - for (j=1;j<=nbusetypes;j++) { - k=usetypes[j]; - //data loaded into memory in chunks - if (hdf_header_info[i].npart[k]0)nchunk=hdf_header_info[i].npart[k]-n; - // //setup hyperslab so that it is loaded into the buffer - HDF5ReadHyperSlabReal(doublebuff,partsdataset[i*NHDFTYPE+k], partsdataspace[i*NHDFTYPE+k], 1, 3, nchunk, n); - for (int nn=0;nn &Part, const Int_t nbodies,Particle for(n=0;n0)nchunk=hdf_header_info[i].npart[k]-n; - //setup hyperslab so that it is loaded into the buffer - // datarank=1; - // datadim[0]=nchunk; - // chunkspace=DataSpace(datarank,datadim); - // filespacecount[0]=nchunk;filespacecount[1]=1; - // filespaceoffset[0]=n;filespaceoffset[1]=0; - // partsdataspace[i*NHDFTYPE+k].selectHyperslab(H5S_SELECT_SET, filespacecount, filespaceoffset); - // partsdataset[i*NHDFTYPE+k].read(realbuff,HDFREALTYPE,chunkspace,partsdataspace[i*NHDFTYPE+k]); - // - // if (ifloat) for (int nn=0;nn &Part, const Int_t nbodies,Particle for(n=0;n0)nchunk=hdf_header_info[i].npart[k]-n; - //setup hyperslab so that it is loaded into the buffer - // datarank=1; - // datadim[0]=nchunk; - // chunkspace=DataSpace(datarank,datadim); - // filespacecount[0]=nchunk;filespacecount[1]=1; - // filespaceoffset[0]=n;filespaceoffset[1]=0; - // partsdataspace[i*NHDFTYPE+k].selectHyperslab(H5S_SELECT_SET, filespacecount, filespaceoffset); - // partsdataset[i*NHDFTYPE+k].read(realbuff,HDFREALTYPE,chunkspace,partsdataspace[i*NHDFTYPE+k]); - // - // if (ifloat) for (int nn=0;nn &Part, const Int_t nbodies,Particle if (hdf_header_info[i].npart[k]-n0)nchunk=hdf_header_info[i].npart[k]-n; //setup hyperslab so that it is loaded into the buffer HDF5ReadHyperSlabReal(doublebuff,partsdataset[i*NHDFTYPE+k], partsdataspace[i*NHDFTYPE+k], 1, 1, nchunk, n); - for (int nn=0;nn 0. ? doublebuff[nn] : 0.); } } else { @@ -923,7 +943,7 @@ void ReadHDF(Options &opt, vector &Part, const Int_t nbodies,Particle if (hdf_header_info[i].npart[k]-n0)nchunk=hdf_header_info[i].npart[k]-n; //setup hyperslab so that it is loaded into the buffer HDF5ReadHyperSlabReal(doublebuff,partsdataset[i*NHDFTYPE+k], partsdataspace[i*NHDFTYPE+k], 1, 1, nchunk, n); - Pbaryons[bcount++].SetZmet(doublebuff[nn]*zmetconversion); + for (int nn=0;nn &Part, const Int_t nbodies,Particle #endif #endif }//end of if not dark matter then baryon search + //now load extra fields if necessary. + if (numextrafields>0) + { +#if defined(GASON) + if (opt.gas_internalprop_names.size()>0) + { + count=count2; + bcount=bcount2; + for (j=0;j1) cout<<"Opening group "<0)nchunk=hdf_header_info[i].npart[k]-n; + //setup hyperslab so that it is loaded into the buffer + HDF5ReadHyperSlabReal(doublebuff,partsdataset_extra[i*numextrafields+iextra], partsdataspace_extra[i*numextrafields+iextra], 1, 1, nchunk, n); + for (int nn=0;nn0) + { + count=count2; + bcount=bcount2; + for (j=0;j1) cout<<"Opening group "<0)nchunk=hdf_header_info[i].npart[k]-n; + //setup hyperslab so that it is loaded into the buffer + HDF5ReadHyperSlabReal(doublebuff,partsdataset_extra[i*numextrafields+iextra], partsdataspace_extra[i*numextrafields+iextra], 1, 1, nchunk, n); + for (int nn=0;nn0) + { + count=count2; + bcount=bcount2; + for (j=0;j1) cout<<"Opening group "<0)nchunk=hdf_header_info[i].npart[k]-n; + //setup hyperslab so that it is loaded into the buffer + HDF5ReadHyperSlabReal(doublebuff,partsdataset_extra[i*numextrafields+iextra], partsdataspace_extra[i*numextrafields+iextra], 1, 1, nchunk, n); + for (int nn=0;nn0) + { + count=count2; + bcount=bcount2; + for (j=0;j1) cout<<"Opening group "<0)nchunk=hdf_header_info[i].npart[k]-n; + //setup hyperslab so that it is loaded into the buffer + HDF5ReadHyperSlabReal(doublebuff,partsdataset_extra[i*numextrafields+iextra], partsdataspace_extra[i*numextrafields+iextra], 1, 1, nchunk, n); + for (int nn=0;nn0) + { + count=count2; + bcount=bcount2; + for (j=0;j1) cout<<"Opening group "<0)nchunk=hdf_header_info[i].npart[k]-n; + //setup hyperslab so that it is loaded into the buffer + HDF5ReadHyperSlabReal(doublebuff,partsdataset_extra[i*numextrafields+iextra], partsdataspace_extra[i*numextrafields+iextra], 1, 1, nchunk, n); + for (int nn=0;nn0) + { + count=count2; + bcount=bcount2; + for (j=0;j1) cout<<"Opening group "<0)nchunk=hdf_header_info[i].npart[k]-n; + //setup hyperslab so that it is loaded into the buffer + HDF5ReadHyperSlabReal(doublebuff,partsdataset_extra[i*numextrafields+iextra], partsdataspace_extra[i*numextrafields+iextra], 1, 1, nchunk, n); + for (int nn=0;nn0) + { + count=count2; + bcount=bcount2; + for (j=0;j1) cout<<"Opening group "<0)nchunk=hdf_header_info[i].npart[k]-n; + //setup hyperslab so that it is loaded into the buffer + HDF5ReadHyperSlabReal(doublebuff,partsdataset_extra[i*numextrafields+iextra], partsdataspace_extra[i*numextrafields+iextra], 1, 1, nchunk, n); + for (int nn=0;nn0) + { + count=count2; + bcount=bcount2; + for (j=0;j1) cout<<"Opening group "<0)nchunk=hdf_header_info[i].npart[k]-n; + //setup hyperslab so that it is loaded into the buffer + HDF5ReadHyperSlabReal(doublebuff,partsdataset_extra[i*numextrafields+iextra], partsdataspace_extra[i*numextrafields+iextra], 1, 1, nchunk, n); + for (int nn=0;nn0) + { + count=count2; + bcount=bcount2; + for (j=0;j1) cout<<"Opening group "<0)nchunk=hdf_header_info[i].npart[k]-n; + //setup hyperslab so that it is loaded into the buffer + HDF5ReadHyperSlabReal(doublebuff,partsdataset_extra[i*numextrafields+iextra], partsdataspace_extra[i*numextrafields+iextra], 1, 1, nchunk, n); + for (int nn=0;nn0) + { + count=count2; + bcount=bcount2; + for (j=0;j1) cout<<"Opening group "<0)nchunk=hdf_header_info[i].npart[k]-n; + //setup hyperslab so that it is loaded into the buffer + HDF5ReadHyperSlabReal(doublebuff,partsdataset_extra[i*numextrafields+iextra], partsdataspace_extra[i*numextrafields+iextra], 1, 1, nchunk, n); + for (int nn=0;nn &Part, const Int_t nbodies,Particle // SWIFT snapshot velocities already contain the sqrt(a) factor, // so there is no need to include it. - if(opt.ihdfnameconvention == HDFSWIFTEAGLENAMES) vscale = opt.velocityinputconversion; + if(opt.ihdfnameconvention == HDFSWIFTEAGLENAMES || opt.ihdfnameconvention == HDFOLDSWIFTEAGLENAMES) vscale = opt.velocityinputconversion; else vscale = opt.velocityinputconversion*sqrt(opt.a); + if(opt.ihdfnameconvention == HDFSWIFTEAGLENAMES || opt.ihdfnameconvention == HDFOLDSWIFTEAGLENAMES) opt.internalenergyinputconversion = opt.a*opt.a*opt.velocityinputconversion*opt.velocityinputconversion; + else opt.internalenergyinputconversion = opt.velocityinputconversion*opt.velocityinputconversion; //finally adjust to appropriate units for (i=0;i &Part, const Int_t nbodies,Particle Part[i].SetMass(Part[i].GetMass()*mscale); for (int j=0;j<3;j++) Part[i].SetVelocity(j,Part[i].GetVelocity(j)*vscale+Hubbleflow*Part[i].GetPosition(j)); for (int j=0;j<3;j++) Part[i].SetPosition(j,Part[i].GetPosition(j)*lscale); -#ifdef GASON - if (Part[i].GetType()==GASTYPE) Part[i].SetU(Part[i].GetU()*opt.velocityinputconversion*opt.velocityinputconversion); -#endif } if (Pbaryons!=NULL && opt.iBaryonSearch==1) { for (i=0;i &Part, const Int_t nbodies,Particle Pbaryons[i].SetMass(Pbaryons[i].GetMass()*mscale); for (int j=0;j<3;j++) Pbaryons[i].SetVelocity(j,Pbaryons[i].GetVelocity(j)*vscale+Hubbleflow*Pbaryons[i].GetPosition(j)); for (int j=0;j<3;j++) Pbaryons[i].SetPosition(j,Pbaryons[i].GetPosition(j)*lscale); -#ifdef GASON - Pbaryons[i].SetU(Pbaryons[i].GetU()*opt.velocityinputconversion*opt.velocityinputconversion); -#endif } } @@ -1224,58 +1604,305 @@ void ReadHDF(Options &opt, vector &Part, const Int_t nbodies,Particle #endif } //end of baryon read if not running search dm then baryons - for (j=0;j0)nchunk=hdf_header_info[i].npart[k]-n; - //setup hyperslab so that it is loaded into the buffer - //load positions - itemp=0; - //set hyperslab - HDF5ReadHyperSlabReal(doublebuff,partsdatasetall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], partsdataspaceall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], 1, 3, nchunk, n); - //velocities - itemp++; - HDF5ReadHyperSlabReal(veldoublebuff,partsdatasetall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], partsdataspaceall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], 1, 3, nchunk, n); - //ids - itemp++; - HDF5ReadHyperSlabInteger(longbuff,partsdatasetall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], partsdataspaceall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], 1, 1, nchunk, n); - - //masses - itemp++; - if (hdf_header_info[i].mass[k]==0) { - HDF5ReadHyperSlabReal(massdoublebuff,partsdatasetall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], partsdataspaceall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], 1, 1, nchunk, n); + if (numextrafields>0) + { + iextraoffset = 0; +#if defined(GASON) + for (j=0;j0) + { + for (auto iextra=0;iextra1) cout<<"Opening group "<0) + { + for (auto iextra=0;iextra1) cout<<"Opening group "<0) + { + for (auto iextra=0;iextra1) cout<<"Opening group "<0) + { + for (auto iextra=0;iextra1) cout<<"Opening group "<0) + { + for (auto iextra=0;iextra1) cout<<"Opening group "<0) + { + for (auto iextra=0;iextra1) cout<<"Opening group "<0) + { + for (auto iextra=0;iextra1) cout<<"Opening group "<0) + { + for (auto iextra=0;iextra1) cout<<"Opening group "<0) + { + for (auto iextra=0;iextra1) cout<<"Opening group "<0) + { + for (auto iextra=0;iextra1) cout<<"Opening group "<0)nchunk=hdf_header_info[i].npart[k]-n; + //setup hyperslab so that it is loaded into the buffer + //load positions + itemp=0; + //set hyperslab + HDF5ReadHyperSlabReal(doublebuff,partsdatasetall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], partsdataspaceall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], 1, 3, nchunk, n); + //velocities + itemp++; + HDF5ReadHyperSlabReal(veldoublebuff,partsdatasetall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], partsdataspaceall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], 1, 3, nchunk, n); + //ids + itemp++; + HDF5ReadHyperSlabInteger(longbuff,partsdatasetall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], partsdataspaceall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], 1, 1, nchunk, n); + + //masses + itemp++; + if (hdf_header_info[i].mass[k]==0) { + HDF5ReadHyperSlabReal(massdoublebuff,partsdatasetall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], partsdataspaceall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], 1, 1, nchunk, n); + } +#ifdef GASON + //self-energy + itemp++; + if (k == HDFGASTYPE) { + HDF5ReadHyperSlabReal(udoublebuff,partsdatasetall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], partsdataspaceall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], 1, 1, nchunk, n); + } +#ifdef STARON + //star formation rate + itemp++; + if (k == HDFGASTYPE) { + HDF5ReadHyperSlabReal(SFRdoublebuff,partsdatasetall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], partsdataspaceall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], 1, 1, nchunk, n); + } + + //metallicity + itemp++; + if (k == HDFGASTYPE || k == HDFSTARTYPE) { + HDF5ReadHyperSlabReal(Zdoublebuff,partsdatasetall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], partsdataspaceall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], 1, 1, nchunk, n); + } + + //stellar age + itemp++; + if (k == HDFSTARTYPE) { + HDF5ReadHyperSlabReal(Tagedoublebuff,partsdatasetall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], partsdataspaceall[i*NHDFTYPE*NHDFDATABLOCK+k*NHDFDATABLOCK+itemp], 1, 1, nchunk, n); + } +#endif +#endif + //load extra fields + if (numextrafields>0) + { + iextraoffset = 0; +#if defined(GASON) + if (opt.gas_internalprop_names.size()>0) + { + for (auto iextra=0;iextra0) + { + for (auto iextra=0;iextra0) + { + for (auto iextra=0;iextra0) + { + for (auto iextra=0;iextra0) + { + for (auto iextra=0;iextra0) + { + for (auto iextra=0;iextra0) + { + for (auto iextra=0;iextra0) + { + for (auto iextra=0;iextra0) + { + for (auto iextra=0;iextra0) + { + for (auto iextra=0;iextra &Part, const Int_t nbodies,Particle //reset hydro quantities of buffer #ifdef GASON Pbuf[ibufindex].SetU(0); + Pbuf[ibufindex].SetHydroProperties(); #ifdef STARON Pbuf[ibufindex].SetSFR(0); Pbuf[ibufindex].SetZmet(0); @@ -1291,9 +1919,15 @@ void ReadHDF(Options &opt, vector &Part, const Int_t nbodies,Particle #ifdef STARON Pbuf[ibufindex].SetZmet(0); Pbuf[ibufindex].SetTage(0); + Pbuf[ibufindex].SetStarProperties(); #endif #ifdef BHON + Pbuf[ibufindex].SetBHProperties(); #endif +#ifdef EXTRADMON + Pbuf[ibufindex].SetExtraDMProperties(); +#endif + Pbuf[ibufindex].SetPosition(doublebuff[nn*3],doublebuff[nn*3+1],doublebuff[nn*3+2]); Pbuf[ibufindex].SetVelocity(veldoublebuff[nn*3],veldoublebuff[nn*3+1],veldoublebuff[nn*3+2]); if (hdf_header_info[i].mass[k]==0)Pbuf[ibufindex].SetMass(massdoublebuff[nn]); @@ -1311,12 +1945,13 @@ void ReadHDF(Options &opt, vector &Part, const Int_t nbodies,Particle #ifdef HIGHRES if (k==HDFDMTYPE && MP_DM>Pbuf[ibufindex].GetMass()) MP_DM=Pbuf[ibufindex].GetMass(); + if (k==HDFGASTYPE && MP_B 0. ? SFRdoublebuff[nn]: 0.); Pbuf[ibufindex].SetZmet(Zdoublebuff[nn]); #endif } @@ -1328,6 +1963,111 @@ void ReadHDF(Options &opt, vector &Part, const Int_t nbodies,Particle Pbuf[ibufindex].SetTage(Tagedoublebuff[nn]); } #endif + + if (numextrafields>0) { + iextraoffset = 0; +#ifdef GASON + if (k==HDFGASTYPE && numextrafieldsvec[HDFGASTYPE]) { + if (!Pbuf[ibufindex].HasHydroProperties()) Pbuf[ibufindex].InitHydroProperties(); + if (opt.gas_internalprop_names.size()>0) + { + for (auto iextra=0;iextra0) + { + for (auto iextra=0;iextra0) + { + for (auto iextra=0;iextra0) + { + for (auto iextra=0;iextra0) + { + for (auto iextra=0;iextra0) + { + for (auto iextra=0;iextra0) + { + for (auto iextra=0;iextra0) + { + for (auto iextra=0;iextra0) + { + for (auto iextra=0;iextra0) + { + for (auto iextra=0;iextra &Part, const Int_t nbodies,Particle } #endif Nbuf[ibuf]++; - MPIAddParticletoAppropriateBuffer(ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocal, Part.data(), Nreadbuf, Preadbuf); + MPIAddParticletoAppropriateBuffer(opt, ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocal, Part.data(), Nreadbuf, Preadbuf); } ninputoffset += nchunk; } @@ -1431,7 +2171,7 @@ void ReadHDF(Options &opt, vector &Part, const Int_t nbodies,Particle if (k==HDFGASTYPE) { Pbuf[ibufindex].SetU(udoublebuff[nn]); #ifdef STARON - Pbuf[ibufindex].SetSFR(SFRdoublebuff[nn]); + Pbuf[ibufindex].SetSFR(SFRdoublebuff[nn] > 0. ? SFRdoublebuff[nn] : 0.); Pbuf[ibufindex].SetZmet(Zdoublebuff[nn]); #endif } @@ -1451,7 +2191,7 @@ void ReadHDF(Options &opt, vector &Part, const Int_t nbodies,Particle } #endif Nbuf[ibuf]++; - MPIAddParticletoAppropriateBuffer(ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocalbaryon[0], Pbaryons, Nreadbuf, Preadbuf); + MPIAddParticletoAppropriateBuffer(opt, ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocalbaryon[0], Pbaryons, Nreadbuf, Preadbuf); } ninputoffset+=nchunk; }//end of chunk @@ -1521,7 +2261,12 @@ void ReadHDF(Options &opt, vector &Part, const Int_t nbodies,Particle MPI_Ssend(&Nbuf[ibuf],1,MPI_Int_t, ibuf, ibuf+NProcs, MPI_COMM_WORLD); if (Nbuf[ibuf]>0) { MPI_Ssend(&Pbuf[ibuf*BufSize], sizeof(Particle)*Nbuf[ibuf], MPI_BYTE, ibuf, ibuf, MPI_COMM_WORLD); + MPISendHydroInfoFromReadThreads(opt, Nbuf[ibuf], &Pbuf[ibuf*BufSize], ibuf); + MPISendStarInfoFromReadThreads(opt, Nbuf[ibuf], &Pbuf[ibuf*BufSize], ibuf); + MPISendBHInfoFromReadThreads(opt, Nbuf[ibuf], &Pbuf[ibuf*BufSize], ibuf); + MPISendExtraDMInfoFromReadThreads(opt, Nbuf[ibuf], &Pbuf[ibuf*BufSize], ibuf); Nbuf[ibuf]=0; + //last broadcast with Nbuf[ibuf]=0 so that receiver knows no more particles are to be broadcast MPI_Ssend(&Nbuf[ibuf],1,MPI_Int_t,ibuf,ibuf+NProcs,MPI_COMM_WORLD); } @@ -1547,14 +2292,15 @@ void ReadHDF(Options &opt, vector &Part, const Int_t nbodies,Particle ///if gas found and Omega_b not set correctly (ie: ==0), assumes that ///lowest mass gas particle found corresponds to Omega_b ///Note that if there is mass evolution this WILL NOT WORK! - if (opt.Omega_b==0 && MP_B==MAXVALUE){ + if (opt.Omega_b==0 && MP_B>0){ opt.Omega_b=MP_B/(MP_DM+MP_B)*opt.Omega_m; opt.Omega_cdm=opt.Omega_m-opt.Omega_b; } // SWIFT snapshots already include the 1/h factor factor, // so there is no need to include it. - if(opt.ihdfnameconvention == HDFSWIFTEAGLENAMES) { + if(opt.ihdfnameconvention == HDFSWIFTEAGLENAMES || opt.ihdfnameconvention == HDFOLDSWIFTEAGLENAMES) + { //adjust period if (opt.comove) opt.p*=opt.lengthinputconversion; else opt.p*=opt.lengthinputconversion*opt.a; @@ -1569,9 +2315,11 @@ void ReadHDF(Options &opt, vector &Part, const Int_t nbodies,Particle } #endif #ifdef USEMPI +#ifdef HIGHRES if (opt.nsnapread>1) { - MPI_Allreduce(&MP_DM,&MP_DM, 1, MPI_DOUBLE, MPI_MIN,mpi_comm_read); + MPI_Allreduce(MPI_IN_PLACE,&MP_DM, 1, MPI_DOUBLE, MPI_MIN,mpi_comm_read); } +#endif MPI_Barrier(MPI_COMM_WORLD); //update cosmological data and boundary in code units MPI_Bcast(&(opt.p),sizeof(opt.p),MPI_BYTE,0,MPI_COMM_WORLD); @@ -1652,8 +2400,11 @@ void ReadHDF(Options &opt, vector &Part, const Int_t nbodies,Particle // SWIFT snapshot velocities already contain the sqrt(a) factor, // so there is no need to include it. - if(opt.ihdfnameconvention == HDFSWIFTEAGLENAMES) vscale = opt.velocityinputconversion; + if(opt.ihdfnameconvention == HDFSWIFTEAGLENAMES || opt.ihdfnameconvention == HDFOLDSWIFTEAGLENAMES) vscale = opt.velocityinputconversion; else vscale = opt.velocityinputconversion*sqrt(opt.a); + if(opt.ihdfnameconvention == HDFSWIFTEAGLENAMES || opt.ihdfnameconvention == HDFOLDSWIFTEAGLENAMES) opt.internalenergyinputconversion = opt.a*opt.a*opt.velocityinputconversion*opt.velocityinputconversion; + else opt.internalenergyinputconversion = opt.velocityinputconversion*opt.velocityinputconversion; + //finally adjust to appropriate units for (i=0;i &Part, const Int_t nbodies,Particle Part[i].SetMass(Part[i].GetMass()*mscale); for (int j=0;j<3;j++) Part[i].SetVelocity(j,Part[i].GetVelocity(j)*vscale+Hubbleflow*Part[i].GetPosition(j)); for (int j=0;j<3;j++) Part[i].SetPosition(j,Part[i].GetPosition(j)*lscale); -#ifdef GASON - if (Part[i].GetType()==GASTYPE) Part[i].SetU(Part[i].GetU()*opt.velocityinputconversion*opt.velocityinputconversion); -#endif } if (Pbaryons!=NULL && opt.iBaryonSearch==1) { for (i=0;i &Part, const Int_t nbodies,Particle Pbaryons[i].SetMass(Pbaryons[i].GetMass()*mscale); for (int j=0;j<3;j++) Pbaryons[i].SetVelocity(j,Pbaryons[i].GetVelocity(j)*vscale+Hubbleflow*Pbaryons[i].GetPosition(j)); for (int j=0;j<3;j++) Pbaryons[i].SetPosition(j,Pbaryons[i].GetPosition(j)*lscale); -#ifdef GASON - Pbaryons[i].SetU(Pbaryons[i].GetU()*opt.velocityinputconversion*opt.velocityinputconversion); -#endif } } #endif - delete[] intbuff; delete[] longbuff; delete[] uintbuff; delete[] floatbuff; delete[] doublebuff; + delete[] extrafieldbuff; #ifdef USEMPI delete[] velfloatbuff; delete[] veldoublebuff; @@ -1704,7 +2449,6 @@ void ReadHDF(Options &opt, vector &Part, const Int_t nbodies,Particle delete[] Tagedoublebuff; #endif #endif - } #endif diff --git a/src/hdfitems.h b/src/hdfitems.h index a5d04c09..1834d6b0 100644 --- a/src/hdfitems.h +++ b/src/hdfitems.h @@ -7,11 +7,9 @@ #ifndef HDFITEMS_H #define HDFITEMS_H - -//#include "H5Cpp.h" -//using namespace H5; #include "hdf5.h" + ///\name ILLUSTRIS specific constants //@{ ///convert illustris metallicty to ratio to solar @@ -52,7 +50,9 @@ #define HDFSTARIMETAL 40 #define HDFSTARIAGE 41 -#define HDFBHIMDOT 50 +#define HDFBHIMETAL 50 +#define HDFBHIAGE 51 +#define HDFBHIMDOT 52 //@} ///number of luminosity bands for stars @@ -71,7 +71,7 @@ ///\defgroup HDFNAMES labels for HDF naming conventions //@{ -#define HDFNUMNAMETYPES 8 +#define HDFNUMNAMETYPES 9 #define HDFILLUSTISNAMES 0 #define HDFGADGETXNAMES 1 #define HDFEAGLENAMES 2 @@ -79,6 +79,7 @@ #define HDFSIMBANAMES 4 #define HDFMUFASANAMES 5 #define HDFSWIFTEAGLENAMES 6 +#define HDFOLDSWIFTEAGLENAMES 8 #define HDFEAGLEVERSION2NAMES 7 //@} @@ -93,6 +94,14 @@ #define HDF5_FILE_GROUP_COMMON_BASE H5::CommonFG #endif +// #ifdef USEPARALLELHDF +// // #if H5_VERSION_GE(1,10,2) +// // #define USEHDFCOMPRESSOIN +// // #endif +// // #else +// // #define USEHDFCOMPRESSOIN +// #endif + template ReturnT safe_hdf5(F function, Ts ... args) { @@ -109,487 +118,842 @@ ReturnT safe_hdf5(F function, Ts ... args) return status; } +// Overloaded function to return HDF5 type given a C type +static inline hid_t hdf5_type(float dummy) {return H5T_NATIVE_FLOAT;} +static inline hid_t hdf5_type(double dummy) {return H5T_NATIVE_DOUBLE;} +static inline hid_t hdf5_type(short dummy) {return H5T_NATIVE_SHORT;} +static inline hid_t hdf5_type(int dummy) {return H5T_NATIVE_INT;} +static inline hid_t hdf5_type(long dummy) {return H5T_NATIVE_LONG;} +static inline hid_t hdf5_type(long long dummy) {return H5T_NATIVE_LLONG;} +static inline hid_t hdf5_type(unsigned short dummy) {return H5T_NATIVE_USHORT;} +static inline hid_t hdf5_type(unsigned int dummy) {return H5T_NATIVE_UINT;} +static inline hid_t hdf5_type(unsigned long dummy) {return H5T_NATIVE_ULONG;} +static inline hid_t hdf5_type(unsigned long long dummy) {return H5T_NATIVE_ULLONG;} +static inline hid_t hdf5_type(std::string dummy) {return H5T_C_S1;} + //template //static inline H5::Attribute get_attribute(const AttributeHolder &l, const std::string attr_name) static inline void get_attribute(vector &ids, const std::string attr_name) { - //can use H5Aexists as it is the C interface but how to access it? - //auto exists = H5Aexists(l.getId(), attr_name.c_str()); - auto exists = H5Aexists(ids.back(), attr_name.c_str()); - if (exists == 0) { - throw invalid_argument(std::string("attribute not found ") + attr_name); - } - else if (exists < 0) { - throw std::runtime_error("Error on H5Aexists"); - } - auto attr = H5Aopen(ids.back(), attr_name.c_str(), H5P_DEFAULT); - ids.push_back(attr); + //can use H5Aexists as it is the C interface but how to access it? + //auto exists = H5Aexists(l.getId(), attr_name.c_str()); + auto exists = H5Aexists(ids.back(), attr_name.c_str()); + if (exists == 0) { + throw invalid_argument(std::string("attribute not found ") + attr_name); + } + else if (exists < 0) { + throw std::runtime_error("Error on H5Aexists"); + } + auto attr = H5Aopen(ids.back(), attr_name.c_str(), H5P_DEFAULT); + ids.push_back(attr); } static inline void get_attribute(vector &ids, const std::vector &parts) { - // This is the attribute name, so open it and store the id - if (parts.size() == 1) { - get_attribute(ids, parts[0]); - } - else { - H5O_info_t object_info; - hid_t newid; - H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, H5P_DEFAULT); - if (object_info.type == H5O_TYPE_GROUP) { - newid = H5Gopen2(ids.back(),parts[0].c_str(),H5P_DEFAULT); - } - else if (object_info.type == H5O_TYPE_DATASET) { - newid = H5Dopen2(ids.back(),parts[0].c_str(),H5P_DEFAULT); - } - ids.push_back(newid); - //get the substring - vector subparts(parts.begin() + 1, parts.end()); - //call function again - get_attribute(ids, subparts); - - } - //throw invalid_argument("attribute name not found"); + // This is the attribute name, so open it and store the id + if (parts.size() == 1) { + get_attribute(ids, parts[0]); + } + else { + H5O_info_t object_info; + hid_t newid; + H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, H5P_DEFAULT); + if (object_info.type == H5O_TYPE_GROUP) { + newid = H5Gopen2(ids.back(),parts[0].c_str(),H5P_DEFAULT); + } + else if (object_info.type == H5O_TYPE_DATASET) { + newid = H5Dopen2(ids.back(),parts[0].c_str(),H5P_DEFAULT); + } + ids.push_back(newid); + //get the substring + vector subparts(parts.begin() + 1, parts.end()); + //call function again + get_attribute(ids, subparts); + } + //throw invalid_argument("attribute name not found"); } static inline vector tokenize(const string &s, const string &delims) { - string::size_type lastPos = s.find_first_not_of(delims, 0); - string::size_type pos = s.find_first_of(delims, lastPos); - - vector tokens; - while (string::npos != pos || string::npos != lastPos) { - tokens.push_back(s.substr(lastPos, pos - lastPos)); - lastPos = s.find_first_not_of(delims, pos); - pos = s.find_first_of(delims, lastPos); - } - return tokens; + string::size_type lastPos = s.find_first_not_of(delims, 0); + string::size_type pos = s.find_first_of(delims, lastPos); + + vector tokens; + while (string::npos != pos || string::npos != lastPos) { + tokens.push_back(s.substr(lastPos, pos - lastPos)); + lastPos = s.find_first_not_of(delims, pos); + pos = s.find_first_of(delims, lastPos); + } + return tokens; } static inline void get_attribute(const hid_t &file_id, vector &ids, const string &name) { - std::vector parts = tokenize(name, "/"); - ids.push_back(file_id); - get_attribute(ids, parts); + std::vector parts = tokenize(name, "/"); + ids.push_back(file_id); + get_attribute(ids, parts); } static inline void close_hdf_ids(vector &ids) { - H5O_info_t object_info; - for (auto &id:ids) - { - H5Oget_info(id, &object_info); - if (object_info.type == H5O_TYPE_GROUP) { - H5Gclose(id); - } - else if (object_info.type == H5O_TYPE_GROUP) { - H5Dclose(id); - } - } + H5O_info_t object_info; + for (auto &id:ids) + { + H5Oget_info(id, &object_info); + if (object_info.type == H5O_TYPE_GROUP) { + H5Gclose(id); + } + else if (object_info.type == H5O_TYPE_GROUP) { + H5Dclose(id); + } + } } template static inline void _do_read(const hid_t &attr, const hid_t &type, T &val) { - H5Aread(attr, type, &val); + H5Aread(attr, type, &val); } template<> void _do_read(const hid_t &attr, const hid_t &type, std::string &val) { - vector buf; - hid_t space = H5Aget_space (attr); - hsize_t ndims=1, dims[1], maxdims[1]; - //ndims = H5Sget_simple_extent_dims (space, dims, maxdims); - buf.resize(H5Tget_size (type)); - H5Aread(attr, type, buf.data()); - H5Sclose(space); - val=string(buf.data()); + vector buf; + hid_t type_in_file = H5Aget_type(attr); + hid_t type_in_memory = H5Tcopy(type); // copy memory type because we'll need to modify it + size_t length = H5Tget_size(type_in_file); // get length of the string in the file + buf.resize(length+1); // resize buffer in memory, allowing for null terminator + H5Tset_size(type_in_memory, length+1); // tell HDF5 the length of the buffer in memory + H5Tset_strpad(type_in_memory, H5T_STR_NULLTERM); // specify that we want a null terminated string + H5Aread(attr, type_in_memory, buf.data()); + H5Tclose(type_in_memory); + H5Tclose(type_in_file); + val=string(buf.data()); } template static inline void _do_read_v(const hid_t &attr, const hid_t &type, vector &val) { - hid_t space = H5Aget_space (attr); - int npoints = H5Sget_simple_extent_npoints(space); - val.resize(npoints); - H5Aread(attr, type, val.data()); - H5Sclose(space); + hid_t space = H5Aget_space (attr); + int npoints = H5Sget_simple_extent_npoints(space); + val.resize(npoints); + H5Aread(attr, type, val.data()); + H5Sclose(space); } template const T read_attribute(const hid_t &file_id, const std::string &name) { - std::string attr_name; - T val; - hid_t type; - H5O_info_t object_info; - vector ids; - //traverse the file to get to the attribute, storing the ids of the - //groups, data spaces, etc that have been opened. - get_attribute(file_id, ids, name); - //now reverse ids and load attribute - reverse(ids.begin(),ids.end()); - //read the appropriate type - type = H5Aget_type(ids[0]); - _do_read(ids[0], type, val); - H5Aclose(ids[0]); - //remove file id from id list - ids.pop_back(); - ids.erase(ids.begin()); - //now have hdf5 ids traversed to get to desired attribute so move along to close all - //based on their object type - close_hdf_ids(ids); - return val; + std::string attr_name; + T val; + hid_t type; + H5O_info_t object_info; + vector ids; + //traverse the file to get to the attribute, storing the ids of the + //groups, data spaces, etc that have been opened. + get_attribute(file_id, ids, name); + //now reverse ids and load attribute + reverse(ids.begin(),ids.end()); + //determine hdf5 type of the array in memory + type = hdf5_type(T{}); + // read the data + _do_read(ids[0], type, val); + H5Aclose(ids[0]); + //remove file id from id list + ids.pop_back(); + ids.erase(ids.begin()); + //now have hdf5 ids traversed to get to desired attribute so move along to close all + //based on their object type + close_hdf_ids(ids); + return val; } //read vector attribute template const vector read_attribute_v(const hid_t &file_id, const std::string &name) { - std::string attr_name; - vector val; - hid_t type; - H5O_info_t object_info; - vector ids; - //traverse the file to get to the attribute, storing the ids of the - //groups, data spaces, etc that have been opened. - get_attribute(file_id, ids, name); - //now reverse ids and load attribute - reverse(ids.begin(),ids.end()); - //read the appropriate type - type = H5Aget_type(ids[0]); - _do_read_v(ids[0], type, val); - H5Aclose(ids[0]); - //remove file id from id list - ids.pop_back(); - ids.erase(ids.begin()); - //now have hdf5 ids traversed to get to desired attribute so move along to close all - //based on their object type - close_hdf_ids(ids); - return val; + std::string attr_name; + vector val; + hid_t type; + H5O_info_t object_info; + vector ids; + //traverse the file to get to the attribute, storing the ids of the + //groups, data spaces, etc that have been opened. + get_attribute(file_id, ids, name); + //now reverse ids and load attribute + reverse(ids.begin(),ids.end()); + //determine hdf5 type of the array in memory + type = hdf5_type(T{}); + // read the data + _do_read_v(ids[0], type, val); + H5Aclose(ids[0]); + //remove file id from id list + ids.pop_back(); + ids.erase(ids.begin()); + //now have hdf5 ids traversed to get to desired attribute so move along to close all + //based on their object type + close_hdf_ids(ids); + return val; } template const T read_attribute(const std::string &filename, const std::string &name) { - safe_hdf5(H5Fopen, filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); - hid_t file_id = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); - T attr = read_attribute(file_id, name); - safe_hdf5(H5Fclose,file_id); - return attr; + safe_hdf5(H5Fopen, filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); + hid_t file_id = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); + T attr = read_attribute(file_id, name); + safe_hdf5(H5Fclose,file_id); + return attr; } static inline hid_t HDF5OpenFile(string name, unsigned int flags){ - hid_t Fhdf; - return H5Fopen(name.c_str(),flags, H5P_DEFAULT); + hid_t Fhdf; + return H5Fopen(name.c_str(),flags, H5P_DEFAULT); } static inline hid_t HDF5OpenGroup(const hid_t &file, string name){ - return H5Gopen2(file,name.c_str(),H5P_DEFAULT); + return H5Gopen2(file,name.c_str(),H5P_DEFAULT); } static inline hid_t HDF5OpenDataSet(const hid_t &id, string name){ hid_t idval = H5Dopen2(id,name.c_str(),H5P_DEFAULT); - return idval; + return idval; } static inline hid_t HDF5OpenDataSpace(const hid_t &id){ hid_t idval=H5Dget_space(id); - return idval; + return idval; } static inline void HDF5CloseFile(hid_t &id){ - if (id>=0) H5Fclose(id); - id = -1; + if (id>=0) H5Fclose(id); + id = -1; } static inline void HDF5CloseGroup(hid_t &id){ - if (id>=0) H5Gclose(id); - id = -1; + if (id>=0) H5Gclose(id); + id = -1; } static inline void HDF5CloseDataSet(hid_t &id){ - if (id>=0) H5Dclose(id); - id = -1; + if (id>=0) H5Dclose(id); + id = -1; } static inline void HDF5CloseDataSpace(hid_t &id){ - if (id>=0) H5Sclose(id); - id = -1; + if (id>=0) H5Sclose(id); + id = -1; } static inline void HDF5ReadHyperSlabReal(double *buffer, - const hid_t &dataset, const hid_t &dataspace, - const hsize_t datarank, const hsize_t ndim, int nchunk, int noffset + const hid_t &dataset, const hid_t &dataspace, + const hsize_t datarank, const hsize_t ndim, int nchunk, int noffset ) { - //setup hyperslab so that it is loaded into the buffer - vector start, count, stride, block, memdims; - hid_t memspace; - start.push_back(noffset);start.push_back(0); - count.push_back(nchunk);count.push_back(ndim); - stride.push_back(1);stride.push_back(1); - block.push_back(1);block.push_back(1); - memdims.push_back(nchunk*ndim); - H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, start.data(), stride.data(), count.data(), block.data()); - memspace = H5Screate_simple (1, memdims.data(), NULL); - safe_hdf5(H5Dread, dataset, H5T_NATIVE_DOUBLE, memspace, dataspace, H5P_DEFAULT, buffer); + //setup hyperslab so that it is loaded into the buffer + vector start, count, stride, block, memdims; + hid_t memspace; + start.push_back(noffset);start.push_back(0); + count.push_back(nchunk);count.push_back(ndim); + stride.push_back(1);stride.push_back(1); + block.push_back(1);block.push_back(1); + memdims.push_back(nchunk*ndim); + H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, start.data(), stride.data(), count.data(), block.data()); + memspace = H5Screate_simple (1, memdims.data(), NULL); + safe_hdf5(H5Dread, dataset, H5T_NATIVE_DOUBLE, memspace, dataspace, H5P_DEFAULT, buffer); } static inline void HDF5ReadHyperSlabInteger(long long *buffer, - const hid_t &dataset, const hid_t &dataspace, - const hsize_t datarank, const hsize_t ndim, int nchunk, int noffset + const hid_t &dataset, const hid_t &dataspace, + const hsize_t datarank, const hsize_t ndim, int nchunk, int noffset ) { - //setup hyperslab so that it is loaded into the buffer - vector start, count, stride, block, memdims; - hid_t memspace; - start.push_back(noffset);start.push_back(0); - count.push_back(nchunk);count.push_back(ndim); - stride.push_back(1);stride.push_back(1); - block.push_back(1);block.push_back(1); - memdims.push_back(nchunk*ndim); - H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, start.data(), stride.data(), count.data(), block.data()); + //setup hyperslab so that it is loaded into the buffer + vector start, count, stride, block, memdims; + hid_t memspace; + start.push_back(noffset);start.push_back(0); + count.push_back(nchunk);count.push_back(ndim); + stride.push_back(1);stride.push_back(1); + block.push_back(1);block.push_back(1); + memdims.push_back(nchunk*ndim); + H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, start.data(), stride.data(), count.data(), block.data()); memspace = H5Screate_simple (1, memdims.data(), NULL); - safe_hdf5(H5Dread, dataset, H5T_NATIVE_LONG, memspace, dataspace, H5P_DEFAULT, buffer); + safe_hdf5(H5Dread, dataset, H5T_NATIVE_LONG, memspace, dataspace, H5P_DEFAULT, buffer); } ///\name HDF class to manage writing information class H5OutputFile { - protected: + protected: - hid_t file_id; + hid_t file_id; +#ifdef USEPARALLELHDF + hid_t parallel_access_id; +#endif - // Called if a HDF5 call fails (might need to MPI_Abort) - void io_error(std::string message) { - std::cerr << message << std::endl; + // Called if a HDF5 call fails (might need to MPI_Abort) + void io_error(std::string message) { + std::cerr << message << std::endl; #ifdef USEMPI - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(MPI_COMM_WORLD, 1); +#endif + abort(); + } + + public: + + // Constructor + H5OutputFile() { + file_id = -1; +#ifdef USEPARALLELHDF + parallel_access_id = -1; #endif - abort(); - } - - public: - - // Constructor - H5OutputFile() { - file_id = -1; - } - - // Create a new file - void create(std::string filename, unsigned int flag) - { - if(file_id >= 0)io_error("Attempted to create file when already open!"); - file_id = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); - if(file_id < 0)io_error(string("Failed to create output file: ")+filename); - } - - void append(std::string filename, unsigned int flag) - { - if(file_id >= 0)io_error("Attempted to create file when already open!"); - file_id = H5Fopen(filename.c_str(), H5F_ACC_RDWR, H5P_DEFAULT); - if(file_id < 0)io_error(string("Failed to create output file: ")+filename); - } - - // Close the file - void close() - { - if(file_id < 0)io_error("Attempted to close file which is not open!"); - H5Fclose(file_id); - file_id = -1; - } + } + + // Create a new file + void create(std::string filename, hid_t flag = H5F_ACC_TRUNC, + int taskID = -1, bool iparallelopen = true) + { + if(file_id >= 0)io_error("Attempted to create file when already open!"); +#ifdef USEPARALLELHDF + MPI_Comm comm = mpi_comm_write; + MPI_Info info = MPI_INFO_NULL; + if (iparallelopen && taskID ==-1) { + parallel_access_id = H5Pcreate (H5P_FILE_ACCESS); + if (parallel_access_id < 0) io_error("Parallel access creation failed"); + herr_t ret = H5Pset_fapl_mpio(parallel_access_id, comm, info); + if (ret < 0) io_error("Parallel access failed"); + // create the file collectively + file_id = H5Fcreate(filename.c_str(), flag, H5P_DEFAULT, parallel_access_id); + if (file_id < 0) io_error(string("Failed to create output file: ")+filename); + ret = H5Pclose(parallel_access_id); + if (ret < 0) io_error("Parallel release failed"); + parallel_access_id = -1; + } + else { + if (taskID <0 || taskID > NProcsWrite) io_error(string("MPI Task ID asked to create file out of range. Task ID is ")+to_string(taskID)); + if (ThisWriteTask == taskID) { + file_id = H5Fcreate(filename.c_str(), flag, H5P_DEFAULT, H5P_DEFAULT); + if (file_id < 0) io_error(string("Failed to create output file: ")+filename); + parallel_access_id = -1; + } + else { + parallel_access_id = -2; + } + MPI_Barrier(comm); + } +#else + file_id = H5Fcreate(filename.c_str(), flag, H5P_DEFAULT, H5P_DEFAULT); + if(file_id < 0)io_error(string("Failed to create output file: ")+filename); +#endif + + } + + void append(std::string filename, hid_t flag = H5F_ACC_RDWR, + int taskID = -1, bool iparallelopen = true) + { + if(file_id >= 0)io_error("Attempted to open and append to file when already open!"); +#ifdef USEPARALLELHDF + MPI_Comm comm = mpi_comm_write; + MPI_Info info = MPI_INFO_NULL; + if (iparallelopen && taskID ==-1) { + parallel_access_id = H5Pcreate (H5P_FILE_ACCESS); + if (parallel_access_id < 0) io_error("Parallel access creation failed"); + herr_t ret = H5Pset_fapl_mpio(parallel_access_id, comm, info); + if (ret < 0) io_error("Parallel access failed"); + // create the file collectively + file_id = H5Fopen(filename.c_str(), flag, parallel_access_id); + if (file_id < 0) io_error(string("Failed to create output file: ")+filename); + ret = H5Pclose(parallel_access_id); + if (ret < 0) io_error("Parallel release failed"); + parallel_access_id = -1; + } + else { + if (taskID <0 || taskID > NProcsWrite) io_error(string("MPI Task ID asked to create file out of range. Task ID is ")+to_string(taskID)); + if (ThisWriteTask == taskID) { + file_id = H5Fopen(filename.c_str(),flag, H5P_DEFAULT); + if (file_id < 0) io_error(string("Failed to create output file: ")+filename); + parallel_access_id = -1; + } + else { + parallel_access_id = -2; + } + MPI_Barrier(comm); + } +#else + file_id = H5Fopen(filename.c_str(), flag, H5P_DEFAULT); + if (file_id < 0) io_error(string("Failed to create output file: ")+filename); +#endif + } + + // Close the file + void close() + { +#ifdef USEPARALLELHDF + if(file_id < 0 && parallel_access_id == -1) io_error("Attempted to close file which is not open!"); + if (parallel_access_id == -1) H5Fclose(file_id); +#else + if(file_id < 0) io_error("Attempted to close file which is not open!"); + H5Fclose(file_id); +#endif + file_id = -1; +#ifdef USEPARALLELHDF + parallel_access_id = -1; +#endif + } + + hid_t create_group(string groupname) { + hid_t group_id = H5Gcreate(file_id, groupname.c_str(), + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + return group_id; + } + herr_t close_group(hid_t gid) { + herr_t status = H5Gclose(gid); + return status; + } // Destructor closes the file if it's open - ~H5OutputFile() - { - if(file_id >= 0) - close(); - } - - // Functions to return corresponding HDF5 type for C types - hid_t hdf5_type(float dummy) {return H5T_NATIVE_FLOAT;} - hid_t hdf5_type(double dummy) {return H5T_NATIVE_DOUBLE;} - hid_t hdf5_type(int dummy) {return H5T_NATIVE_INT;} - hid_t hdf5_type(long dummy) {return H5T_NATIVE_LONG;} - hid_t hdf5_type(long long dummy) {return H5T_NATIVE_LLONG;} - hid_t hdf5_type(unsigned int dummy) {return H5T_NATIVE_UINT;} - hid_t hdf5_type(unsigned long dummy) {return H5T_NATIVE_ULONG;} - hid_t hdf5_type(unsigned long long dummy) {return H5T_NATIVE_ULLONG;} - - - /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of - /// the input data if not explicitly specified with the filetype_id parameter. - template void write_dataset(std::string name, hsize_t len, T *data, - hid_t memtype_id = -1, hid_t filetype_id=-1) + ~H5OutputFile() + { + if(file_id >= 0) close(); + } + + /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of + /// the input data if not explicitly specified with the filetype_id parameter. + template void write_dataset(std::string name, hsize_t len, T *data, + hid_t memtype_id = -1, hid_t filetype_id=-1, bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) { - int rank = 1; + int rank = 1; hsize_t dims[1] = {len}; - if (memtype_id == -1) { - memtype_id = hdf5_type(T{}); - } - write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id); + if (memtype_id == -1) memtype_id = hdf5_type(T{}); + write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_hyperslab, flag_collective); } - void write_dataset(string name, hsize_t len, string data) + void write_dataset(string name, hsize_t len, string data, bool flag_parallel = true, bool flag_collective = true) { - int rank = 1; +#ifdef USEPARALLELHDF + MPI_Comm comm = mpi_comm_write; + MPI_Info info = MPI_INFO_NULL; +#endif + int rank = 1; hsize_t dims[1] = {len}; - hid_t memtype_id, filetype_id, dspace_id, dset_id; - herr_t status; - memtype_id = H5Tcopy (H5T_C_S1); - status = H5Tset_size (memtype_id, data.size()); - //status = H5Tset_size (memtype_id, H5T_VARIABLE); - filetype_id = H5Tcopy (H5T_C_S1); - //status = H5Tset_size (filetype_id, H5T_VARIABLE); - status = H5Tset_size (filetype_id, data.size()); - - // Create the dataspace - dspace_id = H5Screate_simple(rank, dims, NULL); - - // Create the dataset - dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - // Write the data - if(H5Dwrite(dset_id, memtype_id, dspace_id, H5S_ALL, H5P_DEFAULT, data.c_str()) < 0) - io_error(string("Failed to write dataset: ")+name); - - // Clean up (note that dtype_id is NOT a new object so don't need to close it) - H5Sclose(dspace_id); - H5Dclose(dset_id); + hid_t memtype_id, filetype_id, dspace_id, dset_id, xfer_plist; + herr_t status, ret; + memtype_id = H5Tcopy (H5T_C_S1); + status = H5Tset_size (memtype_id, data.size()); + filetype_id = H5Tcopy (H5T_C_S1); + status = H5Tset_size (filetype_id, data.size()); + + // Create the dataspace + dspace_id = H5Screate_simple(rank, dims, NULL); + + // Create the dataset + dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); +#ifdef USEPARALLELHDF + if (flag_parallel) { + // set up the collective transfer properties list + xfer_plist = H5Pcreate(H5P_DATASET_XFER); + if (xfer_plist < 0) io_error(string("Failed to set up parallel transfer: ")+name); + if (flag_collective) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); + else ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_INDEPENDENT); + if (ret < 0) io_error(string("Failed to set up parallel transfer: ")+name); + // the result of above should be that all processors write to the same + // point of the hdf file. + } +#endif + // Write the data + if(H5Dwrite(dset_id, memtype_id, dspace_id, H5S_ALL, H5P_DEFAULT, data.c_str()) < 0) + io_error(string("Failed to write dataset: ")+name); + + // Clean up (note that dtype_id is NOT a new object so don't need to close it) +#ifdef USEPARALLELHDF + if (flag_parallel) H5Pclose(xfer_plist); +#endif + H5Sclose(dspace_id); + H5Dclose(dset_id); } - void write_dataset(string name, hsize_t len, void *data, - hid_t memtype_id=-1, hid_t filetype_id=-1) + void write_dataset(string name, hsize_t len, void *data, + hid_t memtype_id=-1, hid_t filetype_id=-1, bool flag_parallel = true, bool flag_first_dim_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) { - int rank = 1; + int rank = 1; hsize_t dims[1] = {len}; - if (memtype_id == -1) { - throw std::runtime_error("Write data set called with void pointer but no type info passed."); + if (memtype_id == -1) { + throw std::runtime_error("Write data set called with void pointer but no type info passed."); } - write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id); + write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_first_dim_parallel, flag_hyperslab, flag_collective); } - /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of - /// the input data if not explicitly specified with the filetype_id parameter. - template void write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data, - hid_t memtype_id = -1, hid_t filetype_id=-1) + /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of + /// the input data if not explicitly specified with the filetype_id parameter. + template void write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data, + hid_t memtype_id = -1, hid_t filetype_id = -1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true) { - // Get HDF5 data type of the array in memory - if (memtype_id == -1) { - memtype_id = hdf5_type(T{}); - } - - // Determine type of the dataset to create - if(filetype_id < 0)filetype_id = memtype_id; - - // Create the dataspace - hid_t dspace_id = H5Screate_simple(rank, dims, NULL); - - // Only chunk non-zero size datasets - int nonzero_size = 1; - for(int i=0; i1 chunk - int large_dataset = 0; - for(int i=0; i HDFOUTPUTCHUNKSIZE)large_dataset = 1; - - // Dataset creation properties - hid_t prop_id = H5Pcreate(H5P_DATASET_CREATE); - if(nonzero_size && large_dataset) - { - hsize_t *chunks = new hsize_t[rank]; - for(auto i=0; i chunks(rank); + + // Get HDF5 data type of the array in memory + if (memtype_id == -1) memtype_id = hdf5_type(T{}); + + // Determine type of the dataset to create + if(filetype_id < 0) filetype_id = memtype_id; + +#ifdef USEPARALLELHDF + vector mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank); + if (flag_parallel) { + //if parallel hdf5 get the full extent of the data + //this bit of code communicating information can probably be done elsewhere + //minimize number of mpi communications + for (auto i=0;i 0) continue; + for (auto j=1;j<=ThisWriteTask;j++) { + dims_offset[i] += mpi_hdf_dims[i*NProcs+j-1]; + } + } + if (flag_first_dim_parallel && rank > 1) { + for (auto i=1; i1 chunk + int large_dataset = 0; + for(int i=0; i HDFOUTPUTCHUNKSIZE) large_dataset = 1; + } + else { + if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; + } +#else + if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; +#endif + } + if(nonzero_size && large_dataset) + { +#ifdef USEPARALLELHDF + if (flag_parallel) { + for(auto i=0; i 0) { + // Write the data + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(string("Failed to write dataset: ")+name); + } + } + else if (dims[0] > 0) + { + // Write the data + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(string("Failed to write dataset: ")+name); + } + +#else + // Write the data + if (dims[0] > 0) { + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(string("Failed to write dataset: ")+name); + } +#endif + + // Clean up (note that dtype_id is NOT a new object so don't need to close it) + H5Pclose(prop_id); +#ifdef USEPARALLELHDF + if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id); +#endif + H5Sclose(dspace_id); + H5Dclose(dset_id); } - void write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data, - hid_t memtype_id = -1, hid_t filetype_id=-1) + void write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data, + hid_t memtype_id = -1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true) { - // Get HDF5 data type of the array in memory - if (memtype_id == -1) { - throw std::runtime_error("Write data set called with void pointer but no type info passed."); - } - - // Determine type of the dataset to create - if(filetype_id < 0)filetype_id = memtype_id; - - // Create the dataspace - hid_t dspace_id = H5Screate_simple(rank, dims, NULL); - - // Only chunk non-zero size datasets - int nonzero_size = 1; - for(int i=0; i1 chunk - int large_dataset = 0; - for(int i=0; i HDFOUTPUTCHUNKSIZE)large_dataset = 1; - - // Dataset creation properties - hid_t prop_id = H5Pcreate(H5P_DATASET_CREATE); - if(nonzero_size && large_dataset) - { - hsize_t *chunks = new hsize_t[rank]; - for(auto i=0; i chunks(rank); + // Get HDF5 data type of the array in memory + if (memtype_id == -1) { + throw std::runtime_error("Write data set called with void pointer but no type info passed."); + } + // Determine type of the dataset to create + if(filetype_id < 0) filetype_id = memtype_id; + +#ifdef USEPARALLELHDF + vector mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank); + //if parallel hdf5 get the full extent of the data + //this bit of code communicating information can probably be done elsewhere + //minimize number of mpi communications + if (flag_parallel) { + //if parallel hdf5 get the full extent of the data + //this bit of code communicating information can probably be done elsewhere + //minimize number of mpi communications + for (auto i=0;i 0) continue; + for (auto j=1;j<=ThisWriteTask;j++) { + dims_offset[i] += mpi_hdf_dims[i*NProcs+j-1]; + } + } + if (flag_first_dim_parallel && rank > 1) { + for (auto i=1; i1 chunk + int large_dataset = 0; + for(int i=0; i HDFOUTPUTCHUNKSIZE) large_dataset = 1; + } + else { + if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; + } +#else + if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; +#endif + } + if(nonzero_size && large_dataset) + { +#ifdef USEPARALLELHDF + if (flag_parallel) { + for(auto i=0; i 0) { + // Write the data + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(string("Failed to write dataset: ")+name); + } + } + else if (dims[0] > 0) + { + // Write the data + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(string("Failed to write dataset: ")+name); + } + +#else + // Write the data + if (dims[0] > 0) { + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(string("Failed to write dataset: ")+name); + } +#endif + + // Clean up (note that dtype_id is NOT a new object so don't need to close it) + H5Pclose(prop_id); +#ifdef USEPARALLELHDF + if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id); +#endif + H5Sclose(dspace_id); + H5Dclose(dset_id); } - /// write an attribute + /// write an attribute template void write_attribute(std::string parent, std::string name, T data) { - // Get HDF5 data type of the value to write - hid_t dtype_id = hdf5_type(data); + // Get HDF5 data type of the value to write + hid_t dtype_id = hdf5_type(data); - // Open the parent object - hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT); - if(parent_id < 0)io_error(string("Unable to open object to write attribute: ")+name); + // Open the parent object + hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT); + if(parent_id < 0)io_error(string("Unable to open object to write attribute: ")+name); - // Create dataspace - hid_t dspace_id = H5Screate(H5S_SCALAR); + // Create dataspace + hid_t dspace_id = H5Screate(H5S_SCALAR); - // Create attribute - hid_t attr_id = H5Acreate(file_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT); - if(attr_id < 0)io_error(string("Unable to create attribute ")+name+string(" on object ")+parent); + // Create attribute + hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT); + if(attr_id < 0)io_error(string("Unable to create attribute ")+name+string(" on object ")+parent); - // Write the attribute - if(H5Awrite(attr_id, dtype_id, &data) < 0) - io_error(string("Unable to write attribute ")+name+string(" on object ")+parent); + // Write the attribute + if(H5Awrite(attr_id, dtype_id, &data) < 0) + io_error(string("Unable to write attribute ")+name+string(" on object ")+parent); - // Clean up - H5Aclose(attr_id); - H5Sclose(dspace_id); - H5Oclose(parent_id); + // Clean up + H5Aclose(attr_id); + H5Sclose(dspace_id); + H5Oclose(parent_id); } }; @@ -600,7 +964,7 @@ struct HDF_Group_Names { string Header_name; string GASpart_name; string DMpart_name; - string EXTRADMpart_name; + string EXTRADMpart_name; string EXTRApart_name; string TRACERpart_name; string STARpart_name; @@ -615,8 +979,8 @@ struct HDF_Group_Names { Header_name=string("Header"); GASpart_name=string("PartType0"); DMpart_name=string("PartType1"); - EXTRADMpart_name=string("PartType2"); - EXTRApart_name=string("PartType2"); + EXTRADMpart_name=string("PartType2"); + EXTRApart_name=string("PartType2"); TRACERpart_name=string("PartType3"); STARpart_name=string("PartType4"); BHpart_name=string("PartType5"); @@ -626,7 +990,7 @@ struct HDF_Group_Names { Header_name=string("Header"); GASpart_name=string("PartType0"); DMpart_name=string("PartType1"); - EXTRADMpart_name=string("PartType2"); + EXTRADMpart_name=string("PartType2"); EXTRApart_name=string("PartType2"); TRACERpart_name=string("PartType3"); STARpart_name=string("PartType4"); @@ -636,24 +1000,24 @@ struct HDF_Group_Names { part_names[0]=GASpart_name; part_names[1]=DMpart_name; - #ifdef HIGHRES +#ifdef HIGHRES part_names[2]=EXTRADMpart_name; - #else - part_names[2]=EXTRApart_name; - #endif - part_names[3]=TRACERpart_name; +#else + part_names[2]=EXTRApart_name; +#endif + part_names[3]=TRACERpart_name; part_names[4]=STARpart_name; part_names[5]=BHpart_name; names[0]=Header_name; names[1]=GASpart_name; names[2]=DMpart_name; - #ifdef HIGHRES - names[3]=EXTRADMpart_name; - #else +#ifdef HIGHRES + names[3]=EXTRADMpart_name; +#else names[3]=EXTRApart_name; - #endif - names[4]=TRACERpart_name; +#endif + names[4]=TRACERpart_name; names[5]=STARpart_name; names[6]=BHpart_name; } @@ -704,6 +1068,20 @@ struct HDF_Header { names[itemp++]=string("Cosmology/h"); names[itemp++]=string("Cosmology/Cosmological run"); break; + case HDFOLDSWIFTEAGLENAMES: + names[itemp++]=string("Header/BoxSize"); + names[itemp++]=string("Header/MassTable"); + names[itemp++]=string("Header/NumPart_ThisFile"); + names[itemp++]=string("Header/NumPart_Total"); + names[itemp++]=string("Header/NumPart_Total_HighWord"); + names[itemp++]=string("Cosmology/Omega_m"); + names[itemp++]=string("Cosmology/Omega_lambda"); + names[itemp++]=string("Header/Redshift"); + names[itemp++]=string("Header/Time"); + names[itemp++]=string("Header/NumFilesPerSnapshot"); + names[itemp++]=string("Cosmology/h"); + names[itemp++]=string("Cosmology/Cosmological run"); + break; default: names[itemp++]=string("Header/BoxSize"); @@ -736,16 +1114,35 @@ struct HDF_Part_Info { int itemp=0; //gas if (ptype==HDFGASTYPE) { + + // Positions names[itemp++]=string("Coordinates"); + + // Velocities if(hdfnametype==HDFEAGLENAMES) names[itemp++]=string("Velocity"); else names[itemp++]=string("Velocities"); + + // IDs names[itemp++]=string("ParticleIDs"); + + // Masses if(hdfnametype==HDFEAGLENAMES) names[itemp++]=string("Mass"); else names[itemp++]=string("Masses"); - names[itemp++]=string("Density"); - names[itemp++]=string("InternalEnergy"); - names[itemp++]=string("StarFormationRate"); - //always place the metacallity at position 7 in naming array + + // Density + if(hdfnametype==HDFSWIFTEAGLENAMES) names[itemp++]=string("Densities"); + else names[itemp++]=string("Density"); + + // Internal energies + if(hdfnametype==HDFSWIFTEAGLENAMES) names[itemp++]=string("InternalEnergies"); + else names[itemp++]=string("InternalEnergy"); + + // SFR + if(hdfnametype==HDFSWIFTEAGLENAMES) names[itemp++]=string("StarFormationRates"); + else if(hdfnametype==HDFOLDSWIFTEAGLENAMES) names[itemp++]=string("SFR"); + else names[itemp++]=string("StarFormationRate"); + + //Metallicity. Note always place at position 7 in naming array if (hdfnametype==HDFILLUSTISNAMES) { propindex[HDFGASIMETAL]=itemp; names[itemp++]=string("GFM_Metallicity"); @@ -802,38 +1199,72 @@ struct HDF_Part_Info { names[itemp++]=string("Dust_Masses"); names[itemp++]=string("Dust_Metallicity");//11 metals stored in this data set } - else if (hdfnametype==HDFEAGLENAMES || hdfnametype==HDFSWIFTEAGLENAMES) { + else if (hdfnametype==HDFEAGLENAMES) { propindex[HDFGASIMETAL]=itemp; names[itemp++]=string("Metallicity"); } + else if(hdfnametype==HDFSWIFTEAGLENAMES) { + propindex[HDFGASIMETAL]=itemp; + names[itemp++]=string("MetalMassFractions"); + } + else if(hdfnametype==HDFOLDSWIFTEAGLENAMES) { + propindex[HDFGASIMETAL]=itemp; + names[itemp++]=string("Metallicity"); + } + } //dark matter if (ptype==HDFDMTYPE) { + + // Positions names[itemp++]=string("Coordinates"); + + // Velocities if(hdfnametype==HDFEAGLENAMES) names[itemp++]=string("Velocity"); else names[itemp++]=string("Velocities"); + + // IDs names[itemp++]=string("ParticleIDs"); - if (hdfnametype==HDFSWIFTEAGLENAMES) { + + // Masses + if (hdfnametype==HDFSWIFTEAGLENAMES || hdfnametype==HDFSIMBANAMES || + hdfnametype==HDFMUFASANAMES || hdfnametype==HDFOLDSWIFTEAGLENAMES) { names[itemp++]=string("Masses"); } + + // Potential if (hdfnametype==HDFSIMBANAMES||hdfnametype==HDFMUFASANAMES) { - names[itemp++]=string("Masses"); names[itemp++]=string("Potential"); } - if (hdfnametype==HDFILLUSTISNAMES) { + else if (hdfnametype==HDFILLUSTISNAMES) { names[itemp++]=string("Potential"); + } + + // Subfind properties + if (hdfnametype==HDFILLUSTISNAMES) { names[itemp++]=string("SubfindDensity"); names[itemp++]=string("SubfindHsml"); names[itemp++]=string("SubfindVelDisp"); } } + //also dark matter particles if (ptype==HDFDM1TYPE ||ptype==HDFDM2TYPE) { + + // Positions names[itemp++]=string("Coordinates"); + + // Velocities if(hdfnametype==HDFEAGLENAMES) names[itemp++]=string("Velocity"); else names[itemp++]=string("Velocities"); + + // IDs names[itemp++]=string("ParticleIDs"); + + // Masses names[itemp++]=string("Masses"); + + // Potential if (hdfnametype==HDFSIMBANAMES||hdfnametype==HDFMUFASANAMES) { names[itemp++]=string("Potential"); } @@ -844,12 +1275,21 @@ struct HDF_Part_Info { names[itemp++]=string("TracerID"); } if (ptype==HDFSTARTYPE) { + + // Positions names[itemp++]=string("Coordinates"); + + // Velocities if(hdfnametype==HDFEAGLENAMES) names[itemp++]=string("Velocity"); else names[itemp++]=string("Velocities"); + + // IDs names[itemp++]=string("ParticleIDs"); + + // Masses if(hdfnametype==HDFEAGLENAMES) names[itemp++]=string("Mass"); else names[itemp++]=string("Masses"); + //for stars assume star formation and metallicy are position 4, 5 in name array if (hdfnametype==HDFILLUSTISNAMES) { propindex[HDFSTARIAGE]=itemp; @@ -890,14 +1330,30 @@ struct HDF_Part_Info { propindex[HDFSTARIMETAL]=itemp; names[itemp++]=string("Metallicity"); } + else if (hdfnametype==HDFSWIFTEAGLENAMES) { + propindex[HDFSTARIAGE]=itemp; + names[itemp++]=string("BirthScaleFactors"); + propindex[HDFSTARIMETAL]=itemp; + names[itemp++]=string("MetalMassFractions"); + } } if (ptype==HDFBHTYPE) { + + // Positions names[itemp++]=string("Coordinates"); + + // Velocities if(hdfnametype==HDFEAGLENAMES) names[itemp++]=string("Velocity"); else names[itemp++]=string("Velocities"); + + // IDs names[itemp++]=string("ParticleIDs"); + + // Masses if(hdfnametype==HDFEAGLENAMES) names[itemp++]=string("Mass"); + if(hdfnametype==HDFSWIFTEAGLENAMES) names[itemp++]=string("DynamicalMasses"); else names[itemp++]=string("Masses"); + if (hdfnametype==HDFILLUSTISNAMES) { names[itemp++]=string("HostHaloMass"); names[itemp++]=string("Potential"); @@ -934,6 +1390,28 @@ struct HDF_Part_Info { //names[itemp++]=string("StellarFormationTime"); //names[itemp++]=string("Metallicity"); } + else if (hdfnametype==HDFSWIFTEAGLENAMES) { + propindex[HDFBHIAGE]=itemp; + names[itemp++]=string("FormationScaleFactors"); + propindex[HDFBHIMETAL]=itemp; + names[itemp++]=string("MetalMasses"); + propindex[HDFBHIMDOT]=itemp; + names[itemp++]=string("AccretionRates"); + + names[itemp++]=string("SubgridMasses"); + names[itemp++]=string("ElementMasses"); + names[itemp++]=string("MetalMassFromSNIa"); + names[itemp++]=string("MetalMassFromSNII"); + names[itemp++]=string("MetalMassFromAGB"); + names[itemp++]=string("MassesFromSNIa"); + names[itemp++]=string("MassesFromSNII"); + names[itemp++]=string("MassesFromAGB"); + names[itemp++]=string("IronMassFromSNIa"); + names[itemp++]=string("GasDensities"); + names[itemp++]=string("GasSoundSpeeds"); + names[itemp++]=string("EnergyReservoirs"); + names[itemp++]=string("TotalAccretedMasses"); + } } nentries=itemp; } @@ -944,41 +1422,41 @@ struct HDF_Part_Info { //@{ inline void HDFSetUsedParticleTypes(Options &opt, int &nusetypes, int &nbusetypes, int usetypes[]) { - nusetypes=0; - if (opt.partsearchtype==PSTALL) { - nusetypes=0; + nusetypes=0; + if (opt.partsearchtype==PSTALL) { + nusetypes=0; if (opt.iusegasparticles) usetypes[nusetypes++]=HDFGASTYPE; - if (opt.iusedmparticles) usetypes[nusetypes++]=HDFDMTYPE; + if (opt.iusedmparticles) usetypes[nusetypes++]=HDFDMTYPE; if (opt.iuseextradarkparticles) { - usetypes[nusetypes++]=HDFDM1TYPE; - if (opt.ihdfnameconvention!=HDFSWIFTEAGLENAMES) - { - usetypes[nusetypes++]=HDFDM2TYPE; - } - } + usetypes[nusetypes++]=HDFDM1TYPE; + if (opt.ihdfnameconvention!=HDFSWIFTEAGLENAMES) + { + usetypes[nusetypes++]=HDFDM2TYPE; + } + } if (opt.iusestarparticles) usetypes[nusetypes++]=HDFSTARTYPE; if (opt.iusesinkparticles) usetypes[nusetypes++]=HDFBHTYPE; if (opt.iusewindparticles) usetypes[nusetypes++]=HDFWINDTYPE; if (opt.iusetracerparticles) usetypes[nusetypes++]=HDFTRACERTYPE; - } - else if (opt.partsearchtype==PSTDARK) { - nusetypes=1;usetypes[0]=HDFDMTYPE; - if (opt.iuseextradarkparticles) { - usetypes[nusetypes++]=HDFDM1TYPE; - if (opt.ihdfnameconvention!=HDFSWIFTEAGLENAMES) - { - usetypes[nusetypes++]=HDFDM2TYPE; - } - } - if (opt.iBaryonSearch) { - nbusetypes=1;usetypes[nusetypes+nbusetypes++]=HDFGASTYPE; - if (opt.iusestarparticles) usetypes[nusetypes+nbusetypes++]=HDFSTARTYPE; - if (opt.iusesinkparticles) usetypes[nusetypes+nbusetypes++]=HDFBHTYPE; - } - } - else if (opt.partsearchtype==PSTGAS) {nusetypes=1;usetypes[0]=HDFGASTYPE;} - else if (opt.partsearchtype==PSTSTAR) {nusetypes=1;usetypes[0]=HDFSTARTYPE;} - else if (opt.partsearchtype==PSTBH) {nusetypes=1;usetypes[0]=HDFBHTYPE;} + } + else if (opt.partsearchtype==PSTDARK) { + nusetypes=1;usetypes[0]=HDFDMTYPE; + if (opt.iuseextradarkparticles) { + usetypes[nusetypes++]=HDFDM1TYPE; + if (opt.ihdfnameconvention!=HDFSWIFTEAGLENAMES) + { + usetypes[nusetypes++]=HDFDM2TYPE; + } + } + if (opt.iBaryonSearch) { + nbusetypes=1;usetypes[nusetypes+nbusetypes++]=HDFGASTYPE; + if (opt.iusestarparticles) usetypes[nusetypes+nbusetypes++]=HDFSTARTYPE; + if (opt.iusesinkparticles) usetypes[nusetypes+nbusetypes++]=HDFBHTYPE; + } + } + else if (opt.partsearchtype==PSTGAS) {nusetypes=1;usetypes[0]=HDFGASTYPE;} + else if (opt.partsearchtype==PSTSTAR) {nusetypes=1;usetypes[0]=HDFSTARTYPE;} + else if (opt.partsearchtype==PSTBH) {nusetypes=1;usetypes[0]=HDFBHTYPE;} } //@} @@ -997,11 +1475,11 @@ inline Int_t HDF_get_nbodies(char *fname, int ptype, Options &opt) } //H5File Fhdf; - hid_t Fhdf; + hid_t Fhdf; HDF_Group_Names hdf_gnames; //to store the groups, data sets and their associated data spaces //Attribute headerattribs; - hid_t headerattribs; + hid_t headerattribs; HDF_Header hdf_header_info = HDF_Header(opt.ihdfnameconvention); //buffers to load data string stringbuff, dataname; @@ -1009,17 +1487,17 @@ inline Int_t HDF_get_nbodies(char *fname, int ptype, Options &opt) int intbuff[NHDFTYPE]; long long longbuff[NHDFTYPE]; unsigned int uintbuff[NHDFTYPE]; - vector vuintbuff; + vector vuintbuff; int j,k,ireaderror=0; Int_t nbodies=0; //DataSpace headerdataspace; - hid_t headerdataspace; + hid_t headerdataspace; //to determine types //IntType inttype; //StrType stringtype; int nusetypes,usetypes[NHDFTYPE],nbusetypes; - HDFSetUsedParticleTypes(opt,nusetypes,nbusetypes,usetypes); + HDFSetUsedParticleTypes(opt,nusetypes,nbusetypes,usetypes); //Try block to detect exceptions raised by any of the calls inside it //try @@ -1030,90 +1508,86 @@ inline Int_t HDF_get_nbodies(char *fname, int ptype, Options &opt) //Open the specified file and the specified dataset in the file. //Fhdf.openFile(buf, H5F_ACC_RDONLY); - Fhdf = H5Fopen(buf, H5F_ACC_RDONLY, H5P_DEFAULT); + Fhdf = H5Fopen(buf, H5F_ACC_RDONLY, H5P_DEFAULT); cout<<"Loading HDF header info in header group: "<(Fhdf, dataname); + // Check if it is a SWIFT snapshot. + //headerattribs=get_attribute(Fhdf, "Header/Code"); + //stringtype = headerattribs.getStrType(); + //headerattribs.read(stringtype, stringbuff); + dataname = string("Header/Code"); + stringbuff = read_attribute(Fhdf, dataname); - // Read SWIFT parameters - if(!swift_str.compare(stringbuff)) { - hdf_header_info.iscosmological=read_attribute(Fhdf, hdf_header_info.names[hdf_header_info.IIsCosmological]); - - if (!hdf_header_info.iscosmological && opt.icosmologicalin) { - cout<<"Error: cosmology is turned on in the config file but the snaphot provided is a non-cosmological run."<(Fhdf, hdf_header_info.names[hdf_header_info.IIsCosmological]); + if (!hdf_header_info.iscosmological && opt.icosmologicalin) { + cout<<"Error: cosmology is turned on in the config file but the snaphot provided is a non-cosmological run."<(Fhdf, hdf_header_info.names[hdf_header_info.INumTot]); - for (j=0;j(Fhdf, hdf_header_info.names[hdf_header_info.INumTotHW]); - for (j=0;j(Fhdf, hdf_header_info.names[hdf_header_info.INumTot]); + for (j=0;j(Fhdf, hdf_header_info.names[hdf_header_info.INumTotHW]); + for (j=0;j(Fhdf, hdf_header_info.names[hdf_header_info.INumFiles]); + hdf_header_info.num_files = read_attribute(Fhdf, hdf_header_info.names[hdf_header_info.INumFiles]); } - /* + /* catch(GroupIException &error) { HDF5PrintError(error); @@ -1273,12 +1748,20 @@ inline Int_t HDF_get_nfiles(char *fname, int ptype) ireaderror=1; } Fhdf.close(); - */ - HDF5CloseFile(Fhdf); + */ + HDF5CloseFile(Fhdf); return nfiles = hdf_header_info.num_files; } //@} +/// \name Wrappers to write attributes to HDF file +//@{ +void WriteVELOCIraptorConfigToHDF(Options &opt, H5OutputFile &Fhdf); +///Write the simulation info (which could use input files to overwrite passed configuration options) +void WriteSimulationInfoToHDF(Options &opt, H5OutputFile &Fhdf); +///Write the unit info +void WriteUnitInfoToHDF(Options &opt, H5OutputFile &Fhdf); +//@} #endif diff --git a/src/io.cxx b/src/io.cxx index cff97d45..0af76434 100644 --- a/src/io.cxx +++ b/src/io.cxx @@ -103,6 +103,10 @@ void ReadData(Options &opt, vector &Part, const Int_t nbodies, Particl //Adjust particle data to appropriate units void AdjustHydroQuantities(Options &opt, vector &Part, const Int_t nbodies) { #ifdef GASON + for (auto &p:Part) { + if (p.GetType()!=GASTYPE) continue; + p.SetU(p.GetU()*opt.internalenergyinputconversion); + } #ifdef STARON if (opt.metallicityinputconversion!=1.0) { for (auto &p:Part) { @@ -376,19 +380,23 @@ void WritePGList(Options &opt, const Int_t ngroups, const Int_t ng, Int_t *numin void WriteGroupCatalog(Options &opt, const Int_t ngroups, Int_t *numingroup, Int_t **pglist, vector &Part, Int_t nadditional){ fstream Fout,Fout2,Fout3; - char fname[500]; - char fname2[500]; - char fname3[500]; - unsigned long noffset=0,ngtot=0,nids=0,nidstot,nuids=0,nuidstot,ng=0; - Int_t *offset; + string fname, fname2, fname3; + ostringstream os; + unsigned long long noffset=0,ngtot=0,nids=0,nidstot=0,nuids=0,nuidstot=0, ng=0, nwritecommtot=0, nuwritecommtot=0; + vector groupdata; + vector offset; + vector partdata; +#ifdef USEMPI + MPIBuildWriteComm(opt); +#endif #ifdef USEHDF - // H5File Fhdf,Fhdf3; - // H5std_string datasetname; - // DataSpace dataspace; - // DataSet dataset; - // DSetCreatPropList hdfdatasetproplist; H5OutputFile Fhdf, Fhdf3; int itemp=0; + int ival; +#if defined(USEMPI) && defined(USEPARALLELHDF) + vector mpi_ngoffset(NProcs); + Int_t ngoffset; +#endif #endif #ifdef USEADIOS int adios_err; @@ -406,23 +414,36 @@ void WriteGroupCatalog(Options &opt, const Int_t ngroups, Int_t *numingroup, Int int ThisTask=0,NProcs=1; #endif + os << opt.outname << ".catalog_groups"; #ifdef USEMPI - sprintf(fname,"%s.catalog_groups.%d",opt.outname,ThisTask); + if (opt.ibinaryout==OUTHDF) { +#ifdef USEPARALLELHDF + os<<"."< 1) { + for (int j=0;j1) for (Int_t i=2;i<=ngroups;i++) offset[i]=offset[i-1]+pglist[i-1][numingroup[i-1]]; + if (opt.ibinaryout==OUTBINARY) Fout.write((char*)&(offset.data())[1],sizeof(Int_t)*ngroups); #ifdef USEHDF else if (opt.ibinaryout==OUTHDF) { - unsigned int *data=new unsigned int[ng]; - for (Int_t i=1;i<=ng;i++) data[i-1]=offset[i]; - Fhdf.write_dataset(datagroupnames.group[itemp], ng, data); + groupdata.resize(ng+1,0); + if (ng > 1) for (Int_t i=1;i<=ng;i++) groupdata[i-1]=offset[i]; +#ifdef USEPARALLELHDF + nids = 0; for (Int_t i=1; i<=ng; i++) nids+=pglist[i][numingroup[i]]; + MPI_Allgather(&nids, 1, MPI_Int_t, mpi_ngoffset.data(), 1, MPI_Int_t, mpi_comm_write); + if (ThisWriteTask > 0) + { + ngoffset = 0; for (auto itask = 0; itask < ThisWriteTask; itask++) ngoffset += mpi_ngoffset[itask]; + if (ng > 1) for (Int_t i=1; i<=ng; i++) groupdata[i-1] += ngoffset; + } +#endif + Fhdf.write_dataset(datagroupnames.group[itemp], ng, groupdata.data()); itemp++; - delete[] data; } #endif #ifdef USEADIOS else if (opt.ibinaryout==OUTADIOS) { //don't delcare new group, just add data adios_err=adios_define_var(adios_grp_handle,datagroupnames.group[itemp].c_str(),"",datagroupnames.adiosgroupdatatype[itemp],"ng","ngtot","ngmpioffset"); - unsigned long *data=new unsigned long[ng]; - for (Int_t i=1;i<=ng;i++) data[i-1]=offset[i]; - adios_err=adios_write(adios_file_handle,datagroupnames.group[itemp].c_str(),data); - delete[] data; + for (Int_t i=1;i<=ng;i++) groupdata[i-1]=offset[i]; + adios_err=adios_write(adios_file_handle,datagroupnames.group[itemp].c_str(),groupdata.data()); itemp++; } #endif else for (Int_t i=1;i<=ngroups;i++) Fout<1) for (Int_t i=2;i<=ngroups;i++) offset[i]=offset[i-1]+numingroup[i-1]-pglist[i-1][numingroup[i-1]]; + if (opt.ibinaryout==OUTBINARY) Fout.write((char*)&(offset.data())[1],sizeof(Int_t)*ngroups); #ifdef USEHDF else if (opt.ibinaryout==OUTHDF) { - unsigned int *data=new unsigned int[ng]; - for (Int_t i=1;i<=ng;i++) data[i-1]=numingroup[i]; - Fhdf.write_dataset(datagroupnames.group[itemp], ng, data); + groupdata.resize(ng+1,0); + if (ng > 1) for (Int_t i=1;i<=ng;i++) groupdata[i-1]=offset[i]; +#ifdef USEPARALLELHDF + nuids = 0; for (Int_t i=1; i<=ng; i++) nuids+=numingroup[i]-pglist[i][numingroup[i]]; + MPI_Allgather(&nuids, 1, MPI_Int_t, mpi_ngoffset.data(), 1, MPI_Int_t, mpi_comm_write); + if (ThisWriteTask > 0) + { + ngoffset = 0; for (auto itask = 0; itask < ThisWriteTask; itask++) ngoffset += mpi_ngoffset[itask]; + for (Int_t i=1; i<=ng; i++) groupdata[i-1] += ngoffset; + } +#endif + Fhdf.write_dataset(datagroupnames.group[itemp], ng, groupdata.data()); itemp++; - delete[] data; } #endif #ifdef USEADIOS else if (opt.ibinaryout==OUTADIOS) { //don't delcare new group, just add data adios_err=adios_define_var(adios_grp_handle,datagroupnames.group[itemp].c_str(),"",datagroupnames.adiosgroupdatatype[itemp],"ng","ngtot","ngmpioffset"); - unsigned long *data=new unsigned long[ng]; - for (Int_t i=1;i<=ng;i++) data[i-1]=offset[i]; - adios_err=adios_write(adios_file_handle,datagroupnames.group[itemp].c_str(),data); - delete[] data; + groupdata.resize(ng+1,0); + for (Int_t i=1;i<=ng;i++) groupdata[i-1]=offset[i]; + adios_err=adios_write(adios_file_handle,datagroupnames.group[itemp].c_str(),groupdata.data()); itemp++; } #endif else for (Int_t i=1;i<=ngroups;i++) Fout< 1) { + MPI_Allreduce(&nids, &nidstot, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&nuids, &nuidstot, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); + } + else { + nidstot=nids; + nuidstot=nuids; + } #else nidstot=nids; nuidstot=nuids; @@ -645,6 +731,34 @@ void WriteGroupCatalog(Options &opt, const Int_t ngroups, Int_t *numingroup, Int } #ifdef USEHDF else if (opt.ibinaryout==OUTHDF) { +#ifdef USEPARALLELHDF + MPI_Allreduce(&nids, &nwritecommtot, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, mpi_comm_write); + MPI_Allreduce(&nuids, &nuwritecommtot, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, mpi_comm_write); + if (ThisWriteTask == 0) { + itemp=0; + Fhdf.write_dataset(datagroupnames.part[itemp], 1, &ThisWriteComm, -1, -1, false); + Fhdf3.write_dataset(datagroupnames.part[itemp], 1, &ThisWriteComm, -1, -1, false); + itemp++; + Fhdf.write_dataset(datagroupnames.part[itemp], 1, &NWriteComms, -1, -1, false); + Fhdf3.write_dataset(datagroupnames.part[itemp], 1, &NWriteComms, -1, -1, false); + itemp++; + Fhdf.write_dataset(datagroupnames.part[itemp], 1, &nwritecommtot, -1, -1, false); + Fhdf3.write_dataset(datagroupnames.part[itemp], 1, &nuwritecommtot, -1, -1, false); + itemp++; + Fhdf.write_dataset(datagroupnames.part[itemp], 1, &nidstot, -1, -1, false); + Fhdf3.write_dataset(datagroupnames.part[itemp], 1, &nuidstot, -1, -1, false); + itemp++; + } + else { + itemp=4; + } + Fhdf.close(); + Fhdf3.close(); + MPI_Barrier(MPI_COMM_WORLD); + //reopen for parallel write + Fhdf.append(string(fname)); + Fhdf3.append(string(fname3)); +#else itemp=0; Fhdf.write_dataset(datagroupnames.part[itemp], 1, &ThisTask); Fhdf3.write_dataset(datagroupnames.part[itemp], 1, &ThisTask); @@ -658,6 +772,8 @@ void WriteGroupCatalog(Options &opt, const Int_t ngroups, Int_t *numingroup, Int Fhdf.write_dataset(datagroupnames.part[itemp], 1, &nidstot); Fhdf3.write_dataset(datagroupnames.part[itemp], 1, &nuidstot); itemp++; +#endif + } #endif #ifdef USEADIOS @@ -699,10 +815,9 @@ void WriteGroupCatalog(Options &opt, const Int_t ngroups, Int_t *numingroup, Int if (opt.ibinaryout==OUTBINARY) Fout.write((char*)idval,sizeof(Int_t)*nids); #ifdef USEHDF else if (opt.ibinaryout==OUTHDF) { - long long *data=new long long[nids]; - for (Int_t i=0;i 0) { - long long *data=new long long[nids]; - for (Int_t i=0;i 0) { - long long *data=new long long[nuids]; - for (Int_t i=0;i &Part){ fstream Fout,Fout2; - char fname[2000]; - char fname2[2000]; - Int_t noffset=0,ngtot=0,nids=0,nidstot,nuids=0,nuidstot=0; + string fname, fname2; + ostringstream os, os2; + unsigned long long noffset=0,ngtot=0,nids=0,nidstot,nuids=0,nuidstot=0, nwritecommtot=0, nuwritecommtot=0; Int_t *offset; int *typeval; +#ifdef USEMPI + MPIBuildWriteComm(opt); +#endif #ifdef USEHDF - // H5File Fhdf,Fhdf2; - // H5std_string datasetname; - // DataSpace dataspace; - // DataSet dataset; - // DSetCreatPropList hdfdatasetproplist; - // hsize_t *dims,*chunk_dims; - // hsize_t rank; H5OutputFile Fhdf,Fhdf2; int itemp; + int ival; #endif #if defined(USEHDF)||defined(USEADIOS) DataGroupNames datagroupnames; #endif - #ifndef USEMPI int ThisTask=0,NProcs=1; #endif + os << opt.outname << ".catalog_parttypes"; + os2 << opt.outname << ".catalog_parttypes.unbound"; #ifdef USEMPI - sprintf(fname,"%s.catalog_parttypes.%d",opt.outname,ThisTask); - sprintf(fname2,"%s.catalog_parttypes.unbound.%d",opt.outname,ThisTask); + if (opt.ibinaryout==OUTHDF) { +#ifdef USEPARALLELHDF + os<<"."< 1) { + MPI_Allreduce(&nids, &nidstot, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&nuids, &nuidstot, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); + } + else { + nidstot=nids; + nuidstot=nuids; + } #else nidstot=nids; nuidstot=nuids; @@ -878,6 +1002,34 @@ void WriteGroupPartType(Options &opt, const Int_t ngroups, Int_t *numingroup, In } #ifdef USEHDF else if (opt.ibinaryout==OUTHDF) { +#ifdef USEPARALLELHDF + MPI_Allreduce(&nids, &nwritecommtot, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, mpi_comm_write); + MPI_Allreduce(&nuids, &nuwritecommtot, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, mpi_comm_write); + if (ThisWriteTask == 0) { + itemp=0; + Fhdf.write_dataset(datagroupnames.types[itemp], 1, &ThisWriteComm, -1, -1, false); + Fhdf2.write_dataset(datagroupnames.types[itemp], 1, &ThisWriteComm, -1, -1, false); + itemp++; + Fhdf.write_dataset(datagroupnames.types[itemp], 1, &NWriteComms, -1, -1, false); + Fhdf2.write_dataset(datagroupnames.types[itemp], 1, &NWriteComms, -1, -1, false); + itemp++; + Fhdf.write_dataset(datagroupnames.types[itemp], 1, &nwritecommtot, -1, -1, false); + Fhdf2.write_dataset(datagroupnames.types[itemp], 1, &nuwritecommtot, -1, -1, false); + itemp++; + Fhdf.write_dataset(datagroupnames.types[itemp], 1, &nidstot, -1, -1, false); + Fhdf2.write_dataset(datagroupnames.types[itemp], 1, &nuidstot, -1, -1, false); + itemp++; + } + else { + itemp=4; + } + Fhdf.close(); + Fhdf2.close(); + MPI_Barrier(MPI_COMM_WORLD); + //reopen for parallel write + Fhdf.append(string(fname)); + Fhdf2.append(string(fname2)); +#else itemp=0; Fhdf.write_dataset(datagroupnames.types[itemp], 1, &ThisTask); Fhdf2.write_dataset(datagroupnames.types[itemp], 1, &ThisTask); @@ -891,6 +1043,7 @@ void WriteGroupPartType(Options &opt, const Int_t ngroups, Int_t *numingroup, In Fhdf.write_dataset(datagroupnames.types[itemp], 1, &nidstot); Fhdf2.write_dataset(datagroupnames.types[itemp], 1, &nuidstot); itemp++; +#endif } #endif else { @@ -944,7 +1097,7 @@ void WriteGroupPartType(Options &opt, const Int_t ngroups, Int_t *numingroup, In #endif #ifdef USEMPI - MPI_Barrier(MPI_COMM_WORLD); + MPIBuildWriteComm(opt); #endif } @@ -954,23 +1107,25 @@ void WriteGroupPartType(Options &opt, const Int_t ngroups, Int_t *numingroup, In ///to store all ids and then copying info from the array of vectors into it. void WriteSOCatalog(Options &opt, const Int_t ngroups, vector *SOpids, vector *SOtypes){ fstream Fout; - char fname[500]; - unsigned long ng,noffset=0,ngtot=0,nSOids=0,nSOidstot=0; + string fname; + ostringstream os; + unsigned long ng,noffset=0,ngtot=0,nSOids=0,nSOidstot=0, nwritecommtot=0, nSOwritecommtot=0; unsigned long *offset; long long *idval; int *typeval; Int_t *numingroup; +#ifdef USEMPI + MPIBuildWriteComm(opt); +#endif #ifdef USEHDF - // H5File Fhdf; - // H5std_string datasetname; - // DataSpace dataspace; - // DataSet dataset; - // DSetCreatPropList hdfdatasetproplist; - // hsize_t *dims,*chunk_dims; - // hsize_t rank; H5OutputFile Fhdf; int itemp=0; + int ival; +#if defined(USEMPI) && defined(USEPARALLELHDF) + vector mpi_offset(NProcs); + Int_t nSOidoffset; +#endif #endif #ifdef USEADIOS int adios_err; @@ -990,30 +1145,52 @@ void WriteSOCatalog(Options &opt, const Int_t ngroups, vector *SOpids, ve ng=ngroups; #ifdef USEMPI - MPI_Allreduce(&ng, &ngtot, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); + if (NProcs >1) { + MPI_Allreduce(&ng, &ngtot, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); + } + else { + ngtot = ng; + } #else ngtot=ng; #endif for (Int_t i=1;i<=ngroups;i++) nSOids+=SOpids[i].size(); #ifdef USEMPI - MPI_Allreduce(&nSOids, &nSOidstot, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); + if (NProcs > 1) { + MPI_Allreduce(&nSOids, &nSOidstot, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); + } + else { + nSOidstot = nSOids; + } #else - nSOidstot=nSOids; + nSOidstot = nSOids; #endif + os << opt.outname <<".catalog_SOlist"; #ifdef USEMPI - sprintf(fname,"%s.catalog_SOlist.%d",opt.outname,ThisTask); + if (opt.ibinaryout==OUTHDF) { +#ifdef USEPARALLELHDF + os<<"."< *SOpids, ve } #ifdef USEHDF else if (opt.ibinaryout==OUTHDF) { +#ifdef USEPARALLELHDF + MPI_Allreduce(&ng, &nwritecommtot, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, mpi_comm_write); + MPI_Allreduce(&nSOids, &nSOwritecommtot, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, mpi_comm_write); + if (ThisWriteTask == 0) { + itemp=0; + Fhdf.write_dataset(datagroupnames.SO[itemp++], 1, &ThisWriteComm, -1, -1, false); + Fhdf.write_dataset(datagroupnames.SO[itemp++], 1, &NWriteComms, -1, -1, false); + Fhdf.write_dataset(datagroupnames.SO[itemp++], 1, &nwritecommtot, -1, -1, false); + Fhdf.write_dataset(datagroupnames.SO[itemp++], 1, &ngtot, -1, -1, false); + Fhdf.write_dataset(datagroupnames.SO[itemp++], 1, &nSOwritecommtot, -1, -1, false); + Fhdf.write_dataset(datagroupnames.SO[itemp++], 1, &nSOidstot, -1, -1, false); + } + else { + itemp=6; + } + Fhdf.close(); + MPI_Barrier(MPI_COMM_WORLD); + //reopen for parallel write + Fhdf.append(string(fname)); +#else itemp=0; - Fhdf.write_dataset(datagroupnames.SO[itemp], 1, &ThisTask); - itemp++; - Fhdf.write_dataset(datagroupnames.SO[itemp], 1, &NProcs); - itemp++; - Fhdf.write_dataset(datagroupnames.SO[itemp], 1, &ng); - itemp++; - Fhdf.write_dataset(datagroupnames.SO[itemp], 1, &ngtot); - itemp++; - Fhdf.write_dataset(datagroupnames.SO[itemp], 1, &nSOids); - itemp++; - Fhdf.write_dataset(datagroupnames.SO[itemp], 1, &nSOidstot); - itemp++; + Fhdf.write_dataset(datagroupnames.SO[itemp++], 1, &ThisTask); + Fhdf.write_dataset(datagroupnames.SO[itemp++], 1, &NProcs); + Fhdf.write_dataset(datagroupnames.SO[itemp++], 1, &ng); + Fhdf.write_dataset(datagroupnames.SO[itemp++], 1, &ngtot); + Fhdf.write_dataset(datagroupnames.SO[itemp++], 1, &nSOids); + Fhdf.write_dataset(datagroupnames.SO[itemp++], 1, &nSOidstot); +#endif } #endif #ifdef USEADIOS @@ -1141,6 +1333,14 @@ void WriteSOCatalog(Options &opt, const Int_t ngroups, vector *SOpids, ve else if (opt.ibinaryout==OUTHDF) { unsigned long *data=new unsigned long[ng]; for (Int_t i=1;i<=ng;i++) data[i-1]=offset[i]; +#ifdef USEPARALLELHDF + MPI_Allgather(&nSOids, 1, MPI_Int_t, mpi_offset.data(), 1, MPI_Int_t, mpi_comm_write); + if (ThisWriteTask > 0) + { + nSOidoffset = 0; for (auto itask = 0; itask < ThisWriteTask; itask++) nSOidoffset += mpi_offset[itask]; + for (Int_t i=1; i<=ng; i++) data[i-1] += nSOidoffset; + } +#endif Fhdf.write_dataset(datagroupnames.SO[itemp], ng, data); itemp++; delete[] data; @@ -1236,6 +1436,9 @@ void WriteSOCatalog(Options &opt, const Int_t ngroups, vector *SOpids, ve else if (opt.ibinaryout==OUTADIOS) adios_err=adios_close(adios_file_handle); #endif +#ifdef USEMPI + MPIFreeWriteComm(); +#endif } //@} @@ -1246,32 +1449,24 @@ void WriteSOCatalog(Options &opt, const Int_t ngroups, vector *SOpids, ve ///\todo need to add in 500crit mass and radial output in here and in \ref allvars.h void WriteProperties(Options &opt, const Int_t ngroups, PropData *pdata){ fstream Fout; - char fname[1000]; + string fname; + ostringstream os; char buf[40]; - long unsigned ngtot=0, noffset=0, ng=ngroups; + long unsigned ngtot=0, noffset=0, ng=ngroups, nwritecommtot=0; //if need to convert from physical back to comoving if (opt.icomoveunit) { opt.p*=opt.h/opt.a; for (Int_t i=1;i<=ngroups;i++) pdata[i].ConverttoComove(opt); } +#ifdef USEMPI + MPIBuildWriteComm(opt); +#endif #ifdef USEHDF - // H5File Fhdf; - // H5std_string datasetname; - // DataSpace dataspace; - // DataSet dataset; - // DataSpace attrspace; - // Attribute attr; - // float attrvalue; - // hsize_t *dims, *chunk_dims; - // - // int rank; - // DataSpace *propdataspace; - // DataSet *propdataset; - // DSetCreatPropList *hdfdatasetproplist; H5OutputFile Fhdf; int itemp=0; + int ival; #endif #if defined(USEHDF)||defined(USEADIOS) DataGroupNames datagroupnames; @@ -1279,15 +1474,31 @@ void WriteProperties(Options &opt, const Int_t ngroups, PropData *pdata){ PropDataHeader head(opt); + os << opt.outname <<".properties"; #ifdef USEMPI - sprintf(fname,"%s.properties.%d",opt.outname,ThisTask); - for (int j=0;j 1) { + for (int j=0;j0) { + for (auto &extrafield:opt.gas_internalprop_names) + { + for (Int_t i=0;i0) { + for (auto &extrafield:opt.star_internalprop_names) + { + for (Int_t i=0;i0) { + for (auto &extrafield:opt.bh_internalprop_names) + { + for (Int_t i=0;i0) { + for (auto &extrafield:opt.extra_dm_internalprop_names) + { + for (Int_t i=0;i0){ for (auto j=0;j0) for (auto i=1;i<=ng;i++) nhalos += (pdata[i].hostid == -1); +#ifdef USEMPI + MPIBuildWriteComm(opt); +#endif #ifdef USEHDF - // H5File Fhdf; - // H5std_string datasetname; - // DataSpace dataspace; - // DataSet dataset; - // DataSpace attrspace; - // Attribute attr; - // float attrvalue; - // hsize_t *dims, *chunk_dims; - // - // int rank; - // DataSpace *profiledataspace; - // DataSet *profiledataset; - // DSetCreatPropList *hdfdatasetprofilelist; H5OutputFile Fhdf; vector dims; #endif @@ -2205,17 +2569,34 @@ void WriteProfiles(Options &opt, const Int_t ngroups, PropData *pdata){ #endif ProfileDataHeader head(opt); + os << opt.outname << ".profiles"; #ifdef USEMPI - sprintf(fname,"%s.profiles.%d",opt.outname,ThisTask); - for (int j=0;j 1) { + for (int j=0;j 1) { + for (int j=0;j ompsubsearchnum) { -#pragma omp for schedule(dynamic) nowait +#pragma omp for schedule(static) #endif for (i=0;iTPHYS); + tree=new KDTree(ptemp,ngrid,1,tree->TPHYS, tree->KEPAN,100,0,0,0,NULL,NULL,false); #ifndef USEOPENMP nthreads=1; @@ -79,9 +79,10 @@ private(i) #ifdef USEOPENMP #pragma omp parallel default(shared) \ -private(i,w,wsum,sv,vsv,fbg,vp,maxdist,vmweighted,isvweighted,tid,tempdenv) +private(i,w,wsum,sv,vsv,fbg,vp,maxdist,vmweighted,isvweighted,tid,tempdenv) \ +if (nbodies > ompsubsearchnum) { -#pragma omp for schedule(dynamic) nowait +#pragma omp for schedule(static) #endif for (i=0;i=2) cout< rbin; + vector xbin; int nthreads=1,tid; Double_t w; - Int_t ir; -#ifdef USEOPENMP -#pragma omp parallel - { - if (omp_get_thread_num()==0) nthreads=omp_get_num_threads(); - } -#endif + unsigned int ir; //to determine initial number of bins using modified Sturges' formula nbins = ceil(log10((Double_t)nbodies)/log10(2.0)+1)*4; - //rbin=new Double_t[nbins]; - //xbin=new Double_t[nbins]; - omp_rbin=new Double_t*[nthreads]; - for (i=0;i ompperiodnum) #endif - -#ifdef USEOPENMP - if (nbodies>ompperiodnum) { -#pragma omp parallel default(shared) \ -private(i,tid) -{ -#pragma omp for - for (i=1;iPart[i].GetPotential())rmina[tid]=Part[i].GetPotential(); - if (rmaxa[tid]rmina[j])rmin=rmina[j]; - if (rmaxPart[i].GetPotential())rmin=Part[i].GetPotential(); if (rmaxPart[i].GetPotential())rmin=Part[i].GetPotential(); - if (rmaxompperiodnum) { -#pragma omp parallel default(shared) -{ -#pragma omp for private(i,tid,w,ir) reduction(+:mtot) for (i=0;i= nbins) continue; //mass weighted #ifdef NOMASSWEIGHT w=1.0; #else w=Part[i].GetMass(); #endif - if (irmaxprob) maxprob=rbin[iprob=i]; } - meanr=(iprob+0.5)*deltar+rmin; //find first estimate of sdlow by going from rmin to prob and when have some expect fraction of the probability Double_t sl=1.0; + Double_t ampfac = exp(-0.5*sl*sl); for (i=iprob;i>=0;i--) { - if (rbin[i]<=exp(-0.5*sl*sl)*rbin[iprob]) { + if (rbin[i]<=ampfac*rbin[iprob]) { jprob=i; - sdlow=(meanr-(((exp(-0.5*sl*sl)*rbin[iprob]-rbin[jprob])/(rbin[jprob+1]-rbin[jprob])+jprob+0.5)*deltar+rmin))/sl; + sdlow=(meanr-(((ampfac*rbin[iprob]-rbin[jprob])/(rbin[jprob+1]-rbin[jprob])+jprob+0.5)*deltar+rmin))/sl; break; } if (i==0) { - sdlow=(iprob-jprob)*deltar/sl; + jprob=i; + sdlow=iprob*deltar/sl; } } for (i=iprob;i=2) printf("Using meanr=%e sdlow=%e sdhigh=%e\n",meanr,sdlow,sdhigh); @@ -295,41 +220,25 @@ private(i,tid) //now rebin around most probable over sl in either direction to be used to estimate dispersion //and gradually increase region till region encompases over 50% of the mass or particle numbers GMatrix W(nbins,nbins); - rbin=new Double_t[nbins]; + rbin.resize(nbins); do { mtotpeak=0; rmin=(meanr-sl*sdlow); rmax=(meanr+sl*sdhigh); int npeak=0; for (i=0;i=rmin&&Part[i].GetPotential()=rmin&&Part[i].GetPotential()=rmin && Part[i].GetPotential()maxprob)maxprob=rbin[iprob=i]; - //if (rbin[i]0.)minprob=rbin[i]; if (W(i,i)0.) minprob=W(i,i); xbin[i]=(i+0.5)*deltar+rmin; } @@ -365,22 +268,18 @@ private(i,tid) } meanr=(iprob+0.5)*deltar+rmin; sl=0.9; + ampfac = exp(-0.5*sl*sl); for (i=iprob;i>=0;i--) { - if (rbin[i]<=exp(-0.5*sl*sl)*rbin[iprob]) { + if (rbin[i]<=ampfac*rbin[iprob]) { jprob=i; - sdlow=(meanr-(((exp(-0.5*sl*sl)*rbin[iprob]-rbin[jprob])/(rbin[jprob+1]-rbin[jprob])+jprob+0.5)*deltar+rmin))/sl; + sdlow=(meanr-(((ampfac*rbin[iprob]-rbin[jprob])/(rbin[jprob+1]-rbin[jprob])+jprob+0.5)*deltar+rmin))/sl; break; } - /* - if (i==0) { - sdlow=iprob*deltar/sl; - } - */ } for (i=iprob;i params(nparams); //five sets of fix parameter choices so that get optimal fit given bad data. - int **fixp,nfix,itemp; + vector> fixp; + int nfix,itemp; int nfits=8; - fixp= new int*[nfits]; - for (int i=0;i<8;i++) fixp[i]=new int[nparams]; + fixp.resize(nfits); + for (int i=0;i<8;i++) fixp[i].resize(nparams); struct math_function fitfunc,*difffuncs; difffuncs=new math_function[nparams]; nparams=4; fitfunc.function=SkewGauss; + fitfunc.gsl_function=SkewGaussGSL; + fitfunc.gsl_function_df=DiffSkewGaussGSL; difffuncs[0].function=DiffSkewGaussAmp; difffuncs[1].function=DiffSkewGaussMean; difffuncs[2].function=DiffSkewGaussVar; @@ -436,7 +335,7 @@ private(i,tid) nfits=8; oldchi2=MAXVALUE; for (int i=0;i2) printf("chi2/dof=%e/%d, A=%e mu=%e var=%e s=%e\n",chi2,nbins-(nparams-nfix)-1,params[0],params[1],sqrt(params[2]),sqrt(params[3])); } - else if (oldchi20) break; else { if (opt.iverbose>2)printf("fit failed, using previous values\n"); params[0]=maxprob;params[1]=meanr;params[2]=sdhigh*sdhigh;params[3]=(sdlow*sdlow)/(sdhigh*sdhigh); } } - + delete[] difffuncs; if (opt.iverbose>=2) printf("Using meanr=%e sdlow=%e sdhigh=%e\n",meanr,sdlow,sdhigh); - //free memory - delete[] xbin; - delete[] rbin; - //rbin=NULL; - for (int j=0;j ompsubsearchnum) #endif - for (i=0;i0) tempell*=temp2; else tempell*=temp3; Part[i].SetPotential(tempell); nsubset+=(Part[i].GetPotential()>opt.ellthreshold); } -#ifdef USEOPENMP -} -#endif if (opt.iverbose>=2) cout<=1 && opt.partsearchtype==PSTALL))); + nimport=MPIBuildParticleNNImportList(opt, nbodies, tree, Part, (!(opt.iBaryonSearch>=1 && opt.partsearchtype==PSTALL))); int nimportsearch=opt.Nsearch; if (nimportsearch>nimport) nimportsearch=nimport; if (opt.iverbose) cout<=1 && opt.partsearchtype==PSTALL))); + nimport=MPIBuildParticleNNImportList(opt, nbodies, tree, Part,(!(opt.iBaryonSearch>=1 && opt.partsearchtype==PSTALL))); int nimportsearch=opt.Nsearch; if (nimportsearch>nimport) nimportsearch=nimport; if (opt.iverbose) cout<0) PartDataIn = new Particle[NExport]; if (NImport>0) PartDataGet = new Particle[NImport]; //run search on exported particles and determine which local particles need to be exported back (or imported) - nimport=MPIBuildParticleNNImportList(nbodies, tree, Part, (!(opt.iBaryonSearch>=1 && opt.partsearchtype==PSTALL))); + nimport=MPIBuildParticleNNImportList(opt, nbodies, tree, Part, (!(opt.iBaryonSearch>=1 && opt.partsearchtype==PSTALL))); int nimportsearch=opt.Nsearch; if (nimportsearch>nimport) nimportsearch=nimport; if (opt.iverbose) cout<0) { - pglist=SortAccordingtoBindingEnergy(opt,Nlocal,Part.data(),nhalos,pfof,numingroup,pdata);//alters pglist so most bound particles first - WriteProperties(opt,nhalos,pdata); - WriteGroupCatalog(opt, nhalos, numingroup, pglist, Part,ngroup-nhalos); - //if baryons have been searched output related gas baryon catalogue - if (opt.iBaryonSearch>0 || opt.partsearchtype==PSTALL){ - WriteGroupPartType(opt, nhalos, numingroup, pglist, Part); + if (nhalos>0) { + pglist=SortAccordingtoBindingEnergy(opt,Nlocal,Part.data(),nhalos,pfof,numingroup,pdata);//alters pglist so most bound particles first + WriteProperties(opt,nhalos,pdata); + WriteGroupCatalog(opt, nhalos, numingroup, pglist, Part,ngroup-nhalos); + //if baryons have been searched output related gas baryon catalogue + if (opt.partsearchtype==PSTALL){ + WriteGroupPartType(opt, nhalos, numingroup, pglist, Part); + } + WriteHierarchy(opt,ngroup,nhierarchy,psldata->nsinlevel,nsub,parentgid,stype); + for (Int_t i=1;i<=nhalos;i++) delete[] pglist[i]; + delete[] pglist; } - WriteHierarchy(opt,ngroup,nhierarchy,psldata->nsinlevel,nsub,parentgid,stype); - for (Int_t i=1;i<=nhalos;i++) delete[] pglist[i]; - delete[] pglist; - } - else { - WriteGroupCatalog(opt,nhalos,numingroup,NULL,Part); - WriteHierarchy(opt,nhalos,nhierarchy,psldata->nsinlevel,nsub,parentgid,stype); - if (opt.iBaryonSearch>0 || opt.partsearchtype==PSTALL){ - WriteGroupPartType(opt, nhalos, numingroup, NULL, Part); + else { +#ifdef USEMPI + //if calculating inclusive masses at end, must call SortAccordingtoBindingEnergy if + //MPI as domain, despite having no groups might need to exchange particles + if (opt.iInclusiveHalo==3) SortAccordingtoBindingEnergy(opt,Nlocal,Part.data(),nhalos,pfof,numingroup,pdata); +#endif + WriteGroupCatalog(opt,nhalos,numingroup,NULL,Part); + WriteHierarchy(opt,nhalos,nhierarchy,psldata->nsinlevel,nsub,parentgid,stype); + if (opt.partsearchtype==PSTALL){ + WriteGroupPartType(opt, nhalos, numingroup, NULL, Part); + } } } - } Int_t indexii=0; ng=ngroup; //if separate files, alter offsets @@ -499,23 +506,28 @@ int main(int argc,char **argv) } if (ng>0) { - pglist=SortAccordingtoBindingEnergy(opt,nbodies,Part.data(),ng,pfof,&numingroup[indexii],&pdata[indexii],indexii);//alters pglist so most bound particles first + pglist=SortAccordingtoBindingEnergy(opt,Nlocal,Part.data(),ng,pfof,&numingroup[indexii],&pdata[indexii],indexii);//alters pglist so most bound particles first WriteProperties(opt,ng,&pdata[indexii]); WriteGroupCatalog(opt, ng, &numingroup[indexii], pglist, Part); if (opt.iseparatefiles) WriteHierarchy(opt,ngroup,nhierarchy,psldata->nsinlevel,nsub,parentgid,stype,1); else WriteHierarchy(opt,ngroup,nhierarchy,psldata->nsinlevel,nsub,parentgid,stype,-1); - if (opt.iBaryonSearch>0 || opt.partsearchtype==PSTALL){ + if (opt.partsearchtype==PSTALL){ WriteGroupPartType(opt, ng, &numingroup[indexii], pglist, Part); } for (Int_t i=1;i<=ng;i++) delete[] pglist[i]; delete[] pglist; } else { +#ifdef USEMPI + //if calculating inclusive masses at end, must call SortAccordingtoBindingEnergy if + //MPI as domain, despite having no groups might need to exchange particles + if (opt.iInclusiveHalo==3) SortAccordingtoBindingEnergy(opt,Nlocal,Part.data(),ng,pfof,numingroup,pdata); +#endif WriteProperties(opt,ng,NULL); WriteGroupCatalog(opt,ng,&numingroup[indexii],NULL,Part); if (opt.iseparatefiles) WriteHierarchy(opt,ngroup,nhierarchy,psldata->nsinlevel,nsub,parentgid,stype,1); else WriteHierarchy(opt,ngroup,nhierarchy,psldata->nsinlevel,nsub,parentgid,stype,-1); - if (opt.iBaryonSearch>0 || opt.partsearchtype==PSTALL){ + if (opt.partsearchtype==PSTALL){ WriteGroupPartType(opt, ng, &numingroup[indexii], NULL, Part); } } @@ -523,13 +535,20 @@ int main(int argc,char **argv) if (opt.iprofilecalc) WriteProfiles(opt, ngroup, pdata); #ifdef EXTENDEDHALOOUTPUT - if (opt.iExtendedOutput) WriteExtendedOutput (opt, ngroup, nbodies, pdata, Part, pfof); + if (opt.iExtendedOutput) WriteExtendedOutput (opt, ngroup, Nlocal, pdata, Part, pfof); #endif + delete[] pfof; delete[] numingroup; delete[] pdata; delete psldata; + + delete[] nsub; + delete[] parentgid; + delete[] uparentgid; + delete[] stype; + tottime=MyGetTime()-tottime; cout<<"TIME::"<(Fhdf, hdf_header_info.names[hdf_header_info.IBoxSize]); + if (opt.ihdfnameconvention == HDFSWIFTEAGLENAMES || opt.ihdfnameconvention == HDFOLDSWIFTEAGLENAMES) + { + /* SWIFT can have non-cubic boxes; but for cosmological runs they will always be cubes. + * This makes the BoxSize a vector attribute, with it containing three values, but they + * will always be the same. */ + hdf_header_info.BoxSize = read_attribute_v(Fhdf, hdf_header_info.names[hdf_header_info.IBoxSize])[0]; + } + else + { + hdf_header_info.BoxSize = read_attribute(Fhdf, hdf_header_info.names[hdf_header_info.IBoxSize]); + } } /* catch(GroupIException error) @@ -148,7 +158,7 @@ void MPINumInDomainHDF(Options &opt) hdf_parts[5]=&hdf_bh_info; //to store the groups, data sets and their associated data spaces - HDF_Header *hdf_header_info; + vector hdf_header_info; vectorFhdf; vectorheaderattribs; vectorheaderdataspace; @@ -185,7 +195,7 @@ void MPINumInDomainHDF(Options &opt) int usetypes[NHDFTYPE]; if (ireadtask[ThisTask]>=0) { HDFSetUsedParticleTypes(opt,nusetypes,nbusetypes,usetypes); - hdf_header_info=new HDF_Header[opt.num_files]; + hdf_header_info.resize(opt.num_files); Fhdf.resize(opt.num_files); headerdataspace.resize(opt.num_files); headerattribs.resize(opt.num_files); @@ -202,7 +212,7 @@ void MPINumInDomainHDF(Options &opt) // Fhdf[i].openFile(buf, H5F_ACC_RDONLY); Fhdf[i]=H5Fopen(buf, H5F_ACC_RDONLY, H5P_DEFAULT); //get number in file - if (opt.ihdfnameconvention==HDFSWIFTEAGLENAMES) { + if (opt.ihdfnameconvention==HDFSWIFTEAGLENAMES || opt.ihdfnameconvention==HDFOLDSWIFTEAGLENAMES) { vlongbuff = read_attribute_v(Fhdf[i], hdf_header_info[i].names[hdf_header_info[i].INuminFile]); for (k=0;k &indices, vector &propbuff, bool resetbuff=false) +{ +#ifdef GASON + Int_t num = 0, numextrafields = 0, index, offset = 0; + string field; + indices.resize(0); + propbuff.resize(0); + + numextrafields = opt.gas_internalprop_names.size() + opt.gas_chem_names.size() + opt.gas_chemproduction_names.size(); + if (numextrafields == 0) return; + for (auto i=0;i &indices, vector &propbuff, bool resetbuff=false) +{ +#ifdef STARON + Int_t num = 0, numextrafields = 0, index, offset = 0; + string field; + indices.resize(0); + propbuff.resize(0); + + numextrafields = opt.star_internalprop_names.size() + opt.star_chem_names.size() + opt.star_chemproduction_names.size(); + if (numextrafields == 0) return; + for (auto i=0;i &indices, vector &propbuff, bool resetbuff=false) +{ +#ifdef BHON + Int_t num = 0, numextrafields = 0, index, offset = 0; + string field; + indices.resize(0); + propbuff.resize(0); + + numextrafields = opt.bh_internalprop_names.size() + opt.bh_chem_names.size() + opt.bh_chemproduction_names.size(); + if (numextrafields == 0) return; + for (auto i=0;i &indices, vector &propbuff, bool iforexport) +{ +#ifdef GASON + Int_t num = 0, numextrafields = 0, index, offset = 0; + string field; + vector npertask(NProcs); + indices.resize(0); + propbuff.resize(0); + + numextrafields = opt.gas_internalprop_names.size() + opt.gas_chem_names.size() + opt.gas_chemproduction_names.size(); + if (numextrafields == 0) return; + for (auto itask=0;itask indices_gas, indices_star, indices_bh, indices_extradm; + Int_t num = 0, numextrafields = 0, index, offset = 0; + vector propbuff_gas, propbuff_star, propbuff_bh, propbuff_extradm; + string field; + +#ifdef GASON + numextrafields = opt.gas_internalprop_names.size() + opt.gas_chem_names.size() + opt.gas_chemproduction_names.size(); + if (numextrafields > 0) + { + for (auto i=0;i0) { + propbuff_gas.resize(numextrafields*num); + for (auto i=0;i 0) + { + for (auto i=0;i 0) + { + propbuff_star.resize(numextrafields*num); + for (auto i=0;i 0) + { + for (auto i=0;i 0) + { + propbuff_bh.resize(numextrafields*num); + for (auto i=0;i 0) + { + for (auto i=0;i0) { + propbuff_extradm.resize(numextrafields*num); + for (auto i=0;i 0) + { + num = indices_gas.size(); + MPI_Send(&num,sizeof(Int_t),MPI_BYTE,taskID,taskID,MPI_COMM_WORLD); + if (num > 0) + { + MPI_Send(indices_gas.data(),sizeof(Int_t)*num,MPI_BYTE,taskID,taskID,MPI_COMM_WORLD); + MPI_Send(propbuff_gas.data(),sizeof(float)*num*numextrafields,MPI_BYTE,taskID,taskID,MPI_COMM_WORLD); + } + } +#endif +#ifdef STARON + numextrafields = opt.star_internalprop_names.size() + opt.star_chem_names.size() + opt.star_chemproduction_names.size(); + if (numextrafields > 0) + { + num = indices_star.size(); + MPI_Send(&num,sizeof(Int_t),MPI_BYTE,taskID,taskID,MPI_COMM_WORLD); + if (num > 0) + { + MPI_Send(indices_star.data(),sizeof(Int_t)*num,MPI_BYTE,taskID,taskID,MPI_COMM_WORLD); + MPI_Send(propbuff_star.data(),sizeof(float)*num*numextrafields,MPI_BYTE,taskID,taskID,MPI_COMM_WORLD); + } + } +#endif +#ifdef BHON + numextrafields = opt.bh_internalprop_names.size() + opt.bh_chem_names.size() + opt.bh_chemproduction_names.size(); + if (numextrafields > 0) + { + num = indices_bh.size(); + MPI_Send(&num,sizeof(Int_t),MPI_BYTE,taskID,taskID,MPI_COMM_WORLD); + if (num > 0) + { + MPI_Send(indices_bh.data(),sizeof(Int_t)*num,MPI_BYTE,taskID,taskID,MPI_COMM_WORLD); + MPI_Send(propbuff_bh.data(),sizeof(float)*num*numextrafields,MPI_BYTE,taskID,taskID,MPI_COMM_WORLD); + } + } +#endif +#ifdef EXTRADMON + numextrafields = opt.extra_dm_internalprop_names.size(); + if (numextrafields > 0) + { + num = indices_gas.size(); + MPI_Send(&num,sizeof(Int_t),MPI_BYTE,taskID,taskID,MPI_COMM_WORLD); + if (num > 0) + { + MPI_Send(indices_extradm.data(),sizeof(Int_t)*num,MPI_BYTE,taskID,taskID,MPI_COMM_WORLD); + MPI_Send(propbuff_extradm.data(),sizeof(float)*num*numextrafields,MPI_BYTE,taskID,taskID,MPI_COMM_WORLD); + } + } +#endif +} + +void MPISendHydroInfoFromReadThreads(Options &opt, Int_t nlocalbuff, Particle *Part, int taskID) +{ +#ifdef GASON + MPI_Status status; + vector indices; + Int_t num = 0, numextrafields = 0, index, offset = 0; + vector propbuff; + string field; + + numextrafields = opt.gas_internalprop_names.size() + opt.gas_chem_names.size() + opt.gas_chemproduction_names.size(); + if (numextrafields == 0) return; + for (auto i=0;i indices; + Int_t num = 0, numextrafields = 0, index, offset = 0; + vector propbuff; + string field; + + numextrafields = opt.star_internalprop_names.size() + opt.star_chem_names.size() +opt.star_chemproduction_names.size(); + if (numextrafields == 0) return; + for (auto i=0;i indices; + Int_t num = 0, numextrafields = 0, index, offset = 0; + vector propbuff; + string field; + + numextrafields = opt.bh_internalprop_names.size() + opt.bh_chem_names.size() + opt.bh_chemproduction_names.size(); + if (numextrafields == 0) return; + for (auto i=0;i indices; + Int_t num = 0, numextrafields = 0, index, offset = 0; + vector propbuff; + string field; + + numextrafields = opt.extra_dm_internalprop_names.size(); + if (numextrafields == 0) return; + for (auto i=0;i indices; + Int_t num = 0, numextrafields = 0, index, offset = 0; + vector propbuff; + string field; + + numextrafields = opt.gas_internalprop_names.size() + opt.gas_chem_names.size() + opt.gas_chemproduction_names.size(); + if (numextrafields == 0) return; + for (auto i=0;i indices; + Int_t num = 0, numextrafields = 0, index, offset = 0; + vector propbuff; + string field; + + numextrafields = opt.star_internalprop_names.size() + opt.star_chem_names.size() + opt.star_chemproduction_names.size(); + if (numextrafields == 0) return; + for (auto i=0;i indices; + Int_t num = 0, numextrafields = 0, index, offset = 0; + vector propbuff; + string field; + + numextrafields = opt.bh_internalprop_names.size() + opt.bh_chem_names.size() + opt.bh_chemproduction_names.size(); + if (numextrafields == 0) return; + for (auto i=0;i indices; + Int_t num = 0, numextrafields = 0, index, offset = 0; + vector propbuff; + string field; + + numextrafields = opt.extra_dm_internalprop_names.size(); + if (numextrafields == 0) return; + for (auto i=0;i*&Preadbuf){ +void MPIAddParticletoAppropriateBuffer(Options &opt, const int &ibuf, Int_t ibufindex, int *&ireadtask, const Int_t &BufSize, Int_t *&Nbuf, Particle *&Pbuf, Int_t &numpart, Particle *Part, Int_t *&Nreadbuf, vector*&Preadbuf){ if (ibuf==ThisTask) { Nbuf[ibuf]--; Part[numpart++]=Pbuf[ibufindex]; @@ -238,6 +924,10 @@ void MPIAddParticletoAppropriateBuffer(const int &ibuf, Int_t ibufindex, int *&i if(Nbuf[ibuf]==BufSize&&ireadtask[ibuf]<0) { MPI_Send(&Nbuf[ibuf], 1, MPI_Int_t, ibuf, ibuf+NProcs, MPI_COMM_WORLD); MPI_Send(&Pbuf[ibuf*BufSize],sizeof(Particle)*Nbuf[ibuf],MPI_BYTE,ibuf,ibuf,MPI_COMM_WORLD); + MPISendHydroInfoFromReadThreads(opt, Nbuf[ibuf], &Pbuf[ibuf*BufSize], ibuf); + MPISendStarInfoFromReadThreads(opt, Nbuf[ibuf], &Pbuf[ibuf*BufSize], ibuf); + MPISendBHInfoFromReadThreads(opt, Nbuf[ibuf], &Pbuf[ibuf*BufSize], ibuf); + MPISendExtraDMInfoFromReadThreads(opt, Nbuf[ibuf], &Pbuf[ibuf*BufSize], ibuf); Nbuf[ibuf]=0; } else if (ireadtask[ibuf]>=0) { @@ -533,82 +1223,1131 @@ void MPIDistributeReadTasks(Options&opt, int *&ireadtask, int*&readtaskID){ for (int i=0;i 1) { + ThisWriteComm = (int)(floor(ThisTask/(float)opt.mpinprocswritesize)); + NWriteComms = (int)(ceil(NProcs/(float)opt.mpinprocswritesize)); + MPI_Comm_split(MPI_COMM_WORLD, ThisWriteComm, ThisTask, &mpi_comm_write); + MPI_Comm_rank(mpi_comm_write, &ThisWriteTask); + MPI_Comm_size(mpi_comm_write, &NProcsWrite); + } +#endif +} +void MPIFreeWriteComm(){ + if (mpi_comm_write != MPI_COMM_WORLD) MPI_Comm_free(&mpi_comm_write); + mpi_comm_write = MPI_COMM_WORLD; + ThisWriteTask = ThisTask; + ThisWriteComm = ThisTask; + NProcsWrite = NProcs; + NWriteComms = NProcs; +} +//@} + +/// \name Routines involved in exporting particles +//@{ + +void MPIReceiveHydroInfoFromReadThreads(Options &opt, Int_t nlocalbuff, Particle *Part, int readtaskID) +{ +#ifdef GASON + MPI_Status status; + vector indices; + Int_t num, numextrafields = 0, index, offset = 0; + vector propbuff; + string field; + HydroProperties x; + numextrafields = opt.gas_internalprop_names.size() + opt.gas_chem_names.size() + opt.gas_chemproduction_names.size(); + if (numextrafields == 0) return; + MPI_Recv(&num, 1, MPI_Int_t, readtaskID, ThisTask, MPI_COMM_WORLD, &status); + if (num == 0) return; + //explicitly NULLing copied information which was done with a BYTE copy + //The unique pointers will have meaningless info so NULL them (by relasing ownership) + //and then setting the released pointer to null via in built function. + for (auto i=0;i indices; + Int_t num, numextrafields = 0, index, offset = 0; + vector propbuff; + string field; + StarProperties x; + numextrafields = opt.star_internalprop_names.size() + opt.star_chem_names.size() + opt.star_chemproduction_names.size(); + if (numextrafields == 0) return; + MPI_Recv(&num, 1, MPI_Int_t, readtaskID, ThisTask, MPI_COMM_WORLD, &status); + if (num == 0) return; + //explicitly NULLing copied information which was done with a BYTE copy + //The unique pointers will have meaningless info so NULL them (by relasing ownership) + //and then setting the released pointer to null via in built function. + for (auto i=0;i indices; + Int_t num, numextrafields = 0, index, offset = 0; + vector propbuff; + string field; + BHProperties x; + numextrafields = opt.bh_internalprop_names.size() +opt.bh_chem_names.size() + opt.bh_chemproduction_names.size(); + if (numextrafields == 0) return; + MPI_Recv(&num, 1, MPI_Int_t, readtaskID, ThisTask, MPI_COMM_WORLD, &status); + if (num == 0) return; + //explicitly NULLing copied information which was done with a BYTE copy + //The unique pointers will have meaningless info so NULL them (by relasing ownership) + //and then setting the released pointer to null via in built function. + for (auto i=0;i indices; + Int_t num, numextrafields = 0, index, offset = 0; + vector propbuff; + string field; + ExtraDMProperties x; + numextrafields = opt.extra_dm_internalprop_names.size(); + if (numextrafields == 0) return; + MPI_Recv(&num, 1, MPI_Int_t, readtaskID, ThisTask, MPI_COMM_WORLD, &status); + if (num == 0) return; + //explicitly NULLing copied information which was done with a BYTE copy + //The unique pointers will have meaningless info so NULL them (by relasing ownership) + //and then setting the released pointer to null via in built function. + for (auto i=0;i indices; + Int_t num, numextrafields = 0, index, offset = 0; + vector propbuff; + string field; + HydroProperties x; + numextrafields = opt.gas_internalprop_names.size() + opt.gas_chem_names.size() + opt.gas_chemproduction_names.size(); + if (numextrafields == 0) return; + MPI_Recv(&num, 1, MPI_Int_t, sourceTaskID, tag, MPI_COMM_WORLD,&status); + if (num == 0) return; + //explicitly NULLing copied information which was done with a BYTE copy + //The unique pointers will have meaningless info so NULL them (by relasing ownership) + //and then setting the released pointer to null via in built function. + for (auto i=0;i indices; + Int_t num, numextrafields = 0, index, offset = 0; + vector propbuff; + string field; + StarProperties x; + numextrafields = opt.star_internalprop_names.size() + opt.star_chem_names.size() + opt.star_chemproduction_names.size(); + if (numextrafields == 0) return; + MPI_Recv(&num, 1, MPI_Int_t, sourceTaskID, tag, MPI_COMM_WORLD,&status); + if (num == 0) return; + //explicitly NULLing copied information which was done with a BYTE copy + //The unique pointers will have meaningless info so NULL them (by relasing ownership) + //and then setting the released pointer to null via in built function. + for (auto i=0;i indices; + Int_t num, numextrafields = 0, index, offset = 0; + vector propbuff; + string field; + BHProperties x; + numextrafields = opt.bh_internalprop_names.size() + opt.bh_chem_names.size() + opt.bh_chemproduction_names.size(); + if (numextrafields == 0) return; + MPI_Recv(&num, 1, MPI_Int_t, sourceTaskID, tag, MPI_COMM_WORLD,&status); + if (num == 0) return; + //explicitly NULLing copied information which was done with a BYTE copy + //The unique pointers will have meaningless info so NULL them (by relasing ownership) + //and then setting the released pointer to null via in built function. + for (auto i=0;i indices; + Int_t num, numextrafields = 0, index, offset = 0; + vector propbuff; + string field; + ExtraDMProperties x; + numextrafields = opt.extra_dm_internalprop_names.size(); + if (numextrafields == 0) return; + MPI_Recv(&num, 1, MPI_Int_t, sourceTaskID, tag, MPI_COMM_WORLD,&status); + if (num == 0) return; + //explicitly NULLing copied information which was done with a BYTE copy + //The unique pointers will have meaningless info so NULL them (by relasing ownership) + //and then setting the released pointer to null via in built function. + for (auto i=0;i0) { + MPI_Recv(&Part[Nlocal],sizeof(Particle)*Nlocalthreadbuf[i],MPI_BYTE,readtaskID[i],ThisTask, MPI_COMM_WORLD,&status); + MPIReceiveHydroInfoFromReadThreads(opt, Nlocalthreadbuf[i], &Part[Nlocal], readtaskID[i]); + MPIReceiveStarInfoFromReadThreads(opt, Nlocalthreadbuf[i], &Part[Nlocal], readtaskID[i]); + MPIReceiveBHInfoFromReadThreads(opt, Nlocalthreadbuf[i], &Part[Nlocal], readtaskID[i]); + MPIReceiveExtraDMInfoFromReadThreads(opt, Nlocalthreadbuf[i], &Part[Nlocal], readtaskID[i]); + Nlocal+=Nlocalthreadbuf[i]; + Nlocaltotalbuf+=Nlocalthreadbuf[i]; + mpi_irecvflag[i]=0; + MPI_Irecv(&Nlocalthreadbuf[i], 1, MPI_Int_t, readtaskID[i], ThisTask+NProcs, MPI_COMM_WORLD, &mpi_request[i]); + } + else { + irecv[i]=0; + } + } + } + } + for (i=0;i0); + //now that data is local, must adjust data iff a separate baryon search is required. + if (opt.partsearchtype==PSTDARK && opt.iBaryonSearch) { + for (i=0;i indicessend, indicesrecv; + Int_t numsend, numrecv, numextrafields = 0, index, offset = 0; + vector propsendbuff, proprecvbuff; + string field; + HydroProperties x; + + numextrafields = opt.gas_internalprop_names.size() + opt.gas_chem_names.size() + opt.gas_chemproduction_names.size(); + if (numextrafields == 0) return; + + //first determine what needs to be sent. + for (auto i=0;i0) { + propsendbuff.resize(numextrafields*numsend); + for (auto i=0;i0) { + indicesrecv.resize(numrecv); + proprecvbuff.resize(numrecv*numextrafields); + } + //send the information. If size is zero, resize vector so .data() points to valid address + if (numsend == 0) {indicessend.resize(1);propsendbuff.resize(1);} + if (numrecv == 0) {indicesrecv.resize(1);proprecvbuff.resize(1);} + MPI_Sendrecv(indicessend.data(),numsend, MPI_Int_t, recvTask, + tag*2, indicesrecv.data(),numrecv, MPI_Int_t, recvTask, tag*2, mpi_comm, &status); + MPI_Sendrecv(propsendbuff.data(),numsend, MPI_FLOAT, recvTask, + tag*3, proprecvbuff.data(),numrecv, MPI_FLOAT, recvTask, tag*3, mpi_comm, &status); + + if (numrecv == 0) return; + //and then update the local information + //explicitly NULLing copied information which was done with a BYTE copy + //The unique pointers will have meaningless info so NULL them (by relasing ownership) + //and then setting the released pointer to null via in built function. + for (auto i=0;i indicessend, indicesrecv; + Int_t numsend, numrecv, numextrafields = 0, index, offset = 0; + vector propsendbuff, proprecvbuff; + string field; + StarProperties x; + + numextrafields = opt.star_internalprop_names.size() + opt.star_chem_names.size() + opt.star_chemproduction_names.size(); + if (numextrafields == 0) return; + + //first determine what needs to be sent. + for (auto i=0;i0) { + propsendbuff.resize(numextrafields*numsend); + for (auto i=0;i0) { + indicesrecv.resize(numrecv); + proprecvbuff.resize(numrecv*numextrafields); + } + //send the information. If size is zero, resize vector so .data() points to valid address + if (numsend == 0) {indicessend.resize(1);propsendbuff.resize(1);} + if (numrecv == 0) {indicesrecv.resize(1);proprecvbuff.resize(1);} + MPI_Sendrecv(indicessend.data(),numsend, MPI_Int_t, recvTask, + tag*2, indicesrecv.data(),numrecv, MPI_Int_t, recvTask, tag*2, mpi_comm, &status); + MPI_Sendrecv(propsendbuff.data(),numsend, MPI_FLOAT, recvTask, + tag*3, proprecvbuff.data(),numrecv, MPI_FLOAT, recvTask, tag*3, mpi_comm, &status); + + if (numrecv == 0) return; + //and then update the local information + //explicitly NULLing copied information which was done with a BYTE copy + //The unique pointers will have meaningless info so NULL them (by relasing ownership) + //and then setting the released pointer to null via in built function. + for (auto i=0;i indicessend, indicesrecv; + Int_t numsend, numrecv, numextrafields = 0, index, offset = 0; + vector propsendbuff, proprecvbuff; + string field; + BHProperties x; + + numextrafields = opt.bh_internalprop_names.size() + opt.bh_chem_names.size() + opt.bh_chemproduction_names.size(); + if (numextrafields == 0) return; + + //first determine what needs to be sent. + for (auto i=0;i0) { + propsendbuff.resize(numextrafields*numsend); + for (auto i=0;i0) { + indicesrecv.resize(numrecv); + proprecvbuff.resize(numrecv*numextrafields); + } + //send the information. If size is zero, resize vector so .data() points to valid address + if (numsend == 0) {indicessend.resize(1);propsendbuff.resize(1);} + if (numrecv == 0) {indicesrecv.resize(1);proprecvbuff.resize(1);} + MPI_Sendrecv(indicessend.data(),numsend, MPI_Int_t, recvTask, + tag*2, indicesrecv.data(),numrecv, MPI_Int_t, recvTask, tag*2, mpi_comm, &status); + MPI_Sendrecv(propsendbuff.data(),numsend, MPI_FLOAT, recvTask, + tag*3, proprecvbuff.data(),numrecv, MPI_FLOAT, recvTask, tag*3, mpi_comm, &status); + + if (numrecv == 0) return; + //and then update the local information + //explicitly NULLing copied information which was done with a BYTE copy + //The unique pointers will have meaningless info so NULL them (by relasing ownership) + //and then setting the released pointer to null via in built function. + for (auto i=0;i indicessend, indicesrecv; + Int_t numsend, numrecv, numextrafields = 0, index, offset = 0; + vector propsendbuff, proprecvbuff; + string field; + ExtraDMProperties x; + + numextrafields = opt.extra_dm_internalprop_names.size(); + if (numextrafields == 0) return; + + //first determine what needs to be sent. + for (auto i=0;i0) { + propsendbuff.resize(numextrafields*numsend); + for (auto i=0;i0) { + indicesrecv.resize(numrecv); + proprecvbuff.resize(numrecv*numextrafields); + } + //send the information. If size is zero, resize vector so .data() points to valid address + if (numsend == 0) {indicessend.resize(1);propsendbuff.resize(1);} + if (numrecv == 0) {indicesrecv.resize(1);proprecvbuff.resize(1);} + MPI_Sendrecv(indicessend.data(),numsend, MPI_Int_t, recvTask, + tag*2, indicesrecv.data(),numrecv, MPI_Int_t, recvTask, tag*2, mpi_comm, &status); + MPI_Sendrecv(propsendbuff.data(),numsend, MPI_FLOAT, recvTask, + tag*3, proprecvbuff.data(),numrecv, MPI_FLOAT, recvTask, tag*3, mpi_comm, &status); + + if (numrecv == 0) return; + //and then update the local information + //explicitly NULLing copied information which was done with a BYTE copy + //The unique pointers will have meaningless info so NULL them (by relasing ownership) + //and then setting the released pointer to null via in built function. + for (auto i=0;i indicessend(0), indicesrecv(0); + Int_t numsend, numrecv, numextrafields = 0, index, offset = 0; + vector propsendbuff(0), proprecvbuff(0); + string field; + HydroProperties x; + + numextrafields = opt.gas_internalprop_names.size() + opt.gas_chem_names.size() + opt.gas_chemproduction_names.size(); + if (numextrafields == 0) return; + for (auto i=0;i0) { + indicesrecv.resize(numrecv); + proprecvbuff.resize(numrecv*numextrafields); + } + if (numsend >0) { + propsendbuff.resize(numextrafields*numsend); + for (auto i=0;i indicessend(0), indicesrecv(0); + Int_t numsend, numrecv, numextrafields = 0, index, offset = 0; + vector propsendbuff(0), proprecvbuff(0); + string field; + StarProperties x; + + numextrafields = opt.star_internalprop_names.size() + opt.star_chem_names.size() + opt.star_chemproduction_names.size(); + if (numextrafields == 0) return; + for (auto i=0;i0) { + indicesrecv.resize(numrecv); + proprecvbuff.resize(numrecv*numextrafields); + } + if (numsend >0) { + propsendbuff.resize(numextrafields*numsend); + for (auto i=0;i indicessend(0), indicesrecv(0); + Int_t numsend, numrecv, numextrafields = 0, index, offset = 0; + vector propsendbuff(0), proprecvbuff(0); + string field; + BHProperties x; + + numextrafields = opt.bh_internalprop_names.size() + opt.bh_chem_names.size() + opt.bh_chemproduction_names.size(); + if (numextrafields == 0) return; + for (auto i=0;i0) { + indicesrecv.resize(numrecv); + proprecvbuff.resize(numrecv*numextrafields); + } + if (numsend >0) { + propsendbuff.resize(numextrafields*numsend); + for (auto i=0;i indicessend(0), indicesrecv(0); + Int_t numsend, numrecv, numextrafields = 0, index, offset = 0; + vector propsendbuff(0), proprecvbuff(0); + string field; + ExtraDMProperties x; + + numextrafields = opt.extra_dm_internalprop_names.size(); + if (numextrafields == 0) return; + for (auto i=0;i0) { + indicesrecv.resize(numrecv); + proprecvbuff.resize(numrecv*numextrafields); } - Nlocaltotalbuf=0; - //non-blocking receives for the number of particles one expects to receive - do { - irecvflag=0; - for (i=0;i0) { - MPI_Recv(&Part[Nlocal],sizeof(Particle)*Nlocalthreadbuf[i],MPI_BYTE,readtaskID[i],ThisTask, MPI_COMM_WORLD,&status); - Nlocal+=Nlocalthreadbuf[i]; - Nlocaltotalbuf+=Nlocalthreadbuf[i]; - mpi_irecvflag[i]=0; - MPI_Irecv(&Nlocalthreadbuf[i], 1, MPI_Int_t, readtaskID[i], ThisTask+NProcs, MPI_COMM_WORLD, &mpi_request[i]); - } - else { - irecv[i]=0; - } - } + if (numsend >0) { + propsendbuff.resize(numextrafields*numsend); + for (auto i=0;i0); - //now that data is local, must adjust data iff a separate baryon search is required. - if (opt.partsearchtype==PSTDARK && opt.iBaryonSearch) { - for (i=0;i=0) { //split the communication into small buffers int icycle=0,ibuf; @@ -657,6 +2396,10 @@ void MPISendParticlesBetweenReadThreads(Options &opt, Particle *&Pbuf, Particle MPI_Sendrecv(&Pbuf[nreadoffset[ireadtask[recvTask]]+sendoffset],sizeof(Particle)*cursendchunksize, MPI_BYTE, recvTask, TAG_IO_A+isendrecv, &Part[Nlocal],sizeof(Particle)*currecvchunksize, MPI_BYTE, recvTask, TAG_IO_A+isendrecv, MPI_COMM_WORLD, &status); + MPISendReceiveHydroInfoBetweenThreads(opt, cursendchunksize, &Pbuf[nreadoffset[ireadtask[recvTask]]+sendoffset], currecvchunksize, &Part[Nlocal], recvTask, TAG_IO_A+isendrecv, mpi_comm_read); + MPISendReceiveStarInfoBetweenThreads(opt, cursendchunksize, &Pbuf[nreadoffset[ireadtask[recvTask]]+sendoffset], currecvchunksize, &Part[Nlocal], recvTask, TAG_IO_A+isendrecv, mpi_comm_read); + MPISendReceiveBHInfoBetweenThreads(opt, cursendchunksize, &Pbuf[nreadoffset[ireadtask[recvTask]]+sendoffset], currecvchunksize, &Part[Nlocal], recvTask, TAG_IO_A+isendrecv, mpi_comm_read); + MPISendReceiveExtraDMInfoBetweenThreads(opt, cursendchunksize, &Pbuf[nreadoffset[ireadtask[recvTask]]+sendoffset], currecvchunksize, &Part[Nlocal], recvTask, TAG_IO_A+isendrecv, mpi_comm_read); Nlocal+=currecvchunksize; sendoffset+=cursendchunksize; recvoffset+=currecvchunksize; @@ -685,6 +2428,10 @@ void MPISendParticlesBetweenReadThreads(Options &opt, Particle *&Pbuf, Particle MPI_Sendrecv(&Pbuf[nreadoffset[ireadtask[recvTask]]+mpi_nsend[ThisTask * NProcs + recvTask]+sendoffset],sizeof(Particle)*cursendchunksize, MPI_BYTE, recvTask, TAG_IO_B+isendrecv, &Pbaryons[Nlocalbaryon[0]],sizeof(Particle)*currecvchunksize, MPI_BYTE, recvTask, TAG_IO_B+isendrecv, MPI_COMM_WORLD, &status); + MPISendReceiveHydroInfoBetweenThreads(opt, cursendchunksize, &Pbuf[nreadoffset[ireadtask[recvTask]]+sendoffset], currecvchunksize, &Pbaryons[Nlocalbaryon[0]], recvTask, TAG_IO_B+isendrecv, mpi_comm_read); + MPISendReceiveStarInfoBetweenThreads(opt, cursendchunksize, &Pbuf[nreadoffset[ireadtask[recvTask]]+sendoffset], currecvchunksize, &Pbaryons[Nlocalbaryon[0]], recvTask, TAG_IO_B+isendrecv, mpi_comm_read); + MPISendReceiveBHInfoBetweenThreads(opt, cursendchunksize, &Pbuf[nreadoffset[ireadtask[recvTask]]+sendoffset], currecvchunksize, &Pbaryons[Nlocalbaryon[0]], recvTask, TAG_IO_B+isendrecv, mpi_comm_read); + MPISendReceiveExtraDMInfoBetweenThreads(opt, cursendchunksize, &Pbuf[nreadoffset[ireadtask[recvTask]]+sendoffset], currecvchunksize, &Pbaryons[Nlocalbaryon[0]], recvTask, TAG_IO_B+isendrecv, mpi_comm_read); Nlocalbaryon[0]+=currecvchunksize; sendoffset+=cursendchunksize; recvoffset+=currecvchunksize; @@ -742,6 +2489,10 @@ void MPISendParticlesBetweenReadThreads(Options &opt, vector *&Preadbu MPI_Sendrecv(&Preadbuf[recvTask][sendoffset],sizeof(Particle)*cursendchunksize, MPI_BYTE, recvTask, TAG_IO_A+isendrecv, &(Part[Nlocal]),sizeof(Particle)*currecvchunksize, MPI_BYTE, recvTask, TAG_IO_A+isendrecv, mpi_comm_read, &status); + MPISendReceiveHydroInfoBetweenThreads(opt, cursendchunksize, &Preadbuf[recvTask][sendoffset], currecvchunksize, &Part[Nlocal], recvTask, TAG_IO_A+isendrecv, mpi_comm_read); + MPISendReceiveStarInfoBetweenThreads(opt, cursendchunksize, &Preadbuf[recvTask][sendoffset], currecvchunksize, &Part[Nlocal], recvTask, TAG_IO_A+isendrecv, mpi_comm_read); + MPISendReceiveBHInfoBetweenThreads(opt, cursendchunksize, &Preadbuf[recvTask][sendoffset], currecvchunksize, &Part[Nlocal], recvTask, TAG_IO_A+isendrecv, mpi_comm_read); + MPISendReceiveExtraDMInfoBetweenThreads(opt, cursendchunksize, &Preadbuf[recvTask][sendoffset], currecvchunksize, &Part[Nlocal], recvTask, TAG_IO_A+isendrecv, mpi_comm_read); Nlocal+=currecvchunksize; sendoffset+=cursendchunksize; recvoffset+=currecvchunksize; @@ -770,6 +2521,10 @@ void MPISendParticlesBetweenReadThreads(Options &opt, vector *&Preadbu MPI_Sendrecv(&Preadbuf[recvTask][mpi_nsend_readthread[sendTask * opt.nsnapread + recvTask]+sendoffset],sizeof(Particle)*cursendchunksize, MPI_BYTE, recvTask, TAG_IO_B+isendrecv, &Pbaryons[Nlocalbaryon[0]],sizeof(Particle)*currecvchunksize, MPI_BYTE, recvTask, TAG_IO_B+isendrecv, mpi_comm_read, &status); + MPISendReceiveHydroInfoBetweenThreads(opt, cursendchunksize, &Preadbuf[recvTask][mpi_nsend_readthread[sendTask * opt.nsnapread + recvTask]+sendoffset], currecvchunksize, &Pbaryons[Nlocalbaryon[0]], recvTask, TAG_IO_B+isendrecv, mpi_comm_read); + MPISendReceiveStarInfoBetweenThreads(opt, cursendchunksize, &Preadbuf[recvTask][mpi_nsend_readthread[sendTask * opt.nsnapread + recvTask]+sendoffset], currecvchunksize, &Pbaryons[Nlocalbaryon[0]], recvTask, TAG_IO_B+isendrecv, mpi_comm_read); + MPISendReceiveBHInfoBetweenThreads(opt, cursendchunksize, &Preadbuf[recvTask][mpi_nsend_readthread[sendTask * opt.nsnapread + recvTask]+sendoffset], currecvchunksize, &Pbaryons[Nlocalbaryon[0]], recvTask, TAG_IO_B+isendrecv, mpi_comm_read); + MPISendReceiveExtraDMInfoBetweenThreads(opt, cursendchunksize, &Preadbuf[recvTask][mpi_nsend_readthread[sendTask * opt.nsnapread + recvTask]+sendoffset], currecvchunksize, &Pbaryons[Nlocalbaryon[0]], recvTask, TAG_IO_B+isendrecv, mpi_comm_read); Nlocalbaryon[0]+=currecvchunksize; sendoffset+=cursendchunksize; recvoffset+=currecvchunksize; @@ -864,12 +2619,19 @@ void MPIGetExportNumUsingMesh(Options &opt, const Int_t nbodies, Particle *Part, /*! Determine which particles have a spatial linking length such that linking overlaps the domain of another processor store the necessary information to send that data and then send that information */ -void MPIBuildParticleExportList(const Int_t nbodies, Particle *Part, Int_t *&pfof, Int_tree_t *&Len, Double_t rdist){ +void MPIBuildParticleExportList(Options &opt, const Int_t nbodies, Particle *Part, Int_t *&pfof, Int_tree_t *&Len, Double_t rdist){ Int_t i, j,nthreads,nexport=0,nimport=0; Int_t nsend_local[NProcs],noffset[NProcs],nbuffer[NProcs]; Double_t xsearch[3][2]; Int_t sendTask,recvTask; + int maxchunksize=LOCAL_MAX_MSGSIZE/NProcs/sizeof(Particle); + int nsend,nrecv,nsendchunks,nrecvchunks,numsendrecv; + int sendoffset,recvoffset; + int isendrecv; + int cursendchunksize,currecvchunksize; MPI_Status status; + MPI_Comm mpi_comm = MPI_COMM_WORLD; + int mpi_tag, mpi_tag_offset; ///\todo would like to add openmp to this code. In particular, loop over nbodies but issue is nexport. ///This would either require making a FoFDataIn[nthreads][NExport] structure so that each omp thread @@ -884,7 +2646,6 @@ void MPIBuildParticleExportList(const Int_t nbodies, Particle *Part, Int_t *&pfo //determine if search region is not outside of this processors domain if(MPIInDomain(xsearch,mpi_domain[j].bnd)) { - //FoFDataIn[nexport].Part=Part[i]; FoFDataIn[nexport].Index = i; FoFDataIn[nexport].Task = j; FoFDataIn[nexport].iGroup = pfof[Part[i].GetID()];//set group id @@ -897,9 +2658,23 @@ void MPIBuildParticleExportList(const Int_t nbodies, Particle *Part, Int_t *&pfo } } if (nexport>0) { - //sort the export data such that all particles to be passed to thread j are together in ascending thread number - qsort(FoFDataIn, nexport, sizeof(struct fofdata_in), fof_export_cmp); - for (i=0;i= maxNumPart || nsend_local[recvTask] >= maxNumPart ) bufferFlag++; - } - } - //if buffer is too large, split sends - if (bufferFlag) - { - MPI_Request rqst; - int numBuffersToSend [NProcs]; - int numBuffersToRecv [NProcs]; - int numPartInBuffer = maxNumPart * 0.9; - int maxnbufferslocal=0,maxnbuffers; - for (j = 0; j < NProcs; j++) - { - numBuffersToSend[j] = 0; - numBuffersToRecv[j] = 0; - if (nsend_local[j] > 0) - numBuffersToSend[j] = (nsend_local[j]/numPartInBuffer) + 1; - } - for (int i = 1; i < NProcs; i++) - { - int src = (ThisTask + NProcs - i) % NProcs; - int dst = (ThisTask + i) % NProcs; - MPI_Isend (&numBuffersToSend[dst], 1, MPI_INT, dst, 0, MPI_COMM_WORLD, &rqst); - MPI_Recv (&numBuffersToRecv[src], 1, MPI_INT, src, 0, MPI_COMM_WORLD, &status); - } - MPI_Barrier (MPI_COMM_WORLD); - //find max to be transfer, allows appropriate tagging of messages - for (int i=0;imaxnbufferslocal) maxnbufferslocal=numBuffersToRecv[i]; - for (int i=0;imaxnbufferslocal) maxnbufferslocal=numBuffersToSend[i]; - MPI_Allreduce (&maxnbufferslocal, &maxnbuffers, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); - - for (int i = 1; i < NProcs; i++) + if (nexport>0||nimport>0) { + for(j=0;j 0 && numBuffersToSend[dst] > 0) - { - MPI_Isend (&size, 1, MPI_Int_t, dst, (int)(numBuffersToSend[dst]), MPI_COMM_WORLD, &rqst); - MPI_Isend (&FoFDataIn[noffset[dst] + buffOffset], sizeof(struct fofdata_in)*size, - MPI_BYTE, dst, (int)(TAG_FOF_A*maxnbuffers+numBuffersToSend[dst]), MPI_COMM_WORLD, &rqst); - MPI_Isend (&PartDataIn[noffset[dst] + buffOffset], sizeof(Particle)*size, - MPI_BYTE, dst, (int)(TAG_FOF_B*maxnbuffers*3+numBuffersToSend[dst]), MPI_COMM_WORLD, &rqst); - } - // Receive Buffers - buffOffset = 0; - for (int jj = 0; jj < numBuffersToRecv[src]; jj++) - { - Int_t numInBuffer = 0; - MPI_Recv (&numInBuffer, 1, MPI_Int_t, src, (int)(jj+1), MPI_COMM_WORLD, &status); - MPI_Recv (&FoFDataGet[nbuffer[src] + buffOffset], sizeof(struct fofdata_in)*numInBuffer, - MPI_BYTE, src, (int)(TAG_FOF_A*maxnbuffers+jj+1), MPI_COMM_WORLD, &status); - MPI_Recv (&PartDataGet[nbuffer[src] + buffOffset], sizeof(Particle)*numInBuffer, - MPI_BYTE, src, (int)(TAG_FOF_B*maxnbuffers*3+jj+1), MPI_COMM_WORLD, &status); - buffOffset += numInBuffer; - } - } - } - else - { - if (nexport>0||nimport>0) { - for(j=0;j 0 || mpi_nsend[recvTask * NProcs + ThisTask] > 0) { - sendTask = ThisTask; - recvTask = j;//ThisTask^j;//bitwise XOR ensures that recvTask cycles around sendTask - nbuffer[recvTask]=0; - for (int k=0;k 0 || mpi_nsend[recvTask * NProcs + ThisTask] > 0) + + nsend=mpi_nsend[sendTask * NProcs + recvTask]; + nrecv=mpi_nsend[recvTask * NProcs + sendTask]; + //calculate how many send/recvs are needed + nsendchunks=ceil((double)nsend/(double)maxchunksize); + nrecvchunks=ceil((double)nrecv/(double)maxchunksize); + numsendrecv=max(nsendchunks,nrecvchunks); + //initialize the offset in the particle array + sendoffset=0; + recvoffset=0; + isendrecv=1; + do { + //determine amount to be sent + cursendchunksize=min(maxchunksize,nsend-sendoffset); + currecvchunksize=min(maxchunksize,nrecv-recvoffset); + //blocking point-to-point send and receive. Here must determine the appropriate offset point in the local export buffer //for sending data and also the local appropriate offset in the local the receive buffer for information sent from the local receiving buffer //first send FOF data and then particle data - MPI_Sendrecv(&FoFDataIn[noffset[recvTask]], - nsend_local[recvTask] * sizeof(struct fofdata_in), MPI_BYTE, + MPI_Sendrecv(&FoFDataIn[noffset[recvTask]+sendoffset], + cursendchunksize * sizeof(struct fofdata_in), MPI_BYTE, recvTask, TAG_FOF_A, - &FoFDataGet[nbuffer[recvTask]], - mpi_nsend[ThisTask+recvTask * NProcs] * sizeof(struct fofdata_in), + &FoFDataGet[nbuffer[recvTask]+recvoffset], + currecvchunksize * sizeof(struct fofdata_in), MPI_BYTE, recvTask, TAG_FOF_A, MPI_COMM_WORLD, &status); - MPI_Sendrecv(&PartDataIn[noffset[recvTask]], - nsend_local[recvTask] * sizeof(Particle), MPI_BYTE, + MPI_Sendrecv(&PartDataIn[noffset[recvTask]+sendoffset], + cursendchunksize * sizeof(Particle), MPI_BYTE, recvTask, TAG_FOF_B, - &PartDataGet[nbuffer[recvTask]], - mpi_nsend[ThisTask+recvTask * NProcs] * sizeof(Particle), + &PartDataGet[nbuffer[recvTask]+recvoffset], + currecvchunksize * sizeof(Particle), MPI_BYTE, recvTask, TAG_FOF_B, MPI_COMM_WORLD, &status); - } + MPISendReceiveHydroInfoBetweenThreads(opt, cursendchunksize, &PartDataIn[noffset[recvTask]+sendoffset], currecvchunksize, &PartDataGet[nbuffer[recvTask]+recvoffset], recvTask, TAG_FOF_B_HYDRO, mpi_comm); + MPISendReceiveStarInfoBetweenThreads(opt, cursendchunksize, &PartDataIn[noffset[recvTask]+sendoffset], currecvchunksize, &PartDataGet[nbuffer[recvTask]+recvoffset], recvTask, TAG_FOF_B_STAR, mpi_comm); + MPISendReceiveBHInfoBetweenThreads(opt, cursendchunksize, &PartDataIn[noffset[recvTask]+sendoffset], currecvchunksize, &PartDataGet[nbuffer[recvTask]+recvoffset], recvTask, TAG_FOF_B_BH, mpi_comm); + MPISendReceiveExtraDMInfoBetweenThreads(opt, cursendchunksize, &PartDataIn[noffset[recvTask]+sendoffset], currecvchunksize, &PartDataGet[nbuffer[recvTask]+recvoffset], recvTask, TAG_FOF_B_EXTRA_DM, mpi_comm); + sendoffset+=cursendchunksize; + recvoffset+=currecvchunksize; + isendrecv++; + } while (isendrecv<=numsendrecv); } } } @@ -1036,6 +2750,7 @@ void MPIBuildParticleExportListUsingMesh(Options &opt, const Int_t nbodies, Part Double_t xsearch[3][2]; Int_t sendTask,recvTask; MPI_Status status; + MPI_Comm mpi_comm = MPI_COMM_WORLD; vectorsent_mpi_domain(NProcs); ///\todo would like to add openmp to this code. In particular, loop over nbodies but issue is nexport. @@ -1054,7 +2769,6 @@ void MPIBuildParticleExportListUsingMesh(Options &opt, const Int_t nbodies, Part const int cellnodeID = opt.cellnodeids[j]; /// Only check if particles have overlap with neighbouring cells that are on another MPI domain and have not already been sent to if (sent_mpi_domain[cellnodeID] == 1) continue; - //FoFDataIn[nexport].Part=Part[i]; FoFDataIn[nexport].Index = i; FoFDataIn[nexport].Task = cellnodeID; FoFDataIn[nexport].iGroup = pfof[Part[i].GetID()];//set group id @@ -1067,9 +2781,23 @@ void MPIBuildParticleExportListUsingMesh(Options &opt, const Int_t nbodies, Part } if (nexport>0) { - //sort the export data such that all particles to be passed to thread j are together in ascending thread number - qsort(FoFDataIn, nexport, sizeof(struct fofdata_in), fof_export_cmp); - for (i=0;i &leafnodes, KDTree *&tree){ - Int_t i, j,nthreads,nexport=0,nimport=0; - Int_t nsend_local[NProcs],noffset[NProcs],nbuffer[NProcs]; - Double_t xsearch[3][2]; - Int_t sendTask,recvTask; - MPI_Status status; - int indomain; - int *sent_mpi_domain = new int[NProcs]; - int inode; - - ///\todo would like to add openmp to this code. In particular, loop over nbodies but issue is nexport. - ///This would either require making a FoFDataIn[nthreads][NExport] structure so that each omp thread - ///can only access the appropriate memory and adjust nsend_local.\n - ///\em Or outer loop is over threads, inner loop over nbodies and just have a idlist of size Nlocal that tags particles - ///which must be exported. Then its a much quicker follow up loop (no if statement) that stores the data - for (j=0;jFindLeafNode(i) - for(int k=0;k<3;k++) {xsearch[k][0]=Part[i].GetPosition(k)-leafnodes[inode].searchdist;xsearch[k][1]=Part[i].GetPosition(k)+leafnodes[inode].searchdist;} - vector celllist=MPIGetCellListInSearchUsingMesh(opt,xsearch); - for (auto j:celllist) { - const int cellnodeID = opt.cellnodeids[j]; - /// Only check if particles have overlap with neighbouring cells that are on another MPI domain and have not already been sent to - if (sent_mpi_domain[cellnodeID] == 1) continue; - vector celllist=MPIGetCellListInSearchUsingMesh(opt,xsearch); - for (auto j:celllist) { - const int cellnodeID = opt.cellnodeids[j]; - /// Only check if particles have overlap with neighbouring cells that are on another MPI domain and have not already been sent to - if (sent_mpi_domain[cellnodeID] == 1) continue; - nexport++; - nsend_local[cellnodeID]++; - sent_mpi_domain[cellnodeID]++; - } - } - } - //and then gather the number of particles to be sent from mpi thread m to mpi thread n in the mpi_nsend[NProcs*NProcs] array via [n+m*NProcs] - MPI_Allgather(nsend_local, NProcs, MPI_Int_t, mpi_nsend, NProcs, MPI_Int_t, MPI_COMM_WORLD); - NImport=0; - for (j=0;jmpi_nsend[recvTask+ThisTask * NProcs]-sendoffset)cursendchunksize=mpi_nsend[recvTask+ThisTask * NProcs]-sendoffset; @@ -2032,11 +3734,13 @@ void MPIGetHaloSearchImportNum(const Int_t nbodies, KDTree *tree, Particle *Part for (j=0;jmpi_nsend[recvTask+ThisTask * NProcs]-sendoffset)cursendchunksize=mpi_nsend[recvTask+ThisTask * NProcs]-sendoffset; @@ -2133,6 +3843,8 @@ Int_t MPIBuildHaloSearchImportList(const Int_t nbodies, KDTree *tree, Particle * } } ncount=0;for (int k=0;kmpi_nsend[recvTask+ThisTask * NProcs]-sendoffset)cursendchunksize=mpi_nsend[recvTask+ThisTask * NProcs]-sendoffset; @@ -2270,6 +3988,7 @@ void MPIAdjustLocalGroupIDs(const Int_t nbodies, Int_t *pfof){ for (int j=0;j0) FoFGroupDataExport=new fofid_in[nexport]; - else FoFGroupDataExport=new fofid_in[1]; Int_t *storeval=new Int_t[nbodies]; Noldlocal=nbodies-nexport; - //for (i=0;impi_nsend[recvTask+ThisTask * NProcs]-sendoffset)cursendchunksize=mpi_nsend[recvTask+ThisTask * NProcs]-sendoffset; @@ -2645,7 +4377,7 @@ Int_t MPIGroupExchange(const Int_t nbodies, Particle *Part, Int_t *&pfof){ /*! The baryon equivalent of \ref MPIGroupExchange. Here assume baryons are searched afterwards */ -Int_t MPIBaryonGroupExchange(const Int_t nbodies, Particle *Part, Int_t *&pfof){ +Int_t MPIBaryonGroupExchange(Options &opt, const Int_t nbodies, Particle *Part, Int_t *&pfof){ Int_t i, j,nthreads,nexport,nimport,nlocal,n; Int_t nsend_local[NProcs],noffset_import[NProcs],noffset_export[NProcs],nbuffer[NProcs]; int sendTask,recvTask; @@ -2788,7 +4520,7 @@ Int_t MPIBaryonGroupExchange(const Int_t nbodies, Particle *Part, Int_t *&pfof){ } ///Determine the local number of groups and their sizes (groups must be local to an mpi thread) -Int_t MPICompileGroups(const Int_t nbodies, Particle *Part, Int_t *&pfof, Int_t minsize){ +Int_t MPICompileGroups(Options &opt, const Int_t nbodies, Particle *Part, Int_t *&pfof, Int_t minsize){ Int_t i,j,start,ngroups; Int_t *numingroup,*groupid,**plist; ngroups=0; @@ -2843,7 +4575,7 @@ Int_t MPICompileGroups(const Int_t nbodies, Particle *Part, Int_t *&pfof, Int_t ///Similar to \ref MPICompileGroups but optimised for separate baryon search ///\todo need to update to reflect vector implementation -Int_t MPIBaryonCompileGroups(const Int_t nbodies, Particle *Part, Int_t *&pfof, Int_t minsize, int iorder){ +Int_t MPIBaryonCompileGroups(Options &opt, const Int_t nbodies, Particle *Part, Int_t *&pfof, Int_t minsize, int iorder){ Int_t i,j,start,ngroups; Int_t *numingroup,*groupid,**plist; ngroups=0; @@ -3001,7 +4733,7 @@ private(i,j,k,tid,p1,pindex,x1,D2,dval,rval,nnID,dist2) return nexport; } -Int_t MPIBaryonExchange(const Int_t nbaryons, Particle *Pbaryons, Int_t *pfofbaryons){ +Int_t MPIBaryonExchange(Options &opt, const Int_t nbaryons, Particle *Pbaryons, Int_t *pfofbaryons){ Int_t i, j,nthreads,nexport,nimport,nlocal,n; Int_t nsend_local[NProcs],noffset_import[NProcs],noffset_export[NProcs],nbuffer[NProcs]; int sendTask,recvTask; @@ -3383,6 +5115,18 @@ void MPISwiftExchange(vector &Part){ if (nexport >0) { PartBufSend=new Particle[nexport]; for (i=0;i &Part, const Int_t nbodies,Part #ifdef USEMPI MPI_Bcast(&LN, 1, MPI_Real_t, 0, MPI_COMM_WORLD); #endif + opt.internalenergyinputconversion = opt.velocityinputconversion*opt.velocityinputconversion; ///if not an individual halo, assume cosmological and store scale of the highest resolution interparticle spacing to scale the physical FOF linking length if (opt.iSingleHalo==0) { @@ -814,9 +815,6 @@ void ReadNchilada(Options &opt, vector &Part, const Int_t nbodies,Part Part[i].SetMass(Part[i].GetMass()*mscale); for (int j=0;j<3;j++) Part[i].SetVelocity(j,Part[i].GetVelocity(j)*opt.velocityinputconversion*sqrt(opt.a)+Hubbleflow*Part[i].GetPosition(j)); for (int j=0;j<3;j++) Part[i].SetPosition(j,Part[i].GetPosition(j)*lscale); -#ifdef GASON - if (Part[i].GetType()==GASTYPE) Part[i].SetU(Part[i].GetU()*opt.velocityinputconversion*opt.velocityinputconversion); -#endif } if (Pbaryons!=NULL && opt.iBaryonSearch==1) { for (i=0;i &Part, const Int_t nbodies,Part Pbaryons[i].SetMass(Pbaryons[i].GetMass()*mscale); for (int j=0;j<3;j++) Pbaryons[i].SetVelocity(j,Pbaryons[i].GetVelocity(j)*opt.velocityinputconversion*sqrt(opt.a)+Hubbleflow*Pbaryons[i].GetPosition(j)); for (int j=0;j<3;j++) Pbaryons[i].SetPosition(j,Pbaryons[i].GetPosition(j)*lscale); -#ifdef GASON - Pbaryons[i].SetU(Pbaryons[i].GetU()*opt.velocityinputconversion*opt.velocityinputconversion); -#endif } } #endif diff --git a/src/omproutines.cxx b/src/omproutines.cxx index 59bcf7a9..3a2c175a 100644 --- a/src/omproutines.cxx +++ b/src/omproutines.cxx @@ -49,9 +49,10 @@ KDTree **OpenMPBuildLocalTrees(Options &opt, const Int_t numompregions, vector

TPHYS,tree3dfofomp[i]->KEPAN,100,0,0,0,period); + tree3dfofomp[i] = new KDTree(&Part.data()[ompdomain[i].noffset],ompdomain[i].ncount,opt.Bsize,tree3dfofomp[i]->TPHYS,tree3dfofomp[i]->KEPAN,100,0,0,0,period,NULL,false); + tree3dfofomp[i]->OverWriteInputOrder(); } } return tree3dfofomp; @@ -287,48 +288,55 @@ Int_t OpenMPResortParticleandGroups(Int_t nbodies, vector &Part, Int_t #ifndef USEMPI int ThisTask=0,NProcs=1; #endif - Int_t start, ngroups=0; - Int_t *numingroup, **plist; - //now get number of groups and reorder group ids - for (auto i=0;i numingroup, index; + unordered_map pfofoldtonew; + PriorityQueue *pq; - //determine the # of groups, their size and the current group ID - for (auto i=0,start=0;i0) numingroup[pfof[i]]++; + for (auto i=1;i<=ngroups;i++) { + if (numingroup[i]Push(index[i],numingroup[i]); + } + ngroups = newnumgroups; + newnumgroups = 0; + //generate map + for (auto i=0;iTopQueue()] = ++newnumgroups; + pq->Pop(); + } + delete pq; + //set new group id values stored in pfof + for (auto i=0;i details; + int thread_count, active_threads; + + /* Public block */ + public: + void throttle_up(){ + // TODO: add the following logic: + // OMP_Threadpool_datastructure update state of thread from inactive to active + active_threads += 1; + } + + void throttle_down(){ + // TODO: add the following logic: + // OMP_Threadpool_datastructure update state of thread from active to inactive + active_threads -= 1; + } + + void set_total_threads(int thread_count){ + thread_count = thread_count; + } + + int get_total_threads(){ + return thread_count; + } + + int get_active_threads(){ + return active_threads; + } + + /* Confirmation Functions */ + void print_total_threads(){ + printf("Total Threads: %d\n", thread_count); + } + + void print_active_threads(){ + printf("Active Threads: %d\n", active_threads); + } +}; diff --git a/src/proto.h b/src/proto.h index 3ed3982a..6f9d37b1 100644 --- a/src/proto.h +++ b/src/proto.h @@ -21,7 +21,7 @@ void usage(void); void GetArgs(const int argc, char *argv[], Options &opt); void GetParamFile(Options &opt); -inline void ConfigCheck(Options &opt); +void ConfigCheck(Options &opt); //@} @@ -92,6 +92,7 @@ void WriteVELOCIraptorConfig(Options &opt); void WriteSimulationInfo(Options &opt); ///Write the unit info void WriteUnitInfo(Options &opt); + ///Write particle ids of those within spherical overdensity of a field halo void WriteSOCatalog(Options &opt, const Int_t ngroups, vector *SOpids, vector *SOtypes=NULL); ///Write profiles @@ -212,6 +213,8 @@ void HaloCoreGrowth(Options &opt, const Int_t nsubset, Particle *&Partsubset, In void MergeSubstructuresCoresPhase(Options &opt, const Int_t nsubset, Particle *&Partsubset, Int_t *&pfof, Int_t &numsubs, Int_t &numcores); ///merge substructures if phase-space positions overlap void MergeSubstructuresPhase(Options &opt, const Int_t nsubset, Particle *&Partsubset, Int_t *&pfof, Int_t &numgroups, Int_t &numsubs, Int_t &numcores); +///remove spurious dynamical substructures that comprise most of their host halo +void RemoveSpuriousDynamicalSubstructures(Options &opt, const Int_t nsubset, Int_t *&pfof, Int_t &numgroups, Int_t &numsubs, Int_t &numcores); ///Check significance of each group int CheckSignificance(Options &opt, const Int_t nsubset, Particle *Partsubset, Int_t &numgroups, Int_t *numingroups, Int_t *pfof, Int_t **pglist); ///Search for Baryonic structures associated with dark matter structures in phase-space @@ -277,6 +280,7 @@ int Unbind(Options &opt, Particle *Part, Int_t &numgroups, Int_t *&numingroup, I ///calculate the potential of an array of particles void Potential(Options &opt, Int_t nbodies, Particle *Part, Double_t *potV); void Potential(Options &opt, Int_t nbodies, Particle *Part); +void PotentialPP(Options &opt, Int_t nbodies, Particle *Part); //@} /// \name Routines to determine bulk quantities of halo and adjust halo @@ -381,6 +385,16 @@ void AddDataToRadialBinInclusive(Options &opt, Double_t rval, Double_t massval, Double_t srfval, int typeval, #endif Double_t irnorm, int &ibin, PropData &pdata); + +///calculate extra hydro properties +void GetExtraHydroProperties(Options &opt, PropData &pdata, Int_t n, Particle *Pval); +///calculate extra star properties +void GetExtraStarProperties(Options &opt, PropData &pdata, Int_t n, Particle *Pval); +///calculate extra bh properties +void GetExtraBHProperties(Options &opt, PropData &pdata, Int_t n, Particle *Pval); +///calculate extra dm properties +void GetExtraDMProperties(Options &opt, PropData &pdata, Int_t n, Particle *Pval); + ///calculate spherical overdensity from vector of radii, masses and indices Int_t CalculateSphericalOverdensity(Options &opt, PropData &pdata, vector &radii, vector &masses, vector &indices, @@ -514,6 +528,13 @@ void MPIDistributeReadTasks(Options&opt, int *&ireadtask, int*&readtaskID); ///set which file a given task will read int MPISetFilesRead(Options&opt, int *&ireadfile, int *&ireadtask); +///generic init of write communicator to mpi world; +void MPIInitWriteComm(); +///determine how to group mpi threads together when writing +void MPIBuildWriteComm(Options &opt); +///free communicator if needed +void MPIFreeWriteComm(); + /// Determine number of local particles for tipsy void MPINumInDomainTipsy(Options &opt); /// Determine number of local particles for gadget @@ -555,14 +576,83 @@ vector MPIGetCellListInSearchUsingMesh(Options &opt, Double_t xsearch[3][2] /// \name MPI send/recv related routines when reading input data /// see \ref mpiroutines.cxx for implementation //@{ + ///adds particles to appropriate send buffers and initiates sends if necessary. -void MPIAddParticletoAppropriateBuffer(const int &ibuf, Int_t ibufindex, int *&ireadtask, const Int_t &Bufsize, Int_t *&Nbuf, Particle *&Pbuf, Int_t &numpart, Particle *Part, Int_t *&Nreadbuf, vector*&Preadbuf); +void MPIAddParticletoAppropriateBuffer(Options &opt, const int &ibuf, Int_t ibufindex, int *&ireadtask, const Int_t &Bufsize, Int_t *&Nbuf, Particle *&Pbuf, Int_t &numpart, Particle *Part, Int_t *&Nreadbuf, vector*&Preadbuf); +///Send particle information from read threads to non read threads using MPI_COMM_WORLD +void MPISendParticlesFromReadThreads(Options &opt, Int_t nlocalbuff, Particle *Part, int taskID); ///recv particle data from read threads void MPIReceiveParticlesFromReadThreads(Options &opt, Particle *&Pbuf, Particle *Part, int *&readtaskID, int *&irecv, int *&mpi_irecvflag, Int_t *&Nlocalthreadbuf, MPI_Request *&mpi_request, Particle *&Pbaryons); ///Send/recv particle data read from input files between the various read threads; void MPISendParticlesBetweenReadThreads(Options &opt, Particle *&Pbuf, Particle *Part, Int_t *&nreadoffset, int *&ireadtask, int *&readtaskID, Particle *&Pbaryons, Int_t *&mpi_nsend_baryon); ///Send/recv particle data stored in vector using the read thread communication domain void MPISendParticlesBetweenReadThreads(Options &opt, vector *&Pbuf, Particle *Part, int *&ireadtask, int *&readtaskID, Particle *&Pbaryons, MPI_Comm &mpi_read_comm, Int_t *&mpi_nsend_readthread, Int_t *&mpi_nsend_readthread_baryon); + +///Interrupt send of particle information to destination taskID using MPI_COMM_WORLD +void MPIISendParticleInfo(Options &opt, Int_t nlocalbuff, Particle *Part, int taskID, int tag, MPI_Request &rqst); +///Receive Particle information send with specific tag +void MPIReceiveParticleInfo(Options &opt, Int_t nlocalbuff, Particle *Part, int sendingTaskID, int tag); + +///Send hydro information from read threads to non read threads using MPI_COMM_WORLD +void MPISendHydroInfoFromReadThreads(Options &opt, Int_t nlocalbuff, Particle *Part, int taskID); +///Send star information from read threads to non read threads using MPI_COMM_WORLD +void MPISendStarInfoFromReadThreads(Options &opt, Int_t nlocalbuff, Particle *Part, int taskID); +///Send bh information from read threads to non read threads using MPI_COMM_WORLD +void MPISendBHInfoFromReadThreads(Options &opt, Int_t nlocalbuff, Particle *Part, int taskID); +///Send extra dm information from read threads to non read threads using MPI_COMM_WORLD +void MPISendExtraDMInfoFromReadThreads(Options &opt, Int_t nlocalbuff, Particle *Part, int taskID); + +///Receive hydro information from read threads using MPI_COMM_WORLD +void MPIReceiveHydroInfoFromReadThreads(Options &opt, Int_t nlocalbuff, Particle *Part, int readtaskID); +///Receive star information from read threads using MPI_COMM_WORLD +void MPIReceiveStarInfoFromReadThreads(Options &opt, Int_t nlocalbuff, Particle *Part, int readtaskID); +///Receive bh information from read threads using MPI_COMM_WORLD +void MPIReceiveBHInfoFromReadThreads(Options &opt, Int_t nlocalbuff, Particle *Part, int readtaskID); +///Receive extra dm information from read threads using MPI_COMM_WORLD +void MPIReceiveExtraDMInfoFromReadThreads(Options &opt, Int_t nlocalbuff, Particle *Part, int readtaskID); + +///Interrupt send of hydro information to destination taskID using MPI_COMM_WORLD +void MPIISendHydroInfo(Options &opt, Int_t nlocalbuff, Particle *Part, int taskID, int tag, MPI_Request &rqst); +///Interrupt send of star information to destination taskID using MPI_COMM_WORLD +void MPIISendStarInfo(Options &opt, Int_t nlocalbuff, Particle *Part, int taskID, int tag, MPI_Request &rqst); +///Interrupt send of BH information to destination taskID using MPI_COMM_WORLD +void MPIISendBHInfo(Options &opt, Int_t nlocalbuff, Particle *Part, int taskID, int tag, MPI_Request &rqst); +///Interrupt send of extra dm information to destination taskID using MPI_COMM_WORLD +void MPIISendExtraDMInfo(Options &opt, Int_t nlocalbuff, Particle *Part, int taskID, int tag, MPI_Request &rqst); + +///Receive Hydro information send with specific tag +void MPIReceiveHydroInfo(Options &opt, Int_t nlocalbuff, Particle *Part, int sendingTaskID, int tag); +///Receive star information send with specific tag +void MPIReceiveStarInfo(Options &opt, Int_t nlocalbuff, Particle *Part, int sendingTaskID, int tag); +///Receive BH information send with specific tag +void MPIReceiveBHInfo(Options &opt, Int_t nlocalbuff, Particle *Part, int sendingTaskID, int tag); +///Receive Extra DM information send with specific tag +void MPIReceiveExtraDMInfo(Options &opt, Int_t nlocalbuff, Particle *Part, int sendingTaskID, int tag); + +///Send/Receive hydro information between read threads using the MPI communicator +void MPISendReceiveHydroInfoBetweenThreads(Options &opt, Int_t nlocalbuff, Particle *Pbuf, Int_t nlocal, Particle *Part, int recvTask, int tag, MPI_Comm &mpi_comm); +///Send/Receive star information between read threads using the MPI communicator +void MPISendReceiveStarInfoBetweenThreads(Options &opt, Int_t nlocalbuff, Particle *Pbuf, Int_t nlocal, Particle *Part, int recvTask, int tag, MPI_Comm &mpi_comm); +///Send/Receive BH information between read threads using the MPI communicator +void MPISendReceiveBHInfoBetweenThreads(Options &opt, Int_t nlocalbuff, Particle *Pbuf, Int_t nlocal, Particle *Part, int recvTask, int tag, MPI_Comm &mpi_comm); +///Send/Receive BH information between read threads using the MPI communicator +void MPISendReceiveExtraDMInfoBetweenThreads(Options &opt, Int_t nlocalbuff, Particle *Pbuf, Int_t nlocal, Particle *Part, int recvTask, int tag, MPI_Comm &mpi_comm); + +///Filling extra buffers with hydro data for particles that are to be exported as part of +///a FOF group. +void MPIFillFOFBuffWithHydroInfo(Options &opt, Int_t *numbuff, Int_t *numoffset, Particle *&Part, fofid_in *&FoFGroupData, vector &indices, vector &propbuff, bool iforexport=false); +///Send/Receive hydro information between read threads using the MPI communicator +///Using a FOF filled buffer +void MPISendReceiveFOFHydroInfoBetweenThreads(Options &opt, Int_t nexport, fofid_in *FoFGroupDataExport, Int_t nlocal, fofid_in *FoFGroupDataLocal, Particle *&Part, int recvTask, int tag, MPI_Comm &mpi_comm); +///Send/Receive star information between read threads using the MPI communicator +///Using a FOF filled buffer +void MPISendReceiveFOFStarInfoBetweenThreads(Options &opt, Int_t nexport, fofid_in *FoFGroupDataExport, Int_t nlocal, fofid_in *FoFGroupDataLocal, Particle *&Part, int recvTask, int tag, MPI_Comm &mpi_comm); +///Send/Receive BH information between read threads using the MPI communicator +///Using a FOF filled buffer +void MPISendReceiveFOFBHInfoBetweenThreads(Options &opt, Int_t nexport, fofid_in *FoFGroupDataExport, Int_t nlocal, fofid_in *FoFGroupDataLocal, Particle *&Part, int recvTask, int tag, MPI_Comm &mpi_comm); +///Send/Receive ExtraDM information between read threads using the MPI communicator +///Using a FOF filled buffer +void MPISendReceiveFOFExtraDMInfoBetweenThreads(Options &opt, Int_t nexport, fofid_in *FoFGroupDataExport, Int_t nlocal, fofid_in *FoFGroupDataLocal, Particle *&Part, int recvTask, int tag, MPI_Comm &mpi_comm); //@} /// \name MPI search related routines @@ -580,7 +670,7 @@ void MPIGetExportNum(const Int_t nbodies, Particle *Part, Double_t rdist); void MPIGetExportNumUsingMesh(Options &opt, const Int_t nbodies, Particle *Part, Double_t rdist); #endif ///Determine and send particles that need to be exported to another mpi thread from local mpi thread based on rdist -void MPIBuildParticleExportList(const Int_t nbodies, Particle *Part, Int_t *&pfof, Int_tree_t *&Len, Double_t rdist); +void MPIBuildParticleExportList(Options &opt, const Int_t nbodies, Particle *Part, Int_t *&pfof, Int_tree_t *&Len, Double_t rdist); ///Determine and send particles that need to be exported to another mpi thread from local mpi thread based on rdist using the SWIFT mesh void MPIBuildParticleExportListUsingMesh(Options &opt, const Int_t nbodies, Particle *Part, Int_t *&pfof, Int_tree_t *&Len, Double_t rdist); ///Link groups across MPI threads using a physical search @@ -592,13 +682,13 @@ Int_t MPILinkAcross(const Int_t nbodies, KDTree *&tree, Particle *Part, Int_t *& ///update export list after after linking across void MPIUpdateExportList(const Int_t nbodies, Particle *Part, Int_t *&pfof, Int_tree_t *&Len); ///localize groups to a single mpi thread -Int_t MPIGroupExchange(const Int_t nbodies, Particle *Part, Int_t *&pfof); +Int_t MPIGroupExchange(Options &opt, const Int_t nbodies, Particle *Part, Int_t *&pfof); ///Determine the local number of groups and their sizes (groups must be local to an mpi thread) -Int_t MPICompileGroups(const Int_t nbodies, Particle *Part, Int_t *&pfof, Int_t minsize); +Int_t MPICompileGroups(Options &opt, const Int_t nbodies, Particle *Part, Int_t *&pfof, Int_t minsize); ///similar to \ref MPIGroupExchange but optimised for separate baryon search, assumes only looking at baryons -Int_t MPIBaryonGroupExchange(const Int_t nbodies, Particle *Part, Int_t *&pfof); +Int_t MPIBaryonGroupExchange(Options &opt, const Int_t nbodies, Particle *Part, Int_t *&pfof); ///similar to \ref MPICompileGroups but optimised for separate baryon search, assumes only looking at baryons -Int_t MPIBaryonCompileGroups(const Int_t nbodies, Particle *Part, Int_t *&pfof, Int_t minsize, int iorder=1); +Int_t MPIBaryonCompileGroups(Options &opt, const Int_t nbodies, Particle *Part, Int_t *&pfof, Int_t minsize, int iorder=1); ///localize baryons particle members of groups to a single mpi thread ///Collect FOF from all void MPICollectFOF(const Int_t nbodies, Int_t *&pfof); @@ -607,7 +697,7 @@ int fof_export_cmp(const void *a, const void *b); ///comparison function to order particles for export and fof group localization. int fof_id_cmp(const void *a, const void *b); ///similar to \ref MPIBuildParticleExportList but specific interface for baryon search -void MPIBuildParticleExportBaryonSearchList(const Int_t nbodies, Particle *Part, Int_t *&pfof, Int_t *ids, Int_t *numingroup, Double_t rdist); +void MPIBuildParticleExportBaryonSearchList(Options &opt, const Int_t nbodies, Particle *Part, Int_t *&pfof, Int_t *ids, Int_t *numingroup, Double_t rdist); ///search local baryons with exported particle list. Int_t MPISearchBaryons(const Int_t nbaryons, Particle *&Pbaryons, Int_t *&pfofbaryons, Int_t *numingroup, Double_t *localdist, Int_t nsearch, Double_t *param, Double_t *period); ///localize the baryons to the mpi thread on which their associated DM group exists. @@ -635,7 +725,7 @@ void MPIBuildParticleNNExportListUsingMesh(Options &opt, const Int_t nbodies, Pa ///Determine number of local particles that need to be exported back based on ball search. void MPIGetNNImportNum(const Int_t nbodies, KDTree *tree, Particle *Part, int iallflag=1); ///Determine local particles that need to be exported back based on ball search. -Int_t MPIBuildParticleNNImportList(const Int_t nbodies, KDTree *tree, Particle *Part, int iallflag=1); +Int_t MPIBuildParticleNNImportList(Options &opt, const Int_t nbodies, KDTree *tree, Particle *Part, int iallflag=1); ///comparison function to order particles for export int nn_export_cmp(const void *a, const void *b); ///Determine number of halos whose search regions overlap other mpi domains @@ -653,7 +743,7 @@ void MPIBuildHaloSearchExportListUsingMesh(Options &opt, const Int_t ngroup, Pro ///Determine number of imported particles based on halo search regions void MPIGetHaloSearchImportNum(const Int_t nbodies, KDTree *tree, Particle *Part); ///Builds the import list of particles based on halo positions -Int_t MPIBuildHaloSearchImportList(const Int_t nbodies, KDTree *tree, Particle *Part); +Int_t MPIBuildHaloSearchImportList(Options &opt, const Int_t nbodies, KDTree *tree, Particle *Part); #ifdef SWIFTINTERFACE ///Exchange Particles so that particles in group are back original swift task void MPISwiftExchange(vector &Part); @@ -722,6 +812,40 @@ int GetMillSpan(int ); int CompareInt(const void *, const void *); ///get a time double MyGetTime(); + +//@} + +/// \name Compilation functions +/// Functions defined when certain compilation options enabled. Allows easy check +///of a library file produced. see \ref utilities.cxx for implementation +//@{ +#ifdef NOMASS +extern "C" void VR_NOMASS(); +#endif +#ifdef GASON +extern "C" void VR_GASON(); +#endif +#ifdef STARON +extern "C" void VR_STARON(); +#endif +#ifdef BHON +extern "C" void VR_BHON(); +#endif +#ifdef USEMPI +extern "C" void VR_MPION(); +#endif +#ifdef USEOPENMP +extern "C" void VR_OPENMPON(); +#endif +#ifdef HIGHRES +extern "C" void VR_ZOOMSIMON(); +#endif +#ifdef USEHDF +extern "C" void VR_HDFON(); +#ifdef USEPARALLELHDF +extern "C" void VR_PARALLELHDFON(); +#endif +#endif //@} #endif diff --git a/src/ramsesio.cxx b/src/ramsesio.cxx index f1209921..5ad10e0a 100644 --- a/src/ramsesio.cxx +++ b/src/ramsesio.cxx @@ -826,7 +826,7 @@ void ReadRamses(Options &opt, vector &Part, const Int_t nbodies, Parti } #endif Nbuf[ibuf]++; - MPIAddParticletoAppropriateBuffer(ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocal, Part.data(), Nreadbuf, Preadbuf); + MPIAddParticletoAppropriateBuffer(opt, ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocal, Part.data(), Nreadbuf, Preadbuf); #else Part[count2]=Particle(mtemp*mscale, xtemp[0]*lscale,xtemp[1]*lscale,xtemp[2]*lscale, @@ -864,7 +864,7 @@ void ReadRamses(Options &opt, vector &Part, const Int_t nbodies, Parti #endif //ensure that store number of particles to be sent to other reading threads Nbuf[ibuf]++; - MPIAddParticletoAppropriateBuffer(ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocal, Part.data(), Nreadbuf, Preadbuf); + MPIAddParticletoAppropriateBuffer(opt, ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocal, Part.data(), Nreadbuf, Preadbuf); #else Part[count2]=Particle(mtemp*mscale, xtemp[0]*lscale,xtemp[1]*lscale,xtemp[2]*lscale, @@ -907,7 +907,7 @@ void ReadRamses(Options &opt, vector &Part, const Int_t nbodies, Parti if (k==RAMSESSTARTYPE) Nlocalbaryon[2]++; else if (k==RAMSESSINKTYPE) Nlocalbaryon[3]++; } - MPIAddParticletoAppropriateBuffer(ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocalbaryon[0], Pbaryons, Nreadbuf, Preadbuf); + MPIAddParticletoAppropriateBuffer(opt, ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocalbaryon[0], Pbaryons, Nreadbuf, Preadbuf); #else Pbaryons[bcount2]=Particle(mtemp*mscale, xtemp[0]*lscale,xtemp[1]*lscale,xtemp[2]*lscale, @@ -948,7 +948,7 @@ void ReadRamses(Options &opt, vector &Part, const Int_t nbodies, Parti } #endif Nbuf[ibuf]++; - MPIAddParticletoAppropriateBuffer(ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocal, Part.data(), Nreadbuf, Preadbuf); + MPIAddParticletoAppropriateBuffer(opt, ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocal, Part.data(), Nreadbuf, Preadbuf); #else Part[count2]=Particle(mtemp*mscale, xtemp[0]*lscale,xtemp[1]*lscale,xtemp[2]*lscale, @@ -1179,7 +1179,7 @@ void ReadRamses(Options &opt, vector &Part, const Int_t nbodies, Parti #endif //ensure that store number of particles to be sent to the threads involved with reading snapshot files Nbuf[ibuf]++; - MPIAddParticletoAppropriateBuffer(ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocal, Part.data(), Nreadbuf, Preadbuf); + MPIAddParticletoAppropriateBuffer(opt, ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocal, Part.data(), Nreadbuf, Preadbuf); #else Part[count2]=Particle(mtemp*mscale, xpos[0]*lscale,xpos[1]*lscale,xpos[2]*lscale, @@ -1233,7 +1233,7 @@ void ReadRamses(Options &opt, vector &Part, const Int_t nbodies, Parti if (ibuf==ThisTask) { Nlocalbaryon[1]++; } - MPIAddParticletoAppropriateBuffer(ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocalbaryon[0], Pbaryons, Nreadbuf, Preadbuf); + MPIAddParticletoAppropriateBuffer(opt, ibuf, ibufindex, ireadtask, BufSize, Nbuf, Pbuf, Nlocalbaryon[0], Pbaryons, Nreadbuf, Preadbuf); #else Pbaryons[bcount2]=Particle(mtemp*mscale, xpos[0]*lscale,xpos[1]*lscale,xpos[2]*lscale, @@ -1290,6 +1290,9 @@ void ReadRamses(Options &opt, vector &Part, const Int_t nbodies, Parti MPI_Ssend(&Nbuf[ibuf],1,MPI_Int_t, ibuf, ibuf+NProcs, MPI_COMM_WORLD); if (Nbuf[ibuf]>0) { MPI_Ssend(&Pbuf[ibuf*BufSize], sizeof(Particle)*Nbuf[ibuf], MPI_BYTE, ibuf, ibuf, MPI_COMM_WORLD); + MPISendHydroInfoFromReadThreads(opt, Nbuf[ibuf], &Pbuf[ibuf*BufSize], ibuf); + MPISendStarInfoFromReadThreads(opt, Nbuf[ibuf], &Pbuf[ibuf*BufSize], ibuf); + MPISendBHInfoFromReadThreads(opt, Nbuf[ibuf], &Pbuf[ibuf*BufSize], ibuf); Nbuf[ibuf]=0; //last broadcast with Nbuf[ibuf]=0 so that receiver knows no more particles are to be broadcast MPI_Ssend(&Nbuf[ibuf],1,MPI_Int_t,ibuf,ibuf+NProcs,MPI_COMM_WORLD); @@ -1340,6 +1343,9 @@ void ReadRamses(Options &opt, vector &Part, const Int_t nbodies, Parti MPI_Bcast(&(Ntotal),sizeof(Ntotal),MPI_BYTE,0,MPI_COMM_WORLD); MPI_Bcast(&opt.zoomlowmassdm,sizeof(opt.zoomlowmassdm),MPI_BYTE,0,MPI_COMM_WORLD); #endif + //store how to convert input internal energies to physical output internal energies + //as we already convert ramses units to sensible output units, nothing to do. + opt.internalenergyinputconversion = 1.0; //a bit of clean up #ifdef USEMPI diff --git a/src/search.cxx b/src/search.cxx index 8a96ba89..ef0e6dcd 100644 --- a/src/search.cxx +++ b/src/search.cxx @@ -56,6 +56,8 @@ Int_t* SearchFullSet(Options &opt, const Int_t nbodies, vector &Part, if (omp_get_thread_num()==0) nthreads=omp_get_num_threads(); } OMP_Domain *ompdomain; + int numompregions = ceil(nbodies/(float)opt.openmpfofsize); + bool runompfof = (numompregions>=2 && nthreads > 1 && opt.iopenmpfof == 1); #endif if (opt.p>0) { period=new Double_t[3]; @@ -66,8 +68,10 @@ Int_t* SearchFullSet(Options &opt, const Int_t nbodies, vector &Part, minsize=opt.HaloMinSize; #ifdef USEMPI //if using MPI, lower minimum number - if (NProcs>1) minsize=MinNumMPI; - iorder = 0; + if (NProcs>1) { + minsize=MinNumMPI; + iorder = 0; + } #endif time1=MyGetTime(); @@ -80,12 +84,12 @@ Int_t* SearchFullSet(Options &opt, const Int_t nbodies, vector &Part, #ifdef USEOPENMP //if using openmp produce tree with large buckets as a decomposition of the local mpi domain //to then run local fof searches on each domain before stitching - int numompregions = ceil(nbodies/(float)opt.openmpfofsize); - if (numompregions >= 4 && nthreads > 1 && opt.iopenmpfof == 1) { + if (runompfof) { time3=MyGetTime(); Double_t rdist = sqrt(param[1]); //determine the omp regions; tree = new KDTree(Part.data(),nbodies,opt.openmpfofsize,tree->TPHYS,tree->KEPAN,100); + tree->OverWriteInputOrder(); numompregions=tree->GetNumLeafNodes(); ompdomain = OpenMPBuildDomains(opt, numompregions, tree, rdist); storeorgIndex = new Int_t[nbodies]; @@ -97,11 +101,13 @@ Int_t* SearchFullSet(Options &opt, const Int_t nbodies, vector &Part, else { time3=MyGetTime(); tree = new KDTree(Part.data(),nbodies,opt.Bsize,tree->TPHYS,tree->KEPAN,1000,0,0,0,period); + tree->OverWriteInputOrder(); if (opt.iverbose) cout<TPHYS,tree->KEPAN,1000,0,0,0,period); + tree->OverWriteInputOrder(); #endif cout<<"Done"< &Part, //if enough regions then search each individually //then link across omp domains Int_t ompminsize = 2; - if (numompregions>=4 && nthreads > 1 && opt.iopenmpfof == 1){ + if (runompfof){ time3=MyGetTime(); Int_t orgIndex, omp_import_total; int omptask; @@ -199,13 +205,23 @@ Int_t* SearchFullSet(Options &opt, const Int_t nbodies, vector &Part, } else { //posible alteration for all particle search - if (opt.partsearchtype==PSTALL && opt.iBaryonSearch>1) pfof=tree->FOFCriterionSetBasisForLinks(fofcmp,param,numgroups,minsize,iorder,0,FOFchecktype,Head,Next); - else pfof=tree->FOF(sqrt(param[1]),numgroups,minsize,iorder,Head,Next); + if (opt.partsearchtype==PSTALL && opt.iBaryonSearch>1) { + pfof=tree->FOFCriterionSetBasisForLinks(fofcmp,param,numgroups,minsize, + iorder,0,FOFchecktype,Head,Next); + } + else { + pfof=tree->FOF(sqrt(param[1]),numgroups,minsize,iorder,Head,Next); + } } #else //posible alteration for all particle search - if (opt.partsearchtype==PSTALL && opt.iBaryonSearch>1) pfof=tree->FOFCriterionSetBasisForLinks(fofcmp,param,numgroups,minsize,iorder,0,FOFchecktype,Head,Next); - else pfof=tree->FOF(sqrt(param[1]),numgroups,minsize,iorder,Head,Next); + if (opt.partsearchtype==PSTALL && opt.iBaryonSearch>1) { + pfof=tree->FOFCriterionSetBasisForLinks(fofcmp,param,numgroups,minsize, + iorder,0,FOFchecktype,Head,Next); + } + else { + pfof=tree->FOF(sqrt(param[1]),numgroups,minsize,iorder,Head,Next); + } #endif #ifndef USEMPI @@ -213,27 +229,33 @@ Int_t* SearchFullSet(Options &opt, const Int_t nbodies, vector &Part, //if this flag is set, calculate localfield value here for particles possibly resident in a field structure #ifdef STRUCDEN if (numgroups>0 && (opt.iSubSearch==1&&opt.foftype!=FOF6DCORE)) { - numingroup=BuildNumInGroup(nbodies, numgroups, pfof); - storetype=new Int_t[nbodies]; - for (i=0;i0) - if (!(opt.iBaryonSearch>=1 && opt.partsearchtype==PSTALL)) for (i=0;i=MINSUBSIZE); - //otherwise set type to group value for dark matter - else { - for (i=0;i=MINSUBSIZE); - else Part[i].SetType(-1); + numingroup=BuildNumInGroup(nbodies, numgroups, pfof); + storetype=new Int_t[nbodies]; + for (i=0;i0) + if (!(opt.iBaryonSearch>=1 && opt.partsearchtype==PSTALL)) { + for (i=0;i=MINSUBSIZE); + } } - } - for (i=0;i0) numinstrucs++; - if (opt.iverbose) cout<<"Number of particles in large subhalo searchable structures "<0) GetVelocityDensity(opt, nbodies, Part.data(), tree); + //otherwise set type to group value for dark matter + else { + for (i=0;i=MINSUBSIZE); + else Part[i].SetType(-1); + } + } + for (i=0;i0) numinstrucs++; + if (opt.iverbose) { + cout<<"Number of particles in large subhalo searchable structures "<0) GetVelocityDensity(opt, nbodies, Part.data(), tree); - for (i=0;iFOF6D) delete[] numingroup; + for (i=0;iFOF6D) delete[] numingroup; } #endif delete tree; @@ -243,6 +265,8 @@ Int_t* SearchFullSet(Options &opt, const Int_t nbodies, vector &Part, if (NProcs==1) { totalgroups=numgroups; if (tree != NULL) delete tree; + delete[] Head; + delete[] Next; } else { mpi_foftask=MPISetTaskID(Nlocal); @@ -289,7 +313,7 @@ Int_t* SearchFullSet(Options &opt, const Int_t nbodies, vector &Part, #ifdef SWIFTINTERFACE MPIBuildParticleExportListUsingMesh(libvelociraptorOpt, nbodies, Part.data(), pfof, Len, sqrt(param[1])); #else - MPIBuildParticleExportList(nbodies, Part.data(), pfof, Len, sqrt(param[1])); + MPIBuildParticleExportList(opt, nbodies, Part.data(), pfof, Len, sqrt(param[1])); #endif MPI_Barrier(MPI_COMM_WORLD); //Now that have FoFDataGet (the exported particles) must search local volume using said particles @@ -325,7 +349,7 @@ Int_t* SearchFullSet(Options &opt, const Int_t nbodies, vector &Part, delete[] Len; //Now redistribute groups so that they are local to a processor (also orders the group ids according to size opt.HaloMinSize=MinNumOld;//reset minimum size - Int_t newnbodies=MPIGroupExchange(nbodies,Part.data(),pfof); + Int_t newnbodies=MPIGroupExchange(opt, nbodies, Part.data(), pfof); //once groups are local, can free up memory. Might need to increase size //of vector if (Nmemlocal &Part, delete[] pfof; pfof=new Int_t[newnbodies]; //And compile the information and remove groups smaller than minsize - numgroups=MPICompileGroups(newnbodies,Part.data(),pfof,opt.HaloMinSize); + numgroups=MPICompileGroups(opt, newnbodies, Part.data(), pfof, opt.HaloMinSize); //and free up some memory if vector doesn't need to be as big if (Nmemlocal>Nlocal) {Part.resize(Nlocal);Nmemlocal=Nlocal;} cout<<"MPI thread "< &Part, } #endif if (opt.iverbose>=2) { - Int_t sum=0; - for (i=0;i0); + minsize=opt.HaloMinSize; + Int_t sum=0, maxgroupsize=0; + for (i=0;i &Part, Part[i].SetType((numingroup[pfof[i]]>=MINSUBSIZE)); numlocalden += (Part[i].GetType()>0); } + delete[] numingroup; + numingroup=NULL; } //otherwise set type to group value for dark matter else { @@ -376,13 +408,19 @@ Int_t* SearchFullSet(Options &opt, const Int_t nbodies, vector &Part, else Part[i].SetType(0); numlocalden += (Part[i].GetType()>0); } + delete[] numingroup; + numingroup=NULL; } for (i=0;i0);} - if (opt.iverbose) cout<TPHYS,tree->KEPAN,100,0,0,0,period); - GetVelocityDensity(opt, Nlocal, Part.data(),tree); - delete tree; + Int_t numlocalden_total; + MPI_Allreduce(&numlocalden, &numlocalden_total, 1, MPI_Int_t, MPI_SUM, MPI_COMM_WORLD); + if (numlocalden_total > 0) { + if (opt.iverbose) cout<TPHYS,tree->KEPAN,100,0,0,0,period); + GetVelocityDensity(opt, Nlocal, Part.data(),tree); + delete tree; + } for (i=0;i &Part, minsize=opt.HaloMinSize; if (opt.fofbgtype!=FOFSTNOSUBSET) fofcmp=&FOF6d; else fofcmp=&FOFStream; - - cout<0)*pfof[i]); - npartingroups+=(Int_t)(pfof[i]>0); - iend+=(pfof[i]==1); - numingroup[pfof[i]]++; - } - for (i=2;i<=numgroups;i++) noffset[i]=noffset[i-1]+numingroup[i-1]; - qsort(Part.data(), Nlocal, sizeof(Particle), PIDCompare); - //sort(Part.begin(),Part.end(),PIDCompareVec); - for (i=0;i 0 then sort particles for 6dfof search + if (numgroups > 0) { + cout<0)*pfof[i]); + npartingroups+=(Int_t)(pfof[i]>0); + iend+=(pfof[i]==1); + numingroup[pfof[i]]++; + } + for (i=2;i<=numgroups;i++) noffset[i]=noffset[i-1]+numingroup[i-1]; + qsort(Part.data(), Nlocal, sizeof(Particle), PIDCompare); + //sort(Part.begin(),Part.end(),PIDCompareVec); + for (i=0;i0) { + vscale2=mtotregion=vx=vy=vz=0; + for (i=0;i0) vscale2/=mtotregion; } - if (mtotregion>0) vscale2/=mtotregion; - #ifdef USEMPI Double_t mpi_vscale2; MPI_Allreduce(&vscale2,&mpi_vscale2,1,MPI_Real_t,MPI_MAX,MPI_COMM_WORLD); @@ -470,32 +515,36 @@ Int_t* SearchFullSet(Options &opt, const Int_t nbodies, vector &Part, } //otherwise each object has its own velocity scale else if(opt.fofbgtype==FOF6DADAPTIVE || opt.iKeepFOF){ + //if local mpi domain has groups, proceed wih calculation + //of velocity scales vscale2array=new Double_t[numgroups+1]; + if (numgroups > 0) { #ifdef USEOPENMP #pragma omp parallel default(shared) \ private(i,vscale2,mtotregion,vx,vy,vz,vmean) { #pragma omp for schedule(dynamic,1) nowait #endif - for (i=1;i<=numgroups;i++) { - vscale2=mtotregion=vx=vy=vz=0; - for (Int_t j=0;j 0) + { #ifdef USEOPENMP #pragma omp parallel default(shared) \ private(i,tid,xscaling,vscaling) { #pragma omp for schedule(dynamic,1) nowait #endif - for (i=1;i<=iend;i++) { + for (i=1;i<=iend;i++) { #ifdef USEOPENMP - tid=omp_get_thread_num(); + tid=omp_get_thread_num(); #else - tid=0; + tid=0; #endif - //if adaptive 6dfof, set params - if (opt.fofbgtype==FOF6DADAPTIVE) paramomp[2+tid*20]=paramomp[7+tid*20]=vscale2array[i]; - /* - treeomp[tid]=new KDTree(&Part[noffset[i]],numingroup[i],opt.Bsize,treeomp[tid]->TPHYS,tree->KEPAN,100); - pfofomp[i]=treeomp[tid]->FOFCriterion(fofcmp,¶momp[tid*20],ngomp[i],minsize,1,0,Pnocheck,&Head[noffset[i]],&Next[noffset[i]],&Tail[noffset[i]],&Len[noffset[i]]); - */ - //scale particle positions - xscaling=1.0/sqrt(paramomp[1+tid*20]);vscaling=1.0/sqrt(paramomp[2+tid*20]); - for (Int_t j=0;jTPHS,tree->KEPAN,100); - pfofomp[i]=treeomp[tid]->FOF(1.0,ngomp[i],minsize,1,&Head[noffset[i]],&Next[noffset[i]],&Tail[noffset[i]],&Len[noffset[i]]); - delete treeomp[tid]; - for (Int_t j=0;jTPHS,tree->KEPAN,100); + pfofomp[i]=treeomp[tid]->FOF(1.0,ngomp[i],minsize,1,&Head[noffset[i]],&Next[noffset[i]],&Tail[noffset[i]],&Len[noffset[i]]); + delete treeomp[tid]; + for (Int_t j=0;j0) { if (opt.iverbose>=2 && ThisTask==0) cout<<"Storing the 3D fof envelopes of the 6d fof structures found"< 0){ ng=0; for (i=0;i=bgoffset+1) { for (i=0;ibgoffset) pfof[i]=numgroups+(pfofbg[i]-bgoffset); numgroupsbg-=bgoffset; - //if (numgroups>0 && opt.coresubmergemindist>0) MergeSubstructuresCoresPhase(opt, nsubset, Partsubset, pfof, numgroups, numgroupsbg); numgroups+=numgroupsbg; } if (opt.iverbose>=2) cout<0 && opt.coresubmergemindist>0 && nsubset>=MINSUBSIZE) MergeSubstructuresPhase(opt, nsubset, Partsubset, pfof, numgroups, numsubs, numgroupsbg); - + RemoveSpuriousDynamicalSubstructures(opt,nsubset, pfof, numgroups, numsubs, numgroupsbg); #ifdef USEMPI //now if substructures are subsubstructures, then the region of interest has already been localized to a single MPI domain @@ -1745,7 +1801,7 @@ private(i,tid) #ifdef SWIFTINTERFACE MPIBuildParticleExportListUsingMesh(libvelociraptorOpt, nsubset, Partsubset, pfof, Len, sqrt(param[1])); #else - MPIBuildParticleExportList(nsubset, Partsubset, pfof, Len, sqrt(param[1])); + MPIBuildParticleExportList(opt, nsubset, Partsubset, pfof, Len, sqrt(param[1])); #endif //Now that have FoFDataGet (the exported particles) must search local volume using said particles //This is done by finding all particles in the search volume and then checking if those particles meet the FoF criterion @@ -1771,7 +1827,7 @@ private(i,tid) //Now redistribute groups so that they are local to a processor (also orders the group ids according to size if (opt.iSingleHalo) opt.MinSize=MinNumOld;//reset minimum size - Int_t newnbodies=MPIGroupExchange(nsubset,Partsubset,pfof); + Int_t newnbodies=MPIGroupExchange(opt, nsubset,Partsubset,pfof); ///\todo need to clean up this mpi section for single halo /* #ifndef MPIREDUCEMEM @@ -1789,7 +1845,7 @@ private(i,tid) ///\todo Before final compilation of data, should have unbind here but must adjust unbind so it ///does not call reordergroupids in it though it might be okay. //And compile the information and remove groups smaller than minsize - numgroups=MPICompileGroups(newnbodies,Partsubset,pfof,opt.MinSize); + numgroups=MPICompileGroups(opt, newnbodies,Partsubset,pfof,opt.MinSize); MPI_Barrier(MPI_COMM_WORLD); cout<<"MPI thread "< mcore(numgroupsbg+1, 0.0); + vector ncore(numgroupsbg+1, 0); + vector newcore(numgroupsbg+1, 0); + Int_t newnumgroupsbg=0; int nsearch=opt.Nvel; int mincoresize; int tid,i; @@ -1833,8 +1890,8 @@ void HaloCoreGrowth(Options &opt, const Int_t nsubset, Particle *&Partsubset, In Double_t **dist2; PriorityQueue *pq; Int_t nactivepart=nsubset; + vector noffset(numgroupsbg+1,0); - for (i=0;i<=numgroupsbg;i++)ncore[i]=mcore[i]=0; //determine the weights for the cores dispersions factors for (i=0;i0) { @@ -1850,7 +1907,6 @@ void HaloCoreGrowth(Options &opt, const Int_t nsubset, Particle *&Partsubset, In } //if number of particles in core less than number in subset then start assigning particles if (nincoreompperiodnum) { int nreduce=0; #pragma omp parallel default(shared) \ -private(i,tid,Pval,D2,dval,mval,pid) +private(i,tid,Pval,D2,dval,mval,pid,weight) { #pragma omp for reduction(+:nreduce) for (i=0;iGetPhase(k)-cmphase[1](k,0); dval=(dist[tid].Transpose()*invdisp[1]*dist[tid])(0,0); pfofbg[pid]=1; - for (int j=2;j<=numgroupsbg;j++) if (mcore[j]>0 && corelevel[j]>=iloop){ - for (int k=0;k<6;k++) dist[tid](k,0)=Pval->GetPhase(k)-cmphase[j](k,0); - D2=(dist[tid].Transpose()*invdisp[j]*dist[tid])(0,0); - if (dval*dispfac[pfofbg[pid]]>D2*dispfac[j]) {dval=D2;mval=mcore[j];pfofbg[pid]=j;} + for (int j=2;j<=numgroupsbg;j++) { + if (mcore[j]>0 && corelevel[j]>=iloop){ + weight = 1.0/sqrt(mcore[j]/mval); + for (int k=0;k<6;k++) dist[tid](k,0)=Pval->GetPhase(k)-cmphase[j](k,0); + D2=(dist[tid].Transpose()*invdisp[j]*dist[tid])(0,0) * weight; + if (dval*dispfac[pfofbg[pid]]>D2*dispfac[j]) { + dval=D2; + mval=mcore[j]; + pfofbg[pid]=j; + } + } } //if particle assigned to a core remove from search Pval->SetType(-1); @@ -1954,10 +2015,17 @@ private(i,tid,Pval,D2,dval,mval,pid) for (int k=0;k<6;k++) dist[tid](k,0)=Pval->GetPhase(k)-cmphase[1](k,0); dval=(dist[tid].Transpose()*invdisp[1]*dist[tid])(0,0); pfofbg[pid]=1; - for (int j=2;j<=numgroupsbg;j++) if (mcore[j]>0 && corelevel[j]>=iloop){ - for (int k=0;k<6;k++) dist[tid](k,0)=Pval->GetPhase(k)-cmphase[j](k,0); - D2=(dist[tid].Transpose()*invdisp[j]*dist[tid])(0,0); - if (dval*dispfac[pfofbg[pid]]>D2*dispfac[j]) {dval=D2;mval=mcore[j];pfofbg[pid]=j;} + for (int j=2;j<=numgroupsbg;j++) { + if (mcore[j]>0 && corelevel[j]>=iloop){ + weight = 1.0/sqrt(mcore[j]/mval); + for (int k=0;k<6;k++) dist[tid](k,0)=Pval->GetPhase(k)-cmphase[j](k,0); + D2=(dist[tid].Transpose()*invdisp[j]*dist[tid])(0,0) * weight; + if (dval*dispfac[pfofbg[pid]]>D2*dispfac[j]) { + dval=D2; + mval=mcore[j]; + pfofbg[pid]=j; + } + } } Pval->SetType(-1); nactivepart--; @@ -2093,8 +2161,8 @@ private(i,tid,Pval,x1,D2,dval,mval,pid,pidcore) delete tcore; delete[] Pcore; for (i=0;i numingroup, noffset, taggedsubs; struct mergeinfo { + Int_t originalpfofval; Int_t pfofval; Int_t numingroup; int type; int nummerged; bool ismerged; + int mergeindex; vector mergedlist; mergeinfo(){ - nummerged=0; - ismerged=false; + nummerged = 0; + ismerged = false; + mergeindex = -1; }; - //Int_t numingroup; }; vector subs; vector minfo; KDTree *tree; - //vector phasetensorsubs(numsubs,GMatrix(6,6)), phasetensorcores(numcores,GMatrix(6,6)); - vector sigXsubs(numgroups), sigVsubs(numgroups), sigXcores(numgroups), sigVcores(numgroups); + //vector phasetensorsubs(numgroups+1,GMatrix(6,6)); + vector sigXsubs(numgroups+1), sigVsubs(numgroups+1); + Double_t searchdist; struct indexfof { Int_t fofval; Int_t index; }; vector indexing; -//cout<numsubs)); - minfo[pfofval-1].pfofval=pfofval; - minfo[pfofval-1].type = x.GetType(); - minfo[pfofval-1].numingroup=numingroup[pfofval]; - pfofval++; - for (auto k=0;k<6;k++) x.SetPhase(k,x.GetPhase(k)/x.GetMass()); + + //set sub properties. + for (auto i=0;inumsubs)); + subs[i].SetPID(i); + subs[i].SetID(i); + minfo[i].originalpfofval = i; + minfo[i].pfofval = i; + minfo[i].type = subs[i].GetType(); + minfo[i].numingroup = numingroup[i]; + for (auto k=0;k<6;k++) subs[i].SetPhase(k,subs[i].GetPhase(k)/subs[i].GetMass()); } - //sort indices by fof value + + //sort indices by original fof value sort(indexing.begin(), indexing.end(), [](indexfof &a, indexfof &b){ return a.fofval < b.fofval; }); + //get the dispersions for (auto i=0;iTPHYS,tree->KEPAN,100,0,0,0); - //tree = new KDTree(subs.data(),numlargesubs,1,tree->TPHYS,tree->KEPAN,100,0,0,0); - //check all cores to see if they overlap significantly with substructures + tree = new KDTree(subs.data(),subs.size(),1,tree->TPHYS,tree->KEPAN,100,0,0,0); + + //first check all cores to see if they overlap significantly with dynamically distince + //substructures. Since cores are after subs in id value, this removes a core + //and adds particles to a substructure for (auto i=0;iSearchBallPosTagged(i, sigXsubs[index1]*fdist2); -//cout<SearchBallPosTagged(i, searchdist); if (taggedsubs.size()<=1) continue; //if objects are within search window of core, get min phase distance imerge=-1; mindist2=MAXVALUE; - for (auto j=0;j=2) cout< b.numingroup); + else if (a.type==b.type) { + if (a.numingroup > b.numingroup) return true; + else if (a.numingroup < b.numingroup) return false; + else { + return (a.originalpfofval < b.originalpfofval); + } + } else return false; }); + //store old to new pfof values + map oldtonewindex; + for (auto i=0;i= 0) newnumgroups++; + minfo[i].pfofval = newnumgroups; if (minfo[i].type == 1) newnumcores++; + } + //update the values to new pfof values + for (auto i=0;i 0) { + for (auto &mergedgroup:minfo[i].mergedlist) mergedgroup = oldtonewindex[mergedgroup]; + } + } + + //now update the pfof array as necessary + for (auto i=0;i0) { + pfofval = minfo[i].pfofval; + for (auto &mergedgroup:minfo[i].mergedlist) { + index1 = minfo[mergedgroup].originalpfofval; + for (auto j=noffset[index1];jnewnumgroups) cout<<"WTF incorrect pfof value "<=nsubset*opt.minfracsubsizeforremoval && numinlargest>=nsubset*opt.minfracsubsizeforremoval) { + if (opt.iverbose>=2) cout<0) pfof[i]--; + } +} + +///adjust to phase centre +inline void AdjustSubPartToPhaseCM(Int_t num, Particle *subPart, GMatrix &cmphase) { int nthreads = 1; #ifdef USEOPENMP - cout << "OPTIMISATION-01: BEGIN PRINT" << endl; - cout << "OPTIMISATION-01: num[" << num << "]" << endl; - cout << "OPTIMISATION-01: omp_get_max_threads[" << omp_get_max_threads() << "]" << endl; - cout << "OPTIMISATION-01: ompsearchnum[" << ompsearchnum << "]" << endl; - nthreads = max(1, (int)(num/(float)ompsearchnum)); - cout << "OPTIMISATION-01: nthreads.1[" << nthreads << "]" << endl; - nthreads = min(nthreads,omp_get_max_threads()); - cout << "OPTIMISATION-01: nthreads.2[" << nthreads << "]" << endl; - cout << "OPTIMISATION-01: END PRINT" << endl; #pragma omp parallel for \ default(shared) \ -num_threads(nthreads) +num_threads(nthreads) if (num > ompperiodnum) #endif for (auto j=0;j=MINSUBSIZE&&opt.foftype!=FOF6DCORE) { + #ifndef USEMPI + int ThisTask = 0; + #endif + KDTree *tree; + Int_t ngrid; + GridCell *grid; + Coordinate *gvel; + Matrix *gveldisp; + + if (subnumingroup>=MINSUBSIZE&&opt.foftype!=FOF6DCORE) { //now if object is large enough for phase-space decomposition and search, compare local field to bg field - opt.Ncell=opt.Ncellfac*subnumingroup[i]; + opt.Ncell=opt.Ncellfac*subnumingroup; //if ncell is such that uncertainty would be greater than 0.5% based on Poisson noise, increase ncell till above unless cell would contain >25% - while (opt.Ncellopt.Ncell) opt.Ncell*=2; - tree=InitializeTreeGrid(opt,subnumingroup[i],subPart); + while (opt.Ncellopt.Ncell) opt.Ncell*=2; + tree=InitializeTreeGrid(opt,subnumingroup,subPart); ngrid=tree->GetNumLeafNodes(); - if (opt.iverbose) cout<opt.HaloVelDispScale) opt.HaloVelDispScale=opt.HaloSigmaV; #ifdef HALOONLYDEN - GetVelocityDensity(opt,subnumingroup[i],subPart); + GetVelocityDensity(opt,subnumingroup,subPart); #endif - GetDenVRatio(opt,subnumingroup[i],subPart,ngrid,grid,gvel,gveldisp); - GetOutliersValues(opt,subnumingroup[i],subPart,sublevel); + GetDenVRatio(opt,subnumingroup, subPart, ngrid, grid, gvel, gveldisp); + GetOutliersValues(opt,subnumingroup, subPart, sublevel); opt.idenvflag++;//largest field halo used to deteremine statistics of ratio } //otherwise only need to calculate a velocity scale for merger separation else { Matrix eigvec(0.),I(0.); Double_t sigma2x,sigma2y,sigma2z; - CalcVelSigmaTensor(subnumingroup[i], subPart, sigma2x, sigma2y, sigma2z, eigvec, I); + CalcVelSigmaTensor(subnumingroup, subPart, sigma2x, sigma2y, sigma2z, eigvec, I); opt.HaloLocalSigmaV=opt.HaloSigmaV=pow(sigma2x*sigma2y*sigma2z,1.0/3.0); } } -// ENCAPSULATED: ENCAPSULATION-03 -void SearchSubStruct(Int_t *numcores, Int_t i, Options &opt, Int_t ng, bool iunbindflag, Int_t *subnumingroup, - Particle *subPart, Int_t *subngroup, Int_t *subpfof, Int_t **subsubnumingroup, Int_t ***subsubpglist, - Int_t *&pfof, Int_t **subpglist, Int_t &ngroup, Int_t ngroupidoffset) { +inline void CleanAndUpdateGroupsFromSubSearch(Options &opt, + Int_t &subnumingroup, Particle *subPart, Int_t *&subpfof, + Int_t &subngroup, Int_t *&subsubnumingroup, + Int_t **&subsubpglist, Int_t &numcores, + Int_t *&subpglist, + Int_t *&pfof, Int_t &ngroup, Int_t &ngroupidoffset) +{ + bool iunbindflag; + Int_t ng=subngroup; Int_t *coreflag; - ng=subngroup[i]; - subsubnumingroup[i]=BuildNumInGroup(subnumingroup[i], subngroup[i], subpfof); - subsubpglist[i]=BuildPGList(subnumingroup[i], subngroup[i], subsubnumingroup[i], subpfof); - if (opt.uinfo.unbindflag&&subngroup[i]>0) { - //if also keeping track of cores then must allocate coreflag - if (numcores[i]>0 && opt.iHaloCoreSearch>=1) { - coreflag=new Int_t[ng+1]; - for (int icore=1;icore<=ng;icore++) coreflag[icore]=1+(icore>ng-numcores[i]); - } - else {coreflag=NULL;} - iunbindflag=CheckUnboundGroups(opt,subnumingroup[i],subPart,subngroup[i],subpfof,subsubnumingroup[i],subsubpglist[i],1, coreflag); - if (iunbindflag) { - for (int j=1;j<=ng;j++) delete[] subsubpglist[i][j]; - delete[] subsubnumingroup[i]; - delete[] subsubpglist[i]; - if (subngroup[i]>0) { - subsubnumingroup[i]=BuildNumInGroup(subnumingroup[i], subngroup[i], subpfof); - subsubpglist[i]=BuildPGList(subnumingroup[i], subngroup[i], subsubnumingroup[i], subpfof); - } - //if need to update number of cores, - if (numcores[i]>0 && opt.iHaloCoreSearch>=1) { - numcores[i]=0; - for (int icore=1;icore<=subngroup[i];icore++)numcores[i]+=(coreflag[icore]==2); - delete[] coreflag; - } - }} - for (Int_t j=0;j0) pfof[subpglist[i][j]]=ngroup+ngroupidoffset+subpfof[j]; + if (subngroup == 0) return; + + subsubnumingroup = BuildNumInGroup(subnumingroup, subngroup, subpfof); + subsubpglist = BuildPGList(subnumingroup, subngroup, subsubnumingroup, subpfof); + if (opt.uinfo.unbindflag&&subngroup>0) { + //if also keeping track of cores then must allocate coreflag + if (numcores>0 && opt.iHaloCoreSearch>=1) { + coreflag=new Int_t[subngroup+1]; + for (auto icore=1;icore<=subngroup;icore++) coreflag[icore]=1+(icore>subngroup-numcores); + } + else { + coreflag=NULL; + } + iunbindflag = CheckUnboundGroups(opt, subnumingroup, subPart, + subngroup, subpfof, subsubnumingroup, subsubpglist, 1, coreflag); + if (iunbindflag) { + for (auto j=1;j<=ng;j++) delete[] subsubpglist[j]; + delete[] subsubnumingroup; + delete[] subsubpglist; + if (subngroup>0) { + subsubnumingroup = BuildNumInGroup(subnumingroup, subngroup, subpfof); + subsubpglist = BuildPGList(subnumingroup, subngroup, subsubnumingroup, subpfof); + } + //if need to update number of cores, + if (numcores>0 && opt.iHaloCoreSearch>=1) { + numcores=0; + for (auto icore=1;icore<=subngroup;icore++) numcores += (coreflag[icore]==2); + delete[] coreflag; + } + } + } + + for (auto j=0;j0) pfof[subpglist[j]]=ngroup+ngroupidoffset+subpfof[j]; + } + + //ngroupidoffset+=subngroup; //now alter subsubpglist so that index pointed is global subset index as global subset is used to get the particles to be searched for subsubstructure - for (Int_t j=1;j<=subngroup[i];j++) for (Int_t k=0;k &ngroupidoffset_old, vector &ngroupidoffset_new) +{ + //now adjust the group ids to the new offsets. + ngroupidoffset += ns; + for (auto i=2;i<=activenumgroups;i++) + ngroupidoffset_new[i] = ngroupidoffset_new[i-1]+subngroup[i-1]; + +#ifdef USEOPENMP + #pragma omp parallel for \ + default(shared) schedule(static) if (activenumgroups > 2) +#endif + for (auto i=1;i<=activenumgroups;i++) { + if (subngroup[i]==0) continue; + for (auto j=0;j &Partsubset, Int_t *&pfof, Int_t &ngroup, Int_t &nhalos, PropData *pdata) { @@ -2593,16 +2801,14 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse Int_t **subsubnumingroup, ***subsubpglist; Int_t *numcores,*coreflag; Int_t *subpfofold; - Coordinate *gvel; - Matrix *gveldisp; - KDTree *tree; - GridCell *grid; - Coordinate cm,cmvel; + vector ngroupidoffset_old, ngroupidoffset_new; + vector ompactivesubgroups; //variables to keep track of structure level, pfof values (ie group ids) and their parent structure //use to point to current level StrucLevelData *pcsld; //use to store total number in sublevel; Int_t ns; + int minsizeforsubsearch = opt.MinSize*2; #ifndef USEMPI int ThisTask=0,NProcs=1; #endif @@ -2631,7 +2837,7 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse pcsld=psldata->nextlevel; nsubsearch=ngroup-opt.num3dfof; } - for (Int_t i=firstgroup;i<=ngroup;i++) if (numingroup[i]0); if (iflag) { @@ -2651,8 +2857,7 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse delete[] pglist; delete[] numingroup; //now start searching while there are still sublevels to be searched - - while (iflag) { + while (iflag) { if (opt.iverbose) cout< &Partsubse numcores=new Int_t[nsubsearch+1]; subpfofold=new Int_t[nsubsearch+1]; ns=0; - // START: ENCAPSULATE - //here loop over all sublevel groups that need to be searched for substructure - // ADACS: this loop proceses halos (that are independent) sequentially. This is unecessary. - // #pragma omp parallel for... - // parallelise loop, collection, and/or calculations per collection - // TODO: give a collection a pool of threads thread_pool + + ngroupidoffset_old.resize(oldnsubsearch+1); + ngroupidoffset_new.resize(oldnsubsearch+1); + ngroupidoffset_new[1] = ngroupidoffset; + ngroupidoffset_old[1] = ngroupidoffset; + for (auto i=2;i<=oldnsubsearch;i++) ngroupidoffset_old[i] = ngroupidoffset_old[i-1]+ceil(subnumingroup[i-1]/opt.MinSize)+1; +#ifdef USEOPENMP + //vector that will store the subgroups that are small + //enough to be searched fully in parallel. + ompactivesubgroups.resize(0); +#endif + for (Int_t i=1;i<=oldnsubsearch;i++) { + // try running loop over largest objects in serial with parallel inside calls + // so skip of group is small enough and running with openmp +#ifdef USEOPENMP + if (subnumingroup[i] < ompsplitsubsearchnum) { + ompactivesubgroups.push_back(i); + continue; + } +#endif subpfofold[i]=pfof[subpglist[i][0]]; subPart=new Particle[subnumingroup[i]]; for (Int_t j=0;j0 change the pfof ids of these particles in question and see if there are any substrucures that can be searched again. - //the group ids must be stored along with the number of groups in this substructure that will be searched at next level. - //now check if self bound and if not, id doesn't change from original subhalo,ie: subpfof[j]=0 - // SearchSubStruct(subngroup, i, ng, subsubnumingroup, subnumingroup, subpfof, subsubpglist, opt, - // numcores, coreflag, iunbindflag, subPart, pfof, subpglist, ngroup, ngroupidoffset); - if (subngroup[i]) { - SearchSubStruct(numcores, i, opt, ng, iunbindflag, subnumingroup, - subPart, subngroup, subpfof, subsubnumingroup, subsubpglist, - pfof, subpglist, ngroup, ngroupidoffset); - - ngroupidoffset+=subngroup[i]; - } - - delete[] subpfof; // freeze - delete[] subPart; // freeze - //increase tot num of objects at sublevel - //ADACS: this would need a reduction at the end. - ns+=subngroup[i]; // reduction - } - // END: ENCAPSULATE + // TODO: Optimise bottleneck + double time_temp; + time_temp = MyGetTime(); + PreCalcSearchSubSet(opt, subnumingroup[i], subPart, sublevel); + cout<<"DURATION[PreCalcSearchSubSet]: "<0) { + Int_t oldns = ns; + ns = 0; + Options opt2; + #pragma omp parallel for \ + default(shared) private(subPart, subpfof, opt2) schedule(dynamic) \ + reduction(+:ns) + for (auto iomp=0;iomp &Partsubse Particle *Pparentheadval; //here adjust head particle of parent structure if necessary. Search for first instance where //the pfof value of the particles originally associated with the parent structure have a value - //less than the expected values for substructures while (pfof[subpglist[i][ii]]>ngroup+ngroupidoffset-ns && iingroup+ngroupidoffset-ns) { @@ -2811,13 +3050,14 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse } if (opt.iverbose) cout<MINSUBSIZE) + for (Int_t j=1;j<=subngroup[i];j++) if (subsubnumingroup[i][j]>=minsizeforsubsearch) nsubsearch++; if (nsubsearch>0) { subnumingroup=new Int_t[nsubsearch+1]; @@ -2825,7 +3065,7 @@ void SearchSubSub(Options &opt, const Int_t nsubset, vector &Partsubse nsubsearch=1; for (Int_t i=1;i<=oldnsubsearch;i++) { for (Int_t j=1;j<=subngroup[i];j++) - if (subsubnumingroup[i][j]>MINSUBSIZE) { + if (subsubnumingroup[i][j]>=minsizeforsubsearch) { subnumingroup[nsubsearch]=subsubnumingroup[i][j]; subpglist[nsubsearch]=new Int_t[subnumingroup[nsubsearch]]; for (Int_t k=0;k remap; Int_t newng=0, oldpid, newpid; remap[0]=0; - for (i=1;i<=ng;i++) { + for (auto i=1;i<=ng;i++) { if (ningall[i]>0) { newng++; remap[i]=newng; diff --git a/src/stf-fitting.h b/src/stf-fitting.h index 4da08caf..8a59e38d 100644 --- a/src/stf-fitting.h +++ b/src/stf-fitting.h @@ -5,6 +5,11 @@ #ifndef STFFITTING_H #define STFFITTING_H +///Math code +#include +using namespace Math; + + ///\name Functions for Skew Gaussian distribution ///Here param[0] is amplitude, param[1] is mean, param[2] is variance and param[3] is s^2, the skew parameter //@{ @@ -46,6 +51,122 @@ inline Double_t DiffSkewGaussSkew(Double_t x, void *param){ if (x<=params[1]) return params[0]*exp(-0.5*dx2*ivar*is2)*dx2*ivar*is2*is2*0.5; else return 0.0; } + +inline int SkewGaussGSL(const gsl_vector *param, void *data, gsl_vector *f){ + gsl_fitting_data *fitdata = (gsl_fitting_data *)data; + vector params(fitdata->nallparams); + for (int i=0;inallparams;i++) params[i]=fitdata->allparams[i]; + for (int i=0;inpar;i++) params[fitdata->iparindex[i]] = gsl_vector_get(param, i); + double x, y; + for (int i=0; in; i++) { + x = fitdata ->x[i]; + if (x<=params[1]) y = params[0]*exp(-0.5*(x-params[1])*(x-params[1])/(params[2]*params[3])); + else y = params[0]*exp(-0.5*(x-params[1])*(x-params[1])/(params[2])); + gsl_vector_set(f,i,y-fitdata->y[i]); + } + return GSL_SUCCESS; +} + +inline int DiffSkewGaussAmpGSL(const gsl_vector *param, void *data, gsl_vector *f){ + gsl_fitting_data *fitdata = (gsl_fitting_data *)data; + vector params(fitdata->nallparams); + for (int i=0;inallparams;i++) params[i]=fitdata->allparams[i]; + for (int i=0;inpar;i++) params[fitdata->iparindex[i]] = gsl_vector_get(param, i); + double ivar=1.0/params[2]; + double is2=1.0/params[3]; + double x, y, dx2; + for (int i=0; in; i++) { + x = fitdata->x[i]; + dx2=(x-params[1])*(x-params[1]); + if (x<=params[1]) y = exp(-0.5*dx2*ivar*is2); + else y= exp(-0.5*dx2*ivar); + gsl_vector_set(f,i,y); + } + return GSL_SUCCESS; +} + +inline int DiffSkewGaussMeanGSL(const gsl_vector *param, void *data, gsl_vector *f){ + gsl_fitting_data *fitdata = (gsl_fitting_data *)data; + vector params(fitdata->nallparams); + for (int i=0;inallparams;i++) params[i]=fitdata->allparams[i]; + for (int i=0;inpar;i++) params[fitdata->iparindex[i]] = gsl_vector_get(param, i); + double ivar=1.0/params[2]; + double is2=1.0/params[3]; + double x, y, dx2; + for (int i=0; in; i++) { + x = fitdata->x[i]; + dx2=(x-params[1])*(x-params[1]); + if (x params(fitdata->nallparams); + for (int i=0;inallparams;i++) params[i]=fitdata->allparams[i]; + for (int i=0;inpar;i++) params[fitdata->iparindex[i]] = gsl_vector_get(param, i); + double ivar=1.0/params[2]; + double is2=1.0/params[3]; + double x, y, dx2; + for (int i=0; in; i++) { + x = fitdata->x[i]; + dx2=(x-params[1])*(x-params[1]); + if (x<=params[1]) y = params[0]*exp(-0.5*dx2*ivar*is2)*dx2*ivar*ivar*is2*0.5; + else y = params[0]*exp(-0.5*dx2*ivar)*dx2*ivar*ivar*0.5; + gsl_vector_set(f,i,y); + } + return GSL_SUCCESS; +} + +inline int DiffSkewGaussSkewGSL(const gsl_vector *param, void *data, gsl_vector *f){ + gsl_fitting_data *fitdata = (gsl_fitting_data *)data; + vector params(fitdata->nallparams); + for (int i=0;inallparams;i++) params[i]=fitdata->allparams[i]; + for (int i=0;inpar;i++) params[fitdata->iparindex[i]] = gsl_vector_get(param, i); + double ivar=1.0/params[2]; + double is2=1.0/params[3]; + double x, y, dx2; + for (int i=0; in; i++) { + x = fitdata->x[i]; + dx2=(x-params[1])*(x-params[1]); + if (x<=params[1]) y = params[0]*exp(-0.5*dx2*ivar*is2)*dx2*ivar*is2*is2*0.5; + else y = 0.0; + gsl_vector_set(f,i,y); + } + return GSL_SUCCESS; +} + +inline int DiffSkewGaussGSL(const gsl_vector *param, void *data, gsl_matrix *J){ + gsl_fitting_data *fitdata = (gsl_fitting_data *)data; + gsl_vector * f = gsl_vector_alloc(fitdata->n); + for (int iparam = 0; iparam < fitdata->npar; iparam++) { + int activeparam = fitdata->iparindex[iparam]; + switch(activeparam) + { + case 0: + DiffSkewGaussAmpGSL(param, data, f); + break; + case 1: + DiffSkewGaussMeanGSL(param, data, f); + break; + case 2: + DiffSkewGaussVarGSL(param, data, f); + break; + case 3: + DiffSkewGaussSkewGSL(param, data, f); + break; + } + for (int i=0;in;i++) { + gsl_matrix_set (J, i, iparam, gsl_vector_get(f,i)); + } + } + gsl_vector_free(f); + return GSL_SUCCESS; +} //@} ///\name Functions for Gaussian distribution diff --git a/src/stf.h b/src/stf.h index 964d7ca0..5d8165d8 100644 --- a/src/stf.h +++ b/src/stf.h @@ -51,11 +51,12 @@ outputing files containing bulk properties and particles IDs belonging to (sub)s It is primarily designed to use the VELOCIraptor/STF algorithm (which generates an outlier subset) but can also use the 3DFOF and 6DFOF algorithm (effectively finds regions of high physical or phase-space density).\n The program can be altered to read different formats by altering \ref proto.h, \ref io.cxx, specifically \ref ReadData -and providing the appropriate io.cxx file like \ref gadgetio.cxx. The overall flow of the program is outlined in \ref main.cxx -and a brief description of the searches available is in \ref searching +and providing the appropriate io.cxx file like \ref gadgetio.cxx. The overall flow of the program is outlined in \ref main.cxx. A full account of the numerical algorithms employed by the code is given -in the code paper, Elahi et al. (2011) +in the code papers, +Elahi et al., (2011), +Elahi et al., (2019) and detailed instructions for usage of the code are given in the included code documentation. For a discussion of tidal debris in cosmological simulations and a comparison of the few codes capable in principle of identifying physically diffuse tidal debris see Elahi et al. (2013). @@ -63,272 +64,6 @@ of identifying physically diffuse tidal debris see TreeFrog). -- \b OrbWeaver - A code designed to produce halo merger trees or cross correlate two different halo (structure) catalogues (\ref OrbWeaver). - -\section prelim Getting started - - Getting started is as simple as copying Makefile.config.template to Makefile.config, editing the Makefile.config file (see \ref STF-makeflags) - typing make and then running the code (see \ref howtorun). - -\section install Compilation - -VELOCIraptor/STF needs the following non-standard libraries for compilation: - -- \b GSL - the GNU scientific library. This open-source package can be - obtained at http://www.gnu.org/software/gsl. VELOCIraptor - needs this library for a few special function calls - and for random number generation. - -- \b libNBody - a scientific library included with VELOCIraptor (\ref libNBody). - VELOCIraptor needs this library for a number of structures, classes, and methods it provides. - -VELOCIraptor for parallel use may need the following non-standard libraries for compilation -depending on the compilation flags used: - -- \b MPI - the Message Passing Interface (version 1.0 or higher). Many - vendor supplied versions exist, in addition to excellent open source - implementations, e.g. MPICH - (http://www-unix.mcs.anl.gov/mpi/mpich/) or LAM - (http://www.lam-mpi.org/). - -- \b OpenMP - the OpenMP API generally included with many compilers - -- \b CUDA - NOT IMPLEMENTED YET. - -One could in principle alter the code to use high precision libraries - -- \b QD - for higher precision (like double-double and quad-quad). - (http://crd.lbl.gov/~dhbailey/mpdist/) - -- \b ARPREC - also for arbitrarily high precision. - (http://crd.lbl.gov/~dhbailey/mpdist/) - -VELOCIraptor also can output in a variety of formats ASCII, binary, HDF and ADIOS. -HDF and ADIOS can be enabled and disabled, and require libaries. - -- Hiearchical Data Format (HDF) - self describing data format. -(https://www.hdfgroup.org/) - -- Adaptable IO System (ADIOS) - self describing data format. -(https://www.olcf.ornl.gov/center-projects/adios/) - -VELOCIraptor also can read a variety of particle inputs. Most are specific binary formats -but some require extra libraries. HDF inputs require the HDF library and Nchilada requires -some extra libraries. - -Note that if any of the above libraries is not installed in standard -locations on your system, the \ref STF-makeflags "Makefile.config" provided with -the code may need slight adjustments. Similarly, compiler options, -particularly with respect to optimisations, may need adjustment to the -C++-compiler that is used. - -The provided makefile is compatible with GNU-make, i.e. typing \b make or -\b gmake should then build the executable STF along with associated analysis tools -and specific purpose libraries included with VELOCIraptor. If your site -does not have GNU-make, get it. - -To compile the code simply - cp Makefile.config.template Makefile.config -Edit Makefile.config with you favourite editor and type - make -A list of compile time options is listed in \ref STF-makeflags - -\section howtorun Running the code - -A typical command to start the code looks like: \n \n - - ./stf < args > \n \n - -with OpenMP, setting the environment variable OMP_NUM_THREADS=8 will then run -the code with 8 threads in the openmp sections. - -with MPI: \n \n - - mpirun -np 8 ./stf < args > \n \n - -This would start the code using 8 processors, assuming that the parallel -environment uses the mpirun command to start MPI -applications. Depending on the operating system, other commands may be -required for this task, e.g. poe on IBM/AIX machines. Note that -the code can in principle be started using an arbitrary number of -processors, but the communication algorithms will be most efficient for -powers of 2. - -Note that at the moment, mpirun assumes that a single structure can fit onto the shared -memory local to the mpi thread. If larger haloes are to be analyzed, it is suggested that -the iSingleHalo option be set to 1, and the analysis is done on a shared memory machine -with enough memory. A more complete version capable of handling large structures across -mpi domains that are then searched for substructures is in the works. - -\section param Parameters -The code has several parameters that can be adjusted through a configuration file. The following commands -are accepted (more info can be found in \ref Options struct and \ref ui.cxx for user interface or the sample configuration file -in the examples directory). \n \n - \subsection configfile Preferred interface: Use a configuration file - for details of configuration options see \ref configopt - \arg \b \e -i < input file > \ref Options.fname \n - \arg \b \e -s < number of files per snapshot for gadget input, 0 for tipsy [default] > \ref Options.num_files \n - \arg \b \e -Z < number of files to read in parallel (when mpi is invoked) > \ref Options.nsnapread \n - \arg \b \e -o < output base name (this can be overwritten by a configuration option in the config file. Suggestion would be to not use this option in the config file, use explicit command> \ref Options.outname \n - \arg \b \e -C < Config file name (see \ref configopt for discussion of what is contained in this ascii parameter file) > \ref Options.pname \n - \n - -\section searching Altering the search for substructures - -The algorithm searches for substructures in a specific fashion but if the user wishes to modify the search in some way, -this can be done by altering \ref search.cxx (along with \ref proto.h, \ref allvars.h and \ref fofalgo.h as necessary). -However, before altering the search it is useful to understand what searches are available. - -First, note that implemented in the code are a variety of FOF criteria. Second, also implemented is an interative search. -Given the added complexity of the iterative search, an aside is necessary. First, the iterative search finds candidate objects -using the criteria passed, then relaxes the criteria, thus if an iterative search is used, -one should use it with more restrictive fof criteria than one would normally do. The iterative search -can correct for a bias present in large subhaloes AND also find extended portions (typically unbound portions) -of a substructure. As candidate tidal debris substructure can be split into several "groups" using the smaller -search window and have links when using the more relaxed criteria. If enough connections are present between -candidate substructures they are merged. The iterative parameters indicating how much the initial parameters -are increased by are listed in \ref Options - -\section unbinding Unbinding when searching for tidal debris -As the algorithm is designed to identify dynamically distinct but not necessarily self-bound structures residing in a background -(that is roughly in equilibrium), the code allows the user to specific the ratio between kinetic and potential energy of particles -AND whether particles are removed from the background potential when unbinding. Typically codes of this nature, i.e. (sub)halo finders, -REQUIRE and unbinding step to function properly. This is not the case here. Tests show that requiring the ratio -|Epot|/Ekin >0.2 does not throw up too many suprious objects. One can be even more relaxed (and use slightly cpu resources) if -one does not require particles that do not meet the energy criterion to be ignored when estimating the potential. The idea would be for -extended tidal debris with a very small loosely unbound core, the sea of tidal debris particles contributes negligibly to the potential -and so updating this is not cost effective. - -But for the default user interested in only completely self-bound objects, a kinetic ratio of 1.0 and ignore the bacground potential are -the options to use. - -\section outputs Outputs - - \subsection prop Structure properties - Contains a variety of properties calculate for each structure identified and also contains - some information regarding the relationship of this object to others (if object is a substructure, - what is its hostHaloID). A list out possible outputs can be found in \ref allvars.h, specifically \ref PropData - This output can be in ASCII, binary, HDF, and ADIOS - - \subsection cat Catalog_ files (.catalog_groups, .catalog_particles, .catalog_parttypes) - Contains particle information to extract read particles from an input file or for tracking (ie: producing halo merger trees). - The catalog_groups contains the sizes of groups and the offsets to read the associated .catalog_particles (.catalog_particles.unbound) - .catalog_parttypes (.catalog_parttypes.unbound) which just listed the IDS and Types of particles belonging to groups. - For examples of how to read this information, see the python tools included. When combined with raw particle data can - be used for extra processing (such as calculating properties/profiles not calculated by default by VELOCIraptor). - These files are also necessary if one wishes to construct "halo merger trees" or cross match haloes between catalogues. - This output can be in ASCII, binary, HDF, and ADIOS - - \subsection hierarchy Field Structure / Substructure relationships - Contains the substructure hierarchy information, such as the hostID (which is -1 if it is a field structure) - an objects ID, number of direct substructures. - This output can be in ASCII, binary, HDF, and ADIOS - - \subsection foflists Structure ID lists - Optional, contains a simple list which is particle id ordered that simply has the (sub)halo of a paritcle - (and is zero if particle doesn't belong to a list.) These outputs are outname.fof.grp. Note that the fof.grp - format is collected from all MPI threads and is only ASCII output. - - \subsection mergertrees Merger trees produced by TreeFrog - The TreeFrog code located within the analysis directory. It is a particle correlator that can build a halo merger tree linking across - multiple snapshots to identify optimal progenitors. - - \subsection baryonic_analysis Analysing baryons - Code to analyse baryonic component of haloes. Obsolete/In need of revision. - - \section modifications Modifications/Searching for other types of Structures - -The code can be modified to search for other types of substructures by altering the definition of an outlier -particle and the linking criteria used. For instance, to search a hydrodynamical simulation for dynamically -distinct gas substructures which are also metalicity outliers would require: - \arg altering the \ref NBody::Particle class in \ref Particle.h and \ref Particle.cxx. The particle could be - used to store extra quantities such as temperature, metalicity, ionization fraction, etc. - \arg Then one would need to alter the quantities calculated for the background in \ref bgfield.cxx to - calculate for instance, the mean mass or volume weighted metalicity and the variance - \arg Then one would have to alter \ref localfield.cxx to calculate the local velocity distribution - AND the local metalicity distribution. - \arg One would need to alter \ref localbgcomp.cxx to compare the predicted bg to the actual - local value for each quantity independently. (Here one might assume that the metalicity distribution is lognormal. - \arg Finally, one would need to alter the search criterion used in \ref search.cxx (and \ref fofalgo.h). - -*/ - -/*! \page STF-makeflags Makefile.config - -A number of features of VELOCIraptor are controlled with compile-time options -in the makefile rather than by arguments. The Makefile.config.template file contains -a list of all available compile-time options, with most of them commented out by default. -To activate a certain feature, the corresponding parameter should be commented in, -and given the desired value, where appropriate. Below, a brief guide to these options is -included. Copy Makefile.config.template to Makefile.config and edit as necessary. - -Important Note: Whenever one of the compile-time options -described below is modified, a full recompilation of the code may be -necessary. To guarantee that this is done when a simple make is -specified, all source files have been specified in the Makefile as being -dependent on the Makefile and Makefile.config files. Alternatively, one can also issue the -command make clean, which will erase all object files, followed -by make. - -\n -\section secmake1 Basic operation mode of code -- \b STRUCDEN \n Set this if you want to calculate the velocity density function used to find (sub)structures \e ONLY for particles resident in structure -- \b HALOONLYDEN \n Set this if you want to calculate the velocity density function used to find (sub)structures \e ONLY for particles resident in structure \em USING -\em ONLY \em PARTICLES in the parent structure. \b STRUCDEN is overridden by \b HALOONLYDEN -(technically these are incompatible with each other as HALOONLYDEN is faster but is biased and -the number of particles for which the local distribution function density is calculated for is even more incomplete). -- \b ZOOMSIM \n Set this if code to naturally account for a zoom simulation with high resolution particles and low resolution particles. - -\n -\section secmake2 Computational flags -- \b SINGLEPARTICLEPRECISION \n Set in \ref NBody::Particle contained in file \ref Particle.h reduces memory allocation and sets phase-space position of particles to floats instead of doubles -- \b LONGIDS \n If this is set, the code assumes that particle-IDs are stored as 64-bit long integers and general ints are also now 64 long ints. This is only really needed if you want - to go beyond ~2 billion particles. -- \b SINGLEPRECISION, \n Code is compiled with floats instead of doubles (normal) -- \b LARGEKDTREE, \n Code is compiled such that a single KD Tree can handle > MAXINT 2 billion particles, increasing the memory footprint. Typically -never used as would use mpi with each mpi process having fewer than MAXINT particles to deal with and build kd trees for. - -\n -\section secmake3 Particle Properties flags -- \b NOMASS \n Mass no longer stored per particle. Instead all assumed to have the same mass. Set in \ref NBody::Particle contained in file \ref Particle.h reduces memory allocation -- \b MASSVAL \n Sets the return value of the mass of a particle. See \ref NBody::Particle, file \ref Particle.h, \ref NBody::Particle.GetMass -- \b USEGAS \n Particles now can also be gas particles and have extra properties. Set in \ref NBody::Particle contained in file \ref Particle.h increases memory allocation. -- \b USESTAR \n Particles now can also be star particles and have extra properties. Set in \ref NBody::Particle contained in file \ref Particle.h increases memory allocation. -- \b USEBH \n Particles now can also be black hole particles and have extra properties (currently none defined though this will change). Set in \ref NBody::Particle contained in file \ref Particle.h increases memory allocation. -- \b USEBARYONS \n Particles now can also be gas/star particles and have extra properties . Set in \ref NBody::Particle contained in file \ref Particle.h increases memory allocation. -- \b USEHYDRO \n Particles now can also be gas/star/bh particles and have extra properties. Set in \ref NBody::Particle contained in file \ref Particle.h increases memory allocation. -- \b USECOSMICRAYS \n Particles now can also be "cosmic ray" particles and have extra properties (currently none defined though this will change). Set in \ref NBody::Particle contained in file \ref Particle.h increases memory allocation. - -\n -\section secmake4 Parallel -Note that for practical reasons, the combination of OpenMP/MPI only works on correctly setup environments where one can explicitly state how many MPI threads to start on a given node. Otherwise, many systems will fill up a node with mpi threads and do so till all asked for mpi threads are active. Consequently, the openmp threads spawned by the MPI threads will compete with the MPI threads on the same node and the other OpenMP threads started by other MPI threads. -- \b USEOPENMP \n Code is compiled with openmp -- \b USEMPI \n Code is compiled with mpi. must also set the appropriate compiler -- \b MPIREDUCEMEM \n If this flag is set, then the amount of memory allocated for MPI routines is reduced and one does not have to worry as much about the -MPI factors \ref Options. -- \b LARGEMPIDOMAIN \n If set, number of mpi threads can be > maxshort - -\n -\section secmake5 IO flags when reading gadget or other formats -- \b HDFENABLE \n HDF io enabled. VELOCIraptor has hdf formatted outputs -- \b ADIOSENABLE \n ADIOS io enabled. VELOCIraptor has (alpha) adios formatted outputs -- \b XDRENABLE \n nchilada input uses some XDR routines so this must be enabled and libraries present to read this input. -- \b GLONGID \n Gadget binary input particle file stores 64 long ids -- \b GDPOS \n Gadget binary input particle file stores double precision x,v -- \b GSMASS \n Gadget binary input particle file stores single precision mass -- \b GSHEAD \n Gadget binary input particle file has version 2 headers between each block of data - -\n -\section secmake6 Things for special behaviour -- \b OLDCCOMPILER \n code uses some standard c++ libraries but if compiler is too old, these options might not be available. - +the source-code documentation, documentatino can be found +online. */ diff --git a/src/substructureproperties.cxx b/src/substructureproperties.cxx index 0c0d0017..e120c93f 100644 --- a/src/substructureproperties.cxx +++ b/src/substructureproperties.cxx @@ -373,6 +373,7 @@ private(EncMassSF,EncMassNSF,Krot_sf,Krot_nsf,Ekin_sf,Ekin_nsf) Ekin=0.; pdata[i].gJ[0]=pdata[i].gJ[1]=pdata[i].gJ[2]=0.; Coordinate J; + vc = 0; for (j=0;jGetMass()*(vx*vx+vy*vy+vz*vz); +#ifdef GASON + if (Pval->GetType()==GASTYPE) Ekin+=2.0*Pval->GetU()*Pval->GetMass(); +#endif pdata[i].gveldisp(0,0)+=vx*vx*Pval->GetMass(); pdata[i].gveldisp(1,1)+=vy*vy*Pval->GetMass(); pdata[i].gveldisp(2,2)+=vz*vz*Pval->GetMass(); @@ -469,6 +473,9 @@ private(EncMassSF,EncMassNSF,Krot_sf,Krot_nsf,Ekin_sf,Ekin_nsf) vy = (*Pval).Vy()-pdata[i].gcmvel[1]; vz = (*Pval).Vz()-pdata[i].gcmvel[2]; RV_Ekin+=Pval->GetMass()*(vx*vx+vy*vy+vz*vz); +#ifdef GASON + if (Pval->GetType()==GASTYPE) RV_Ekin+=2.0*Pval->GetU()*Pval->GetMass(); +#endif pdata[i].RV_J=pdata[i].RV_J+Coordinate(Pval->GetPosition()).Cross(Coordinate(vx,vy,vz))*Pval->GetMass(); pdata[i].RV_veldisp(0,0)+=vx*vx*Pval->GetMass(); pdata[i].RV_veldisp(1,1)+=vy*vy*Pval->GetMass(); @@ -504,6 +511,13 @@ private(EncMassSF,EncMassNSF,Krot_sf,Krot_nsf,Ekin_sf,Ekin_nsf) #ifdef NOMASS pdata[i].RV_Krot*=opt.MassValue; #endif + +#if defined(EXTRADMON) + for (j=0;jGetType()==GASTYPE) { - x = (*Pval).X();//-pdata[i].cm_gas[0]; - y = (*Pval).Y();//-pdata[i].cm_gas[1]; - z = (*Pval).Z();//-pdata[i].cm_gas[2]; - vx = (*Pval).Vx()-pdata[i].gcmvel[0];//-pdata[i].cmvel_gas[0]; - vy = (*Pval).Vy()-pdata[i].gcmvel[1];//-pdata[i].cmvel_gas[1]; - vz = (*Pval).Vz()-pdata[i].gcmvel[2];//-pdata[i].cmvel_gas[2]; + x = (*Pval).X(); + y = (*Pval).Y(); + z = (*Pval).Z(); + vx = (*Pval).Vx()-pdata[i].gcmvel[0]; + vy = (*Pval).Vy()-pdata[i].gcmvel[1]; + vz = (*Pval).Vz()-pdata[i].gcmvel[2]; mval=Pval->GetMass(); EncMass+=mval; r2=x*x+y*y+z*z; @@ -693,8 +707,13 @@ private(EncMassSF,EncMassNSF,Krot_sf,Krot_nsf,Ekin_sf,Ekin_nsf) if (r2<=opt.lengthtokpc50pow2) pdata[i].M_gas_50kpc+=mval; if (r2<=pdata[i].gR500c*pdata[i].gR500c) pdata[i].M_gas_500c+=mval; if (EncMass>0.5*pdata[i].M_gas && pdata[i].Rhalfmass_gas==0) pdata[i].Rhalfmass_gas=rc; - if (Rdist>0) pdata[i].Krot_gas+=mval*(jzval*jzval/(Rdist*Rdist)); - Ekin+=mval*(vx*vx+vy*vy+vz*vz); + double ekin_i, ethermal_i, krot_i; + ekin_i = mval*(vx*vx+vy*vy+vz*vz); + ethermal_i = 2.0*mval*Pval->GetU(); + krot_i = mval*(jzval*jzval/(Rdist*Rdist)); + if (Rdist>0) pdata[i].Krot_gas+=krot_i; + Ekin+=ekin_i; + Ekin+=ethermal_i; if (opt.iextragasoutput) { if (rc<=pdata[i].gR200c_excl) { pdata[i].M_200crit_excl_gas+=mval; @@ -714,8 +733,9 @@ private(EncMassSF,EncMassNSF,Krot_sf,Krot_nsf,Ekin_sf,Ekin_nsf) if (SFR>opt.gas_sfr_threshold){ EncMassSF+=mval; if (EncMassSF>0.5*pdata[i].M_gas_sf && pdata[i].Rhalfmass_gas_sf==0) pdata[i].Rhalfmass_gas_sf=rc; - if (Rdist>0)pdata[i].Krot_gas_sf+=mval*(jzval*jzval/(Rdist*Rdist)); - Ekin_sf+=mval*(vx*vx+vy*vy+vz*vz); + if (Rdist>0)pdata[i].Krot_gas_sf+=krot_i; + Ekin_sf+=ekin_i; + Ekin_sf+=ethermal_i; if (opt.iextragasoutput) { if (rc<=pdata[i].gR200c_excl) { pdata[i].M_200crit_excl_gas_sf+=mval; @@ -734,8 +754,9 @@ private(EncMassSF,EncMassNSF,Krot_sf,Krot_nsf,Ekin_sf,Ekin_nsf) else { EncMassNSF+=mval; if (EncMassNSF>0.5*pdata[i].M_gas_nsf && pdata[i].Rhalfmass_gas_nsf==0) pdata[i].Rhalfmass_gas_nsf=rc; - if (Rdist>0)pdata[i].Krot_gas_nsf+=mval*(jzval*jzval/(Rdist*Rdist)); - Ekin_nsf+=mval*(vx*vx+vy*vy+vz*vz); + if (Rdist>0)pdata[i].Krot_gas_nsf+=krot_i; + Ekin_nsf+=ekin_i; + Ekin_nsf+=ethermal_i; if (opt.iextragasoutput) { if (rc<=pdata[i].gR200c_excl) { pdata[i].M_200crit_excl_gas_nsf+=mval; @@ -754,11 +775,11 @@ private(EncMassSF,EncMassNSF,Krot_sf,Krot_nsf,Ekin_sf,Ekin_nsf) #endif } } - pdata[i].Krot_gas*=0.5/Ekin; + pdata[i].Krot_gas/=Ekin; pdata[i].T_gas=0.5*Ekin; #ifdef STARON - if (pdata[i].M_gas_sf>0) pdata[i].Krot_gas_sf*=0.5/Ekin_sf; - if (pdata[i].M_gas_nsf>0) pdata[i].Krot_gas_nsf*=0.5/Ekin_nsf; + if (pdata[i].M_gas_sf>0) pdata[i].Krot_gas_sf/=Ekin_sf; + if (pdata[i].M_gas_nsf>0) pdata[i].Krot_gas_nsf/=Ekin_nsf; #endif } if (pdata[i].n_gas>=PROPMORPHMINNUM) GetGlobalSpatialMorphology(numingroup[i], &Part[noffset[i]], pdata[i].q_gas, pdata[i].s_gas, 1e-2, pdata[i].eigvec_gas,0,GASTYPE,0); @@ -879,12 +900,12 @@ private(EncMassSF,EncMassNSF,Krot_sf,Krot_nsf,Ekin_sf,Ekin_nsf) for (j=0;jGetType()==STARTYPE) { - x = (*Pval).X();//-pdata[i].cm_star[0]; - y = (*Pval).Y();//-pdata[i].cm_star[1]; - z = (*Pval).Z();//-pdata[i].cm_star[2]; - vx = (*Pval).Vx()-pdata[i].gcmvel[0];//-pdata[i].cmvel_star[0]; - vy = (*Pval).Vy()-pdata[i].gcmvel[1];//-pdata[i].cmvel_star[1]; - vz = (*Pval).Vz()-pdata[i].gcmvel[2];//-pdata[i].cmvel_star[2]; + x = (*Pval).X(); + y = (*Pval).Y(); + z = (*Pval).Z(); + vx = (*Pval).Vx()-pdata[i].gcmvel[0]; + vy = (*Pval).Vy()-pdata[i].gcmvel[1]; + vz = (*Pval).Vz()-pdata[i].gcmvel[2]; mval=Pval->GetMass(); EncMass+=mval; r2=x*x+y*y+z*z; @@ -916,8 +937,9 @@ private(EncMassSF,EncMassNSF,Krot_sf,Krot_nsf,Ekin_sf,Ekin_nsf) } } } - pdata[i].Krot_star/=Ekin; - pdata[i].T_star=0.5*Ekin; + Ekin *= 0.5; + pdata[i].Krot_star /= Ekin; + pdata[i].T_star = Ekin; } if (pdata[i].n_star>=PROPMORPHMINNUM) GetGlobalSpatialMorphology(numingroup[i], &Part[noffset[i]], pdata[i].q_star, pdata[i].s_star, 1e-2, pdata[i].eigvec_star,0,STARTYPE,0); #endif @@ -931,6 +953,19 @@ private(EncMassSF,EncMassNSF,Krot_sf,Krot_nsf,Ekin_sf,Ekin_nsf) } } #endif +#ifdef GASON + GetExtraHydroProperties(opt, pdata[i], numingroup[i], &Part[noffset[i]]); +#endif +#ifdef STARON + GetExtraStarProperties(opt, pdata[i], numingroup[i], &Part[noffset[i]]); +#endif +#ifdef BHON + GetExtraBHProperties(opt, pdata[i], numingroup[i], &Part[noffset[i]]); +#endif +#ifdef EXTRADMON + GetExtraDMProperties(opt, pdata[i], numingroup[i], &Part[noffset[i]]); +#endif + #ifdef HIGHRES for (j=0;jGetType()==GASTYPE) Ekin+=mval*Pval->GetU(); + #endif } #ifdef USEOPENMP } @@ -1115,6 +1153,7 @@ private(j,Pval,x,y,z,vx,vy,vz,jval,jzval,zdist,Rdist) #ifdef NOMASS pdata[i].Krot*=opt.MassValue; #endif + vc = 0; for (j=0;jGetMass(); @@ -1154,6 +1193,9 @@ private(j,Pval,x,y,z,vx,vy,vz,J,mval) sxz+=vx*vz*mval; syz+=vy*vz*mval; Ekin+=(vx*vx+vy*vy+vz*vz)*mval; + #ifdef GASON + if (Pval->GetType()==GASTYPE) Ekin+=2.0*mval*Pval->GetU(); + #endif } #ifdef USEOPENMP } @@ -1198,6 +1240,12 @@ private(j,Pval,x,y,z,vx,vy,vz,jval,jzval,zdist,Rdist) pdata[i].RV_Krot=0.5*Krot/Ekin; #ifdef NOMASS pdata[i].RV_Krot*=opt.MassValue; +#endif +#if defined(EXTRADMON) + for (j=0;jGetType()==GASTYPE) { - x = (*Pval).X();//-pdata[i].cm_gas[0]; - y = (*Pval).Y();//-pdata[i].cm_gas[1]; - z = (*Pval).Z();//-pdata[i].cm_gas[2]; + x = (*Pval).X(); + y = (*Pval).Y(); + z = (*Pval).Z(); r2=x*x+y*y+z*z; rc=sqrt(r2); mval = Pval->GetMass(); @@ -1446,37 +1494,44 @@ private(j,Pval,x,y,z,vx,vy,vz,J,mval,SFR) Krot_sf=Krot_nsf=Ekin_sf=Ekin_nsf=0; #ifdef USEOPENMP #pragma omp parallel default(shared) \ -private(j,Pval,x,y,z,vx,vy,vz,jval,jzval,zdist,Rdist) +private(j,Pval,x,y,z,vx,vy,vz,jval,jzval,zdist,Rdist,mval) { #pragma omp for reduction(+:Krot,Ekin,Krot_sf,Ekin_sf,Krot_nsf,Ekin_nsf) #endif for (j=0;jGetType()!=GASTYPE) continue; - x = (*Pval).X();//-pdata[i].cm_gas[0]; - y = (*Pval).Y();//-pdata[i].cm_gas[1]; - z = (*Pval).Z();//-pdata[i].cm_gas[2]; - vx = (*Pval).Vx()-pdata[i].gcmvel[0];//-pdata[i].cmvel_gas[0]; - vy = (*Pval).Vy()-pdata[i].gcmvel[1];//-pdata[i].cmvel_gas[1]; - vz = (*Pval).Vz()-pdata[i].gcmvel[2];//-pdata[i].cmvel_gas[2]; + x = (*Pval).X(); + y = (*Pval).Y(); + z = (*Pval).Z(); + vx = (*Pval).Vx()-pdata[i].gcmvel[0]; + vy = (*Pval).Vy()-pdata[i].gcmvel[1]; + vz = (*Pval).Vz()-pdata[i].gcmvel[2]; + mval = Pval->GetMass(); jval=Coordinate(x,y,z).Cross(Coordinate(vx,vy,vz)); jzval=(jval*pdata[i].L_gas)/pdata[i].L_gas.Length(); zdist=(Coordinate(x,y,z)*pdata[i].L_gas)/pdata[i].L_gas.Length(); Rdist=sqrt(x*x+y*y+z*z-zdist*zdist); - if (Rdist>0)Krot+=Pval->GetMass()*(jzval*jzval/(Rdist*Rdist)); - Ekin+=Pval->GetMass()*(vx*vx+vy*vy+vz*vz); - #ifdef STARON + double ekin_i, ethermal_i, krot_i; + ekin_i = mval*(vx*vx+vy*vy+vz*vz); + ethermal_i = 2.0*mval*Pval->GetU(); + krot_i = mval*(jzval*jzval/(Rdist*Rdist)); + if (Rdist>0)Krot+=krot_i; + Ekin+=ekin_i; + Ekin+=ethermal_i; + #ifdef STARON SFR = Pval->GetSFR(); if (SFR>opt.gas_sfr_threshold) { - if (Rdist>0)Krot_sf+=Pval->GetMass()*(jzval*jzval/(Rdist*Rdist)); - Ekin_sf+=Pval->GetMass()*(vx*vx+vy*vy+vz*vz); + if (Rdist>0)Krot_sf+=krot_i; + Ekin_sf+=ekin_i; + Ekin_sf+=ethermal_i; } else { - if (Rdist>0)Krot_nsf+=Pval->GetMass()*(jzval*jzval/(Rdist*Rdist)); - Ekin_nsf+=Pval->GetMass()*(vx*vx+vy*vy+vz*vz); - + if (Rdist>0)Krot_nsf+=krot_i; + Ekin_nsf+=ekin_i; + Ekin_nsf+=ethermal_i; } - #endif +#endif } #ifdef USEOPENMP } @@ -1484,8 +1539,8 @@ private(j,Pval,x,y,z,vx,vy,vz,jval,jzval,zdist,Rdist) pdata[i].Krot_gas=Krot/Ekin; pdata[i].T_gas=0.5*Ekin; #ifdef STARON - if (pdata[i].M_gas_sf>0) pdata[i].Krot_gas_sf*=0.5/Ekin_sf; - if (pdata[i].M_gas_nsf>0) pdata[i].Krot_gas_nsf*=0.5/Ekin_nsf; + if (pdata[i].M_gas_sf>0) pdata[i].Krot_gas_sf=Krot_sf/Ekin_sf; + if (pdata[i].M_gas_nsf>0) pdata[i].Krot_gas_nsf=Krot_nsf/Ekin_nsf; #endif } if (pdata[i].n_gas>=PROPMORPHMINNUM) GetGlobalSpatialMorphology(numingroup[i], &Part[noffset[i]], pdata[i].q_gas, pdata[i].s_gas, 1e-2, pdata[i].eigvec_gas,0,GASTYPE,0); @@ -1702,6 +1757,20 @@ private(j,Pval,x,y,z,vx,vy,vz,jval,jzval,zdist,Rdist) } } #endif + +#ifdef GASON + GetExtraHydroProperties(opt, pdata[i], numingroup[i], &Part[noffset[i]]); +#endif +#ifdef STARON + GetExtraStarProperties(opt, pdata[i], numingroup[i], &Part[noffset[i]]); +#endif +#ifdef BHON + GetExtraBHProperties(opt, pdata[i], numingroup[i], &Part[noffset[i]]); +#endif +#ifdef EXTRADMON + GetExtraDMProperties(opt, pdata[i], numingroup[i], &Part[noffset[i]]); +#endif + #ifdef HIGHRES for (j=0;j0) treeimport=new KDTree(PartDataGet,nimport,opt.HaloMinSize,tree->TPHYS,tree->KEPAN,100,0,0,0,period); } #endif @@ -2740,7 +2809,7 @@ void GetSOMasses(Options &opt, const Int_t nbodies, Particle *Part, Int_t ngroup { Particle *Pval; KDTree *tree; - Double_t *period=NULL; + Double_t period[3]; Int_t i,j,k, nhalos = 0; if (opt.iverbose) { cout<<"Get inclusive masses"< maxrdist(ngroup+1); //to store particle ids of those in SO volume. vector SOpids; - vector *SOpartlist=new vector[ngroup+1]; + vector *SOpartlist = new vector[ngroup+1]; vector *SOparttypelist = NULL; #if defined(GASON) || defined(STARON) || defined(BHON) || defined(HIGHRES) @@ -2806,7 +2875,6 @@ void GetSOMasses(Options &opt, const Int_t nbodies, Particle *Part, Int_t ngroup //set period if (opt.p>0) { - period=new Double_t[3]; for (int j=0;j<3;j++) period[j]=opt.p; #ifdef USEMPI mpi_period=opt.p; @@ -2859,7 +2927,7 @@ void GetSOMasses(Options &opt, const Int_t nbodies, Particle *Part, Int_t ngroup PartDataIn = new Particle[NExport+1]; PartDataGet = new Particle[NImport+1]; //run search on exported particles and determine which local particles need to be exported back (or imported) - nimport=MPIBuildParticleNNImportList(nbodies, tree, Part); + nimport=MPIBuildParticleNNImportList(opt, nbodies, tree, Part); if (nimport>0) treeimport=new KDTree(PartDataGet,nimport,opt.HaloMinSize,tree->TPHYS,tree->KEPAN,100,0,0,0,period); } #endif @@ -3063,9 +3131,15 @@ private(i,j,k,taggedparts,radii,masses,indices,posref,posparts,velparts,typepart ///\todo need to update to allow for star forming/non-star forming profiles ///by storing the star forming value. double sfrval = 0; + int typeval = DARKTYPE; +#if defined(GASON) || defined(STARON) || defined(BHON) + if (opt.iextragasoutput || opt.iextrastaroutput || opt.iextrainterloperoutput || opt.iSphericalOverdensityPartList) + typeval = typeparts[indices[j]]; +#endif + AddDataToRadialBinInclusive(opt, radii[indices[j]], masses[indices[j]], #if defined(GASON) || defined(STARON) || defined(BHON) - sfrval, typeparts[indices[j]], + sfrval, typeval, #endif irnorm, ibin, pdata[i]); } @@ -3105,11 +3179,9 @@ private(i,j,k,taggedparts,radii,masses,indices,posref,posparts,velparts,typepart //write the particle lists if (opt.iSphericalOverdensityPartList) { WriteSOCatalog(opt, nhalos, SOpartlist, SOparttypelist); - delete[] SOpartlist; -#if defined(GASON) || defined(STARON) || defined(BHON) || defined(HIGHRES) - delete[] SOparttypelist; -#endif } + delete[] SOpartlist; + delete[] SOparttypelist; #ifdef USEMPI mpi_period=0; if (NProcs>1) { @@ -3906,7 +3978,7 @@ void GetBindingEnergy(Options &opt, const Int_t nbodies, Particle *Part, Int_t n Double_t eps2=opt.uinfo.eps*opt.uinfo.eps; //useful variables to store temporary results - Double_t r2,v2,Ti,poti,pot,Ei; + Double_t r2,v2,Ti,poti,pot,Ei,mval; Double_t Tval,Potval,Efracval,Eval,Emostbound,Eunbound; Int_t imostbound,iunbound; Double_t Efracval_gas,Efracval_star; @@ -3914,12 +3986,16 @@ void GetBindingEnergy(Options &opt, const Int_t nbodies, Particle *Part, Int_t n Double_t potmin,menc; Int_t npot,ipotmin; Coordinate cmpotmin; + vector npartspertype(NPARTTYPES); + Int_t n_gas, n_star, n_interloper, n_bh, n_dm; //used to temporarily store pids. Needed for large groups as the tree code used to calculate potential overwrites the id of particles so that once //finished it puts the particles back into the input order. Therefore store id values in PID value (which can be over written) //also if wish to use the deepest potential as a reference, then used to store original order Int_t *storepid; + double time2 = MyGetTime(); + if (opt.uinfo.icalculatepotential) { //small groups with PP calculations of potential. #ifdef USEOPENMP @@ -3928,34 +4004,51 @@ private(i,j,k,r2,v2,poti,Ti,pot,Eval,npot,storepid,menc,potmin,ipotmin) { #pragma omp for schedule(dynamic) nowait #endif - for (i=1;i<=ngroup;i++) if (numingroup[i]=POTOMPCALCNUM) { + storepid=new Int_t[numingroup[i]]; + for (j=0;j0)pdata[i].Efrac_gas/=(Double_t)pdata[i].n_gas; -#endif -#ifdef STARON - if (pdata[i].n_star>0)pdata[i].Efrac_star/=(Double_t)pdata[i].n_star; -#endif - } -#ifdef USEOPENMP -} -#endif - - //begin large groups - if (opt.uinfo.icalculatepotential) { - //loop for large groups with tree calculation - for (i=1;i<=ngroup;i++) if (numingroup[i]>=ompunbindnum) { - storepid=new Int_t[numingroup[i]]; - for (j=0;j=ompunbindnum) for (j=0;j=ompunbindnum) { Tval=0;Potval=0;Efracval=0; #ifdef GASON Efracval_gas=0.; + n_gas = 0; #endif #ifdef STARON Efracval_star=0.; + n_star = 0; #endif #ifdef USEOPENMP #pragma omp parallel default(shared) \ -private(j,v2,Ti,Ei) +private(j,v2,Ti,Ei,mval) { - #pragma omp for reduction(+:Tval,Efracval,Potval,Efracval_gas,Efracval_star) + #pragma omp for reduction(+:Tval,Efracval,Potval,Efracval_gas,Efracval_star,n_star,n_gas) #endif for (j=0;j0)pdata[i].Efrac_gas=Efracval_gas/(Double_t)pdata[i].n_gas; + if (n_gas>0)pdata[i].Efrac_gas=Efracval_gas/(Double_t)n_gas; #endif #ifdef STARON - if (pdata[i].n_star>0)pdata[i].Efrac_star=Efracval_star/(Double_t)pdata[i].n_star; + if (n_star>0)pdata[i].Efrac_star=Efracval_star/(Double_t)n_star; #endif } - //get most bound particle #ifdef USEOPENMP #pragma omp parallel default(shared) \ @@ -4337,13 +4414,20 @@ private(i,j) //before used to store the id in pglist and then have to reset particle order so that Ids correspond to indices //but to reduce computing time could just store index and leave particle array unchanged but only really necessary //if want to have separate field and subhalo files - Int_t **pglist=new Int_t*[ngroup+1]; - for (i=1;i<=ngroup;i++){ - pglist[i]=new Int_t[numingroup[i]+1];//here store in very last position at n+1 the unbound particle point - if (opt.iseparatefiles) for (j=0;j0) pglist[i][numingroup[i]]=pdata[i].iunbound; - else pglist[i][0]=0; + + Int_t **pglist; + pglist=NULL; + if (ngroup>0) { + pglist = new Int_t*[ngroup+1]; + pglist[0] = NULL; + for (i=1;i<=ngroup;i++){ + pglist[i]=NULL; + pglist[i]=new Int_t[numingroup[i]+1];//here store in very last position at n+1 the unbound particle point + if (opt.iseparatefiles) for (j=0;j0) pglist[i][numingroup[i]]=pdata[i].iunbound; + else pglist[i][0]=0; + } } delete[] noffset; //reset particles back to id order @@ -4552,22 +4636,21 @@ void CalculateApertureQuantities(Options &opt, Int_t &ning, Particle *Part, Prop Double_t EncMass=0, EncMassGas=0, EncMassGasSF=0, EncMassGasNSF=0, EncMassStar=0, EncMassBH=0, EncMassInterloper=0; Double_t EncVelDisp=0, EncVelDispGas=0, EncVelDispGasSF=0, EncVelDispGasNSF=0, EncVelDispStar=0, EncVelDispBH=0, EncVelDispInterloper=0; Double_t EncVRDisp=0, EncVRDispGas=0, EncVRDispGasSF=0, EncVRDispGasNSF=0, EncVRDispStar=0, EncVRDispBH=0, EncVRDispInterloper=0; - Double_t EncSFR=0; + Double_t EncSFR=0, EncZmetGas=0, EncZmetGasSF=0, EncZmetGasNSF=0, EncZmetStar=0; int iaptindex=0, numapttotal, type; - Double_t mass, rc, oldrc, veldisp, vrdisp, SFR; + Double_t mass, rc, oldrc, veldisp, vrdisp, SFR, Zmet; Double_t oldrc_gas,oldrc_gas_sf,oldrc_gas_nsf,oldrc_star,oldrc_bh; Particle *Pval; Coordinate x2; struct projectedmass { int type; - float mass; + float mass, mass_sf, mass_nsf; #if defined(GASON) && defined(STARON) - float SFR; + float SFR, Zmet; #endif Coordinate rproj; }; vector proj(ning); - //first calculate 3d aperture values; if (opt.aperturenum>0) { for (auto j=0;jGetType(); #if defined(GASON) && defined(STARON) SFR = Pval->GetSFR(); + Zmet = Pval->GetZmet()*mass; #endif veldisp = 0; for (auto k=0;k<3;k++) veldisp += pow(Pval->GetVelocity(k)-pdata.gcmvel[k],2.0); veldisp *= mass; vrdisp = 0; for (auto k=0;k<3;k++) vrdisp += pow((Pval->GetVelocity(k)-pdata.gcmvel[k])*Pval->GetPosition(k),2.0); vrdisp *= mass/(rc*rc); @@ -4592,10 +4676,13 @@ void CalculateApertureQuantities(Options &opt, Int_t &ning, Particle *Part, Prop if (EncMassGas>0) pdata.aperture_vrdisp_gas[iaptindex]=EncVRDispGas/EncMassGas; #ifdef STARON pdata.aperture_SFR_gas[iaptindex]=EncSFR; + pdata.aperture_Z_gas[iaptindex]=EncZmetGas; pdata.aperture_npart_gas_sf[iaptindex]=NinsideGasSF; pdata.aperture_npart_gas_nsf[iaptindex]=NinsideGasNSF; pdata.aperture_mass_gas_sf[iaptindex]=EncMassGasSF; pdata.aperture_mass_gas_nsf[iaptindex]=EncMassGasNSF; + if (EncMassGasSF>0) pdata.aperture_Z_gas_sf[iaptindex]=EncZmetGasSF/EncMassGasSF; + if (EncMassGasNSF>0) pdata.aperture_Z_gas_nsf[iaptindex]=EncZmetGasNSF/EncMassGasNSF; if (EncMassGasSF>0) pdata.aperture_veldisp_gas_sf[iaptindex]=EncVelDispGasSF/EncMassGasSF; if (EncMassGasNSF>0) pdata.aperture_veldisp_gas_nsf[iaptindex]=EncVelDispGasNSF/EncMassGasNSF; if (EncMassGasSF>0) pdata.aperture_vrdisp_gas_sf[iaptindex]=EncVRDispGasSF/EncMassGasSF; @@ -4605,6 +4692,7 @@ void CalculateApertureQuantities(Options &opt, Int_t &ning, Particle *Part, Prop #ifdef STARON pdata.aperture_npart_star[iaptindex]=NinsideStar; pdata.aperture_mass_star[iaptindex]=EncMassStar; + if (EncMassStar>0) pdata.aperture_Z_star[iaptindex]=EncZmetStar/EncMassStar; if (EncMassStar>0) pdata.aperture_veldisp_star[iaptindex]=EncVelDispStar/EncMassStar; if (EncMassStar>0) pdata.aperture_vrdisp_star[iaptindex]=EncVRDispStar/EncMassStar; #endif @@ -4626,18 +4714,21 @@ void CalculateApertureQuantities(Options &opt, Int_t &ning, Particle *Part, Prop EncVelDispGas += veldisp; EncVRDispGas += vrdisp; #ifdef STARON - EncSFR+=SFR; + EncSFR += SFR; + EncZmetGas += Zmet; if (SFR>opt.gas_sfr_threshold) { NinsideGasSF++; EncMassGasSF+=mass; EncVelDispGasSF += veldisp; EncVRDispGasSF += vrdisp; + EncZmetGasSF += Zmet; } else { NinsideGasNSF++; - EncMassGasNSF+=mass; + EncMassGasNSF += mass; EncVelDispGasNSF += veldisp; EncVRDispGasNSF += vrdisp; + EncZmetGasNSF += Zmet; } #endif } @@ -4645,9 +4736,10 @@ void CalculateApertureQuantities(Options &opt, Int_t &ning, Particle *Part, Prop #ifdef STARON if (type==STARTYPE) { NinsideStar++; - EncMassStar+=mass; + EncMassStar += mass; EncVelDispStar += veldisp; EncVRDispStar += vrdisp; + EncZmetStar += Zmet; } #endif #ifdef HIGHRES @@ -4683,6 +4775,7 @@ void CalculateApertureQuantities(Options &opt, Int_t &ning, Particle *Part, Prop if (EncMassGas>0) pdata.aperture_vrdisp_gas[j]=EncVRDispGas/EncMassGas; #ifdef STARON pdata.aperture_SFR_gas[j]=EncSFR; + if (EncMassGas>0) pdata.aperture_Z_gas[j]=EncZmetGas/EncMassGas; #endif } #ifdef STARON @@ -4692,6 +4785,7 @@ void CalculateApertureQuantities(Options &opt, Int_t &ning, Particle *Part, Prop pdata.aperture_mass_gas_sf[j]=EncMassGasSF; if (EncMassGasSF>0) pdata.aperture_veldisp_gas_sf[j]=EncVelDispGasSF/EncMassGasSF; if (EncMassGasSF>0) pdata.aperture_vrdisp_gas_sf[j]=EncVRDispGasSF/EncMassGasSF; + if (EncMassGasSF>0) pdata.aperture_Z_gas_sf[j]=EncZmetGasSF/EncMassGasSF; } if (pdata.aperture_mass_gas_nsf[j]==-1) { @@ -4699,6 +4793,7 @@ void CalculateApertureQuantities(Options &opt, Int_t &ning, Particle *Part, Prop pdata.aperture_npart_gas_nsf[j]=NinsideGasNSF; if (EncMassGasNSF>0) pdata.aperture_veldisp_gas_nsf[j]=EncVelDispGasNSF/EncMassGasNSF; if (EncMassGasNSF>0) pdata.aperture_vrdisp_gas_nsf[j]=EncVRDispGasNSF/EncMassGasNSF; + if (EncMassGasNSF>0) pdata.aperture_Z_gas_nsf[j]=EncZmetGasNSF/EncMassGasNSF; } #endif #endif @@ -4709,6 +4804,7 @@ void CalculateApertureQuantities(Options &opt, Int_t &ning, Particle *Part, Prop pdata.aperture_mass_star[j]=EncMassStar; if (EncMassStar>0) pdata.aperture_veldisp_star[j]=EncVelDispStar/EncMassStar; if (EncMassStar>0) pdata.aperture_vrdisp_star[j]=EncVRDispStar/EncMassStar; + if (EncMassStar>0) pdata.aperture_Z_star[j]=EncZmetStar/EncMassStar; } #endif #ifdef HIGHRES @@ -4740,9 +4836,6 @@ void CalculateApertureQuantities(Options &opt, Int_t &ning, Particle *Part, Prop rc=Pval->Radius(); mass = Pval->GetMass(); type = Pval->GetType(); - #if defined(GASON) && defined(STARON) - SFR = Pval->GetSFR(); - #endif EncMass+=mass; #ifdef GASON if (type==GASTYPE) { @@ -4835,6 +4928,7 @@ void CalculateApertureQuantities(Options &opt, Int_t &ning, Particle *Part, Prop proj[j].type = Pval->GetType(); #if defined(GASON) && defined(STARON) proj[j].SFR = Pval->GetSFR(); + proj[j].Zmet = Pval->GetZmet()*mass; #endif for (auto k=0;k<3;k++) {x2[k]=Pval->GetPosition(k);x2[k]=x2[k]*x2[k];} proj[j].rproj[0]=sqrt(x2[0]+x2[1]);proj[j].rproj[1]=sqrt(x2[0]+x2[2]);proj[j].rproj[2]=sqrt(x2[1]+x2[2]); @@ -4859,13 +4953,14 @@ void CalculateApertureQuantities(Options &opt, Int_t &ning, Particle *Part, Prop } iaptindex=0; EncMass=EncMassGas=EncMassGasSF=EncMassGasNSF=EncMassStar=EncMassBH=EncMassInterloper=0; - EncSFR=0; + EncSFR=EncZmetGas=EncZmetGasSF=EncZmetGasNSF=EncZmetStar=0; for (auto j=0;j=opt.aperture_proj_values_kpc[iaptindex]) { pdata.aperture_mass_proj[iaptindex][k]=EncMass; @@ -4873,12 +4968,16 @@ void CalculateApertureQuantities(Options &opt, Int_t &ning, Particle *Part, Prop pdata.aperture_mass_proj_gas[iaptindex][k]=EncMassGas; #ifdef STARON pdata.aperture_SFR_proj_gas[iaptindex][k]=EncSFR; + pdata.aperture_Z_proj_gas[iaptindex][k]=EncZmetGas; pdata.aperture_mass_proj_gas_sf[iaptindex][k]=EncMassGasSF; pdata.aperture_mass_proj_gas_nsf[iaptindex][k]=EncMassGasNSF; + pdata.aperture_Z_proj_gas_sf[iaptindex][k]=EncZmetGasSF; + pdata.aperture_Z_proj_gas_nsf[iaptindex][k]=EncZmetGasNSF; #endif #endif #ifdef STARON pdata.aperture_mass_proj_star[iaptindex][k]=EncMassStar; + pdata.aperture_Z_proj_star[iaptindex][k]=EncZmetStar; #endif iaptindex++; } @@ -4889,17 +4988,23 @@ void CalculateApertureQuantities(Options &opt, Int_t &ning, Particle *Part, Prop EncMassGas+=mass; #ifdef STARON EncSFR+=SFR; + EncZmetGas += Zmet; if (SFR>opt.gas_sfr_threshold) { EncMassGasSF+=mass; + EncZmetGasSF += Zmet; } else { EncMassGasNSF+=mass; + EncZmetGasNSF += Zmet; } #endif } #endif #ifdef STARON - if (type==STARTYPE) EncMassStar+=mass; + if (type==STARTYPE) { + EncMassStar+=mass; + EncZmetStar += Zmet; + } #endif #ifdef BHON if (type==BHTYPE) EncMassBH+=mass; @@ -4913,15 +5018,25 @@ void CalculateApertureQuantities(Options &opt, Int_t &ning, Particle *Part, Prop pdata.aperture_mass_proj_gas[j][k]=EncMassGas; #ifdef STARON pdata.aperture_SFR_proj_gas[j][k]=EncSFR; + pdata.aperture_Z_proj_gas[j][k]=EncZmetGas; #endif } #ifdef STARON - if (pdata.aperture_mass_proj_gas_sf[j][k]==-1) pdata.aperture_mass_proj_gas_sf[j][k]=EncMassGasSF; - if (pdata.aperture_mass_proj_gas_nsf[j][k]==-1) pdata.aperture_mass_proj_gas_nsf[j][k]=EncMassGasNSF; + if (pdata.aperture_mass_proj_gas_sf[j][k]==-1) { + pdata.aperture_mass_proj_gas_sf[j][k]=EncMassGasSF; + pdata.aperture_Z_proj_gas_sf[j][k]=EncZmetGasSF; + } + if (pdata.aperture_mass_proj_gas_nsf[j][k]==-1) { + pdata.aperture_mass_proj_gas_nsf[j][k]=EncMassGasNSF; + pdata.aperture_Z_proj_gas_nsf[j][k]=EncZmetGasNSF; + } #endif #endif #ifdef STARON - if (pdata.aperture_mass_proj_star[j][k]==-1) pdata.aperture_mass_proj_star[j][k]=EncMassStar; + if (pdata.aperture_mass_proj_star[j][k]==-1) { + pdata.aperture_mass_proj_star[j][k]=EncMassStar; + pdata.aperture_Z_proj_star[j][k]=EncZmetStar; + } #endif } //then determine half mass radii @@ -5026,10 +5141,10 @@ void CalculateApertureQuantities(Options &opt, Int_t &ning, Particle *Part, Prop pdata.aperture_mass_star[j]*=opt.MassValue; #endif #ifdef BHON - //pdata.aperture_mass_bh[j]*=opt.MassValue; + pdata.aperture_mass_bh[j]*=opt.MassValue; #endif #ifdef HIGHRES - //pdata.aperture_mass_interloper[j]*=opt.MassValue; + pdata.aperture_mass_interloper[j]*=opt.MassValue; #endif } for (auto j=0;j value; + string extrafield; + HydroProperties x; + double weight, sum; + //initialize map stored in the properties data + for (auto iextra=0;iextra 0) + { + sum = 1.0/sum; + for (auto iextra=0;iextra value; + string extrafield; + StarProperties x; + double weight, sum; + //initialize map stored in the properties data + for (auto iextra=0;iextra 0) + { + sum = 1.0/sum; + for (auto iextra=0;iextra value; + string extrafield; + BHProperties x; + double weight, sum; + //initialize map stored in the properties data + for (auto iextra=0;iextra 0) + { + sum = 1.0/sum; + for (auto iextra=0;iextra value; + string extrafield; + ExtraDMProperties x; + double weight, sum; + for (auto iextra=0;iextra 0) + { + sum = 1.0/sum; + for (auto iextra=0;iextra parts; + #ifdef GASON + HydroProperties hydro; + #endif + #ifdef STARON + StarProperties star; + #endif + #ifdef BHON + BHProperties bh; + #endif Particle *pbaryons; Int_t *pfof, *pfofall, *pfofbaryons, *numingroup,**pglist; - Int_t nbaryons, ndark; + Int_t nbaryons, ndark, index; Int_t ngroup, nhalos; groupinfo *group_info; //KDTree *tree; @@ -337,7 +455,8 @@ groupinfo *InvokeVelociraptor(const int snapnum, char* outputname, cout<<"Copying particle data..."<< endl; time1=MyGetTime(); - ndark = num_gravity_parts - num_hydro_parts - num_star_parts, nbaryons = num_hydro_parts+num_star_parts; + ndark = num_gravity_parts - num_hydro_parts - num_star_parts - num_bh_parts; + nbaryons = num_hydro_parts + num_star_parts + num_bh_parts; Nlocalbaryon[0]=nbaryons; Nmemlocalbaryon=Nlocalbaryon[0]; @@ -401,6 +520,40 @@ groupinfo *InvokeVelociraptor(const int snapnum, char* outputname, parts[i] = Particle(swift_parts[i]); } } + //if extra information has been passed then store it + #ifdef GASON + if (swift_gas_parts != NULL) + { + for (auto i=0; i0) parts[i].SetPID((pfof[i]+ngoffset)+libvelociraptorOpt.snapshotvalue); else parts[i].SetPID(0); } + delete [] pfof; #ifdef USEMPI if (NProcs > 1) { for (auto i=0;i &Part, const Int_t nbodies,Particl LN=opt.p/(Double_t)opt.Neff; } #endif + opt.internalenergyinputconversion = opt.velocityinputconversion*opt.velocityinputconversion; + //adjust physical scales by the inferred interparticle spacing opt.ellxscale=LN; opt.uinfo.eps*=LN; diff --git a/src/ui.cxx b/src/ui.cxx index da79e2ef..ef51aee5 100644 --- a/src/ui.cxx +++ b/src/ui.cxx @@ -10,6 +10,9 @@ void GetArgs(int argc, char *argv[], Options &opt) { #ifndef USEMPI int ThisTask =0, NProcs =1; +#endif +#if defined(USEMPI) && defined(USEPARALLELHDF) + opt.mpinprocswritesize=NProcs; #endif int option; int NumArgs = 0; @@ -461,8 +464,11 @@ void GetParamFile(Options &opt) opt.halocorenumfaciter = atof(vbuff); else if (strcmp(tbuff, "Halo_core_phase_significance")==0) opt.halocorephasedistsig = atof(vbuff); - else if (strcmp(tbuff, "Halo_core_phase_merge_dist")==0) + //cleaning up substructures by merging if phase distance is small + else if (strcmp(tbuff, "Structure_phase_merge_dist")==0) opt.coresubmergemindist = atof(vbuff); + else if (strcmp(tbuff, "Apply_phase_merge_to_host")==0) + opt.icoresubmergewithbg = atoi(vbuff); //for changing factors used in iterative search else if (strcmp(tbuff, "Iterative_threshold_factor")==0) @@ -509,6 +515,8 @@ void GetParamFile(Options &opt) opt.Omega_Lambda = atof(vbuff); else if (strcmp(tbuff, "Omega_DE")==0) opt.Omega_de = atof(vbuff); + else if (strcmp(tbuff, "Omega_k")==0) + opt.Omega_k = atof(vbuff); else if (strcmp(tbuff, "Omega_cdm")==0) opt.Omega_cdm= atof(vbuff); else if (strcmp(tbuff, "Omega_b")==0) @@ -668,11 +676,103 @@ void GetParamFile(Options &opt) //mpi memory related else if (strcmp(tbuff, "MPI_part_allocation_fac")==0) opt.mpipartfac = atof(vbuff); + else if (strcmp(tbuff, "MPI_number_of_tasks_per_write")==0) + opt.mpinprocswritesize = atoi(vbuff); ///OpenMP related else if (strcmp(tbuff, "OMP_run_fof")==0) opt.iopenmpfof = atoi(vbuff); else if (strcmp(tbuff, "OMP_fof_region_size")==0) opt.openmpfofsize = atoi(vbuff); + else if (strcmp(tbuff, "Gas_internal_property_names")==0) { + pos=0; + dataline=string(vbuff); + while ((pos = dataline.find(delimiter)) != string::npos) { + token = dataline.substr(0, pos); + opt.gas_internalprop_names.push_back(token); + dataline.erase(0, pos + delimiter.length()); + } + } + else if (strcmp(tbuff, "Star_internal_property_names")==0) { + pos=0; + dataline=string(vbuff); + while ((pos = dataline.find(delimiter)) != string::npos) { + token = dataline.substr(0, pos); + opt.star_internalprop_names.push_back(token); + dataline.erase(0, pos + delimiter.length()); + } + } + else if (strcmp(tbuff, "BH_internal_property_names")==0) { + pos=0; + dataline=string(vbuff); + while ((pos = dataline.find(delimiter)) != string::npos) { + token = dataline.substr(0, pos); + opt.bh_internalprop_names.push_back(token); + dataline.erase(0, pos + delimiter.length()); + } + } + else if (strcmp(tbuff, "Extra_DM_internal_property_names")==0) { + pos=0; + dataline=string(vbuff); + while ((pos = dataline.find(delimiter)) != string::npos) { + token = dataline.substr(0, pos); + opt.extra_dm_internalprop_names.push_back(token); + dataline.erase(0, pos + delimiter.length()); + } + } + else if (strcmp(tbuff, "Gas_chemistry_names")==0) { + pos=0; + dataline=string(vbuff); + while ((pos = dataline.find(delimiter)) != string::npos) { + token = dataline.substr(0, pos); + opt.gas_chem_names.push_back(token); + dataline.erase(0, pos + delimiter.length()); + } + } + else if (strcmp(tbuff, "Star_chemistry_names")==0) { + pos=0; + dataline=string(vbuff); + while ((pos = dataline.find(delimiter)) != string::npos) { + token = dataline.substr(0, pos); + opt.star_chem_names.push_back(token); + dataline.erase(0, pos + delimiter.length()); + } + } + else if (strcmp(tbuff, "BH_chemistry_names")==0) { + pos=0; + dataline=string(vbuff); + while ((pos = dataline.find(delimiter)) != string::npos) { + token = dataline.substr(0, pos); + opt.bh_chem_names.push_back(token); + dataline.erase(0, pos + delimiter.length()); + } + } + else if (strcmp(tbuff, "Gas_chemistry_production_names")==0) { + pos=0; + dataline=string(vbuff); + while ((pos = dataline.find(delimiter)) != string::npos) { + token = dataline.substr(0, pos); + opt.gas_chem_names.push_back(token); + dataline.erase(0, pos + delimiter.length()); + } + } + else if (strcmp(tbuff, "Star_chemistry_production_names")==0) { + pos=0; + dataline=string(vbuff); + while ((pos = dataline.find(delimiter)) != string::npos) { + token = dataline.substr(0, pos); + opt.star_chem_names.push_back(token); + dataline.erase(0, pos + delimiter.length()); + } + } + else if (strcmp(tbuff, "BH_chemistry_production_names")==0) { + pos=0; + dataline=string(vbuff); + while ((pos = dataline.find(delimiter)) != string::npos) { + token = dataline.substr(0, pos); + opt.bh_chem_names.push_back(token); + dataline.erase(0, pos + delimiter.length()); + } + } //output related @@ -690,6 +790,8 @@ void GetParamFile(Options &opt) opt.iSortByBindingEnergy = atoi(vbuff); else if (strcmp(tbuff, "SUBFIND_like_output")==0) opt.isubfindoutput = atoi(vbuff); + else if (strcmp(tbuff, "No_particle_ID_list_output")==0) + opt.inoidoutput = atoi(vbuff); //gadget io related to extra info for sph, stars, bhs, else if (strcmp(tbuff, "NSPH_extra_blocks")==0) @@ -748,11 +850,14 @@ inline void errormessage(string message) { if (ThisTask==0) cerr<1){ errormessage("WARNING: MPI Particle allocation factor is high (>1)."); } + if (opt.mpinprocswritesize<1){ + #ifdef USEPARALLELHDF + errormessage("WARNING: Number of MPI task writing collectively < 1. Setting to 1 ."); + opt.mpinprocswritesize = 1; + #endif + } + if (opt.mpinprocswritesize>NProcs){ + #ifdef USEPARALLELHDF + errormessage("WARNING: Number of MPI task writing collectively > NProcs. Setting to NProcs."); + opt.mpinprocswritesize = NProcs; + #endif + } #endif #ifdef USEOPENMP diff --git a/src/unbind.cxx b/src/unbind.cxx index 7656d6f3..7be744a6 100644 --- a/src/unbind.cxx +++ b/src/unbind.cxx @@ -60,7 +60,7 @@ inline bool CheckGroupForBoundness(Options &opt, Double_t &Efrac, Double_t &maxE } inline void FillUnboundArrays(Options &opt, int maxunbindsize, - Int_t ning, Particle *&groupPart, const Double_t &Efrac, + Int_t ning, Particle *groupPart, const Double_t &Efrac, Int_t *&nEplusid, int *&Eplusflag, Int_t &nEplus, bool &unbindcheck ) { @@ -98,7 +98,7 @@ inline void FillUnboundArrays(Options &opt, int maxunbindsize, } ///remove particles deemed unbound -inline void RemoveUnboundParticles(Int_t i, Int_t *&pfof, Int_t &ning, Int_t *&pglist, Particle *&groupPart, +inline void RemoveUnboundParticles(Int_t i, Int_t *&pfof, Int_t &ning, Int_t *&pglist, Particle *groupPart, Int_t &nEplus, Int_t *&nEplusid, int *&Eplusflag) { //adjust the pfof array and move unbound particles to the end of the array @@ -107,10 +107,18 @@ inline void RemoveUnboundParticles(Int_t i, Int_t *&pfof, Int_t &ning, Int_t *&p ning-=nEplus; } +inline void RemoveUnboundParticles(Int_t i, Int_t *&pfof, Int_t &ning, Particle *groupPart, + Int_t &nEplus, Int_t *&nEplusid, int *&Eplusflag) +{ + //adjust the pfof array and move unbound particles to the end of the array + for (auto j=0;j~log(numingroup[i]) particles. + //we set the limit at 2*log(numingroup[i]) to account for overhead in producing tree and calculating new potential + iunbindsizeflag=(nEplus<2.0*log((double)nig)); + if (iunbindsizeflag==0) Potential(opt, nig, groupPart); + else { + for (auto k=0;k0) { @@ -303,80 +379,17 @@ int CheckUnboundGroups(Options opt, const Int_t nbodies, Particle *Part, Int_t & return iflag; } - -/*! - Unbinding algorithm that checks to see if a group is self-bound. For small groups the potential is calculated using a PP algorithm, for large groups a tree-potential using kd-tree and monopole is calculated. \n - There are several ways a group can be defined as bound, it total energy is negative, its least bound particle has negative energy, or both. Also one can have different - kinetic reference frames. By default, uses the CM velocity of the total system BUT could also use the velocity of a region centred on the minimum potential well. \n - - If a group is not self bound, the least bound particle is removed from the group. Then - \arg if CM reference frame, the centre-of-mass velocity is recalculated and kinetic energies are recalculated (if it has changed enough). \n - \arg the potential can be left unchanged or if one ignores the background unbound but linked particles, the energies are adjusted for the loss of the particle from the group. - NOTE that for groups where the tree-potential was calculated, at the moment, the subtracted energy corresponds to the PP calculation which can lead to decrepancies. However, unless one is - worried about the exact details of when an object is self-bound, this is not an issue. \n - - Finally, this routines assumes that the pglist passed to the routine is for a gPart array that was build in id order from pfof and a local particle array. -*/ -int Unbind(Options &opt, Particle **gPart, Int_t &numgroups, Int_t *numingroup, Int_t *pfof, Int_t **pglist, int ireorder) +///Calculate potential of groups +inline void CalculatePotentials(Options &opt, Particle **gPart, Int_t &numgroups, Int_t *numingroup) { - //flag which is changed if any groups are altered as groups may need to be reordered. - int iunbindflag=0; - //flag used to determine what style of update to the potential is done for larger groups as - //if the amount of particles removed is large enough for large groups, it is more efficient to - //recalculate the entire potential using a Tree code than it is removing the contribution of each removed particle from - //all other particles - int iunbindsizeflag; - int maxnthreads,nthreads=1,l,n; - Int_t i,j,k,ng=numgroups, oldnumingroup; - int unbindloops; - bool sortflag; - Double_t maxE,v2,r2,poti,Ti,eps2=opt.uinfo.eps*opt.uinfo.eps,mv2=opt.MassValue*opt.MassValue,Efrac; - Double_t *gmass; - Int_t nEplus,maxunbindsize, nEfrac, nunbound; - Int_t *nEplusid; - int *Eplusflag; - bool unbindcheck; - Coordinate *cmvel; - //for tree code potential calculation - KDTree *tree; - Int_t ncell,ntreecell,nleafcell; - Int_t *start,*end; - Double_t *cmtot,*cBmax,*cR2max, **r2val; - Coordinate *cellcm; - Node *root,**nodelist, **npomp; - Int_t **marktreecell,**markleafcell; - - //used to determine potential based reference velocity frame - Double_t potmin,menc; - Int_t npot,ipotmin; - Coordinate potpos; - Int_t *storeval; + int maxnthreads,nthreads=1; #ifndef USEMPI int ThisTask=0,NProcs=1; #endif - //note that it is possible that called as a library, velociraptor - //does not need to calculate potentials itself - //in that case do not calculate potentials but instead - //copy relevant information - cmvel =new Coordinate[numgroups+1]; - gmass =new Double_t[numgroups+1]; - for (i=1;i<=numgroups;i++) { - cmvel[i]=Coordinate(0.); - gmass[i]=0.; - if (opt.uinfo.icalculatepotential) { - for (j=0;jUNBINDNUM) { + if (numingroup[i]>POTPPCALCNUM) { Potential(opt, numingroup[i], gPart[i]); } } - - }//end of check whether we calculate potential - - //Now set the kinetic reference frame - //if using standard frame, then using CMVEL of the entire structure - if (opt.uinfo.fracpotref==1.0) { -#ifdef USEOPENMP -#pragma omp parallel default(shared) \ -private(i,j,k) -{ - #pragma omp for schedule(dynamic,1) nowait -#endif - for (i=1;i<=numgroups;i++) - { - for (k=0;k<3;k++) cmvel[i][k]=0; - for (j=0;j=ompunbindnum) - { - unbindloops=0; - oldnumingroup = numingroup[i]; - GetBoundFractionAndMaxE(opt, numingroup[i], gPart[i], cmvel[i], Efrac, maxE, nunbound); - //if amount unbound is very large, just remove group entirely - if (nunbound>=opt.uinfo.maxunboundfracforiterativeunbind*numingroup[i]) { - for (j=0;j~log(numingroup[i]) particles. Here - //we set the limit at 2*log(numingroup[i]) to account for overhead in producing tree and calculating new potential - iunbindsizeflag=(nEplus<2.0*log((double)numingroup[i])); - if (opt.uinfo.bgpot==0) { - if (iunbindsizeflag==0) Potential(opt, numingroup[i], gPart[i]); - else { - for (k=0;k0 - //otherwise, end unbinding. - if (nEplus>opt.uinfo.maxallowedunboundfrac*numingroup[i]) { - unbindcheck=false; - continue; - } - else{ - sortflag=false; - if ((oldnumingroup-numingroup[i])>opt.uinfo.maxallowedunboundfrac*oldnumingroup) { - oldnumingroup=numingroup[i]; - sortflag=true; - } - //recalculate kinetic energies since cmvel has changed - GetBoundFractionAndMaxE(opt, numingroup[i], gPart[i], cmvel[i], Efrac, maxE, nunbound,sortflag); - //determine if any particle number of particle with positive energy upto opt.uinfo.maxunbindfrac*numingroup+1 - maxunbindsize=(Int_t)(opt.uinfo.maxunbindfrac*nunbound+1); - unbindcheck = CheckGroupForBoundness(opt,Efrac,maxE,numingroup[i]); - FillUnboundArrays(opt, maxunbindsize, numingroup[i], gPart[i], Efrac, nEplusid, Eplusflag, nEplus, unbindcheck); - } - } - AdjustPGListForUnbinding(unbindloops,numingroup[i],pglist[i],gPart[i]); - RemoveGroup(opt, numingroup[i], pfof, gPart[i], iunbindflag); - delete[] nEplusid; - delete[] Eplusflag; - } - } - //now for small groups loop over groups -#ifdef USEOPENMP -#pragma omp parallel default(shared) \ -private(i,j,k,n,maxE,maxunbindsize,nEplus,nEplusid,Eplusflag,v2,Ti,unbindcheck,Efrac,nEfrac,nunbound,r2,poti,unbindloops,sortflag,oldnumingroup) +///Calculate potential of groups, assumes particle list is ordered by group +///and accessed by numingroup and noffset; +inline void CalculatePotentials(Options &opt, Particle *gPart, Int_t &numgroups, Int_t *numingroup, Int_t *noffset) { - #pragma omp for schedule(dynamic) nowait reduction(+:iunbindflag) -#endif - for (i=1;i<=numgroups;i++) if (numingroup[i]<=ompunbindnum && numingroup[i]>0) - { - maxE=-MAXVALUE; - nEplus=0; - Efrac=0.; - unbindloops=0; - GetBoundFractionAndMaxE(opt, numingroup[i], gPart[i], cmvel[i], Efrac, maxE,nunbound); - if (nunbound>=opt.uinfo.maxunboundfracforiterativeunbind*numingroup[i]) { - for (j=0;j0 - //otherwise, end unbinding. - if (nEplusopt.uinfo.maxallowedunboundfrac*oldnumingroup) { - oldnumingroup=numingroup[i]; - sortflag=true; - } - //recalculate kinetic energies since cmvel has changed - GetBoundFractionAndMaxE(opt, numingroup[i], gPart[i], cmvel[i], Efrac, maxE,nunbound, sortflag); - maxunbindsize=(Int_t)(opt.uinfo.maxunbindfrac*nunbound+1); - unbindcheck = CheckGroupForBoundness(opt,Efrac,maxE,numingroup[i]); - FillUnboundArrays(opt, maxunbindsize, numingroup[i], gPart[i], Efrac, nEplusid, Eplusflag, nEplus, unbindcheck); - } - } - //if group too small remove entirely - AdjustPGListForUnbinding(unbindloops,numingroup[i],pglist[i],gPart[i]); - RemoveGroup(opt, numingroup[i], pfof, gPart[i], iunbindflag); - delete[] nEplusid; - delete[] Eplusflag; - } - } -#ifdef USEOPENMP -} -#endif - for (i=1;i<=numgroups;i++) if (numingroup[i]==0) ng--; - if (ireorder==1 && iunbindflag&&ng>0) ReorderGroupIDs(numgroups,ng,numingroup,pfof,pglist); - delete[] cmvel; - delete[] gmass; - numgroups=ng; - //return if any unbinding done indicating groups have been reordered - if (iunbindflag) return 1; - else return 0; -} - -///Similar to unbind algorithm but assumes particles are ordered. saves memory but more computations -int Unbind(Options &opt, Particle *&gPart, Int_t &numgroups, Int_t *&numingroup, Int_t *&noffset, Int_t *&pfof) -{ - //flag which is changed if any groups are altered as groups may need to be reordered. - int iunbindflag=0; - //flag used to determine what style of update to the potential is done for larger groups as - //if the amount of particles removed is large enough for large groups, it is more efficient to - //recalculate the entire potential using a Tree code than it is removing the contribution of each removed particle from - //all other particles - int iunbindsizeflag; - int maxnthreads,nthreads=1,l,n; - Int_t i,j,k,ng=numgroups; - Double_t maxE,totT,v2,r2,poti,Ti,eps2=opt.uinfo.eps*opt.uinfo.eps,mv2=opt.MassValue*opt.MassValue,Efrac; - Double_t *gmass,*totV; - PriorityQueue *pq; - Int_t nEplus,pqsize,nEfrac; - Int_t *nEplusid; - int *Eplusflag; - bool unbindcheck; - Coordinate *cmvel; - Particle Ptemp; - - //for tree code potential calculation - KDTree *tree; - Int_t ncell,ntreecell,nleafcell; - Int_t *start,*end; - Double_t *cmtot,*cBmax,*cR2max, **r2val; - Coordinate *cellcm; - Node *root,**nodelist, **npomp; - Int_t **marktreecell,**markleafcell; - - //used to determine potential based reference velocity frame - Double_t potmin,menc; - Int_t npot,ipotmin; - Coordinate potpos; - Int_t *storeval; - + int maxnthreads,nthreads=1; #ifndef USEMPI int ThisTask=0,NProcs=1; #endif + if (!opt.uinfo.icalculatepotential) return; - cmvel =new Coordinate[numgroups+1]; - gmass =new Double_t[numgroups+1]; - totV =new Double_t[numgroups+1]; - for (i=1;i<=numgroups;i++) { - cmvel[i]=Coordinate(0.); - gmass[i]=totV[i]=0.; - if (opt.uinfo.icalculatepotential) { - for (j=0;jUNBINDNUM) { - //to make this memory efficient really need just KDTree that uses Coordinates - tree=new KDTree(&gPart[noffset[i]],numingroup[i],opt.uinfo.BucketSize,tree->TPHYS); - - ncell=tree->GetNumNodes(); - root=tree->GetRoot(); - //to store particles in a given node - start=new Int_t[ncell]; - end=new Int_t[ncell]; - //distance calculations used to determine when one uses cell or when one uses particles - cmtot=new Double_t[ncell]; - cBmax=new Double_t[ncell]; - cR2max=new Double_t[ncell]; - cellcm=new Coordinate[ncell]; - //to store note list - nodelist=new Node*[ncell]; + if (numingroup[i]<0) continue; + if (numingroup[i]>POTPPCALCNUM) { + Potential(opt, numingroup[i], &gPart[noffset[i]]); + } + } +} - //search tree - for (j=0;jGetStart(); - end[j]=(nodelist[j])->GetEnd(); - cellcm[j][0]=cellcm[j][1]=cellcm[j][2]=0.; - cmtot[j]=0; - for (k=start[j];kGetRoot(); - Coordinate xpos(gPart[noffset[i]+j].GetPosition()); - nleafcell=ntreecell=0; - MarkCell(npomp[tid],marktreecell[tid], markleafcell[tid],ntreecell,nleafcell,r2val[tid],opt.uinfo.BucketSize, cR2max, cellcm, cmtot, xpos, eps2); - poti=0; - for (k=0;k=ompunbindnum) - { - totT=0; - Efrac=0; +///loop over groups and get velocity frame +inline void CalculateBindingReferenceFrame(Options &opt, + Particle *gPart, Int_t &numgroups, Int_t *numingroup, Int_t *noffset, + Double_t *&gmass, Coordinate *&cmvel) +{ + Double_t potmin,menc; + Int_t npot,ipotmin; + Coordinate potpos; + Int_t *storeval; + //if using standard frame, then using CMVEL of the entire structure + if (opt.uinfo.fracpotref==1.0) { #ifdef USEOPENMP -#pragma omp parallel default(shared) \ -private(j,k,v2,Ti,unbindcheck) +#pragma omp parallel default(shared) { - #pragma omp for reduction(+:totT,Efrac) -#endif - for (j=0;j0))&&(numingroup[i]>=opt.MinSize)) unbindcheck=true; - else unbindcheck=false; - } - else if (opt.uinfo.unbindtype==UPART) { - if ((maxE>0)&&(numingroup[i]>=opt.MinSize))unbindcheck=true; - else unbindcheck=false; - } - if (unbindcheck) { - for (j=0;jPush(j,gPart[noffset[i]+j].GetDensity()); - for (j=pqsize;jgPart[noffset[i]+pq->TopQueue()].GetDensity()) {pq->Pop();pq->Push(j,gPart[noffset[i]+j].GetDensity());} - nEplus=0; - //if just looking at particle then add to removal list till energy >0 - if (opt.uinfo.unbindtype==UPART) { - for (j=0;jTopQueue()].GetDensity()>0) {nEplusid[nEplus++]=pq->TopQueue();Eplusflag[pq->TopQueue()]=1;pq->Pop();} - else break; - } - } - //otherwise, remove all positive energies and also if Efrac< minEfrac, keep adding to removal list - else if (opt.uinfo.unbindtype==USYSANDPART) { - nEfrac=0; - if (EfracTopQueue()].GetDensity()>0 || nEplusTopQueue();Eplusflag[pq->TopQueue()]=1;pq->Pop();} - else break; - } - } - delete pq; - } - while(unbindcheck) - { - iunbindflag++; - //first correct for removal of all least bound particle - double temp=1.0/gmass[i], temp2=0.; - if (opt.uinfo.cmvelreftype==CMVELREF) { - for (j=0;j~log(numingroup[i]) particles. Here - //we set the limit at 2*log(numingroup[i]) to account for overhead in producing tree and calculating new potential - iunbindsizeflag=(nEplus<2.0*log((double)numingroup[i])); - if (iunbindsizeflag) { - if (opt.uinfo.bgpot==0) { - for (k=0;k0 - //otherwise, end unbinding. - if (nEplus>=0.1*pqsize+0.5) { - - //recalculate kinetic energies since cmvel has changed - totT=0.; - Efrac=0.; + } + //if using potential then must identify minimum potential. + //Note that most computations involve sorts, so parallize over groups + else if (opt.uinfo.cmvelreftype==POTREF) { #ifdef USEOPENMP #pragma omp parallel default(shared) \ -private(j,k,v2,Ti,unbindcheck) +private(npot,menc,potmin,ipotmin,potpos,storeval) { - #pragma omp for reduction(+:totT,Efrac) -#endif - for (j=0;j0))&&(numingroup[i]>=opt.MinSize)) unbindcheck=true; - else unbindcheck=false; - } - else if (opt.uinfo.unbindtype==UPART) { - if ((maxE>0)&&(numingroup[i]>=opt.MinSize))unbindcheck=true; - else unbindcheck=false; - } - if (unbindcheck) { - pq=new PriorityQueue(pqsize); - nEplus=0; - for (j=0;jPush(j,gPart[noffset[i]+j].GetDensity()); - for (j=pqsize;jgPart[noffset[i]+pq->TopQueue()].GetDensity()) {pq->Pop();pq->Push(j,gPart[noffset[i]+j].GetDensity());} - //if just looking at particle then add to removal list till energy >0 - if (opt.uinfo.unbindtype==UPART) { - for (j=0;jTopQueue()].GetDensity()>0) {nEplusid[nEplus++]=pq->TopQueue();Eplusflag[pq->TopQueue()]=1;pq->Pop();} - else break; - } - } - //otherwise, remove all positive energies and also if Efrac< minEfrac, keep adding to removal list - else if (opt.uinfo.unbindtype==USYSANDPART) { - nEfrac=0; - if (EfracTopQueue()].GetDensity()>0 || nEplusTopQueue();Eplusflag[pq->TopQueue()]=1;pq->Pop();} - else break; - } - } - delete pq; - } - } - else unbindcheck=false; + + //note that it is possible that called as a library, velociraptor + //does not need to calculate potentials itself + //in that case do not calculate potentials but instead + //copy relevant information + cmvel =new Coordinate[numgroups+1]; + gmass =new Double_t[numgroups+1]; + for (i=1;i<=numgroups;i++) { + cmvel[i]=Coordinate(0.); + gmass[i]=0.; + if (opt.uinfo.icalculatepotential) { + for (j=0;j0) { - totT=0; maxE=-MAXVALUE; nEplus=0; Efrac=0.; - for (j=0;j0))&&(numingroup[i]>=opt.MinSize)) unbindcheck=true; - else unbindcheck=false; - } - else if (opt.uinfo.unbindtype==UPART) { - if ((maxE>0)&&(numingroup[i]>=opt.MinSize))unbindcheck=true; - else unbindcheck=false; - } - if (unbindcheck) { - for (j=0;jPush(j,gPart[noffset[i]+j].GetDensity()); - for (j=pqsize;jgPart[noffset[i]+pq->TopQueue()].GetDensity()) {pq->Pop();pq->Push(j,gPart[noffset[i]+j].GetDensity());} - nEplus=0; - //if just looking at particle then add to removal list till energy >0 - if (opt.uinfo.unbindtype==UPART) { - for (j=0;jTopQueue()].GetDensity()>0) {nEplusid[nEplus++]=pq->TopQueue();Eplusflag[pq->TopQueue()]=1;pq->Pop();} - else break; - } - } - //otherwise, remove all positive energies and also if Efrac< minEfrac, keep adding to removal list - else if (opt.uinfo.unbindtype==USYSANDPART) { - nEfrac=0; - if (EfracTopQueue()].GetDensity()>0 || nEplusTopQueue();Eplusflag[pq->TopQueue()]=1;pq->Pop();} - else break; - } - } - delete pq; - } - while(unbindcheck) - { + unbindloops=0; + GetBoundFractionAndMaxE(opt, numingroup[i], gPart[i], cmvel[i], Efrac, maxE,nunbound); + if (nunbound>=opt.uinfo.maxunboundfracforiterativeunbind*numingroup[i]) { + for (j=0;j0 + //otherwise, end unbinding. + if (nEplusopt.uinfo.maxallowedunboundfrac*oldnumingroup) { + oldnumingroup=numingroup[i]; + sortflag=true; } + //recalculate kinetic energies since cmvel has changed + GetBoundFractionAndMaxE(opt, numingroup[i], gPart[i], cmvel[i], Efrac, maxE,nunbound, sortflag); + maxunbindsize=(Int_t)(opt.uinfo.maxunbindfrac*nunbound+1); + unbindcheck = CheckGroupForBoundness(opt,Efrac,maxE,numingroup[i]); + FillUnboundArrays(opt, maxunbindsize, numingroup[i], gPart[i], Efrac, nEplusid, Eplusflag, nEplus, unbindcheck); } } - //remove particles with positive energy - for (j=0;j=0.1*pqsize+0.5) { - - //recalculate kinetic energies since cmvel has changed - totT=0; - maxE=-MAXVALUE; - for (j=0;j0))&&(numingroup[i]>=opt.MinSize)) unbindcheck=true; - else unbindcheck=false; - } - else if (opt.uinfo.unbindtype==UPART) { - if ((maxE>0)&&(numingroup[i]>=opt.MinSize))unbindcheck=true; - else unbindcheck=false; - } - if (unbindcheck) { - //determine if any particle number of particle with positive energy upto opt.uinfo.maxunbindfrac*numingroup+1 - pqsize=(Int_t)(opt.uinfo.maxunbindfrac*numingroup[i]+1); - pq=new PriorityQueue(pqsize); - for (j=0;jPush(j,gPart[noffset[i]+j].GetDensity()); - for (j=pqsize;jgPart[noffset[i]+pq->TopQueue()].GetDensity()) {pq->Pop();pq->Push(j,gPart[noffset[i]+j].GetDensity());} - nEplus=0; - //if just looking at particle then add to removal list till energy >0 - if (opt.uinfo.unbindtype==UPART) { - for (j=0;jTopQueue()].GetDensity()>0) {nEplusid[nEplus++]=pq->TopQueue();Eplusflag[pq->TopQueue()]=1;pq->Pop();} - else break; - } + for (i=1;i<=numgroups;i++) if (numingroup[i]>=ompunbindnum) + { + unbindloops=0; + oldnumingroup = numingroup[i]; + GetBoundFractionAndMaxE(opt, numingroup[i], gPart[i], cmvel[i], Efrac, maxE, nunbound); + //if amount unbound is very large, just remove group entirely + if (nunbound>=opt.uinfo.maxunboundfracforiterativeunbind*numingroup[i]) { + for (j=0;j0 + //otherwise, end unbinding. + if (nEplus>opt.uinfo.maxallowedunboundfrac*numingroup[i]) { + unbindcheck=false; + continue; } - //otherwise, remove all positive energies and also if Efrac< minEfrac, keep adding to removal list - else if (opt.uinfo.unbindtype==USYSANDPART) { - nEfrac=0; - if (EfracTopQueue()].GetDensity()>0 || nEplusTopQueue();Eplusflag[pq->TopQueue()]=1;pq->Pop();} - else break; + else{ + sortflag=false; + if ((oldnumingroup-numingroup[i])>opt.uinfo.maxallowedunboundfrac*oldnumingroup) { + oldnumingroup=numingroup[i]; + sortflag=true; } + //recalculate kinetic energies since cmvel has changed + GetBoundFractionAndMaxE(opt, numingroup[i], gPart[i], cmvel[i], Efrac, maxE, nunbound,sortflag); + //determine if any particle number of particle with positive energy upto opt.uinfo.maxunbindfrac*numingroup+1 + maxunbindsize=(Int_t)(opt.uinfo.maxunbindfrac*nunbound+1); + unbindcheck = CheckGroupForBoundness(opt,Efrac,maxE,numingroup[i]); + FillUnboundArrays(opt, maxunbindsize, numingroup[i], gPart[i], Efrac, nEplusid, Eplusflag, nEplus, unbindcheck); } - delete pq; } - } - else unbindcheck=false; - } - //if group too small remove entirely - if (numingroup[i]0) ReorderGroupIDs(numgroups,ng,numingroup,pfof,pglist); delete[] cmvel; + delete[] gmass; numgroups=ng; //return if any unbinding done indicating groups have been reordered if (iunbindflag) return 1; @@ -1481,7 +934,7 @@ void Potential(Options &opt, Int_t nbodies, Particle *Part, Double_t *potV) { int maxnthreads,nthreads,l,n; Int_t i,j,k,ntreecell,nleafcell; - Double_t v2,r2,Ti,eps2=opt.uinfo.eps*opt.uinfo.eps; + Double_t v2,r2,Ti,eps2=opt.uinfo.eps*opt.uinfo.eps, mv2=opt.MassValue*opt.MassValue; //for tree code potential calculation Int_t ncell; Int_t *start,*end; @@ -1596,6 +1049,9 @@ private(j,k,l,n,ntreecell,nleafcell,r2) } } potV[Part[j].GetID()]*=opt.G; + #ifdef NOMASS + potV[Part[j].GetID()]*=mv2; + #endif } #ifdef USEOPENMP } @@ -1608,7 +1064,7 @@ void Potential(Options &opt, Int_t nbodies, Particle *Part) { int maxnthreads,nthreads,l,n; Int_t i,j,k,ntreecell,nleafcell; - Double_t v2,r2,Ti,eps2=opt.uinfo.eps*opt.uinfo.eps; + Double_t v2,r2,Ti,eps2=opt.uinfo.eps*opt.uinfo.eps, mv2=opt.MassValue*opt.MassValue; //for tree code potential calculation Int_t ncell; Int_t *start,*end; @@ -1619,10 +1075,12 @@ void Potential(Options &opt, Int_t nbodies, Particle *Part) Int_t **marktreecell,**markleafcell; //Double_t **nnr2; KDTree *tree; + bool runomp = false; //for parallel environment store maximum number of threads nthreads=1; #ifdef USEOPENMP + runomp = (nbodies > POTOMPCALCNUM); #pragma omp parallel { if (omp_get_thread_num()==0) maxnthreads=nthreads=omp_get_num_threads(); @@ -1631,7 +1089,7 @@ void Potential(Options &opt, Int_t nbodies, Particle *Part) //otherwise use tree tree gravity calculation //here openmp is per group since each group is large //to make this memory efficient really need just KDTree that uses Coordinates - tree=new KDTree(Part,nbodies,opt.uinfo.BucketSize,tree->TPHYS); + tree=new KDTree(Part,nbodies,opt.uinfo.BucketSize,tree->TPHYS, tree->KEPAN,100,0,0,0,NULL,NULL,runomp); ncell=tree->GetNumNodes(); root=tree->GetRoot(); //to store particles in a given node @@ -1660,9 +1118,9 @@ void Potential(Options &opt, Int_t nbodies, Particle *Part) //determine cm for all cells and openings #ifdef USEOPENMP #pragma omp parallel default(shared) \ -private(j,k,n) +private(j,k,n) if (runomp) { - #pragma omp for schedule(dynamic,1) nowait + #pragma omp for schedule(static) #endif for (j=0;jGetStart(); @@ -1691,9 +1149,9 @@ private(j,k,n) //for marked cells calculate pp, for every other cell just use the CM of the cell to calculate the potential. #ifdef USEOPENMP #pragma omp parallel default(shared) \ -private(j,k,l,n,ntreecell,nleafcell,r2) +private(j,k,l,n,ntreecell,nleafcell,r2) if (runomp) { - #pragma omp for schedule(dynamic,1) nowait + #pragma omp for schedule(static) #endif for (j=0;j Date: Fri, 31 Jan 2020 12:00:19 +1100 Subject: [PATCH 68/71] Added configuration and script subfolders --- examples/{testing => profiling}/flamegraph.Makefile | 0 examples/{ => stfconfig}/baryonsample.cfg | 0 examples/{ => stfconfig}/genesis2019_configuration.cfg | 0 examples/{ => stfconfig}/sample.cfg | 0 examples/{ => stfconfig}/sample_dmcosmological_run.cfg | 0 examples/{ => stfconfig}/sample_eaglehydro_3dfof_subhalo.cfg | 0 examples/{ => stfconfig}/sample_eaglehydro_6dfof_subhalo.cfg | 0 examples/{ => stfconfig}/sample_galaxycatalog_run.cfg | 0 examples/{ => stfconfig}/sample_hydrocosmological_run.cfg | 0 examples/{ => stfconfig}/sample_swiftdm_3dfof_subhalo.cfg | 0 examples/{ => stfconfig}/sample_swifthydro_3dfof_subhalo.cfg | 0 .../sample_swifthydro_3dfof_subhalo_extra_properties.cfg | 0 examples/{ => stfconfig}/sample_swifthydro_6dfof_subhalo.cfg | 0 examples/{ => stfconfig}/sample_zoomdmcosmological_run.cfg | 0 examples/{ => stfconfig}/sample_zoomhydrocosmological_run.cfg | 0 examples/{ => stfconfig}/surfs2018_configuration.cfg | 0 16 files changed, 0 insertions(+), 0 deletions(-) rename examples/{testing => profiling}/flamegraph.Makefile (100%) rename examples/{ => stfconfig}/baryonsample.cfg (100%) rename examples/{ => stfconfig}/genesis2019_configuration.cfg (100%) rename examples/{ => stfconfig}/sample.cfg (100%) rename examples/{ => stfconfig}/sample_dmcosmological_run.cfg (100%) rename examples/{ => stfconfig}/sample_eaglehydro_3dfof_subhalo.cfg (100%) rename examples/{ => stfconfig}/sample_eaglehydro_6dfof_subhalo.cfg (100%) rename examples/{ => stfconfig}/sample_galaxycatalog_run.cfg (100%) rename examples/{ => stfconfig}/sample_hydrocosmological_run.cfg (100%) rename examples/{ => stfconfig}/sample_swiftdm_3dfof_subhalo.cfg (100%) rename examples/{ => stfconfig}/sample_swifthydro_3dfof_subhalo.cfg (100%) rename examples/{ => stfconfig}/sample_swifthydro_3dfof_subhalo_extra_properties.cfg (100%) rename examples/{ => stfconfig}/sample_swifthydro_6dfof_subhalo.cfg (100%) rename examples/{ => stfconfig}/sample_zoomdmcosmological_run.cfg (100%) rename examples/{ => stfconfig}/sample_zoomhydrocosmological_run.cfg (100%) rename examples/{ => stfconfig}/surfs2018_configuration.cfg (100%) diff --git a/examples/testing/flamegraph.Makefile b/examples/profiling/flamegraph.Makefile similarity index 100% rename from examples/testing/flamegraph.Makefile rename to examples/profiling/flamegraph.Makefile diff --git a/examples/baryonsample.cfg b/examples/stfconfig/baryonsample.cfg similarity index 100% rename from examples/baryonsample.cfg rename to examples/stfconfig/baryonsample.cfg diff --git a/examples/genesis2019_configuration.cfg b/examples/stfconfig/genesis2019_configuration.cfg similarity index 100% rename from examples/genesis2019_configuration.cfg rename to examples/stfconfig/genesis2019_configuration.cfg diff --git a/examples/sample.cfg b/examples/stfconfig/sample.cfg similarity index 100% rename from examples/sample.cfg rename to examples/stfconfig/sample.cfg diff --git a/examples/sample_dmcosmological_run.cfg b/examples/stfconfig/sample_dmcosmological_run.cfg similarity index 100% rename from examples/sample_dmcosmological_run.cfg rename to examples/stfconfig/sample_dmcosmological_run.cfg diff --git a/examples/sample_eaglehydro_3dfof_subhalo.cfg b/examples/stfconfig/sample_eaglehydro_3dfof_subhalo.cfg similarity index 100% rename from examples/sample_eaglehydro_3dfof_subhalo.cfg rename to examples/stfconfig/sample_eaglehydro_3dfof_subhalo.cfg diff --git a/examples/sample_eaglehydro_6dfof_subhalo.cfg b/examples/stfconfig/sample_eaglehydro_6dfof_subhalo.cfg similarity index 100% rename from examples/sample_eaglehydro_6dfof_subhalo.cfg rename to examples/stfconfig/sample_eaglehydro_6dfof_subhalo.cfg diff --git a/examples/sample_galaxycatalog_run.cfg b/examples/stfconfig/sample_galaxycatalog_run.cfg similarity index 100% rename from examples/sample_galaxycatalog_run.cfg rename to examples/stfconfig/sample_galaxycatalog_run.cfg diff --git a/examples/sample_hydrocosmological_run.cfg b/examples/stfconfig/sample_hydrocosmological_run.cfg similarity index 100% rename from examples/sample_hydrocosmological_run.cfg rename to examples/stfconfig/sample_hydrocosmological_run.cfg diff --git a/examples/sample_swiftdm_3dfof_subhalo.cfg b/examples/stfconfig/sample_swiftdm_3dfof_subhalo.cfg similarity index 100% rename from examples/sample_swiftdm_3dfof_subhalo.cfg rename to examples/stfconfig/sample_swiftdm_3dfof_subhalo.cfg diff --git a/examples/sample_swifthydro_3dfof_subhalo.cfg b/examples/stfconfig/sample_swifthydro_3dfof_subhalo.cfg similarity index 100% rename from examples/sample_swifthydro_3dfof_subhalo.cfg rename to examples/stfconfig/sample_swifthydro_3dfof_subhalo.cfg diff --git a/examples/sample_swifthydro_3dfof_subhalo_extra_properties.cfg b/examples/stfconfig/sample_swifthydro_3dfof_subhalo_extra_properties.cfg similarity index 100% rename from examples/sample_swifthydro_3dfof_subhalo_extra_properties.cfg rename to examples/stfconfig/sample_swifthydro_3dfof_subhalo_extra_properties.cfg diff --git a/examples/sample_swifthydro_6dfof_subhalo.cfg b/examples/stfconfig/sample_swifthydro_6dfof_subhalo.cfg similarity index 100% rename from examples/sample_swifthydro_6dfof_subhalo.cfg rename to examples/stfconfig/sample_swifthydro_6dfof_subhalo.cfg diff --git a/examples/sample_zoomdmcosmological_run.cfg b/examples/stfconfig/sample_zoomdmcosmological_run.cfg similarity index 100% rename from examples/sample_zoomdmcosmological_run.cfg rename to examples/stfconfig/sample_zoomdmcosmological_run.cfg diff --git a/examples/sample_zoomhydrocosmological_run.cfg b/examples/stfconfig/sample_zoomhydrocosmological_run.cfg similarity index 100% rename from examples/sample_zoomhydrocosmological_run.cfg rename to examples/stfconfig/sample_zoomhydrocosmological_run.cfg diff --git a/examples/surfs2018_configuration.cfg b/examples/stfconfig/surfs2018_configuration.cfg similarity index 100% rename from examples/surfs2018_configuration.cfg rename to examples/stfconfig/surfs2018_configuration.cfg From ae91dd04584e04881f81f2ebde64e5de85692d36 Mon Sep 17 00:00:00 2001 From: Jesmigel Cantos Date: Fri, 31 Jan 2020 12:02:49 +1100 Subject: [PATCH 69/71] Dockerfile for local compilation within a container --- Dockerfile | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index df7b5e7f..25af8b0d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,13 +7,12 @@ ENV BRANCH=feature/threadpool-struct RUN apt update && \ apt install -y g++ libomp-dev libgsl-dev libhdf5-serial-dev git cmake -WORKDIR /home/ubuntu/ +WORKDIR /home/ubuntu/VELOCIraptor-STF # INITIALISE PROJECT DIRECTORY -RUN git clone https://github.com/pelahi/VELOCIraptor-STF.git && \ - cd VELOCIraptor-STF && git checkout ${BRANCH} && git submodule update --init --recursive - -WORKDIR /home/ubuntu/VELOCIraptor-STF +# RUN git clone https://github.com/pelahi/VELOCIraptor-STF.git && \ +# cd VELOCIraptor-STF && git checkout ${BRANCH} && git submodule update --init --recursive +COPY . . # BUILD BINARY RUN mkdir build && cd build && cmake .. && make all From 4456db6a59aa4842531d2d82bb45de6806bbbb30 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Fri, 21 Feb 2020 08:57:01 +1100 Subject: [PATCH 70/71] removed flamegraph duplicate --- examples/testing/flamegraph.Makefile | 48 ---------------------------- 1 file changed, 48 deletions(-) delete mode 100644 examples/testing/flamegraph.Makefile diff --git a/examples/testing/flamegraph.Makefile b/examples/testing/flamegraph.Makefile deleted file mode 100644 index e300654e..00000000 --- a/examples/testing/flamegraph.Makefile +++ /dev/null @@ -1,48 +0,0 @@ -# This Makefile profiles stf using perf to produce flame graphs -.PHONEY: info record_prereq fold flamegraph run display - -#script that produces lots of qsub scripts to run velociraptor on simulation output -info: - @echo "This script profiles a VR run." - -# PERF -_PERF_OPT_RECORD=--call-graph lbr -g -s -o -_PERF_PATH_ROOT=`pwd`/fg -_PERF_FILE_RAW=$(_PERF_PATH_ROOT)/perf.raw.`date +%Y%m%d`.`hostname`.data - -# FLAMEGRAPH -_FLAMEGRAPH_GIT_URL=https://github.com/brendangregg/FlameGraph.git -_FLAMEGRAPH_PATH=$(_PERF_PATH_ROOT)/FlameGraph -_FLAMEGRAPH_FILE_FOLDED=$(_PERF_PATH_FOLDED)/perf.processed.`date +%Y%m%d`.`hostname`.folded -_FLAMEGRAPH_SCRIPT_STACKCOLLAPSE=$(_FLAMEGRAPH_PATH)/stackcollapse-perf.pl -_FLAMEGRAPH_SCRIPT_MAIN=$(_FLAMEGRAPH_PATH)/flamegraph.pl -_FLAMEGRAPH_FILE_SVG=$(_PERF_PATH_ROOT) - -# STF -# Dynamic stf input parameters through environment variables to be set in an input payload "./payloadname.env " -_STF_PARAMETERS="DEFAULT" -include ./payloadname.env - -record_prereq: - @mkdir -p $(_PERF_PATH_ROOT) - @cd $(_PERF_PATH_ROOT) && git clone $(_FLAMEGRAPH_GIT_URL) && cd.. - -record: record_prereq - perf record $(_PERF_OPT_RECORD) -o $(_PERF_FILE_RAW) ./stf $(_STF_PARAMETERS) - @echo "To manually test recorded data, execute: " - @echo "perf report -i $(_PERF_FILE_RAW)" - -fold: - perf script -i $(_PERF_FILE_RAW) | $(_FLAMEGRAPH_SCRIPT_STACKCOLLAPSE) > $(_FLAMEGRAPH_FILE_FOLDED) - -flamegraph: - cat $(_FLAMEGRAPH_FILE_FOLDED) | $(_FLAMEGRAPH_SCRIPT_MAIN) > $(_FLAMEGRAPH_FILE_SVG) - -run: record fold flamegraph - -# Display output for proofreading -# It can be appended to job scripts or test run in CLI -display: - @echo "perf record $(_PERF_OPT_RECORD) -o $(_PERF_FILE_RAW) ./stf $(_STF_PARAMETERS)" - @echo "perf script -i $(_PERF_FILE_RAW) | $(_FLAMEGRAPH_SCRIPT_STACKCOLLAPSE) > $(_FLAMEGRAPH_FILE_FOLDED)" - @echo "cat $(_FLAMEGRAPH_FILE_FOLDED) | $(_FLAMEGRAPH_SCRIPT_MAIN) > $(_FLAMEGRAPH_FILE_SVG)" \ No newline at end of file From 6503a20136c86dacd88ecd6df05f9876f968e3e3 Mon Sep 17 00:00:00 2001 From: jesmigel Date: Wed, 4 Mar 2020 10:44:40 +1100 Subject: [PATCH 71/71] Updated repo source to local --- Dockerfile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 80f2abfd..e4522b12 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,11 +9,14 @@ RUN apt update && \ WORKDIR /home/ubuntu/ -# INITIALISE PROJECT DIRECTORY -RUN git clone https://github.com/pelahi/VELOCIraptor-STF.git && \ - cd VELOCIraptor-STF && git checkout ${BRANCH} && git submodule update --init --recursive +# COPY PROJECT DIRECTORY +# RUN git clone https://github.com/pelahi/VELOCIraptor-STF.git +COPY . /home/ubuntu/VELOCIraptor-STF WORKDIR /home/ubuntu/VELOCIraptor-STF +#RUN git checkout ${BRANCH} +#&& git submodule update --init --recursive + # BUILD BINARY RUN mkdir build && cd build && cmake .. && make all