diff --git a/.gitignore b/.gitignore index 6cf2be5..c158828 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ Data/* test/* Results/* Results_R/* +R_Packages/* +all_sets_test/* rrintcc_BOOST rrintcc_BOOST.log *.o diff --git a/READ.ME b/READ.ME index 32412e6..3b5c57b 100644 --- a/READ.ME +++ b/READ.ME @@ -50,5 +50,86 @@ plink1.9 --bfile MIGen_QC --recode --out MIGen_QC .frq file can be generated using plink1.9, e.g. plink1.9 --file MIGen_QC --freq --out MIGen_QC plink1.9 --bfile MIGen_QC --freq --out MIGen_QC + + +****************************************************************** +4. Install R, Rcpp and RInside dependencies +****************************************************************** + +1). Need to install R base env +sudo apt install r-base-core + +2). Also Need to install mvtnorm, Rcpp and RInside (The packages can be downloaded online) +sudo R CMD INSTALL xxx.tar.gz + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/configNames.txt b/configNames.txt index 5c81969..bd9eb62 100644 --- a/configNames.txt +++ b/configNames.txt @@ -8,19 +8,22 @@ # The first word in a line is the variable name in rrintcc # The second word in a line is the content for this variable # Variables that need to be specified in this file: -# foutpath, resname, logname, filename, mapname, setpath, setname +# foutpath, resname, logname, filename, mapname, setname +# setnumber, setpath (These two are only needed under --all/-a) ################################################################# -foutpath Results/snp_results/ -resname Results/region_pair_results.txt +foutpath all_sets_test/Results/snp_results/ +resname all_sets_test/Results/region_pair_results.txt logname Results/rrintcc_BOOST.log -filename Data/filenamelist.txt -mapname Data/example_bt_tag.map +filename all_sets_test/Data/filenamelist.txt +mapname all_sets_test/Data/MIGen_QC.map setname Data/example_bt_tag.set -# Format of .set/.snps names: locipair1/2/.../.set/snp -setpath all_sets/ +# setpath is used for real data, so that we only read the BOOST data once +# in order to calculate the set pairs inside one run of rrintcc +setpath all_sets_test/ +setnumber 3 # mapname CUHK_HKDRGWA_6445CC_Clean_Ch1-22.map diff --git a/configNames_example.txt b/configNames_example.txt index 5c81969..52c1284 100644 --- a/configNames_example.txt +++ b/configNames_example.txt @@ -8,7 +8,8 @@ # The first word in a line is the variable name in rrintcc # The second word in a line is the content for this variable # Variables that need to be specified in this file: -# foutpath, resname, logname, filename, mapname, setpath, setname +# foutpath, resname, logname, filename, mapname, setname +# setnumber, setpath (These two are only needed under --all/-a) ################################################################# foutpath Results/snp_results/ @@ -19,8 +20,10 @@ filename Data/filenamelist.txt mapname Data/example_bt_tag.map setname Data/example_bt_tag.set -# Format of .set/.snps names: locipair1/2/.../.set/snp +# setpath is used for real data, so that we only read the BOOST data once +# in order to calculate the set pairs inside one run of rrintcc setpath all_sets/ +setnumber 1 # mapname CUHK_HKDRGWA_6445CC_Clean_Ch1-22.map diff --git a/rrintcc_BOOST.cpp b/rrintcc_BOOST.cpp index b2fa19d..4e53b76 100755 --- a/rrintcc_BOOST.cpp +++ b/rrintcc_BOOST.cpp @@ -19,7 +19,9 @@ The contigency table collection part is modified from BOOSTx64.c (Can YANG, 2010 */ -// Format: ./rrintcc_BOOST --config configNames.txt --silent --max-cov 10000 +// Format: ./rrintcc_BOOST --config configNames.txt --silent --max-cov 10000 --all +// --all means will read all .set files in setpath (with format: locipair0.set, locipair1.set,...) +// setnumber and setpath in config file will only be activated under --all (-a) #include "utility.h" @@ -41,6 +43,7 @@ int main(int argc, char* argv[]) bool show_message = true; bool skip_symm = false; bool set_test = true; + bool all_sets_flag = false; // Used for combined p value calculation double myth_pgates = 0.05; @@ -70,7 +73,8 @@ int main(int argc, char* argv[]) max_cov_cnt = atoi(argv[i+1]); } - //if (strcmp(argv[i], "--set") == 0 || strcmp(argv[i], "-s") == 0) + if (strcmp(argv[i], "--all") == 0 || strcmp(argv[i], "-a") == 0) + all_sets_flag = true; } @@ -92,7 +96,7 @@ int main(int argc, char* argv[]) int n, p, ncase, nctrl;; // n: number of samples; p: number of varibles RInside R(argc, argv); - R.parseEvalQ("library(mvtnorm); library(corpcor)"); + R.parseEvalQ("library(mvtnorm)"); clock_t st, ed; @@ -104,7 +108,8 @@ int main(int argc, char* argv[]) printf("start getting the file names...\n"); } st = clock(); - GetFileNames(configname, foutpath, resname, logname, filename, mapname, setpath, setname, show_message); + GetFileNames(configname, foutpath, resname, logname, filename, mapname, setpath, setname, numSets, show_message); + ed = clock(); if (show_message) { @@ -170,23 +175,33 @@ int main(int argc, char* argv[]) printf("start calculating the region interactions...\n"); } // time(&st); + + if (all_sets_flag && isFileExist(resname.c_str())) + remove(resname.c_str()); + + if (!all_sets_flag) + numSets = 1; + for(int i = 0; i < numSets; i++) { + string fout = foutpath; + st = clock(); - if (show_message && i > 0 && i%1000 == 0) + if (show_message && i%100 == 0) { printf("%d sets have been analyzed\n", i); } - - //string setname; - //setname = setpath + "locipair" + to_string(i+1) + ".set"; - - string fout = foutpath; - fout.append("snp_pair_results"); - fout.append(to_string(i+1)); - fout.append(".txt"); - + if (all_sets_flag) + { + setname = setpath + "locipair" + to_string(i) + ".set"; + fout.append("snp_pair_results"); + fout.append(to_string(i)); + fout.append(".txt"); + } else + { + fout.append("snp_pair_results.txt"); + } // load .set data: Write sA, sB, and skip_symm inside sA.clear(); @@ -201,7 +216,7 @@ int main(int argc, char* argv[]) ofstream EPI; EPI.open(resname.c_str(), ofstream::app); EPI.precision(4); - EPI << setw(8) << "Pair " << to_string(i+1) << " | " + EPI << setw(8) << "Pair " << to_string(i) << " | " << setw(8) << "pmin: " << " " << setw(15) << pmin << "\n"; EPI.flush(); @@ -209,9 +224,9 @@ int main(int argc, char* argv[]) ed = clock(); - if (show_message) + /*if (show_message) { printf("cputime for calculating the region interactions: %f seconds.\n", (double)(ed - st)/ CLOCKS_PER_SEC); - } + }*/ } diff --git a/utility.cpp b/utility.cpp index 61a822d..f4eb3e6 100755 --- a/utility.cpp +++ b/utility.cpp @@ -22,7 +22,13 @@ string char2str(char *f) return s2.str(); } -void GetFileNames(string cfname, string &foutpath, string &resname, string &logname, string &filename, string &mapname, string &setpath, string &setname, bool show_message) +bool isFileExist(const char *fileName) +{ + std::ifstream infile(fileName); + return infile.good(); +} + +void GetFileNames(string cfname, string &foutpath, string &resname, string &logname, string &filename, string &mapname, string &setpath, string &setname, int &numSets, bool show_message) { // Should not use printLOG cuz logname is not yet ready if (cfname.empty()) { @@ -33,7 +39,7 @@ void GetFileNames(string cfname, string &foutpath, string &resname, string &logn if (show_message) { - printf("file for filename configuration: %s\n", cfname); + printf("file for filename configuration: %s\n", cfname.c_str()); } //printLOG("file for filename configuration: " + char2str(cfname) + "\n"); @@ -73,6 +79,13 @@ void GetFileNames(string cfname, string &foutpath, string &resname, string &logn if (id == "setname") iss >> setname; + if (id == "setnumber") + { + string tmpnumber; + iss >> tmpnumber; + numSets = stoi(tmpnumber); + } + } } else { fprintf(stderr, "can't open file: %s\n", cfname); @@ -793,10 +806,10 @@ double CalcRegionInter(RInside &R, string fout, vector &pheno, BYTE **geno double pmin = R.parseEval("1-pmvnorm(lower=qnorm(minpv/2),upper=-qnorm(minpv/2),mean=rep(0, numpv),corr=cori)"); ed = clock(); - if (show_message) + /*if (show_message) { printf("cputime for calling R fucntions pmvnorm: %f seconds.\n", (double)(ed - st)/ CLOCKS_PER_SEC); - } + }*/ return pmin; } diff --git a/utility.h b/utility.h index af643c5..8de41af 100755 --- a/utility.h +++ b/utility.h @@ -49,7 +49,7 @@ string int2str(int n); string char2str(char *f); -void GetFileNames(string cfname, string &foutpath, string &resname, string &logname, string &filename, string &mapname, string &setpath, string &setname, bool show_message); +void GetFileNames(string cfname, string &foutpath, string &resname, string &logname, string &filename, string &mapname, string &setpath, string &setname, int &numSets, bool show_message); void GetSnpInfo(string filename, vector &snpchr, vector &snpname, bool show_message); @@ -63,4 +63,5 @@ double CalcRegionInter(RInside &R, string fout, vector &pheno, BYTE **geno void LDContrastTest(vector &pheno, vector &zlist, vector &plist, vector &cov_index, BYTE **geno, double **geno_bar, vector &sA, vector &sB, bool skip_symm, int p, int n, int ncase, int nctrl, bool show_message); +bool isFileExist(const char *fileName); #endif