Skip to content

Commit

Permalink
Add --all mode
Browse files Browse the repository at this point in the history
  • Loading branch information
SenZHANG-GitHub committed Aug 14, 2017
1 parent d30963b commit 97e63ff
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 31 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ Data/*
test/*
Results/*
Results_R/*
R_Packages/*
all_sets_test/*
rrintcc_BOOST
rrintcc_BOOST.log
*.o
81 changes: 81 additions & 0 deletions READ.ME
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,86 @@ plink1.9 --bfile MIGen_QC --recode --out MIGen_QC
.frq file can be generated using plink1.9, e.g.
plink1.9 --file MIGen_QC --freq --out MIGen_QC
plink1.9 --bfile MIGen_QC --freq --out MIGen_QC


******************************************************************
4. Install R, Rcpp and RInside dependencies
******************************************************************

1). Need to install R base env
sudo apt install r-base-core

2). Also Need to install mvtnorm, Rcpp and RInside (The packages can be downloaded online)
sudo R CMD INSTALL xxx.tar.gz








































































17 changes: 10 additions & 7 deletions configNames.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,22 @@
# The first word in a line is the variable name in rrintcc
# The second word in a line is the content for this variable
# Variables that need to be specified in this file:
# foutpath, resname, logname, filename, mapname, setpath, setname
# foutpath, resname, logname, filename, mapname, setname
# setnumber, setpath (These two are only needed under --all/-a)
#################################################################

foutpath Results/snp_results/
resname Results/region_pair_results.txt
foutpath all_sets_test/Results/snp_results/
resname all_sets_test/Results/region_pair_results.txt
logname Results/rrintcc_BOOST.log

filename Data/filenamelist.txt
mapname Data/example_bt_tag.map
filename all_sets_test/Data/filenamelist.txt
mapname all_sets_test/Data/MIGen_QC.map
setname Data/example_bt_tag.set

# Format of .set/.snps names: locipair1/2/.../.set/snp
setpath all_sets/
# setpath is used for real data, so that we only read the BOOST data once
# in order to calculate the set pairs inside one run of rrintcc
setpath all_sets_test/
setnumber 3

# mapname CUHK_HKDRGWA_6445CC_Clean_Ch1-22.map

Expand Down
7 changes: 5 additions & 2 deletions configNames_example.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
# The first word in a line is the variable name in rrintcc
# The second word in a line is the content for this variable
# Variables that need to be specified in this file:
# foutpath, resname, logname, filename, mapname, setpath, setname
# foutpath, resname, logname, filename, mapname, setname
# setnumber, setpath (These two are only needed under --all/-a)
#################################################################

foutpath Results/snp_results/
Expand All @@ -19,8 +20,10 @@ filename Data/filenamelist.txt
mapname Data/example_bt_tag.map
setname Data/example_bt_tag.set

# Format of .set/.snps names: locipair1/2/.../.set/snp
# setpath is used for real data, so that we only read the BOOST data once
# in order to calculate the set pairs inside one run of rrintcc
setpath all_sets/
setnumber 1

# mapname CUHK_HKDRGWA_6445CC_Clean_Ch1-22.map

Expand Down
49 changes: 32 additions & 17 deletions rrintcc_BOOST.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ The contigency table collection part is modified from BOOSTx64.c (Can YANG, 2010
*/

// Format: ./rrintcc_BOOST --config configNames.txt --silent --max-cov 10000
// Format: ./rrintcc_BOOST --config configNames.txt --silent --max-cov 10000 --all
// --all means will read all .set files in setpath (with format: locipair0.set, locipair1.set,...)
// setnumber and setpath in config file will only be activated under --all (-a)

#include "utility.h"

Expand All @@ -41,6 +43,7 @@ int main(int argc, char* argv[])
bool show_message = true;
bool skip_symm = false;
bool set_test = true;
bool all_sets_flag = false;

// Used for combined p value calculation
double myth_pgates = 0.05;
Expand Down Expand Up @@ -70,7 +73,8 @@ int main(int argc, char* argv[])
max_cov_cnt = atoi(argv[i+1]);
}

//if (strcmp(argv[i], "--set") == 0 || strcmp(argv[i], "-s") == 0)
if (strcmp(argv[i], "--all") == 0 || strcmp(argv[i], "-a") == 0)
all_sets_flag = true;

}

Expand All @@ -92,7 +96,7 @@ int main(int argc, char* argv[])
int n, p, ncase, nctrl;; // n: number of samples; p: number of varibles

RInside R(argc, argv);
R.parseEvalQ("library(mvtnorm); library(corpcor)");
R.parseEvalQ("library(mvtnorm)");

clock_t st, ed;

Expand All @@ -104,7 +108,8 @@ int main(int argc, char* argv[])
printf("start getting the file names...\n");
}
st = clock();
GetFileNames(configname, foutpath, resname, logname, filename, mapname, setpath, setname, show_message);
GetFileNames(configname, foutpath, resname, logname, filename, mapname, setpath, setname, numSets, show_message);

ed = clock();
if (show_message)
{
Expand Down Expand Up @@ -170,23 +175,33 @@ int main(int argc, char* argv[])
printf("start calculating the region interactions...\n");
}
// time(&st);

if (all_sets_flag && isFileExist(resname.c_str()))
remove(resname.c_str());

if (!all_sets_flag)
numSets = 1;

for(int i = 0; i < numSets; i++)
{
string fout = foutpath;

st = clock();
if (show_message && i > 0 && i%1000 == 0)
if (show_message && i%100 == 0)
{
printf("%d sets have been analyzed\n", i);
}


//string setname;
//setname = setpath + "locipair" + to_string(i+1) + ".set";

string fout = foutpath;
fout.append("snp_pair_results");
fout.append(to_string(i+1));
fout.append(".txt");

if (all_sets_flag)
{
setname = setpath + "locipair" + to_string(i) + ".set";
fout.append("snp_pair_results");
fout.append(to_string(i));
fout.append(".txt");
} else
{
fout.append("snp_pair_results.txt");
}

// load .set data: Write sA, sB, and skip_symm inside
sA.clear();
Expand All @@ -201,17 +216,17 @@ int main(int argc, char* argv[])
ofstream EPI;
EPI.open(resname.c_str(), ofstream::app);
EPI.precision(4);
EPI << setw(8) << "Pair " << to_string(i+1) << " | "
EPI << setw(8) << "Pair " << to_string(i) << " | "
<< setw(8) << "pmin: " << " "
<< setw(15) << pmin << "\n";
EPI.flush();
EPI.close();

ed = clock();

if (show_message)
/*if (show_message)
{ printf("cputime for calculating the region interactions: %f seconds.\n", (double)(ed - st)/ CLOCKS_PER_SEC);
}
}*/
}


Expand Down
21 changes: 17 additions & 4 deletions utility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,13 @@ string char2str(char *f)
return s2.str();
}

void GetFileNames(string cfname, string &foutpath, string &resname, string &logname, string &filename, string &mapname, string &setpath, string &setname, bool show_message)
bool isFileExist(const char *fileName)
{
std::ifstream infile(fileName);
return infile.good();
}

void GetFileNames(string cfname, string &foutpath, string &resname, string &logname, string &filename, string &mapname, string &setpath, string &setname, int &numSets, bool show_message)
{
// Should not use printLOG cuz logname is not yet ready
if (cfname.empty()) {
Expand All @@ -33,7 +39,7 @@ void GetFileNames(string cfname, string &foutpath, string &resname, string &logn

if (show_message)
{
printf("file for filename configuration: %s\n", cfname);
printf("file for filename configuration: %s\n", cfname.c_str());
}
//printLOG("file for filename configuration: " + char2str(cfname) + "\n");

Expand Down Expand Up @@ -73,6 +79,13 @@ void GetFileNames(string cfname, string &foutpath, string &resname, string &logn
if (id == "setname")
iss >> setname;

if (id == "setnumber")
{
string tmpnumber;
iss >> tmpnumber;
numSets = stoi(tmpnumber);
}

}
} else {
fprintf(stderr, "can't open file: %s\n", cfname);
Expand Down Expand Up @@ -793,10 +806,10 @@ double CalcRegionInter(RInside &R, string fout, vector<bool> &pheno, BYTE **geno

double pmin = R.parseEval("1-pmvnorm(lower=qnorm(minpv/2),upper=-qnorm(minpv/2),mean=rep(0, numpv),corr=cori)");
ed = clock();
if (show_message)
/*if (show_message)
{
printf("cputime for calling R fucntions pmvnorm: %f seconds.\n", (double)(ed - st)/ CLOCKS_PER_SEC);
}
}*/
return pmin;

}
Expand Down
3 changes: 2 additions & 1 deletion utility.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ string int2str(int n);

string char2str(char *f);

void GetFileNames(string cfname, string &foutpath, string &resname, string &logname, string &filename, string &mapname, string &setpath, string &setname, bool show_message);
void GetFileNames(string cfname, string &foutpath, string &resname, string &logname, string &filename, string &mapname, string &setpath, string &setname, int &numSets, bool show_message);

void GetSnpInfo(string filename, vector<int> &snpchr, vector<string> &snpname, bool show_message);

Expand All @@ -63,4 +63,5 @@ double CalcRegionInter(RInside &R, string fout, vector<bool> &pheno, BYTE **geno

void LDContrastTest(vector<bool> &pheno, vector<double> &zlist, vector<double> &plist, vector<int> &cov_index, BYTE **geno, double **geno_bar, vector<int> &sA, vector<int> &sB, bool skip_symm, int p, int n, int ncase, int nctrl, bool show_message);

bool isFileExist(const char *fileName);
#endif

0 comments on commit 97e63ff

Please sign in to comment.