Skip to content

Commit

Permalink
Standardize output names and gzip LCA
Browse files Browse the repository at this point in the history
  • Loading branch information
fgvieira committed Dec 12, 2023
1 parent 60a9999 commit 80e2d64
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 25 deletions.
4 changes: 2 additions & 2 deletions Aggregate_stat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ int HelpPageAggregate(FILE *fp){
fprintf(fp,"--names \t names.dmp.gz\n");
fprintf(fp,"--nodes \t nodes.dmp.gz\n");
fprintf(fp,"--lca \t\t lcaout.stat lca produced statistics\n");
fprintf(fp,"--out \t\t Suffix of outputname with the predetermined prefix (.aggregate.stat.txt.gz)\n");
fprintf(fp,"--out \t\t Suffix of outputname with the predetermined prefix (.stat.gz)\n");

exit(1);
return 0;
Expand Down Expand Up @@ -145,7 +145,7 @@ int main_aggregate(int argc, char **argv) {
if(outfile_name==NULL)
outfile_name = strdup(infile_bdamage);
char buf[1024];
snprintf(buf, 1024, "%s.aggregate.stat.txt.gz", outfile_name);
snprintf(buf, 1024, "%s.stat.gz", outfile_name);
fprintf(stderr, "\t-> Dumping file: \'%s\'\n", buf);
BGZF *fpfpfp = bgzf_open(buf, "wb");
kstring_t *kstr = new kstring_t;
Expand Down
10 changes: 5 additions & 5 deletions ngsLCA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -790,17 +790,17 @@ int main_lca(int argc, char **argv) {
#endif
// p->header points to bam_hdr_t what is expected here?
for (int2int::iterator it = specWeight.begin(); 0 && it != specWeight.end(); it++)
fprintf(p->fp2, "%d\t%s\t%d\n", it->first, name_map[it->first], it->second);
gzprintf(p->fp2, "%d\t%s\t%d\n", it->first, name_map[it->first], it->second);

fprintf(stderr, "\t-> [ALL done] walltime used = %.2f sec\n", (float)(time(NULL) - t2));

if (usedreads_sam != NULL)
sam_close(usedreads_sam);
if (p->fp_lcadist) {
fprintf(p->fp_lcadist,"taxid\tnreads\tmea_len\tvar_len\tmean_gc\tvar_gc\tlca\trank\n");
gzprintf(p->fp_lcadist,"taxid\tnreads\tmea_len\tvar_len\tmean_gc\tvar_gc\tlca\trank\n");
for (std::map<int, lcatriplet>::iterator it = lcastat.begin(); it != lcastat.end(); it++) {
lcatriplet tmp = it->second;
fprintf(p->fp_lcadist, "%d\t%d\t%f\t%f\t%f\t%f", it->first, tmp.nalignments, mean(tmp.readlengths), var(tmp.readlengths), mean(tmp.gccontents), var(tmp.gccontents));
gzprintf(p->fp_lcadist, "%d\t%d\t%f\t%f\t%f\t%f", it->first, tmp.nalignments, mean(tmp.readlengths), var(tmp.readlengths), mean(tmp.gccontents), var(tmp.gccontents));
int2char::iterator it1 = name_map.find(it->first);
int2char::iterator it2 = rank.find(it->first);
char *namnam, *rankrank;
Expand All @@ -809,10 +809,10 @@ int main_lca(int argc, char **argv) {
namnam = it1->second;
if (it2 != rank.end())
rankrank = it2->second;
fprintf(p->fp_lcadist, "\t\"%s\"\t\"%s\"\n", namnam, rankrank);
gzprintf(p->fp_lcadist, "\t\"%s\"\t\"%s\"\n", namnam, rankrank);
}
}
fclose(p->fp_lcadist);

for (int2char::iterator it = name_map.begin(); it != name_map.end(); it++)
free(it->second);
for (int2char::iterator it = rank.begin(); it != rank.end(); it++)
Expand Down
16 changes: 8 additions & 8 deletions ngsLCA_cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ pars *pars_init() {
p->editdistMax = 10;
p->simscoreLow = 0;
p->simscoreHigh = 1;
p->fp1 = Z_NULL;
p->fp2 = p->fp_lcadist = NULL;
p->fp1 = p->fp2 = p->fp_lcadist = Z_NULL;
p->outnames = strdup("outnames");
p->minmapq = 0;
p->discard = 516; // discard unmapped and read fail
Expand All @@ -50,8 +49,9 @@ pars *pars_init() {

void pars_free(pars *p) {
gzclose(p->fp1);
// fclose(p->fp2);
// fclose(p->fp3);
gzclose(p->fp2);
gzclose(p->fp_lcadist);
//gzclose(p->fp3);

if (p->header)
sam_hdr_destroy(p->header);
Expand Down Expand Up @@ -268,14 +268,14 @@ pars *get_pars(int argc, char **argv) {
fprintf(stderr, "\t-> Will output lca results in file:\t\t\'%s\'\n", buf);
p->fp1 = gzopen(buf, "wb");
assert(p->fp1);
snprintf(buf, 1024, "%s.stat", p->outnames);
snprintf(buf, 1024, "%s.stat.gz", p->outnames);
fprintf(stderr, "\t-> Will output lca distribution in file:\t\t\'%s\'\n", buf);
p->fp_lcadist = NULL;
p->fp_lcadist = fopen(buf, "wb");
p->fp_lcadist = gzopen(buf, "wb");
assert(p->fp_lcadist);
snprintf(buf, 1024, "%s.wlca", p->outnames);
snprintf(buf, 1024, "%s.wlca.gz", p->outnames);
fprintf(stderr, "\t-> Will output lca weight in file:\t\t\'%s\'\n", buf);
// p->fp2 = fopen(buf,"wb");
// p->fp2 = gzopen(buf,"wb");
#if 0
snprintf(buf, 1024, "%s.log", p->outnames);
fprintf(stderr, "\t-> Will output log info (problems) in file:\t\'%s\'\n", buf);
Expand Down
4 changes: 2 additions & 2 deletions ngsLCA_cli.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ typedef struct {
int skipnorank;
char *outnames;
gzFile fp1;
FILE *fp_lcadist;
FILE *fp2;
gzFile fp_lcadist;
gzFile fp2;
// FILE *fp3; //this is the logfile that fgv thinkgs sholld be removed
int minmapq;
int discard; // or bitoperation with the flag of the read
Expand Down
13 changes: 6 additions & 7 deletions profile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -686,20 +686,19 @@ std::map<int, mydataD> load_bdamage_full(const char *fname, int &printlength) {

std::map<int, mydata2> load_lcastat(const char *fname,int skipfirstline) {
// fprintf(stderr,"./metadamage print file.bdamage.gz [-names file.gz -bam file.bam]\n");
const char *infile = fname;
// fprintf(stderr,"infile: %s howmany: %d \n",infile,howmany);
// fprintf(stderr,"fname: %s howmany: %d \n",fname,howmany);

FILE *fp = NULL;
gzFile fp = Z_NULL;

if (((fp = fopen(infile, "r"))) == NULL) {
fprintf(stderr, "Could not open input lcastat file: %s\n", infile);
if (((fp = gzopen(fname, "r"))) == NULL) {
fprintf(stderr, "Could not open input lcastat file: %s\n", fname);
exit(1);
}

std::map<int, mydata2> retmap;
char buffer[4096];
int atline = 0;
while (fgets(buffer, 4096, fp)) {
while (gzgets(fp, buffer, 4096)) {
atline++;
if(skipfirstline>0&&atline==1)
continue;
Expand All @@ -714,7 +713,7 @@ std::map<int, mydata2> load_lcastat(const char *fname,int skipfirstline) {
}

if (fp)
fclose(fp);
gzclose(fp);

fprintf(stderr, "\t-> Done loading lcastat file It contains: %lu\n", retmap.size());
for (std::map<int, mydata2>::iterator it = retmap.begin(); 0 && it != retmap.end(); it++)
Expand Down
1 change: 1 addition & 0 deletions profile.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <htslib/sam.h> // for bam1_t, bam_hdr_t, BAM_FDUP, BAM_FPAIRED
#include <stdio.h> // for FILE
#include <stdlib.h> // for calloc, free, NULL, size_t
#include <zlib.h> // for gzFile

#include <map> // for map
#include <vector> // for vector
Expand Down
2 changes: 1 addition & 1 deletion test/testAll.sh
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ if [[ $? -ne 0 ]]; then
RVAL=$((512+RVAL))
fi

CMD="${PRG} print_ugly output/test_lca.bdamage.gz --names data/names.dmp.gz --nodes data/nodes.dmp.gz --lcastat output/test_lca.stat --out_prefix output/test_lca_taxa"
CMD="${PRG} print_ugly output/test_lca.bdamage.gz --names data/names.dmp.gz --nodes data/nodes.dmp.gz --lcastat output/test_lca.stat.gz --out_prefix output/test_lca_taxa"
${CMD} >> ${LOG} 2>&1
if [[ $? -ne 0 ]]; then
echo "Problem running command: ${CMD}"
Expand Down

0 comments on commit 80e2d64

Please sign in to comment.