Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Finish converting calcstate to use DataSet/DataFile framework, add data file 'groupby' options #635

Merged
merged 12 commits into from
Sep 6, 2018
47 changes: 38 additions & 9 deletions src/Analysis_State.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ Analysis::RetType Analysis_State::Setup(ArgList& analyzeArgs, AnalysisSetup& set
curveOut_ = setup.DFL().AddDataFile( analyzeArgs.GetStringKey("curveout"), analyzeArgs );
lifeOut_ = setup.DFL().AddDataFile( analyzeArgs.GetStringKey("stateout"), analyzeArgs );
countOut_ = setup.DFL().AddDataFile( analyzeArgs.GetStringKey("countout"), analyzeArgs );
transOut_ = setup.DFL().AddCpptrajFile(analyzeArgs.GetStringKey("transout"),
"Transitions Output", DataFileList::TEXT, true);
if (transOut_ == 0) return Analysis::ERR;
transOut_ = setup.DFL().AddDataFile( analyzeArgs.GetStringKey("transout"), analyzeArgs );
normalize_ = analyzeArgs.hasKey("norm");
// Get definitions of states if present.
// Define states as 'state <#>,<dataset>,<min>,<max>'
Expand Down Expand Up @@ -125,6 +123,24 @@ Analysis::RetType Analysis_State::Setup(ArgList& analyzeArgs, AnalysisSetup& set
if (ds == 0) return Analysis::ERR;
ds->SetDim(0, Xdim);
state_names_ = ds;
// Set up transition data sets
Xdim = Dimension(1, 1, "Index");
ds = setup.DSL().AddSet(DataSet::INTEGER, MetaData(state_data_->Meta().Name(), "Xlifetimes"));
if (ds == 0) return Analysis::ERR;
ds->SetDim(0, Xdim);
trans_lifetimes_ = ds;
ds = setup.DSL().AddSet(DataSet::DOUBLE, MetaData(state_data_->Meta().Name(), "Xavglife"));
if (ds == 0) return Analysis::ERR;
ds->SetDim(0, Xdim);
trans_avglife_ = ds;
ds = setup.DSL().AddSet(DataSet::INTEGER, MetaData(state_data_->Meta().Name(), "Xmaxlife"));
if (ds == 0) return Analysis::ERR;
ds->SetDim(0, Xdim);
trans_maxlife_ = ds;
ds = setup.DSL().AddSet(DataSet::STRING, MetaData(state_data_->Meta().Name(), "Xname"));
if (ds == 0) return Analysis::ERR;
ds->SetDim(0, Xdim);
trans_names_ = ds;

if (countOut_ != 0) {
countOut_->AddDataSet( state_counts_ );
Expand All @@ -137,6 +153,13 @@ Analysis::RetType Analysis_State::Setup(ArgList& analyzeArgs, AnalysisSetup& set
lifeOut_->AddDataSet( state_maxlife_ );
lifeOut_->AddDataSet( state_names_ );
}
if (transOut_ != 0) {
transOut_->ProcessArgs("noxcol");
transOut_->AddDataSet( trans_lifetimes_ );
transOut_->AddDataSet( trans_avglife_ );
transOut_->AddDataSet( trans_maxlife_ );
transOut_->AddDataSet( trans_names_ );
}

mprintf(" STATE: The following state definitions have been set up:\n");
for (StateArray::const_iterator state = States_.begin(); state != States_.end(); ++state)
Expand All @@ -153,7 +176,7 @@ Analysis::RetType Analysis_State::Setup(ArgList& analyzeArgs, AnalysisSetup& set
mprintf("\tState lifetime output to file '%s'\n", lifeOut_->DataFilename().full());
if (countOut_ != 0)
mprintf("\tState counts output to file '%s'\n", countOut_->DataFilename().full());
mprintf("\tTransitions output to file '%s'\n", transOut_->Filename().full());
mprintf("\tTransitions output to file '%s'\n", transOut_->DataFilename().full());
if (normalize_)
mprintf("\tCurves will be normalized to 1.0\n");

Expand Down Expand Up @@ -302,12 +325,18 @@ Analysis::RetType Analysis_State::Analyze() {
state_maxlife_->Add(idx, &ival);
}
// Calculate transitions
transOut_->Printf("%-12s %12s %12s %s\n", "#N", "Average", "Max", "Transition");
int jdx = 0;
for (TransMapType::const_iterator trans = TransitionMap_.begin();
trans != TransitionMap_.end(); ++trans)
transOut_->Printf("%-12i %12.4f %12i %s\n", trans->second.Nlifetimes(),
trans->second.Avg(), trans->second.Max(),
trans->second.DS().legend());
trans != TransitionMap_.end(); ++trans, jdx++)
{
int ival = trans->second.Nlifetimes();
trans_lifetimes_->Add(jdx, &ival);
double dval = trans->second.Avg();
trans_avglife_->Add(jdx, &dval);
ival = trans->second.Max();
trans_maxlife_->Add(jdx, &ival);
trans_names_->Add(jdx, trans->second.DS().legend());
}
if (normalize_) {
for (std::vector<Transition>::const_iterator s = Status.begin();
s != Status.end(); ++s)
Expand Down
6 changes: 5 additions & 1 deletion src/Analysis_State.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,15 @@ class Analysis_State : public Analysis {
DataSet* state_avglife_;
DataSet* state_maxlife_;
DataSet* state_names_;
DataSet* trans_lifetimes_;
DataSet* trans_avglife_;
DataSet* trans_maxlife_;
DataSet* trans_names_;
DataSetList* masterDSL_;
DataFile* curveOut_;
DataFile* lifeOut_;
DataFile* countOut_;
CpptrajFile* transOut_;
DataFile* transOut_;
int debug_;
bool normalize_;
};
Expand Down
136 changes: 93 additions & 43 deletions src/DataIO_Std.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ DataIO_Std::DataIO_Std() :
DataIO(true, true, true), // Valid for 1D, 2D, 3D
mode_(READ1D),
prec_(UNSPEC),
group_(NO_TYPE),
indexcol_(-1),
isInverted_(false),
groupByName_(false),
hasXcolumn_(true),
writeHeader_(true),
square2d_(false),
Expand Down Expand Up @@ -719,23 +719,42 @@ int DataIO_Std::Read_Mat3x3(std::string const& fname,

// -----------------------------------------------------------------------------
void DataIO_Std::WriteHelp() {
mprintf("\tnoheader : Do not print header line.\n"
"\tinvert : Flip X/Y axes (1D).\n"
"\tgroupbyname : (1D) group data sets by name,\n"
"\tnoxcol : Do not print X (index) column (1D).\n"
"\tsquare2d : Write 2D data sets in matrix-like format.\n"
"\tnosquare2d : Write 2D data sets as '<X> <Y> <Value>'.\n"
"\tnosparse : Write all 3D grid voxels (default).\n"
"\tsparse : Only write 3D grid voxels with value > cutoff (default 0).\n"
"\t\tcut : Cutoff for 'sparse'; default 0.\n");
mprintf("\tnoheader : Do not print header line.\n"
"\tinvert : Flip X/Y axes (1D).\n"
"\tgroupby <type> : (1D) group data sets by <type>.\n"
"\t\tname : Group by name.\n"
"\t\taspect : Group by aspect.\n"
"\t\tidx : Group by index.\n"
"\t\tens : Group by ensemble number.\n"
"\t\tdim : Group by dimension.\n"
"\tnoxcol : Do not print X (index) column (1D).\n"
"\tsquare2d : Write 2D data sets in matrix-like format.\n"
"\tnosquare2d : Write 2D data sets as '<X> <Y> <Value>'.\n"
"\tnosparse : Write all 3D grid voxels (default).\n"
"\tsparse : Only write 3D grid voxels with value > cutoff (default 0).\n"
"\t\tcut <cut> : Cutoff for 'sparse'; default 0.\n");
}

// DataIO_Std::processWriteArgs()
int DataIO_Std::processWriteArgs(ArgList &argIn) {
if (!isInverted_ && argIn.hasKey("invert"))
isInverted_ = true;
if (!groupByName_ && argIn.hasKey("groupbyname"))
groupByName_ = true;
std::string grouparg = argIn.GetStringKey("groupby");
if (!grouparg.empty()) {
if (group_ != BY_NAME && grouparg == "name")
group_ = BY_NAME;
else if (group_ != BY_ASPECT && grouparg == "aspect")
group_ = BY_ASPECT;
else if (group_ != BY_IDX && grouparg == "idx")
group_ = BY_IDX;
else if (group_ != BY_ENS && grouparg == "ens")
group_ = BY_ENS;
else if (group_ != BY_DIM && grouparg == "dim")
group_ = BY_DIM;
else {
mprintf("Warning: Unrecognized arg for 'groupby' (%s), ignoring.\n", grouparg.c_str());
}
}
if (hasXcolumn_ && argIn.hasKey("noxcol"))
hasXcolumn_ = false;
if (writeHeader_ && argIn.hasKey("noheader"))
Expand Down Expand Up @@ -786,6 +805,65 @@ void DataIO_Std::WriteNameToBuffer(CpptrajFile& fileIn, std::string const& label
}
}

// DataIO_Std::WriteByGroup()
int DataIO_Std::WriteByGroup(CpptrajFile& file, DataSetList const& SetList, GroupType gtype)
{
int err = 0;
bool firstWrite = true;
DataSetList tmpdsl;
std::vector<bool> setIsWritten(SetList.size(), false);
unsigned int startIdx = 0;
unsigned int nWritten = 0;
while (nWritten < SetList.size()) {
std::string currentName;
Dimension currentDim;
int currentNum = -1;
switch (gtype) {
case BY_NAME : currentName = SetList[startIdx]->Meta().Name(); break;
case BY_ASPECT : currentName = SetList[startIdx]->Meta().Aspect(); break;
case BY_IDX : currentNum = SetList[startIdx]->Meta().Idx(); break;
case BY_ENS : currentNum = SetList[startIdx]->Meta().EnsembleNum(); break;
case BY_DIM : currentDim = SetList[startIdx]->Dim(0); break;
case NO_TYPE : return 1;
}
int firstNonMatch = -1;
for (unsigned int idx = startIdx; idx != SetList.size(); idx++)
{
if (!setIsWritten[idx])
{
bool match = false;
switch (gtype) {
case BY_NAME : match = (currentName == SetList[idx]->Meta().Name()); break;
case BY_ASPECT : match = (currentName == SetList[idx]->Meta().Aspect()); break;
case BY_IDX : match = (currentNum == SetList[idx]->Meta().Idx()); break;
case BY_ENS : match = (currentNum == SetList[idx]->Meta().EnsembleNum()); break;
case BY_DIM : match = (currentDim == SetList[idx]->Dim(0)); break;
case NO_TYPE : return 1;
}
if (match)
{
tmpdsl.AddCopyOfSet( SetList[idx] );
setIsWritten[idx] = true;
nWritten++;
} else if (firstNonMatch == -1)
firstNonMatch = (int)idx;
}
}
if (firstNonMatch > -1)
startIdx = (unsigned int)firstNonMatch;
if (!firstWrite)
file.Printf("\n");
else
firstWrite = false;
if (isInverted_)
err += WriteDataInverted(file, tmpdsl);
else
err += WriteDataNormal(file, tmpdsl);
tmpdsl.ClearAll();
}
return err;
}

// DataIO_Std::WriteData()
int DataIO_Std::WriteData(FileName const& fname, DataSetList const& SetList)
{
Expand All @@ -799,41 +877,13 @@ int DataIO_Std::WriteData(FileName const& fname, DataSetList const& SetList)
// Special case of 2D - may have sieved frames.
err = WriteCmatrix(file, SetList);
} else if (SetList[0]->Ndim() == 1) {
if (groupByName_) {
DataSetList tmpdsl;
std::vector<bool> setIsWritten(SetList.size(), false);
unsigned int startIdx = 0;
unsigned int nWritten = 0;
while (nWritten < SetList.size()) {
std::string currentName = SetList[startIdx]->Meta().Name();
int firstNonMatch = -1;
for (unsigned int idx = startIdx; idx != SetList.size(); idx++)
{
if (!setIsWritten[idx])
{
if (currentName == SetList[idx]->Meta().Name())
{
tmpdsl.AddCopyOfSet( SetList[idx] );
setIsWritten[idx] = true;
nWritten++;
} else if (firstNonMatch == -1)
firstNonMatch = (int)idx;
}
}
if (firstNonMatch > -1)
startIdx = (unsigned int)firstNonMatch;
if (isInverted_)
err += WriteDataInverted(file, tmpdsl);
else
err += WriteDataNormal(file, tmpdsl);
tmpdsl.ClearAll();
}
} else {
if (group_ == NO_TYPE) {
if (isInverted_)
err = WriteDataInverted(file, SetList);
else
err = WriteDataNormal(file, SetList);
}
} else
err = WriteByGroup(file, SetList, group_);
} else if (SetList[0]->Ndim() == 2)
err = WriteData2D(file, SetList);
else if (SetList[0]->Ndim() == 3)
Expand Down
11 changes: 8 additions & 3 deletions src/DataIO_Std.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ class DataIO_Std : public DataIO {
private:
static const char* SEPARATORS;
static const int IS_ASCII_CMATRIX;

enum GroupType { NO_TYPE = 0, BY_NAME, BY_ASPECT, BY_IDX, BY_ENS, BY_DIM };
enum modeType {READ1D=0, READ2D, READ3D, READVEC, READMAT3X3};
enum precType {UNSPEC, FLOAT, DOUBLE};

static int Get3Double(std::string const&, Vec3&, bool&);
int Read_1D(std::string const&,DataSetList&,std::string const&);
int ReadCmatrix(FileName const&, DataSetList&, std::string const&);
Expand All @@ -24,20 +29,20 @@ class DataIO_Std : public DataIO {
int Read_Vector(std::string const&,DataSetList&,std::string const&);
int Read_Mat3x3(std::string const&,DataSetList&,std::string const&);
static void WriteNameToBuffer(CpptrajFile&, std::string const&, int, bool);
int WriteByGroup(CpptrajFile&, DataSetList const&, GroupType);
int WriteCmatrix(CpptrajFile&, DataSetList const&);
int WriteDataNormal(CpptrajFile&,DataSetList const&);
int WriteDataInverted(CpptrajFile&,DataSetList const&);
int WriteData2D(CpptrajFile&, DataSetList const&);
int WriteData3D(CpptrajFile&, DataSetList const&);
int WriteSet2D(DataSet const&, CpptrajFile&);
int WriteSet3D(DataSet const&, CpptrajFile&);
enum modeType {READ1D=0, READ2D, READ3D, READVEC, READMAT3X3};
enum precType {UNSPEC, FLOAT, DOUBLE};

modeType mode_; ///< Read mode
precType prec_; ///< 3d reads, data set precision
GroupType group_; ///< 1D, control data set grouping
int indexcol_; ///< Read: column containing index (X) values
bool isInverted_; ///< For 1D writes invert X/Y.
bool groupByName_; ///< For 1D writes, group sets with same name together
bool hasXcolumn_;
bool writeHeader_;
bool square2d_;
Expand Down
5 changes: 5 additions & 0 deletions src/Dimension.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ class Dimension {
if (min_ != rhs.min_ || step_ != rhs.step_) return true;
return false;
}
/// \return true if this dim min/step equal to given min/step.
bool operator==(const Dimension& rhs) const {
if (min_ != rhs.min_ || step_ != rhs.step_) return false;
return true;
}
/// Set dimension with given min, step, and label.
void SetDimension(double m, double s, std::string const& l) {
label_ = l;
Expand Down
8 changes: 4 additions & 4 deletions test/Test_CalcState/trans.dat.save
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#N Average Max Transition
2 1.0000 1 Undefined->C0
2 1.0000 1 C0->Undefined
1 3.0000 3 C0->C1
#State[Xlifetimes] State[Xavglife] State[Xmaxlife] State[Xname]
2 1.0000 1 Undefined->C0
2 1.0000 1 C0->Undefined
1 3.0000 3 C0->C1
28 changes: 27 additions & 1 deletion test/Test_Datafile/RunTest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
. ../MasterTest.sh

# Clean
CleanFiles prec.in prec.dat a1.dat a1.agr xprec.dat
CleanFiles prec.in prec.dat a1.dat a1.agr xprec.dat byname.dat dssp.dat \
byidx.dat

TESTNAME='Data file tests'

Expand Down Expand Up @@ -48,6 +49,31 @@ EOF
RunCpptraj "X column format/precision test."
DoTest xprec.dat.save xprec.dat

# Grouping
cat > prec.in <<EOF
readdata ../Test_RemdTraj/d1.offset.dat.save name d1
readdata ../Test_Diffusion/diff.2.dat.save index 1 name Diff
list dataset
writedata byname.dat d1 Diff groupby name
EOF
RunCpptraj "Data file group by name test"
DoTest byname.dat.save byname.dat

CheckFor netcdf maxthreads 10
if [ $? -eq 0 ] ; then
TOP="../DPDP.parm7"
cat > prec.in <<EOF
trajin ../DPDP.nc 1 10
secstruct DSSP
run
create dssp.dat DSSP*[*] groupby aspect
create byidx.dat DSSP*[*] groupby idx
EOF
RunCpptraj "Data file group by aspect test"
DoTest dssp.dat.save dssp.dat
DoTest byidx.dat.save byidx.dat
fi

EndTest

exit 0
Loading