Skip to content

Commit

Permalink
Added collate sub-command implementation and refactored the program.
Browse files Browse the repository at this point in the history
  • Loading branch information
Maozi Chen committed Sep 25, 2020
1 parent b17d343 commit 4d1bff1
Show file tree
Hide file tree
Showing 45 changed files with 2,628 additions and 119 deletions.
538 changes: 533 additions & 5 deletions Actions/CollateAction.cs

Large diffs are not rendered by default.

47 changes: 24 additions & 23 deletions Actions/SearchAction.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using mccsx.Statistics;
using mccsx.Helpers;
using mccsx.Statistics;
using System;
using System.Collections.Generic;
using System.Diagnostics;
Expand All @@ -11,7 +12,7 @@ namespace mccsx
{
internal class SearchAction : IAction<SearchOptions>
{
public SearchModel? Model { get; private set; }
public SearchParameters? Parameters { get; private set; }

public int Setup(SearchOptions options)
{
Expand Down Expand Up @@ -61,11 +62,11 @@ public int Setup(SearchOptions options)
Console.Error.WriteLine($"WARN: Some patterns are missing: {string.Join(", ", errorList)}");

// Finally, set the model for running this action
Model = new SearchModel(
Parameters = new SearchParameters(
options.Library,
options.Out,
options.Count,
new(options.Measure, options.Measure.SimilarityMeasure()),
new(options.Measure),
patternName,
patternCsvs
);
Expand All @@ -78,11 +79,11 @@ private record Result(string Ligand, string ConfName, double Similarity);
public int Run()
{
// Must have been set in the Setup method
Debug.Assert(Model != null);
Debug.Assert(Parameters != null);

Console.WriteLine($"Using {Model.Similarity.Type} similarity measure");
Console.WriteLine($"Using {Parameters.Similarity.Type} similarity measure");

Parallel.ForEach(Model.PatternCsvs, o =>
Parallel.ForEach(Parameters.PatternCsvs, o =>
{
var (category, patternFile) = o;
Console.WriteLine($"Searching in category {category}");
Expand All @@ -93,11 +94,11 @@ public int Run()
.ParseCsvRows(2, 3) // "Residue sequence" column and the first conformation
.ToDictionary(o => o[0], o => double.TryParse(o[1], out double val) ? val : 0.0);

var patternVec = new MapVector<string>(patternResDict, Model.PatternName);
var patternVec = new MapVector<string>(patternResDict, Parameters.PatternName);
var results = new List<Result>();

int count = 0;
foreach (var candidateFile in Model.LibraryDir.EnumerateFiles($"*_{category}.csv", SearchOption.TopDirectoryOnly))
foreach (var candidateFile in Parameters.LibraryDir.EnumerateFiles($"*_{category}.csv", SearchOption.TopDirectoryOnly))
{
// Read and parse the csv file
string[] csvLines = File.ReadAllLines(candidateFile.FullName);
Expand All @@ -116,22 +117,22 @@ public int Run()

// Load vectors for all conformations
var vecs = Enumerable.Range(0, confNames.Length)
.Select // column vectors, no specific order required since they're being reordered while clustering
.Select // Column vectors
(
i => new MapVector<string>
(
Enumerable.Range(0, resSeq.Length)
.ToDictionary(
j => resSeq[j], // row key
j => double.TryParse(data[j][i + 1], out double val) ? val : 0.0 // score value
j => resSeq[j], // Row key
j => double.TryParse(data[j][i + 1], out double val) ? val : 0.0 // Score value
),
confNames[i] // column key
confNames[i] // Column key
)
).ToArray();

// Sort to find the best conformation
var best = vecs
.Select(o => new { ConfName = o.Name, Similarity = Model.Similarity.Measure.Measure(o, patternVec) })
.Select(o => new { ConfName = o.Name, Similarity = Parameters.Similarity.Measure.Measure(o, patternVec) })
.OrderByDescending(o => o.Similarity)
.First();

Expand All @@ -141,15 +142,15 @@ public int Run()
count++;
}

Console.WriteLine($"Generating top {Model.ResultCount} matches out of {count} {category} vectors");
Console.WriteLine($"Generating top {Parameters.ResultCount} matches out of {count} {category} vectors");

// Prepare the category specific directory for storing output
string categoryDir = Path.Combine(Model.OutputDir.FullName, category.ToString());
string categoryDir = Path.Combine(Parameters.OutputDir.FullName, category.ToString());
Directory.CreateDirectory(categoryDir);

results = results.OrderByDescending(o => o.Similarity).ToList();

var bestMatches = results.Take(Model.ResultCount);
var bestMatches = results.Take(Parameters.ResultCount);

// Output top N best matches in separate directories
int rank = 1;
Expand All @@ -162,13 +163,13 @@ public int Run()
Directory.CreateDirectory(outputDir);

// Copy the best matched conformation to the output directory
string inputPdbqtFile = Path.Combine(Model.LibraryDir.FullName, $"{ligand}.pdbqt");
string outputPdbqtFile = Path.Combine(outputDir, $"{Model.PatternName}.pdbqt");
string inputPdbqtFile = Path.Combine(Parameters.LibraryDir.FullName, $"{ligand}.pdbqt");
string outputPdbqtFile = Path.Combine(outputDir, $"{Parameters.PatternName}.pdbqt");
CopyBestConformation(inputPdbqtFile, outputPdbqtFile, confId);

// Copy the best matched vector to the output directory
string inputCsvFile = Path.Combine(Model.LibraryDir.FullName, $"{ligand}_{category}.csv");
string outputCsvFile = Path.Combine(outputDir, $"{Model.PatternName}_{category}.csv");
string inputCsvFile = Path.Combine(Parameters.LibraryDir.FullName, $"{ligand}_{category}.csv");
string outputCsvFile = Path.Combine(outputDir, $"{Parameters.PatternName}_{category}.csv");

string[] headers = new[] { "Chain ID", "Residue name", "Residue sequence", confName };
string[][]? csvContent = File.ReadAllLines(inputCsvFile)
Expand All @@ -178,10 +179,10 @@ public int Run()
}

// Output summarized search report in CSV
string outputReportFile = Path.Combine(Model.OutputDir.FullName, $"searchreport_{category}.csv");
string outputReportFile = Path.Combine(Parameters.OutputDir.FullName, $"searchreport_{category}.csv");
File.WriteAllLines(outputReportFile, results
.Select(o => new[] { o.Ligand, o.ConfName, o.Similarity.ToString() })
.FormatCsvRows(new[] { "Drug", "Best Conf", $"{Model.Similarity.Measure.Name} Similarity" }));
.FormatCsvRows(new[] { "Drug", "Best Conf", $"{Parameters.Similarity.Measure.Name} Similarity" }));
});

return 0;
Expand Down
15 changes: 15 additions & 0 deletions Attributes/AminoAcidNamesAttribute.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
using System;

namespace mccsx
{
[AttributeUsage(AttributeTargets.Field, Inherited = false, AllowMultiple = false)]
internal sealed class AminoAcidNamesAttribute : Attribute
{
public AminoAcidNamesAttribute(string shortName, char code)
=> (ShortName, Code) = (shortName, code);

public string ShortName { get; }

public char Code { get; }
}
}
13 changes: 13 additions & 0 deletions Attributes/LinkageImplAttribute.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
using System;

namespace mccsx
{
[AttributeUsage(AttributeTargets.Field, Inherited = false, AllowMultiple = false)]
internal sealed class LinkageImplAttribute : Attribute
{
public LinkageImplAttribute(Type linkageClass)
=> LinkageClass = linkageClass;

public Type LinkageClass { get; }
}
}
51 changes: 51 additions & 0 deletions Enums/AminoAcid.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
namespace mccsx
{
public enum AminoAcid
{
// Charged (side chains often make salt bridges)
[AminoAcidNames("Arg", 'R')]
Arginine,
[AminoAcidNames("Lys", 'K')]
Lysine,
[AminoAcidNames("Asp", 'D')]
AsparticAcid,
[AminoAcidNames("Glu", 'E')]
GlutamicAcid,

// Polar (usually participate in hydrogen bonds as proton donors or acceptors)
[AminoAcidNames("Gln", 'Q')]
Glutamine,
[AminoAcidNames("Asn", 'N')]
Asparagine,
[AminoAcidNames("His", 'H')]
Histidine,
[AminoAcidNames("Ser", 'S')]
Serine,
[AminoAcidNames("Thr", 'T')]
Threonine,
[AminoAcidNames("Tyr", 'Y')]
Tyrosine,
[AminoAcidNames("Cys", 'C')]
Cysteine,
[AminoAcidNames("Trp", 'W')]
Tryptophan,

// Hydrophobic (normally buried inside the protein core)
[AminoAcidNames("Ala", 'A')]
Alanine,
[AminoAcidNames("Ile", 'I')]
Isoleucine,
[AminoAcidNames("Leu", 'L')]
Leucine,
[AminoAcidNames("Met", 'M')]
Methionine,
[AminoAcidNames("Phe", 'F')]
Phenylalanine,
[AminoAcidNames("Val", 'V')]
Valine,
[AminoAcidNames("Pro", 'P')]
Proline,
[AminoAcidNames("Gly", 'G')]
Glycine,
}
}
9 changes: 8 additions & 1 deletion Enums/Linkage.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
namespace mccsx
using mccsx.Statistics;

namespace mccsx
{
internal enum Linkage
{
[LinkageImpl(typeof(FarthestPointMethod))]
farthest,

[LinkageImpl(typeof(NearestPointMethod))]
nearest,

[LinkageImpl(typeof(AverageDistanceMethod))]
average,
}
}
20 changes: 20 additions & 0 deletions Exceptions/FilterColumnException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
using System;
using System.Runtime.Serialization;

namespace mccsx
{
[Serializable]
public class FilterColumnException : Exception
{
public FilterColumnException() { }
public FilterColumnException(string message) : base(message) { }
public FilterColumnException(string message, Exception inner) : base(message, inner) { }
public FilterColumnException(string message, string filterName) : base(message) => FilterName = filterName;
public FilterColumnException(string message, string filterName, Exception inner) : base(message, inner) => FilterName = filterName;
protected FilterColumnException(
SerializationInfo info,
StreamingContext context) : base(info, context) { }

public string? FilterName { get; }
}
}
20 changes: 20 additions & 0 deletions Exceptions/RawRecvDataFormatException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
using System;
using System.Runtime.Serialization;

namespace mccsx
{
[Serializable]
public class RawRecvDataFormatException : Exception
{
public RawRecvDataFormatException() { }
public RawRecvDataFormatException(string message) : base(message) { }
public RawRecvDataFormatException(string message, Exception inner) : base(message, inner) { }
public RawRecvDataFormatException(string message, string fileName) : base(message) => FileName = fileName;
public RawRecvDataFormatException(string message, string fileName, Exception inner) : base(message, inner) => FileName = fileName;
protected RawRecvDataFormatException(
SerializationInfo info,
StreamingContext context) : base(info, context) { }

public string? FileName { get; }
}
}
63 changes: 63 additions & 0 deletions Extensions/AminoAcidExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
using mccsx.Helpers;
using System;
using System.Collections.Generic;
using System.Linq;

namespace mccsx.Extensions
{
internal static class AminoAcidExtensions
{
private static readonly IDictionary<string, AminoAcid> NameDict = EnumAnnotationHelper<AminoAcid>
.Enums
.ToDictionary(o => EnumAnnotationHelper<AminoAcid>.GetAttribute<AminoAcidNamesAttribute>(o).ShortName.ToUpper());

public static AminoAcid ParseAminoAcid(this string s)
{
if (s.Length != 3)
throw new InvalidCastException();
s = s.ToUpper();
if (!NameDict.ContainsKey(s))
throw new InvalidCastException();
return NameDict[s];
}

public static bool TryParseAminoAcid(this string s, out AminoAcid value)
{
return NameDict.TryGetValue(s.ToUpper(), out value);
}

public static int ParseResidueSequence(this string s)
{
// Ala379
if (s.Length > 3 && char.IsLetter(s[2]))
return int.Parse(s[3..]);
// A379
if (s.Length > 1 && char.IsLetter(s[0]))
return int.Parse(s[1..]);
// 379
return int.Parse(s);
}

public static bool TryParseResidueSequence(this string s, out int resSeq)
{
// Ala379
if (s.Length > 3 && char.IsLetter(s[2]))
return int.TryParse(s[3..], out resSeq);
// A379
if (s.Length > 1 && char.IsLetter(s[0]))
return int.TryParse(s[1..], out resSeq);
// 379
return int.TryParse(s, out resSeq);
}

public static string GetShortName(this AminoAcid value)
{
return EnumAnnotationHelper<AminoAcid>.GetAttribute<AminoAcidNamesAttribute>(value).ShortName;
}

public static char GetCode(this AminoAcid value)
{
return EnumAnnotationHelper<AminoAcid>.GetAttribute<AminoAcidNamesAttribute>(value).Code;
}
}
}
Loading

0 comments on commit 4d1bff1

Please sign in to comment.