Skip to content

Commit

Permalink
Merge pull request #3 from PankratzLab/sample_status_output
Browse files Browse the repository at this point in the history
Add case/control output file
  • Loading branch information
jameeters authored Dec 22, 2022
2 parents e6a6b38 + 99d5b16 commit 1aa4bde
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 9 deletions.
24 changes: 19 additions & 5 deletions src/main/java/org/pankratzlab/kdmatch/KDMatch.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
Expand All @@ -18,7 +19,6 @@
import java.util.logging.Logger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.GZIPOutputStream;

public class KDMatch {

Expand All @@ -35,12 +35,12 @@ public class KDMatch {
// controls. Note: this portion is multi-threaded within communities (i.e. optimizes within
// communities of matches that are connected by at least one control)

private static void run(Path inputFileAnchor, Path inputFileBarns, Path ouputDir,
private static void run(Path inputFileAnchor, Path inputFileBarns, Path outputDir,
int initialNumSelect, int finalNumSelect, int threads,
Logger log) throws IOException, InterruptedException, ExecutionException {
String[] headerA = Files.lines(inputFileAnchor).findFirst().get().toString().trim().split("\t");
String[] headerB = Files.lines(inputFileBarns).findFirst().get().toString().trim().split("\t");
new File(ouputDir.toString()).mkdirs();
new File(outputDir.toString()).mkdirs();

if (Arrays.equals(headerA, headerB)) {
KDTree<Sample> kdTree = new KDTree<>(headerA.length - 1);// dimension of the data to be
Expand All @@ -59,13 +59,16 @@ private static void run(Path inputFileAnchor, Path inputFileBarns, Path ouputDir
getSampleStreamFromFile(inputFileAnchor),
initialNumSelect)
.collect(Collectors.toList());
String outputBase = ouputDir + File.separator + "test.match.AllowDups.txt.gz";
String outputBase = outputDir + File.separator + "test.match.AllowDups.txt.gz";

log.info("reporting full baseline selection of " + initialNumSelect + " nearest neighbors to "
+ outputBase);
writeToFile(naiveMatches.stream(), outputBase, headerA, headerB, initialNumSelect);

String outputOpt = ouputDir + File.separator + "test.match.optimized.txt.gz";
String statusBase = outputDir + File.separator + "test.status.AllowDups.txt";
writeSampleStatusFile(naiveMatches.stream(), statusBase, initialNumSelect);

String outputOpt = outputDir + File.separator + "test.match.optimized.txt.gz";

log.info("selecting " + naiveMatches + " optimized nearest neighbors");

Expand All @@ -77,6 +80,8 @@ private static void run(Path inputFileAnchor, Path inputFileBarns, Path ouputDir

writeToFile(optimizedMatches, outputOpt, headerA, headerB, finalNumSelect);

String statusOptimized = outputDir + File.separator + "test.status.optimized.txt";
writeSampleStatusFile(optimizedMatches, statusOptimized, finalNumSelect);
}

}
Expand Down Expand Up @@ -119,6 +124,15 @@ private static void addHeader(int numToSelect, String[] headerA, String[] header
writer.println(header);
}

public static void writeSampleStatusFile(Stream<Match> matches, String outputFileName,
int numToSelect) throws FileNotFoundException {
try (PrintWriter writer = new PrintWriter(new FileOutputStream(outputFileName, true))) {
String header = "id\tstatus\tmatched_case_id";
writer.println(header);
matches.flatMap(m -> m.getStatusFileLines(numToSelect)).forEach(writer::println);
}
}

public static void main(String[] args) {

// Assumed that the input files are tab delimited with a header, first column is IDs and the
Expand Down
17 changes: 13 additions & 4 deletions src/main/java/org/pankratzlab/kdmatch/Match.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
import java.util.Set;
import java.util.StringJoiner;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* Holds a sample, and the potential matches (i.e nearest neighbors)
*
*/
public class Match {
Sample sample;
Expand Down Expand Up @@ -80,13 +80,13 @@ String getFormattedResults(int numToSelect) {
results.add(Double.toString(control.dim[j]));
}
if (!control.getGroup().equals("")) {
results.add(control.getGroup());
results.add(control.getGroup());
} else {
results.add("no_group");
results.add("no_group");
}
} else {
// TODO untested
results.add("no-match");
results.add("no-match");
results.add(Double.toString(Double.NaN));
for (int j = 0; j < sample.dim.length; j++) {
results.add(Double.toString(Double.NaN));
Expand All @@ -96,6 +96,15 @@ String getFormattedResults(int numToSelect) {
}
results.add(Boolean.toString(hungarian));
return results.toString();
}

Stream<String> getStatusFileLines(int numToSelect) {
numToSelect = Math.min(numToSelect, this.matches.size());
Stream.Builder<String> streamBuilder = Stream.builder();
streamBuilder.add(String.join("\t", sample.ID, "1", sample.ID));
for (Sample s : this.matches.subList(0, numToSelect)) {
streamBuilder.add(String.join("\t", s.ID, "0", sample.ID));
}
return streamBuilder.build();
}
}

0 comments on commit 1aa4bde

Please sign in to comment.