From 244eeebe7796563324b82acf4870e42450513146 Mon Sep 17 00:00:00 2001 From: Brian Pratt Date: Mon, 29 Jul 2024 12:43:42 -0700 Subject: [PATCH] Parallelize feature finding (#3041) Improved Skyline's Hardklor/Bullseye-based feature finding by processing files in parallel. Also made some performance improvements in Hardklor, Bullseye, and our own RT alignment code. --- .../CommonUtil/SystemUtil/ProcessRunner.cs | 5 +- pwiz_tools/Skyline/Executables/BullseyeSharp | 2 +- .../Skyline/Executables/Hardklor/Hardklor | 2 +- .../PeptideSearch/EncyclopeDiaSearchDlg.cs | 51 +- .../PeptideSearch/HardklorSearchControl.cs | 154 ++++ .../PeptideSearch/ImportPeptideSearchDlg.cs | 11 +- .../FileUI/PeptideSearch/SearchControl.cs | 58 +- .../DdaSearch/DdaSearchResources.designer.cs | 69 +- .../Model/DdaSearch/DdaSearchResources.resx | 28 +- .../Model/DdaSearch/HardklorSearchEngine.cs | 735 +++++++++++++----- .../Spectra/Alignment/SimilarityGrid.cs | 78 +- .../Spectra/Alignment/SpectrumSummary.cs | 26 +- .../Spectra/Alignment/SpectrumSummaryList.cs | 8 +- .../Model/RetentionTimes/KdeAligner.cs | 15 +- pwiz_tools/Skyline/Skyline.csproj | 5 +- .../SpectrumRetentionTimeAlignmentTest.cs | 2 +- .../Skyline/TestPerf/FeatureDetectionTest.cs | 185 +++-- 17 files changed, 1032 insertions(+), 402 deletions(-) create mode 100644 pwiz_tools/Skyline/FileUI/PeptideSearch/HardklorSearchControl.cs diff --git a/pwiz_tools/Shared/CommonUtil/SystemUtil/ProcessRunner.cs b/pwiz_tools/Shared/CommonUtil/SystemUtil/ProcessRunner.cs index 53a13f76d4..2ee0c103de 100644 --- a/pwiz_tools/Shared/CommonUtil/SystemUtil/ProcessRunner.cs +++ b/pwiz_tools/Shared/CommonUtil/SystemUtil/ProcessRunner.cs @@ -165,7 +165,10 @@ public void Run(ProcessStartInfo psi, string stdin, IProgressMonitor progress, r { if (progress.IsCanceled) { - proc.Kill(); + if (!proc.HasExited) + { + proc.Kill(); + } progress.UpdateProgress(status = status.Cancel()); CleanupTmpDir(psi); // Clean out any tempfiles left behind, if forceTempfilesCleanup was set return; diff --git a/pwiz_tools/Skyline/Executables/BullseyeSharp b/pwiz_tools/Skyline/Executables/BullseyeSharp index 4ecb2b1ec2..237d348eff 160000 --- a/pwiz_tools/Skyline/Executables/BullseyeSharp +++ b/pwiz_tools/Skyline/Executables/BullseyeSharp @@ -1 +1 @@ -Subproject commit 4ecb2b1ec2984fd23059e7d937ffd13ade9026e2 +Subproject commit 237d348effe9064b36e0f39617454d0b57754657 diff --git a/pwiz_tools/Skyline/Executables/Hardklor/Hardklor b/pwiz_tools/Skyline/Executables/Hardklor/Hardklor index d2b08f8689..71140105aa 160000 --- a/pwiz_tools/Skyline/Executables/Hardklor/Hardklor +++ b/pwiz_tools/Skyline/Executables/Hardklor/Hardklor @@ -1 +1 @@ -Subproject commit d2b08f8689edab7ee72cfdedd98deece21eb7adf +Subproject commit 71140105aabde41257e4a2d7f2c1e01eb0f2b4ac diff --git a/pwiz_tools/Skyline/FileUI/PeptideSearch/EncyclopeDiaSearchDlg.cs b/pwiz_tools/Skyline/FileUI/PeptideSearch/EncyclopeDiaSearchDlg.cs index 83da4f7b6f..eb53d897fb 100644 --- a/pwiz_tools/Skyline/FileUI/PeptideSearch/EncyclopeDiaSearchDlg.cs +++ b/pwiz_tools/Skyline/FileUI/PeptideSearch/EncyclopeDiaSearchDlg.cs @@ -22,12 +22,10 @@ using System.IO; using System.Linq; using System.Security.Cryptography; -using System.Text.RegularExpressions; using System.Threading; using System.Windows.Forms; using pwiz.Common.Chemistry; using pwiz.Common.Collections; -using pwiz.Common.Controls; using pwiz.Common.SystemUtil; using pwiz.Skyline.Alerts; using pwiz.Skyline.Controls; @@ -613,54 +611,7 @@ public EncyclopeDiaSearchControl(EncyclopeDiaSearchDlg hostControl) public EncyclopeDiaSearchDlg.EncyclopeDiaSettings Settings { get; set; } public string EncyclopeDiaChromLibraryPath { get; private set; } public string EncyclopeDiaQuantLibraryPath { get; private set; } - - public class ParallelRunnerProgressControl : MultiProgressControl, IProgressMonitor - { - private readonly EncyclopeDiaSearchControl _hostControl; - - public ParallelRunnerProgressControl(EncyclopeDiaSearchControl hostControl) - { - _hostControl = hostControl; - ProgressSplit.Panel2Collapsed = true; - } - - // ReSharper disable once InconsistentlySynchronizedField - public bool IsCanceled => _hostControl.IsCanceled; - - public UpdateProgressResponse UpdateProgress(IProgressStatus status) - { - if (IsCanceled || status.IsCanceled) - return UpdateProgressResponse.cancel; - - var match = Regex.Match(status.Message, @"(.*)\:\:(.*)"); - Assume.IsTrue(match.Success && match.Groups.Count == 3, - @"ParallelRunnerProgressDlg requires a message like file::message to indicate which file's progress is being updated"); - - lock(this) - { - // only make the MultiProgressControl visible if it's actually used - if (RowCount == 0) - { - var hostDialog = _hostControl.HostDialog; - hostDialog.BeginInvoke(new MethodInvoker(() => - { - _hostControl.progressSplitContainer.Panel1Collapsed = false; - hostDialog.Size = new Size(Math.Min( - Screen.FromControl(hostDialog).Bounds.Width * 90 / 100, - hostDialog.Width * 2), hostDialog.Height); - })); - } - - string name = match.Groups[1].Value; - string message = match.Groups[2].Value; - Update(name, status.PercentComplete, message, status.ErrorException != null); - return IsCanceled ? UpdateProgressResponse.cancel : UpdateProgressResponse.normal; - } - } - - public bool HasUI => true; - } - + private bool Search(EncyclopeDiaSearchDlg.EncyclopeDiaSettings settings, CancellationTokenSource token, IProgressStatus status) { ParallelRunnerProgressControl multiProgressControl = null; diff --git a/pwiz_tools/Skyline/FileUI/PeptideSearch/HardklorSearchControl.cs b/pwiz_tools/Skyline/FileUI/PeptideSearch/HardklorSearchControl.cs new file mode 100644 index 0000000000..743f994e65 --- /dev/null +++ b/pwiz_tools/Skyline/FileUI/PeptideSearch/HardklorSearchControl.cs @@ -0,0 +1,154 @@ +/* + * Original author: Brian Pratt + * + * Copyright 2024 University of Washington - Seattle, WA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Threading; +using System.Windows.Forms; +using pwiz.Common.SystemUtil; +using pwiz.Skyline.Model; +using pwiz.Skyline.Properties; +using pwiz.Skyline.Util.Extensions; +using pwiz.Skyline.Model.DdaSearch; + +namespace pwiz.Skyline.FileUI.PeptideSearch +{ + public class HardklorSearchControl : SearchControl + { + private ImportPeptideSearch ImportPeptideSearch; + private ImportPeptideSearch.HardklorSettings _settings; + private int _totalSteps; + private int _currentStep; + HardklorSearchEngine FeatureFinder => ImportPeptideSearch.SearchEngine as HardklorSearchEngine; + + + public HardklorSearchControl(ImportPeptideSearch importPeptideSearch) + { + ImportPeptideSearch = importPeptideSearch; + } + + // Steps, in parallel, for n files: + // Threads 0 through (n-1): per file, convert to mzML, then do HardKlor then Bullseye + // Thread n: Perform alignments on all mzMLs as they become available + // Finally combine the results across replicates + + private bool Search(ImportPeptideSearch.HardklorSettings settings, CancellationTokenSource token) + { + _settings = settings; + ParallelRunnerProgressControl multiProgressControl = null; + try + { + Invoke(new MethodInvoker(() => + { + multiProgressControl = new ParallelRunnerProgressControl(this); + multiProgressControl.Dock = DockStyle.Fill; + progressSplitContainer.Panel1.Controls.Add(multiProgressControl); + })); + + FeatureFinder.Generate(multiProgressControl, this, token.Token); + } + catch (OperationCanceledException e) + { + UpdateProgress(_status.ChangeWarningMessage(e.InnerException?.Message ?? e.Message)); + return false; + } + catch (Exception e) + { + UpdateProgress(_status.ChangeErrorException(e)); + return false; + } + finally + { + Invoke(new MethodInvoker(() => + { + progressSplitContainer.Panel1Collapsed = true; + progressSplitContainer.Panel1.Controls.Clear(); + multiProgressControl?.Dispose(); + })); + } + + return !token.IsCancellationRequested; + } + + public override UpdateProgressResponse UpdateProgress(IProgressStatus status) + { + if (IsCanceled) + { + return UpdateProgressResponse.cancel; + } + + lock (this) + { + _status = _status.ChangeMessage(status.Message).ChangePercentComplete((100 * _currentStep++) / _totalSteps); + } + + BeginInvoke(new MethodInvoker(() => UpdateSearchEngineProgress(_status))); + + return UpdateProgressResponse.normal; + } + + + + + public override void RunSearch() + { + // ImportPeptideSearch.SearchEngine.SearchProgressChanged += SearchEngine_MessageNotificationEvent; + txtSearchProgress.Text = string.Empty; + _progressTextItems.Clear(); + btnCancel.Enabled = progressBar.Visible = true; + + _cancelToken = new CancellationTokenSource(); + FeatureFinder.SetCancelToken(_cancelToken); + + ActionUtil.RunAsync(RunSearchAsync, @"Feature Finding Search thread"); + } + + private IProgressStatus _status; + private void RunSearchAsync() + { + _totalSteps = (ImportPeptideSearch.SearchEngine.SpectrumFileNames.Length * 4) + 2; // Per-file: msconvert, Hardklor, Bullseye, RTAlign prep. All-files: RT alignment, combine features + _currentStep = 0; + + _status = new ProgressStatus(); + + bool success = true; + + if (!_cancelToken.IsCancellationRequested) + { + UpdateProgress(_status = _status.ChangeMessage(PeptideSearchResources.DDASearchControl_SearchProgress_Starting_search)); + + success = Search(_settings, _cancelToken); + + Invoke(new MethodInvoker(() => UpdateSearchEngineProgressMilestone(_status, success, _status.SegmentCount, + Resources.DDASearchControl_SearchProgress_Search_canceled, + PeptideSearchResources.DDASearchControl_SearchProgress_Search_failed, + Resources.DDASearchControl_SearchProgress_Search_done))); + } + + + Invoke(new MethodInvoker(() => + { + UpdateTaskbarProgress(TaskbarProgress.TaskbarStates.NoProgress, 0); + btnCancel.Enabled = false; + OnSearchFinished(success); + // ImportPeptideSearch.SearchEngine.SearchProgressChanged -= SearchEngine_MessageNotificationEvent; + })); + } + + + } +} diff --git a/pwiz_tools/Skyline/FileUI/PeptideSearch/ImportPeptideSearchDlg.cs b/pwiz_tools/Skyline/FileUI/PeptideSearch/ImportPeptideSearchDlg.cs index 4e18e14c51..f8c93896a1 100644 --- a/pwiz_tools/Skyline/FileUI/PeptideSearch/ImportPeptideSearchDlg.cs +++ b/pwiz_tools/Skyline/FileUI/PeptideSearch/ImportPeptideSearchDlg.cs @@ -162,7 +162,14 @@ public ImportPeptideSearchDlg(SkylineWindow skylineWindow, LibraryManager librar AddPageControl(SearchSettingsControl, ddaSearchSettingsPage, 18, isFeatureDetection ? this.buildSpectralLibraryTitlePanel.Bottom : 50); } - SearchControl = new DDASearchControl(ImportPeptideSearch); + if (isFeatureDetection) + { + SearchControl = new HardklorSearchControl(ImportPeptideSearch); + } + else + { + SearchControl = new DDASearchControl(ImportPeptideSearch); + } AddPageControl(SearchControl, ddaSearchPage, isFeatureDetection ? 3 : 18, 50); if (isFeatureDetection) { @@ -453,7 +460,7 @@ public void AdjustHeightForFullScanSettings() public MatchModificationsControl MatchModificationsControl { get; private set; } public ConverterSettingsControl ConverterSettingsControl { get; private set; } public SearchSettingsControl SearchSettingsControl { get; private set; } - public DDASearchControl SearchControl { get; private set; } + public SearchControl SearchControl { get; private set; } public ImportResultsControl ImportResultsDDAControl { get; private set; } public ImportResultsDIAControl ImportResultsDIAControl { get; private set; } diff --git a/pwiz_tools/Skyline/FileUI/PeptideSearch/SearchControl.cs b/pwiz_tools/Skyline/FileUI/PeptideSearch/SearchControl.cs index ab0a5b1128..063fd603de 100644 --- a/pwiz_tools/Skyline/FileUI/PeptideSearch/SearchControl.cs +++ b/pwiz_tools/Skyline/FileUI/PeptideSearch/SearchControl.cs @@ -18,13 +18,16 @@ */ using System; using System.Collections.Generic; +using System.Drawing; using System.Linq; +using System.Text.RegularExpressions; using System.Threading; using System.Windows.Forms; using pwiz.Common.Collections; using pwiz.Common.Controls; using pwiz.Common.SystemUtil; using pwiz.Skyline.Alerts; +using pwiz.Skyline.Util; namespace pwiz.Skyline.FileUI.PeptideSearch { @@ -56,6 +59,12 @@ public string ToString(bool showTimestamp) return showTimestamp ? $"[{Timestamp.ToString("yyyy/MM/dd HH:mm:ss")}] {Message}" : Message; // ReSharper restore LocalizableElement } + + public override string ToString() + { + return ToString(true); // For debugging convenience + } + } protected List _progressTextItems = new List(); @@ -230,7 +239,7 @@ private void RefreshProgressTextbox() public bool IsCanceled => _cancelToken.IsCancellationRequested; /// progress updates from AbstractDdaConverter (should be prefixed by the file currently being processed) - public UpdateProgressResponse UpdateProgress(IProgressStatus status) + public virtual UpdateProgressResponse UpdateProgress(IProgressStatus status) { if (IsCanceled) return UpdateProgressResponse.cancel; @@ -242,5 +251,52 @@ public UpdateProgressResponse UpdateProgress(IProgressStatus status) return UpdateProgressResponse.normal; } + + public class ParallelRunnerProgressControl : MultiProgressControl, IProgressMonitor + { + private readonly SearchControl _hostControl; + + public ParallelRunnerProgressControl(SearchControl hostControl) + { + _hostControl = hostControl; + ProgressSplit.Panel2Collapsed = true; + } + + // ReSharper disable once InconsistentlySynchronizedField + public bool IsCanceled => _hostControl.IsCanceled; + + public UpdateProgressResponse UpdateProgress(IProgressStatus status) + { + if (IsCanceled || status.IsCanceled) + return UpdateProgressResponse.cancel; + + var match = Regex.Match(status.Message, @"(.*)\:\:(.*)"); + Assume.IsTrue(match.Success && match.Groups.Count == 3, + @"ParallelRunnerProgressDlg requires a message like file::message to indicate which file's progress is being updated"); + + lock (this) + { + // only make the MultiProgressControl visible if it's actually used + if (RowCount == 0) + { + var hostDialog = _hostControl.Parent; + hostDialog.BeginInvoke(new MethodInvoker(() => + { + _hostControl.progressSplitContainer.Panel1Collapsed = false; + hostDialog.Size = new Size(Math.Min( + Screen.FromControl(hostDialog).Bounds.Width * 90 / 100, + hostDialog.Width * 2), hostDialog.Height); + })); + } + + string name = match.Groups[1].Value; + string message = match.Groups[2].Value; + Update(name, status.PercentComplete, message, status.ErrorException != null); + return IsCanceled ? UpdateProgressResponse.cancel : UpdateProgressResponse.normal; + } + } + + public bool HasUI => true; + } } } diff --git a/pwiz_tools/Skyline/Model/DdaSearch/DdaSearchResources.designer.cs b/pwiz_tools/Skyline/Model/DdaSearch/DdaSearchResources.designer.cs index a04ab8775f..e381afb41e 100644 --- a/pwiz_tools/Skyline/Model/DdaSearch/DdaSearchResources.designer.cs +++ b/pwiz_tools/Skyline/Model/DdaSearch/DdaSearchResources.designer.cs @@ -237,6 +237,51 @@ public static string HardklorSearchEngine_FindSimilarFeatures_Looking_for_featur } } + /// + /// Looks up a localized string similar to Align replicates. + /// + public static string HardklorSearchEngine_Generate_Align_replicates { + get { + return ResourceManager.GetString("HardklorSearchEngine_Generate_Align_replicates", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Preparing {0} for RT alignment. + /// + public static string HardklorSearchEngine_Generate_Preparing__0__for_RT_alignment { + get { + return ResourceManager.GetString("HardklorSearchEngine_Generate_Preparing__0__for_RT_alignment", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Reading {0}. + /// + public static string HardklorSearchEngine_Generate_Reading__0_ { + get { + return ResourceManager.GetString("HardklorSearchEngine_Generate_Reading__0_", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Searching for common features across replicates. + /// + public static string HardklorSearchEngine_Generate_Searching_for_common_features_across_replicates { + get { + return ResourceManager.GetString("HardklorSearchEngine_Generate_Searching_for_common_features_across_replicates", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Waiting for Hardklor/Bullseye completion. + /// + public static string HardklorSearchEngine_Generate_Waiting_for_Hardklor_Bullseye_completion { + get { + return ResourceManager.GetString("HardklorSearchEngine_Generate_Waiting_for_Hardklor_Bullseye_completion", resourceCulture); + } + } + /// /// Looks up a localized string similar to Error performing alignment between {0} and {1}: {2}. /// @@ -248,7 +293,7 @@ public static string HardklorSearchEngine_PerformAllAlignments_Error_performing_ } /// - /// Looks up a localized string similar to Retention time alignment: {0} vs {1}. + /// Looks up a localized string similar to RT alignment: {0} vs {1}. /// public static string HardklorSearchEngine_PerformAllAlignments_Performing_retention_time_alignment__0__vs__1_ { get { @@ -268,21 +313,29 @@ public static string HardklorSearchEngine_PerformAllAlignments_Preparing_for_ret } /// - /// Looks up a localized string similar to Searching for peptide-like features with {0}. + /// Looks up a localized string similar to Waiting for next file. + /// + public static string HardklorSearchEngine_PerformAllAlignments_Waiting_for_next_file { + get { + return ResourceManager.GetString("HardklorSearchEngine_PerformAllAlignments_Waiting_for_next_file", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Searching for peptide-like features. /// - public static string HardklorSearchEngine_Run_Searching_for_peptide_like_features_with__0_ { + public static string HardklorSearchEngine_Run_Searching_for_peptide_like_features { get { - return ResourceManager.GetString("HardklorSearchEngine_Run_Searching_for_peptide_like_features_with__0_", resourceCulture); + return ResourceManager.GetString("HardklorSearchEngine_Run_Searching_for_peptide_like_features", resourceCulture); } } /// - /// Looks up a localized string similar to Searching for persistent features in Hardklor results with {0}. + /// Looks up a localized string similar to Searching for persistent features in Hardklor results. /// - public static string HardklorSearchEngine_Run_Searching_for_persistent_features_in_Hardklor_results_with__0_ { + public static string HardklorSearchEngine_Run_Searching_for_persistent_features_in_Hardklor_results { get { - return ResourceManager.GetString("HardklorSearchEngine_Run_Searching_for_persistent_features_in_Hardklor_results_wi" + - "th__0_", resourceCulture); + return ResourceManager.GetString("HardklorSearchEngine_Run_Searching_for_persistent_features_in_Hardklor_results", resourceCulture); } } diff --git a/pwiz_tools/Skyline/Model/DdaSearch/DdaSearchResources.resx b/pwiz_tools/Skyline/Model/DdaSearch/DdaSearchResources.resx index cfd7969897..4e0dd665f8 100644 --- a/pwiz_tools/Skyline/Model/DdaSearch/DdaSearchResources.resx +++ b/pwiz_tools/Skyline/Model/DdaSearch/DdaSearchResources.resx @@ -173,7 +173,7 @@ Starting msconvert conversion. - Retention time alignment: {0} vs {1} + RT alignment: {0} vs {1} Looking for features occurring in multiple replicates @@ -187,11 +187,11 @@ Preparing for retention time alignment on "{0}" - - Searching for peptide-like features with {0} + + Searching for peptide-like features - - Searching for persistent features in Hardklor results with {0} + + Searching for persistent features in Hardklor results Converting file "{0}" to {1} @@ -202,4 +202,22 @@ Hardklor searches for peptide-like features in MS1 scans using an averagine model. Features found by Hardklor are represented in Skyline as small molecules with the chemical formula that Hardklor used to generate the isotope distribution it matched to the feature, plus a mass offset to match the high-res peak mass to charge ratio. This is not the actual formula of the molecule responsible for the MS1 peaks in the mass spectrometer, but rather an approximation with a matching isotope distribution. + + Waiting for Hardklor/Bullseye completion + + + Preparing {0} for RT alignment + + + Reading {0} + + + Align replicates + + + Searching for common features across replicates + + + Waiting for next file + \ No newline at end of file diff --git a/pwiz_tools/Skyline/Model/DdaSearch/HardklorSearchEngine.cs b/pwiz_tools/Skyline/Model/DdaSearch/HardklorSearchEngine.cs index e894652239..a85d8903e7 100644 --- a/pwiz_tools/Skyline/Model/DdaSearch/HardklorSearchEngine.cs +++ b/pwiz_tools/Skyline/Model/DdaSearch/HardklorSearchEngine.cs @@ -23,6 +23,7 @@ using pwiz.Skyline.Properties; using pwiz.Skyline.Util; using System; +using System.Collections.Concurrent; using System.Collections.Generic; using System.ComponentModel; using System.Diagnostics; @@ -38,19 +39,20 @@ using pwiz.Skyline.Model.RetentionTimes; using Enzyme = pwiz.Skyline.Model.DocSettings.Enzyme; using pwiz.Common.Collections; +using System.Text.RegularExpressions; namespace pwiz.Skyline.Model.DdaSearch { public class HardklorSearchEngine : AbstractDdaSearchEngine, IProgressMonitor { private ImportPeptideSearch _searchSettings; + private List _convertedFileNames; // The mzML files Hardklor operates on + private List _completedSearches; // The mzML files we've searched - private bool _keepIntermediateFiles; + internal bool _keepIntermediateFiles; // Temp files we'll need to clean up at the end the end if !_keepIntermediateFiles - private SortedDictionary _inputsAndOutputs; // .hk.bs.kro results files - private string _isotopesFilename; - private string _paramsFilename; - private Dictionary _spectrumSummaryLists = new Dictionary(); + private ConcurrentDictionary _inputsAndOutputs; // Maps mzml to .hk.bs.kro results files + public Dictionary AlignmentSpectrumSummaryLists = new Dictionary(); private Dictionary, KdeAligner> _alignments = new Dictionary, KdeAligner>(); @@ -58,18 +60,31 @@ public class HardklorSearchEngine : AbstractDdaSearchEngine, IProgressMonitor public override void SetSpectrumFiles(MsDataFileUri[] searchFilenames) { SpectrumFileNames = searchFilenames; - _paramsFilename = null; - _searchSettings.RemainingStepsInSearch = searchFilenames.Length + 2; // One step for Hardklor, and one Bullseye per file, then unify the bullseye results + _convertedFileNames = SpectrumFileNames.Select(HardklorSearchEngine.GetMzmlFilePath).ToList(); + + _searchSettings.RemainingStepsInSearch = 1; // Everything runs in a parallel step + + var nThreads = ParallelEx.GetThreadCount(Environment.ProcessorCount); + _rawFileThreadCount = Math.Max(1, nThreads/3); // Allocate twice as many threads to RT alignment than to msconvert->hardklor->bullseye + _alignerThreadCount = Math.Max(1, nThreads-_rawFileThreadCount); + _completedSearches = new List(); } public HardklorSearchEngine(ImportPeptideSearch searchSettings) { _searchSettings = searchSettings; + _keepIntermediateFiles = true; + _inputsAndOutputs = new ConcurrentDictionary(); } + public void SetCancelToken(CancellationTokenSource cancelToken) => _cancelToken = cancelToken; + private CancellationTokenSource _cancelToken; - private IProgressStatus _progressStatus; private bool _success; + private int _rawFileThreadCount; // Number of threads to use for per-file msconvert->Hardklor->Bullseye + private int _alignerThreadCount; // Number of threads to use for RT alignment + private int CountCompletedSearches => _completedSearches.Count; // Number of per-file msconvert->Hardklor->Bullseye runs completed + public override string[] FragmentIons => Array.Empty(); public override string[] Ms2Analyzers => Array.Empty(); public override string EngineName => @"Hardklor"; @@ -82,130 +97,90 @@ public HardklorSearchEngine(ImportPeptideSearch searchSettings) public override event NotificationEventHandler SearchProgressChanged; - public override bool Run(CancellationTokenSource cancelToken, IProgressStatus status) + public override bool Run(CancellationTokenSource cancelToken, IProgressStatus status) // Single threaded version { - using var tmpDir = new TempDir(); // Set TMP to a new directory that we'll destroy on exit - _cancelToken = cancelToken; - _progressStatus = status.ChangePercentComplete(0); - _success = true; - _isotopesFilename = null; + _success = false; + return _success; + } - var skylineWorkingDirectory = Settings.Default.ActiveDirectory; - _keepIntermediateFiles = !string.IsNullOrEmpty(skylineWorkingDirectory); + public void Generate(IProgressMonitor parallelProgressMonitor, + IProgressMonitor masterProgressMonitor, + CancellationToken cancelToken) + { + // parallel converter that starts all files converting + var runner = new ParallelFeatureFinder(this, masterProgressMonitor, parallelProgressMonitor, Settings.Default.ActiveDirectory, cancelToken); - try + runner.Generate(); + } + + internal IProgressStatus RunFeatureFinderStep(IProgressMonitor progressMonitor, string workingDirectory, IProgressMonitor searchControl, IProgressStatus status, MsDataFilePath input, bool bullseye) + { + // Hardklor is not L10N ready, so take care to run its process under InvariantCulture + Func RunHardklorStep = () => { - if (_searchSettings.RemainingStepsInSearch == 2) + string exeName; + string args; + string stepDescription; + if (!bullseye) { - // Final step - try to unify similar features across the various Bullseye result files - _searchSettings.RemainingStepsInSearch--; // More to do after this? - _success = AlignReplicates(); - if (_success) - { - _success = FindSimilarFeatures(); - if (!_success) - { - _progressStatus = _progressStatus.ChangeMessage(string.Format(DdaSearchResources.DdaSearch_Search_failed__0, DdaSearchResources.HardklorSearchEngine_Run_See_Hardklor_Bullseye_log_for_details)); - } - } + // First pass - run Hardklor + exeName = PrepareHardklorProcess(workingDirectory, input, out args, out stepDescription); } else { - // Hardklor is not L10N ready, so take care to run its process under InvariantCulture - Func RunHardklor = () => - { - string exeName; - string args; - string message; - if (string.IsNullOrEmpty(_paramsFilename)) - { - // First pass - run Hardklor - var paramsFileText = GenerateHardklorConfigFile(skylineWorkingDirectory); + // Refine the Hardklor results with Bullseye + exeName = PrepareBullseyeProcess(input, out args, out stepDescription); + } - RunNumber = 0; - void SetHardklorParamsFileName() - { - var versionString = RunNumber == 0 ? string.Empty : $@"_{RunNumber:000}"; - _paramsFilename = string.IsNullOrEmpty(skylineWorkingDirectory) - ? Path.GetTempFileName() - : Path.Combine(skylineWorkingDirectory, $@"Hardklor{versionString}.conf"); - } + searchControl.UpdateProgress(status = status.ChangeMessage($@"{stepDescription}: {exeName} {string.Join(@" ", args)}")); // Update master log + progressMonitor.UpdateProgress(status = status.ChangeSegmentName(exeName)); // Update progress bar - for (SetHardklorParamsFileName(); File.Exists(_paramsFilename);) - { - RunNumber++; - SetHardklorParamsFileName(); // Avoid stomping previous runs - } - File.WriteAllText(_paramsFilename, paramsFileText.ToString()); - exeName = @"Hardklor"; - args = $@"""{_paramsFilename}"""; - message = string.Format(DdaSearchResources.HardklorSearchEngine_Run_Searching_for_peptide_like_features_with__0_, exeName); - } - else - { - // Refine the Hardklor results with Bullseye - _searchSettings.RemainingStepsInSearch--; // More to do after this? - var pair = _inputsAndOutputs.ElementAt(_searchSettings.RemainingStepsInSearch - 2); // Last 2 steps are Bullseye then cleanup for blibbuild - var mzFile = pair.Key; - var hkFile = pair.Value; - var matchFile = GetBullseyeMatchFilename(hkFile); - var noMatchFile = GetBullseyeNoMatchFilename(hkFile); - exeName = @"BullseyeSharp"; - var ppm = GetPPM(); - args = $@"-c 0 " + // Don't eliminate long elutions - $@"-r {ppm.ToString(CultureInfo.InvariantCulture)} " + - @"--timer " + // Show performance info - $@"""{hkFile}"" ""{mzFile}"""; - // + " ""{matchFile}"" ""{noMatchFile}"""; // We're not messing with MS2 (yet?) - message = string.Format(DdaSearchResources.HardklorSearchEngine_Run_Searching_for_persistent_features_in_Hardklor_results_with__0_, exeName); - } - UpdateProgress(_progressStatus = _progressStatus.ChangeSegmentName(message)); - var pr = new ProcessRunner(); - var psi = new ProcessStartInfo(exeName, args) - { - CreateNoWindow = true, - UseShellExecute = false, - RedirectStandardOutput = true, - RedirectStandardError = true, - RedirectStandardInput = false, - StandardOutputEncoding = Encoding.UTF8, - StandardErrorEncoding = Encoding.UTF8 - }; - pr.ShowCommandAndArgs = true; // Show the commandline - pr.Run(psi, string.Empty, this, ref _progressStatus, ProcessPriorityClass.BelowNormal); - return _paramsFilename; - }; - LocalizationHelper.CallWithCulture(CultureInfo.InvariantCulture, RunHardklor); - } - _progressStatus = _progressStatus.NextSegment(); - } - catch (Exception ex) - { - _progressStatus = _progressStatus.ChangeErrorException(ex).ChangeMessage(string.Format(DdaSearchResources.DdaSearch_Search_failed__0, ex.Message)); - _success = false; - } - if (IsCanceled && !_progressStatus.IsCanceled) - { - _progressStatus = _progressStatus.Cancel().ChangeMessage(Resources.DDASearchControl_SearchProgress_Search_canceled); - _success = false; - } + var pr = new ProcessRunner(); + var psi = new ProcessStartInfo(exeName, args) + { + CreateNoWindow = true, + UseShellExecute = false, + RedirectStandardOutput = true, + RedirectStandardError = true, + RedirectStandardInput = false, + StandardOutputEncoding = Encoding.UTF8, + StandardErrorEncoding = Encoding.UTF8 + }; + pr.ShowCommandAndArgs = true; // Show the commandline + pr.Run(psi, string.Empty, progressMonitor, ref status, ProcessPriorityClass.BelowNormal); + return exeName; + }; + LocalizationHelper.CallWithCulture(CultureInfo.InvariantCulture, RunHardklorStep); + return status; + } - if (!_success) + private string PrepareBullseyeProcess(MsDataFilePath input, out string args, out string description) { - _cancelToken.Cancel(); + string exeName; + var mzFile = input.GetFilePath(); + var hkFile = _inputsAndOutputs[input]; + // var matchFile = GetBullseyeMatchFilename(hkFile); MS2 stuff + // var noMatchFile = GetBullseyeNoMatchFilename(hkFile); MS2 stuff + exeName = @"BullseyeSharp"; + var ppm = GetPPM(); + args = $@"-c 0 " + // Don't eliminate long elutions + $@"-r {ppm.ToString(CultureInfo.InvariantCulture)} " + + @"--timer " + // Show performance info + $@"""{hkFile}"" ""{mzFile}"""; + description = DdaSearchResources.HardklorSearchEngine_Run_Searching_for_persistent_features_in_Hardklor_results; + return exeName; } - if (_success) - _progressStatus = _progressStatus.Complete().ChangeMessage(Resources.DDASearchControl_SearchProgress_Search_done); - UpdateProgress(_progressStatus); - - if (!_keepIntermediateFiles) + private string PrepareHardklorProcess(string skylineWorkingDirectory, MsDataFilePath input, out string args, out string description) { - FileEx.SafeDelete(_paramsFilename, true); - } + string exeName; + var paramsFilename = GenerateHardklorConfigFile(skylineWorkingDirectory, input); - return _success; + exeName = @"Hardklor"; + args = $@"""{paramsFilename}"""; + description = DdaSearchResources.HardklorSearchEngine_Run_Searching_for_peptide_like_features; + return exeName; } private double GetPPM() @@ -287,17 +262,15 @@ public override int GetHashCode() private double[] _summedIntensityPerFile; private double _ppm; - private bool AlignReplicates() + internal bool AlignReplicates(IProgressMonitor progressMonitor) { - // Final step - try to unify similar features in the various Bullseye result files - _bfiles = SpectrumFileNames.Select(GetSearchResultFilepath).ToArray(); - if (_bfiles.Length == 0) + if (AlignmentSpectrumSummaryLists.Count == 0) { - return false; + return false; // Nothing to do } // Do retention time alignment on the raw data - if (!PerformAllAlignments()) + if (!PerformAllAlignments(progressMonitor)) { return false; } @@ -305,9 +278,13 @@ private bool AlignReplicates() return true; } - private bool FindSimilarFeatures() + public bool FindSimilarFeatures() { - UpdateProgress(_progressStatus = _progressStatus.ChangeSegmentName(string.Format(DdaSearchResources.HardklorSearchEngine_FindSimilarFeatures_Looking_for_features_occurring_in_multiple_replicates))); + if (SpectrumFileNames.Length == 0) + { + return false; + } + // Parse all the Bullseye output files ReadFeatures(); @@ -428,7 +405,9 @@ private void CreateLookupByCNOS() private void ReadFeatures() { _contents = new List(); + _bfiles = _inputsAndOutputs.Keys.Select(GetSearchResultFilepath).ToArray(); _summedIntensityPerFile = new double[_bfiles.Length]; + for (var fileIndex = 0; fileIndex < _bfiles.Length; fileIndex++) { var file = _bfiles[fileIndex]; @@ -555,8 +534,8 @@ private void CombineSimilarFeaturesAcrossReplicates() // Isotope envelopes agree, masses are similar - does RT agree? if (hkFeatureDetailI.fileIndex != hkFeatureDetailJ.fileIndex) { - var fileI = SpectrumFileNames[hkFeatureDetailI.fileIndex]; - var fileJ = SpectrumFileNames[hkFeatureDetailJ.fileIndex]; + var fileI = _convertedFileNames[hkFeatureDetailI.fileIndex]; + var fileJ = _convertedFileNames[hkFeatureDetailJ.fileIndex]; if (_alignments.TryGetValue(Tuple.Create(fileI, fileJ), out var alignmentI) && _alignments.TryGetValue(Tuple.Create(fileJ, fileI), out var alignmentJ)) { @@ -702,6 +681,16 @@ private void TrimMassDigits() } + private static string GetHardklorIsotopesFilename(string hkFile) + { + return hkFile + @".isotopes"; + } + + private static string GetHardklorConfigurationFilename(string hkFile) + { + return hkFile + @".conf"; + } + private static string GetBullseyeKronikFilename(string hkFile) { return hkFile + @".bs.kro"; @@ -740,7 +729,7 @@ public override void SetFragmentIons(string ions) public override void SetMs2Analyzer(string ms2Analyzer) { - // not used by Hardklor + // Not used by Hardklor } public override void SetPrecursorMassTolerance(MzTolerance mzTolerance) @@ -755,7 +744,11 @@ public override void SetCutoffScore(double cutoffScore) public override string GetSearchResultFilepath(MsDataFileUri searchFilepath) { - return GetBullseyeKronikFilename(_inputsAndOutputs[searchFilepath]); + if (!_inputsAndOutputs.TryGetValue(searchFilepath, out var output)) + { + output = _inputsAndOutputs[GetMzmlFilePath(searchFilepath)]; // Probably that was the raw file name + } + return GetBullseyeKronikFilename(output); } private string[] SupportedExtensions = { @".mzml", @".mzxml" }; // TODO - build Hardklor+MSToolkit to use pwiz so we don't have to convert to mzML @@ -770,7 +763,7 @@ public override bool GetSearchFileNeedsConversion(MsDataFileUri searchFilepath, public bool IsCanceled => _cancelToken.IsCancellationRequested; public UpdateProgressResponse UpdateProgress(IProgressStatus status) { - SearchProgressChanged?.Invoke(this, status); + return _cancelToken.IsCancellationRequested ? UpdateProgressResponse.cancel : UpdateProgressResponse.normal; } @@ -785,28 +778,31 @@ public override void Dispose() { if (!_keepIntermediateFiles) { - FileEx.SafeDelete(_paramsFilename, true); - FileEx.SafeDelete(_isotopesFilename, true); - if (_inputsAndOutputs != null) - { - foreach (var hkFile in _inputsAndOutputs.Values) - { - FileEx.SafeDelete(hkFile, true); // The hardklor .hk file - var bullseyeKronikFilename = GetBullseyeKronikFilename(hkFile); - FileEx.SafeDelete(bullseyeKronikFilename, true); // The Bullseye result file - FileEx.SafeDelete(GetBullseyeKronikUnalignedFilename(bullseyeKronikFilename), true); // The Bullseye result file before we aligned it - FileEx.SafeDelete(GetBullseyeMatchFilename(hkFile), true); - FileEx.SafeDelete(GetBullseyeNoMatchFilename(hkFile), true); - } - } + DeleteIntermediateFiles(); + } + } + + public void DeleteIntermediateFiles() + { + foreach (var hkFile in _inputsAndOutputs.Values) + { + FileEx.SafeDelete(hkFile, true); // The hardklor .hk file + FileEx.SafeDelete(GetHardklorConfigurationFilename(hkFile)); + FileEx.SafeDelete(GetHardklorIsotopesFilename(hkFile)); + var bullseyeKronikFilename = GetBullseyeKronikFilename(hkFile); + FileEx.SafeDelete(bullseyeKronikFilename, true); // The Bullseye result file + FileEx.SafeDelete(GetBullseyeKronikUnalignedFilename(bullseyeKronikFilename), true); // The Bullseye result file before we aligned it + FileEx.SafeDelete(GetBullseyeMatchFilename(hkFile), true); // MS2 stuff + FileEx.SafeDelete(GetBullseyeNoMatchFilename(hkFile), true); // MS2 stuff } } [Localizable(false)] - private void InitializeIsotopes() + private string InitializeIsotopes(string hkFile) { // Make sure Hardklor is working with the same isotope information as Skyline - _isotopesFilename = Path.GetTempFileName(); + + var isotopesFilename = GetHardklorIsotopesFilename(hkFile); var isotopeValues = new List { // First few lines are particular to Hardklor @@ -832,66 +828,61 @@ private void InitializeIsotopes() isotopeValues.Add(string.Empty); } - File.AppendAllLines(_isotopesFilename, isotopeValues); + File.WriteAllLines(isotopesFilename, isotopeValues); + return isotopesFilename; } - private static int RunNumber { get; set; } // Used in filename creation to avoid stomping previous results - private string GenerateHardklorConfigFile(string skylineWorkingDirectory) + private string GenerateHardklorConfigFile(string skylineWorkingDirectory, MsDataFileUri input) { - _inputsAndOutputs = new SortedDictionary(); var workingDirectory = string.IsNullOrEmpty(skylineWorkingDirectory) ? Path.GetTempPath() : skylineWorkingDirectory; int? isCentroided = null; - foreach (var input in SpectrumFileNames) + string outputHardklorFile; + var runNumber = 0; // Used in filename creation to avoid stomping previous results + + void SetHardklorOutputFilename() { - string outputHardklorFile; + var version = runNumber == 0 ? string.Empty : $@"_{runNumber:000}"; + outputHardklorFile = $@"{Path.Combine(workingDirectory, input.GetFileName())}{version}.hk"; + } - void SetHardklorOutputFilename() + for (SetHardklorOutputFilename(); File.Exists(outputHardklorFile);) + { + runNumber++; + SetHardklorOutputFilename(); // Don't stomp existing results + } + + _inputsAndOutputs.GetOrAdd(input, outputHardklorFile); + // Hardklor wants to know if the data is centroided, we should + // find a clue within the first few hundred lines of mzML. + using var reader = new StreamReader(input.GetFilePath()); + for (var lineNum = 0; lineNum < 500; lineNum++) + { + var line = reader.ReadLine(); + if (line == null) { - var version = RunNumber == 0 ? string.Empty : $@"_{RunNumber:000}"; - outputHardklorFile = $@"{Path.Combine(workingDirectory, input.GetFileName())}{version}.hk"; + break; // EOF } - - for (SetHardklorOutputFilename(); File.Exists(outputHardklorFile);) + if (line.Contains(@"MS:1000127") || line.Contains(@"centroid spectrum")) { - RunNumber++; - SetHardklorOutputFilename(); // Don't stomp existing results + isCentroided = 1; + break; } - _inputsAndOutputs.Add(input, outputHardklorFile); - if (!isCentroided.HasValue) + else if (line.Contains(@"MS:1000128") || line.Contains(@"profile spectrum")) { - // Hardklor wants to know if the data is centroided, we should - // find a clue within the first few hundred lines of mzML. - using var reader = new StreamReader(input.GetFilePath()); - for (var lineNum = 0; lineNum < 500; lineNum++) - { - var line = reader.ReadLine(); - if (line == null) - { - break; // EOF - } - if (line.Contains(@"MS:1000127") || line.Contains(@"centroid spectrum")) - { - isCentroided = 1; - break; - } - else if (line.Contains(@"MS:1000128") || line.Contains(@"profile spectrum")) - { - isCentroided = 0; - break; - } - } + isCentroided = 0; + break; } } // Make sure Hardklor is working with the same isotope information as Skyline - InitializeIsotopes(); + var isotopesFilename = InitializeIsotopes(outputHardklorFile); var instrument = _searchSettings.SettingsHardklor.Instrument; var resolution = GetResolution(); - return TextUtil.LineSeparate( + var conf = TextUtil.LineSeparate( $@"# comments in ALL CAPS are from a discussion with Danielle Faivre about Skyline integration", $@"", $@"# Please see online documentation for detailed explanations: ", @@ -953,14 +944,17 @@ void SetHardklorOutputFilename() $@"distribution_area = 1 #Report sum of distribution peaks instead of highest peak only. 0=off, 1=on", $@"xml = 0 #Output results as XML. 0=off, 1=on #MAY NEED UI IN FUTURE", $@"", - $@"isotope_data = ""{_isotopesFilename}"" # Using Skyline's isotope abundance values", + $@"isotope_data = ""{isotopesFilename}"" # Using Skyline's isotope abundance values", $@"", $@"# Below this point is where files to be analyzed should go. They should be listed contain ", $@"# both the input file name, and the output file name. Each file to be analyzed should begin ", $@"# on a new line. By convention Hardklor output should have this extension: .hk", $@"", - TextUtil.LineSeparate(_inputsAndOutputs.Select(kvp => ($@"""{kvp.Key}"" ""{kvp.Value}"""))) + $@"""{input}"" ""{outputHardklorFile}""" ); + var hardklorConfigFile = GetHardklorConfigurationFilename(outputHardklorFile); + File.WriteAllText(hardklorConfigFile, conf); + return hardklorConfigFile; } private double GetResolution() @@ -978,6 +972,7 @@ public static SpectrumSummaryList LoadSpectrumSummaries(MsDataFileUri msDataFile { var summaries = new List(); MsDataFileImpl dataFile; + try { // Only need MS1 for our purposes, and centroided data if possible @@ -997,49 +992,389 @@ public static SpectrumSummaryList LoadSpectrumSummaries(MsDataFileUri msDataFile summaries.Add(SpectrumSummary.FromSpectrum(spectrum)); } } - return new SpectrumSummaryList(summaries); } - public KdeAligner PerformAlignment(SpectrumSummaryList spectra1, SpectrumSummaryList spectra2) + public KdeAligner PerformAlignment(SpectrumSummaryList spectra1, SpectrumSummaryList spectra2, IProgressMonitor progressMonitor, int? threadCount) { - return spectra1.PerformAlignment(this, _progressStatus, spectra2); + return spectra1.PerformAlignment(progressMonitor, spectra2, threadCount); } - public bool PerformAllAlignments() - { - foreach (var path in SpectrumFileNames) - { - UpdateProgress(_progressStatus = _progressStatus.ChangeSegmentName(string.Format(DdaSearchResources.HardklorSearchEngine_PerformAllAlignments_Preparing_for_retention_time_alignment_on___0__, path.GetFileName()))); - _spectrumSummaryLists[path] = LoadSpectrumSummaries(path); - } + public static int TotalAlignmentSteps(int count) => count + // Read each mzml + ((count - 1) * count) + // Compare each A,B and B,A but not A,A + 1; // And the final feature combining step + + private int CompletedAlignmentSteps => AlignmentSpectrumSummaryLists.Count + _alignments.Count; // Steps already taken to read the files and align available + private int AlignmentsPercentDone => 100 * CompletedAlignmentSteps / TotalAlignmentSteps(SpectrumFileNames.Length); - foreach (var entry1 in _spectrumSummaryLists) + private bool PerformAllAlignments(IProgressMonitor progressMonitor) + { + IProgressStatus progressStatus = new ProgressStatus(); + foreach (var entry1 in AlignmentSpectrumSummaryLists) { - foreach (var entry2 in _spectrumSummaryLists) + foreach (var entry2 in AlignmentSpectrumSummaryLists) { if (Equals(entry1.Key, entry2.Key)) { continue; } - UpdateProgress(_progressStatus = _progressStatus.NextSegment().ChangeSegmentName( - string.Format(DdaSearchResources.HardklorSearchEngine_PerformAllAlignments_Performing_retention_time_alignment__0__vs__1_, entry1.Key.GetFileName(), entry2.Key.GetFileName()))); + var tuple = Tuple.Create(entry1.Key, entry2.Key); + if (_alignments.ContainsKey(tuple)) + { + continue; // Already processed + } + + progressMonitor.UpdateProgress(progressStatus = progressStatus.ChangePercentComplete(AlignmentsPercentDone). + ChangeMessage(string.Format(DdaSearchResources.HardklorSearchEngine_PerformAllAlignments_Performing_retention_time_alignment__0__vs__1_, entry1.Key.GetFileNameWithoutExtension(), entry2.Key.GetFileNameWithoutExtension()))); + + + // We can claw back some worker threads if most Hardklor/Bullseye jobs are done + var remainingHardklorJobs = SpectrumFileNames.Length - CountCompletedSearches; + while (remainingHardklorJobs < _rawFileThreadCount) + { + _rawFileThreadCount--; + _alignerThreadCount++; + } try { - _alignments[Tuple.Create(entry1.Key, entry2.Key)] = PerformAlignment(entry1.Value, entry2.Value); + _alignments[tuple] = PerformAlignment(entry1.Value, entry2.Value, progressMonitor, _alignerThreadCount); } catch (Exception x) { - _progressStatus = _progressStatus.ChangeMessage(string.Format(DdaSearchResources.HardklorSearchEngine_PerformAllAlignments_Error_performing_alignment_between__0__and__1____2_, entry1.Key, entry2.Key, x)); + progressMonitor.UpdateProgress(progressStatus = progressStatus.ChangeMessage(string.Format(DdaSearchResources.HardklorSearchEngine_PerformAllAlignments_Error_performing_alignment_between__0__and__1____2_, entry1.Key, entry2.Key, x))); return false; } } } + progressMonitor.UpdateProgress(progressStatus = progressStatus.ChangeMessage(DdaSearchResources.HardklorSearchEngine_PerformAllAlignments_Waiting_for_next_file)); return true; } + + public static MsDataFileUri GetMzmlFilePath(MsDataFileUri rawFile) + { + return MsDataFileUri.Parse(Path.Combine(Path.GetDirectoryName(rawFile.GetFilePath()) ?? string.Empty, + MsconvertDdaConverter.OUTPUT_SUBDIRECTORY, (Path.GetFileNameWithoutExtension(rawFile.GetFilePath()) + @".mzML"))); + } + + + public class ParallelFeatureFinder + { + private readonly CancellationToken _cancelToken; + private readonly HardklorSearchEngine _featureFinder; + private string _workingDirectory; + private IProgressMonitor _masterProgressMonitor { get; } + private IProgressMonitor _parallelProgressMonitor { get; } + public IList RawDataFiles { get; } + + public ParallelFeatureFinder(HardklorSearchEngine featureFinder, + IProgressMonitor masterProgressMonitor, + IProgressMonitor parallelProgressMonitor, + string workingDirectory, + CancellationToken cancelToken) + { + _featureFinder = featureFinder; + _cancelToken = cancelToken; + _parallelProgressMonitor = parallelProgressMonitor; + _masterProgressMonitor = masterProgressMonitor; + _workingDirectory = workingDirectory; + RawDataFiles = _featureFinder.SpectrumFileNames.ToList(); + } + + private bool IsCanceled => _parallelProgressMonitor.IsCanceled || _masterProgressMonitor.IsCanceled; + + public bool KeepIntermediateResults + { + get => _featureFinder?._keepIntermediateFiles ?? false; + set + { + if (_featureFinder != null) _featureFinder._keepIntermediateFiles = value; + } + } + + public void Generate() + { + var rawFileQueue = new ConcurrentQueue(RawDataFiles); + + // QueueWorkers convert input raw files (in parallel) to feed them to hardklor (in parallel), and to the RT alignment (serial) + var searchedFiles = 0; + var allFilesSearched = new ManualResetEventSlim(false); + var allFilesAligned = new ManualResetEventSlim(false); + + var progressMonitorForAlignment = new ProgressMonitorForFile(DdaSearchResources.HardklorSearchEngine_Generate_Align_replicates, _parallelProgressMonitor); + var totalAlignmentSteps = HardklorSearchEngine.TotalAlignmentSteps(RawDataFiles.Count); // Each file load is one step, then it's n by n comparison, and final combination step + + void ConsumeAlignmentFile(MsDataFileUri rawFile, int i) + { + if (IsCanceled) + { + return; + } + + var consumeStatus = new ProgressStatus(); + + var mzmlFile = new MsDataFilePath(HardklorSearchEngine.GetMzmlFilePath(rawFile).GetFilePath()); + _masterProgressMonitor.UpdateProgress(consumeStatus.ChangeMessage(string.Format(DdaSearchResources.HardklorSearchEngine_Generate_Preparing__0__for_RT_alignment, mzmlFile.GetFileNameWithoutExtension()))); // Update the master progress leb + + // Load for alignment + progressMonitorForAlignment.UpdateProgress(consumeStatus = (ProgressStatus)consumeStatus.ChangePercentComplete(_featureFinder.AlignmentsPercentDone).ChangeMessage(string.Format(DdaSearchResources.HardklorSearchEngine_Generate_Reading__0_, mzmlFile.GetFileName()))); + + var summary = HardklorSearchEngine.LoadSpectrumSummaries(mzmlFile); + + lock (_featureFinder.AlignmentSpectrumSummaryLists) + { + _featureFinder.AlignmentSpectrumSummaryLists.Add(mzmlFile, summary); + progressMonitorForAlignment.UpdateProgress(consumeStatus = (ProgressStatus)consumeStatus.ChangePercentComplete(_featureFinder.AlignmentsPercentDone)); + + _featureFinder.AlignReplicates(progressMonitorForAlignment); + + if (_featureFinder.AlignmentSpectrumSummaryLists.Count == RawDataFiles.Count) + { + // That was the last one to load, do the alignment amongst them all + allFilesAligned.Set(); + progressMonitorForAlignment.UpdateProgress(consumeStatus = (ProgressStatus)consumeStatus.ChangeMessage(DdaSearchResources.HardklorSearchEngine_Generate_Waiting_for_Hardklor_Bullseye_completion)); + } + } + } + + // Start alignments as soon as all mzML are available + using var aligner = new QueueWorker(null, ConsumeAlignmentFile); + aligner.RunAsync(1, @"FeatureFindingAlignmentConsumer", 0, null); + + void ConsumeRawFile(MsDataFileUri rawFile, int i) + { + ProcessRawDataFileAsync(rawFile, aligner); + lock (rawFileQueue) + { + ++searchedFiles; + if (searchedFiles == RawDataFiles.Count) + allFilesSearched.Set(); + } + } + + MsDataFileUri ProduceRawFile(int i) + { + if (!rawFileQueue.TryDequeue(out var rawFile)) + { + return null; + } + return rawFile; + } + + using var rawFileProcessor = new QueueWorker(ProduceRawFile, ConsumeRawFile); + rawFileProcessor.RunAsync(_featureFinder._rawFileThreadCount, @"FeatureFindingConsume", 1, @"FeatureFindingProduce"); + + + // Wait for all Hardklor/Bullseye jobs to finish + while (!allFilesSearched.Wait(1000, _cancelToken) && !IsCanceled) + { + lock (rawFileProcessor) + { + var exception = rawFileProcessor.Exception; + if (exception != null) + throw new OperationCanceledException(exception.Message, exception); + } + } + + if (IsCanceled) + { + return; + } + + // Wait for all RT alignments to complete + while (!allFilesAligned.Wait(1000, _cancelToken) && !IsCanceled) + { + lock (aligner) + { + var exception = aligner.Exception; + if (exception != null) + throw new OperationCanceledException(exception.Message, exception); + } + } + + if (IsCanceled) + { + return; + } + + // Now look for common features + var alignmentStatus = new ProgressStatus(); + _masterProgressMonitor.UpdateProgress(alignmentStatus.ChangeMessage(DdaSearchResources.HardklorSearchEngine_Generate_Searching_for_common_features_across_replicates)); + progressMonitorForAlignment.UpdateProgress(alignmentStatus = (ProgressStatus)alignmentStatus.ChangePercentComplete((_featureFinder.AlignmentSpectrumSummaryLists.Count * 100) / totalAlignmentSteps). + ChangeMessage((DdaSearchResources.HardklorSearchEngine_FindSimilarFeatures_Looking_for_features_occurring_in_multiple_replicates))); + + _featureFinder.FindSimilarFeatures(); + + _masterProgressMonitor.UpdateProgress(alignmentStatus.ChangePercentComplete(100)); + } + + private static string MsconvertOutputExtension => @".mzML"; + + + private void ProcessRawDataFileAsync(MsDataFileUri rawFile, QueueWorker aligner) + { + var progressMonitorForFile = new ProgressMonitorForFile(rawFile.GetFileName(), _parallelProgressMonitor); + var mzmlFilePath = HardklorSearchEngine.GetMzmlFilePath(rawFile).GetFilePath(); + IProgressStatus status = new ProgressStatus(); + const string MSCONVERT_EXE = @"msconvert"; + status = status.ChangeSegmentName(MSCONVERT_EXE); + + string convertMessage; + if ((string.Compare(mzmlFilePath, rawFile.GetFilePath(), StringComparison.OrdinalIgnoreCase) == 0) || + (File.Exists(mzmlFilePath) && + File.GetLastWriteTime(mzmlFilePath) > File.GetLastWriteTime(rawFile.GetFilePath()) && + MsDataFileImpl.IsValidFile(mzmlFilePath))) + { + // No need for mzML conversion + convertMessage = string.Format( + Resources.MsconvertDdaConverter_Run_Re_using_existing_converted__0__file_for__1__, + MsconvertOutputExtension, rawFile.GetSampleOrFileName()); + status = status.ChangeMessage(convertMessage); + progressMonitorForFile.UpdateProgress(status); + _masterProgressMonitor.UpdateProgress(status); // Update main window log + } + else + { + convertMessage = string.Format(DdaSearchResources.MsconvertDdaConverter_Run_Converting_file___0___to__1_, rawFile.GetSampleOrFileName(), @"mzML"); + status = status.ChangeMessage(convertMessage); + progressMonitorForFile.UpdateProgress(status); // Update main window log + var pr = new ProcessRunner(); + var psi = new ProcessStartInfo(MSCONVERT_EXE) + { + CreateNoWindow = true, + UseShellExecute = false, + Arguments = + "-v -z --mzML " + + $"-o {Path.GetDirectoryName(mzmlFilePath).Quote()} " + + $"--outfile {Path.GetFileName(mzmlFilePath).Quote()} " + + " --acceptZeroLengthSpectra --simAsSpectra --combineIonMobilitySpectra" + + " --filter \"peakPicking true 1-\" " + + " --filter \"msLevel 1\" " + + rawFile.GetFilePath().Quote() + }; + + try + { + var cmd = $@"{psi.FileName} {psi.Arguments}"; + _masterProgressMonitor.UpdateProgress(status.ChangeMessage($@"{convertMessage}: {cmd}")); // Update main window log + progressMonitorForFile.UpdateProgress(status = status.ChangeMessage(cmd)); // Update local progress bar + pr.Run(psi, null, progressMonitorForFile, ref status, null, ProcessPriorityClass.BelowNormal); + } + catch (Exception e) + { + progressMonitorForFile.UpdateProgress(status.ChangeMessage(e.Message)); + } + } + if (progressMonitorForFile.IsCanceled) + { + _featureFinder.DeleteIntermediateFiles(); // Delete .conf etc + FileEx.SafeDelete(mzmlFilePath, true); + return; + } + + lock (aligner) + { + aligner.Add(rawFile); // Let aligner thread know this mzML file is ready to be loaded for alignment + } + + var mzml = new MsDataFilePath(HardklorSearchEngine.GetMzmlFilePath(rawFile).GetFilePath()); + + // Run Hardklor + status = _featureFinder.RunFeatureFinderStep(progressMonitorForFile, _workingDirectory, _masterProgressMonitor, status, mzml, false); + if (progressMonitorForFile.IsCanceled) + { + _featureFinder.DeleteIntermediateFiles(); // Delete .conf etc + FileEx.SafeDelete(mzmlFilePath, true); + return; + } + + // Run Bullseye + status = _featureFinder.RunFeatureFinderStep(progressMonitorForFile, _workingDirectory, _masterProgressMonitor, status, mzml, true); + if (progressMonitorForFile.IsCanceled) + { + _featureFinder.DeleteIntermediateFiles(); // Delete .conf etc + FileEx.SafeDelete(mzmlFilePath, true); + return; + } + + // Note completion so RT aligner can reclaim threads + lock (_featureFinder._completedSearches) + { + _featureFinder._completedSearches.Add(rawFile); + } + + } + + public class ProgressMonitorForFile : IProgressMonitor + { + private readonly string _filename; + private readonly IProgressMonitor _multiProgressMonitor; + private int _maxPercentComplete; + private StringBuilder _logText = new StringBuilder(); + + public string LogText => _logText.ToString(); + + public ProgressMonitorForFile(string filename, IProgressMonitor multiProgressMonitor) + { + _filename = filename; + _multiProgressMonitor = multiProgressMonitor; + } + + public bool IsCanceled => _multiProgressMonitor.IsCanceled; + + private Regex _msconvert = new Regex(@"writing spectra: (\d+)/(\d+)", + RegexOptions.Compiled | RegexOptions.CultureInvariant); // e.g. "Orbi3_SA_IP_SMC1_01.RAW::msconvert: writing spectra: 2202/4528" + private Regex _hardklor = new Regex(@"(\d+)%", + RegexOptions.Compiled | RegexOptions.CultureInvariant); + + public UpdateProgressResponse UpdateProgress(IProgressStatus status) + { + var message = status.Message.Trim(); + var displayMessage = $@"{_filename}::{status.SegmentName}: {message}"; + + if (status.IsCanceled || status.ErrorException != null) + { + _logText.AppendLine(message); + status = status.ChangePercentComplete(100); + _multiProgressMonitor.UpdateProgress(status.ChangeMessage(displayMessage)); + return UpdateProgressResponse.cancel; + } + + if (string.IsNullOrEmpty(message)) + { + return UpdateProgressResponse.normal; // Don't update + } + + var match = _msconvert.Match(status.Message); // MSConvert output? + if (match.Success && match.Groups.Count == 3) + { + // e.g. "Orbi3_SA_IP_SMC1_01.RAW::msconvert: writing spectra: 2202/4528" + _maxPercentComplete = Math.Max(_maxPercentComplete, + Convert.ToInt32(match.Groups[1].Value) * 100 / + Convert.ToInt32(match.Groups[2].Value)); + status = status.ChangePercentComplete(_maxPercentComplete); + } + else + { + match = _hardklor.Match(status.Message); // Hardklor output? + if (match.Success) + { + _maxPercentComplete = Math.Max(_maxPercentComplete, Convert.ToInt32(match.Groups[1].Value)); + status = status.ChangePercentComplete(_maxPercentComplete); + } + } + + _logText.AppendLine(message); + + return _multiProgressMonitor.UpdateProgress(status.ChangeMessage(displayMessage)); + } + + public bool HasUI => _multiProgressMonitor.HasUI; + } + } + } } \ No newline at end of file diff --git a/pwiz_tools/Skyline/Model/Results/Spectra/Alignment/SimilarityGrid.cs b/pwiz_tools/Skyline/Model/Results/Spectra/Alignment/SimilarityGrid.cs index 971130d616..085df328c0 100644 --- a/pwiz_tools/Skyline/Model/Results/Spectra/Alignment/SimilarityGrid.cs +++ b/pwiz_tools/Skyline/Model/Results/Spectra/Alignment/SimilarityGrid.cs @@ -17,6 +17,7 @@ * limitations under the License. */ using System; +using System.Collections.Concurrent; using System.Collections.Generic; using System.Linq; using System.Threading; @@ -152,16 +153,19 @@ public IEnumerable EnumerateBestQuadrants() // return quadrants.Where(q => q.MaxScore >= minMedian).Take(3); } - public IEnumerable EnumeratePoints() + public List EnumeratePoints() { + var result = new List(XCount * YCount); for (int x = 0; x < XCount; x++) { for (int y = 0; y < YCount; y++) { var score = CalcScore(XStart + x, YStart + y); - yield return new Point(Grid, x + XStart, y + YStart, score); + result.Add( new Point(Grid, x + XStart, y + YStart, score)); } } + + return result; } } @@ -189,33 +193,33 @@ private IEnumerable ToQuadrants(int levels) /// These points should further be filtered by to get the real list /// that should be given to KdeAligner.Train. /// - public IEnumerable GetBestPointCandidates(IProgressMonitor progressMonitor, IProgressStatus status) + public List GetBestPointCandidates(IProgressMonitor progressMonitor, int ?threadCount) { - var parallelProcessor = new ParallelProcessor(progressMonitor, status); - var results = parallelProcessor.FindBestPoints(ToQuadrants(3)); + var parallelProcessor = new ParallelProcessor(progressMonitor); + var results = parallelProcessor.FindBestPoints(ToQuadrants(3), threadCount); return results; } class ParallelProcessor { private IProgressMonitor _progressMonitor; - private List _results = new List(); + private ConcurrentBag _results = new ConcurrentBag(); private int _totalItemCount; private int _completedItemCount; private QueueWorker _queue; private List _exceptions = new List(); - public ParallelProcessor(IProgressMonitor progressMonitor, IProgressStatus status) + public ParallelProcessor(IProgressMonitor progressMonitor) { _progressMonitor = progressMonitor; } - public List FindBestPoints(IEnumerable startingQuadrants) + public List FindBestPoints(IEnumerable startingQuadrants, int ? threadCount) { _queue = new QueueWorker(null, Consume); try { - _queue.RunAsync(ParallelEx.GetThreadCount(), @"SimilarityGrid"); + _queue.RunAsync(threadCount ?? ParallelEx.GetThreadCount(), @"SimilarityGrid"); foreach (var q in startingQuadrants) { Enqueue(q); @@ -231,7 +235,7 @@ public List FindBestPoints(IEnumerable startingQuadrants) if (_completedItemCount == _totalItemCount) { - return _results; + return _results.ToList(); } if (true == _progressMonitor?.IsCanceled) @@ -280,10 +284,10 @@ private void Enqueue(Quadrant quadrant) { if (quadrant.XCount <= 4 || quadrant.YCount <= 4) { - var pointsToAdd = quadrant.EnumeratePoints().ToList(); - lock (this) + var pointsToAdd = quadrant.EnumeratePoints(); + foreach (var p in pointsToAdd) { - _results.AddRange(pointsToAdd); + _results.Add(p); } return; @@ -324,25 +328,51 @@ public double YRetentionTime public double Score { get; } } + private class BestPointIndex + { + private Point[] _valuesX; + private Point[] _valuesY; + + public BestPointIndex(int capacityX, int capacityY) + { + _valuesX = new Point[capacityX]; + _valuesY = new Point[capacityY]; + } + + public void Consider(Point p) + { + if (p.Score > ((_valuesX[p.X]?.Score)??0.0)) + { + _valuesX[p.X] = p; + } + + if (p.Score > ((_valuesY[p.Y]?.Score) ?? 0.0)) + { + _valuesY[p.Y] = p; + } + } + + public bool Contains(Point p) => ReferenceEquals(p, _valuesX[p.X]) || ReferenceEquals(p, _valuesY[p.Y]); + } + + /// /// Returns a subset such that each point has the highest score in either its row /// or column. /// - public static IEnumerable FilterBestPoints(IEnumerable allPoints) + public static List FilterBestPoints(List allPoints) { - HashSet xIndexes = new HashSet(); - HashSet yIndexes = new HashSet(); + var bestPointPerXY = new BestPointIndex(allPoints.Max(p=> p.X) + 1, allPoints.Max(p => p.Y) + 1); + var result = new List(allPoints.Count); // Almost certainly way more capacity than needed, but it's not retained for long - foreach (Point point in allPoints.OrderByDescending(pt=>pt.Score)) + foreach (var p in allPoints) { - if (xIndexes.Contains(point.X) && yIndexes.Contains(point.Y)) - { - continue; - } - xIndexes.Add(point.X); - yIndexes.Add(point.Y); - yield return point; + bestPointPerXY.Consider(p); } + + result.AddRange(allPoints.Where(p => bestPointPerXY.Contains(p))); + + return result; } } } diff --git a/pwiz_tools/Skyline/Model/Results/Spectra/Alignment/SpectrumSummary.cs b/pwiz_tools/Skyline/Model/Results/Spectra/Alignment/SpectrumSummary.cs index be0276ac89..81922e88ed 100644 --- a/pwiz_tools/Skyline/Model/Results/Spectra/Alignment/SpectrumSummary.cs +++ b/pwiz_tools/Skyline/Model/Results/Spectra/Alignment/SpectrumSummary.cs @@ -33,6 +33,7 @@ namespace pwiz.Skyline.Model.Results.Spectra.Alignment public class SpectrumSummary { private float[] _summaryValue; + private int _length; /// /// The length of the vector which will represent the spectrum intensity data. /// A longer vector would be more accurate, but shorter vectors take up less @@ -55,6 +56,7 @@ public SpectrumSummary(SpectrumMetadata spectrumMetadata, IEnumerable sum { SpectrumMetadata = spectrumMetadata; _summaryValue = summaryValue?.ToArray() ?? Array.Empty(); + _length = _summaryValue.Length; } public static SpectrumSummary FromSpectrum(MsDataSpectrum spectrum) @@ -115,17 +117,13 @@ public IEnumerable SummaryValue } } + public float[] SummaryValueArray => _summaryValue; + public IEnumerable SummaryValueFloats { get { return _summaryValue.AsEnumerable(); } } - public int SummaryValueLength - { - get - { - return _summaryValue.Length; - } - } + public int SummaryValueLength => _length; public double? SimilarityScore(SpectrumSummary other) { @@ -134,20 +132,22 @@ public int SummaryValueLength return null; } - return CalculateSimilarityScore(SummaryValue, other.SummaryValue); + return CalculateSimilarityScore(SummaryValueArray, other.SummaryValueArray); } - public static double? CalculateSimilarityScore(IEnumerable values1, IEnumerable values2) + public static double? CalculateSimilarityScore(float[] values1, float[] values2) { double sumXX = 0; double sumXY = 0; double sumYY = 0; - foreach (var pair in values1.Zip(values2, Tuple.Create)) + var i = 0; + foreach (var value1 in values1) { - sumXX += pair.Item1 * pair.Item1; - sumXY += pair.Item1 * pair.Item2; - sumYY += pair.Item2 * pair.Item2; + var value2 = values2[i++]; + sumXX += value1 * value1; + sumXY += value1 * value2; + sumYY += value2 * value2; } if (sumXX <= 0 || sumYY <= 0) { diff --git a/pwiz_tools/Skyline/Model/Results/Spectra/Alignment/SpectrumSummaryList.cs b/pwiz_tools/Skyline/Model/Results/Spectra/Alignment/SpectrumSummaryList.cs index 455cc9ebcb..be11b24771 100644 --- a/pwiz_tools/Skyline/Model/Results/Spectra/Alignment/SpectrumSummaryList.cs +++ b/pwiz_tools/Skyline/Model/Results/Spectra/Alignment/SpectrumSummaryList.cs @@ -53,7 +53,7 @@ public IEnumerator GetEnumerator() private static DigestKey GetSpectrumDigestKey( SpectrumSummary spectrumSummary) { - if (spectrumSummary.SummaryValueLength == 0 || spectrumSummary.SummaryValue.All(v=>0 == v)) + if (spectrumSummary.SummaryValueLength == 0 || spectrumSummary.SummaryValueArray.All(v=>0 == v)) { return null; } @@ -190,16 +190,16 @@ public SimilarityGrid GetSimilarityGrid(SpectrumSummaryList that) return new SimilarityGrid(thisByDigestKey[bestDigestKey], thatByDigestKey[bestDigestKey]); } - public KdeAligner PerformAlignment(IProgressMonitor progressMonitor, IProgressStatus status, SpectrumSummaryList spectra2) + public KdeAligner PerformAlignment(IProgressMonitor progressMonitor, SpectrumSummaryList spectra2, int? threadCount) { var similarityGrid = GetSimilarityGrid(spectra2); - var candidatePoints = similarityGrid.GetBestPointCandidates(progressMonitor, status); + var candidatePoints = similarityGrid.GetBestPointCandidates(progressMonitor, threadCount); if (candidatePoints == null) { return null; } - var bestPoints = SimilarityGrid.FilterBestPoints(candidatePoints).ToList(); + var bestPoints = SimilarityGrid.FilterBestPoints(candidatePoints); var kdeAligner = new KdeAligner(); kdeAligner.Train(bestPoints.Select(pt => pt.XRetentionTime).ToArray(), bestPoints.Select(pt=>pt.YRetentionTime).ToArray(), CancellationToken.None); return kdeAligner; diff --git a/pwiz_tools/Skyline/Model/RetentionTimes/KdeAligner.cs b/pwiz_tools/Skyline/Model/RetentionTimes/KdeAligner.cs index 9d56a2b72e..bf1370f665 100644 --- a/pwiz_tools/Skyline/Model/RetentionTimes/KdeAligner.cs +++ b/pwiz_tools/Skyline/Model/RetentionTimes/KdeAligner.cs @@ -271,19 +271,24 @@ private void StampOutHistogram(float[,] histogram, float[,] stamp, double[] xArr private void Stamp(float[,] histogram, float[,] stamp, int x, int y) { - for (int i = x - stamp.GetLength(0)/2; i <= x + stamp.GetLength(0)/2; i++) + var stampLength0 = stamp.GetLength(0); // GetLength is more expensive than you'd think + var stampLength1 = stamp.GetLength(1); + var histogramLength0 = histogram.GetLength(0); + var histogramLength1 = histogram.GetLength(1); + + for (int i = x - stampLength0/2; i <= x + stampLength0 / 2; i++) { if(i < 0) continue; - if (i >= histogram.GetLength(0)) + if (i >= histogramLength0) break; - for (int j = y - stamp.GetLength(1)/2; j <= y + stamp.GetLength(1)/2; j++) + for (int j = y - stampLength1/2; j <= y + stampLength1/2; j++) { if(j < 0) continue; - if (j >= histogram.GetLength(1)) + if (j >= histogramLength1) break; - histogram[i, j] += stamp[i - x + stamp.GetLength(0)/2, j - y + stamp.GetLength(1)/2]; + histogram[i, j] += stamp[i - x + stampLength0 / 2, j - y + stampLength1/2]; } } } diff --git a/pwiz_tools/Skyline/Skyline.csproj b/pwiz_tools/Skyline/Skyline.csproj index 54f4951f99..d2e805a6f9 100644 --- a/pwiz_tools/Skyline/Skyline.csproj +++ b/pwiz_tools/Skyline/Skyline.csproj @@ -1,4 +1,4 @@ - + Debug @@ -484,6 +484,9 @@ Component + + UserControl + True True diff --git a/pwiz_tools/Skyline/TestFunctional/SpectrumRetentionTimeAlignmentTest.cs b/pwiz_tools/Skyline/TestFunctional/SpectrumRetentionTimeAlignmentTest.cs index d85aeb55d3..6f60e98e54 100644 --- a/pwiz_tools/Skyline/TestFunctional/SpectrumRetentionTimeAlignmentTest.cs +++ b/pwiz_tools/Skyline/TestFunctional/SpectrumRetentionTimeAlignmentTest.cs @@ -46,7 +46,7 @@ protected override void DoTest() var spectrumSummaries2 = LoadSpectrumSummaryList(TestFilesDir.GetTestPath("8fmol.mzML")); var similarityMatrix = spectrumSummaries1.GetSimilarityGrid(spectrumSummaries2); Assert.IsNotNull(similarityMatrix); - var pointsToAlign = SimilarityGrid.FilterBestPoints(similarityMatrix.GetBestPointCandidates(null, null)).ToList(); + var pointsToAlign = SimilarityGrid.FilterBestPoints(similarityMatrix.GetBestPointCandidates(null, null)); Assert.AreNotEqual(0, pointsToAlign.Count); var kdeAligner = new KdeAligner(); kdeAligner.Train(pointsToAlign.Select(pt=>pt.XRetentionTime).ToArray(), pointsToAlign.Select(pt=>pt.YRetentionTime).ToArray(), CancellationToken.None); diff --git a/pwiz_tools/Skyline/TestPerf/FeatureDetectionTest.cs b/pwiz_tools/Skyline/TestPerf/FeatureDetectionTest.cs index 9901210846..19046e9e6e 100644 --- a/pwiz_tools/Skyline/TestPerf/FeatureDetectionTest.cs +++ b/pwiz_tools/Skyline/TestPerf/FeatureDetectionTest.cs @@ -172,6 +172,7 @@ private void PerformSearchTest(int pass) } } + var testingForCancelability = pass == 1; var expectedPeptideGroups = 1490; var expectedPeptides = 11510; var expectedPeptideTransitionGroups = 13456; @@ -222,18 +223,21 @@ private void PerformSearchTest(int pass) PauseForScreenShot("Common prefix form"); OkDialog(importResultsNameDlg, importResultsNameDlg.YesDialog); - // Test back/next buttons - PauseForScreenShot("Testing back button"); - RunUI(() => + if (testingForCancelability) { - Assert.IsTrue(importPeptideSearchDlg.ClickBackButton()); - Assert.IsTrue(importPeptideSearchDlg.CurrentPage == ImportPeptideSearchDlg.Pages.spectra_page); - }); - PauseForScreenShot("and forward again"); - importResultsNameDlg = ShowDialog(() => importPeptideSearchDlg.ClickNextButton()); - RunUI(() => importResultsNameDlg.Suffix = string.Empty); - PauseForScreenShot("Common prefix form again"); - OkDialog(importResultsNameDlg, importResultsNameDlg.YesDialog); + // Test back/next buttons + PauseForScreenShot("Testing back button"); + RunUI(() => + { + Assert.IsTrue(importPeptideSearchDlg.ClickBackButton()); + Assert.IsTrue(importPeptideSearchDlg.CurrentPage == ImportPeptideSearchDlg.Pages.spectra_page); + }); + PauseForScreenShot("and forward again"); + importResultsNameDlg = ShowDialog(() => importPeptideSearchDlg.ClickNextButton()); + RunUI(() => importResultsNameDlg.Suffix = string.Empty); + PauseForScreenShot("Common prefix form again"); + OkDialog(importResultsNameDlg, importResultsNameDlg.YesDialog); + } RunUI(() => { @@ -242,6 +246,7 @@ private void PerformSearchTest(int pass) importPeptideSearchDlg.FullScanSettingsControl.PrecursorCharges = new[] { 2, 3 }; importPeptideSearchDlg.FullScanSettingsControl.PrecursorMassAnalyzer = FullScanMassAnalyzerType.centroided; importPeptideSearchDlg.FullScanSettingsControl.PrecursorRes = 20; + importPeptideSearchDlg.FullScanSettingsControl.SetRetentionTimeFilter(RetentionTimeFilterType.ms2_ids, testingForCancelability ? 3 : 5); }); PauseForScreenShot(" MS1 full scan settings page - next we'll tweak the search settings"); RunUI(() => @@ -252,91 +257,101 @@ private void PerformSearchTest(int pass) importPeptideSearchDlg.SearchSettingsControl.HardklorMinIntensityPPM = 12.37; // Just a random value // The instrument values should be settable since we set "centroided" in Full Scan. AssertEx.IsTrue(importPeptideSearchDlg.SearchSettingsControl.HardklorInstrumentSettingsAreEditable); - importPeptideSearchDlg.SearchSettingsControl.HardklorInstrument = FullScanMassAnalyzerType.orbitrap; - importPeptideSearchDlg.SearchSettingsControl.HardklorResolution = 60000; + importPeptideSearchDlg.SearchSettingsControl.HardklorInstrument = FullScanMassAnalyzerType.tof; + importPeptideSearchDlg.SearchSettingsControl.HardklorResolution = testingForCancelability ? 60000 : 10000; // 10000 per MS1 filtering tutorial }); - PauseForScreenShot(" Search settings page - next we'll start the mzML conversion if needed then cancel the search"); - RunUI(() => + if (!testingForCancelability) { - // Run the search - Assert.IsTrue(importPeptideSearchDlg.ClickNextButton()); - }); - bool? searchSucceeded = null; - TryWaitForOpenForm(typeof(ImportPeptideSearchDlg.DDASearchPage)); // Stop to show this form during form testing - - // Wait for the mzML conversion to complete before canceling - foreach (var searchFile in SearchFiles) - { - var converted = Path.Combine(Path.GetDirectoryName(searchFile) ?? string.Empty, - @"converted", - Path.ChangeExtension(Path.GetFileName(searchFile), @"mzML")); - WaitForCondition(() => File.Exists(converted)); + PauseForScreenShot(" Search settings page"); } - + bool? searchSucceeded = null; RunUI(() => { importPeptideSearchDlg.SearchControl.SearchFinished += (success) => searchSucceeded = success; importPeptideSearchDlg.BuildPepSearchLibControl.IncludeAmbiguousMatches = true; - - // Cancel search - importPeptideSearchDlg.SearchControl.Cancel(); }); - WaitForConditionUI(60000, () => searchSucceeded.HasValue); - Assert.IsFalse(searchSucceeded.Value); - searchSucceeded = null; - PauseForScreenShot("search cancelled, now go back and test 2 input files with the same name in different directories"); - TidyBetweenPasses(0); // For consistent audit log, remove any previous artifacts - // Go back and test 2 input files with the same name in different directories - RunUI(() => + if (testingForCancelability) { - Assert.IsTrue(importPeptideSearchDlg.ClickBackButton()); - Assert.IsTrue(importPeptideSearchDlg.ClickBackButton()); - Assert.IsTrue(importPeptideSearchDlg.ClickBackButton()); + PauseForScreenShot(" Search settings page - next we'll start the mzML conversion if needed then cancel the search"); + RunUI(() => + { + // Run the search + Assert.IsTrue(importPeptideSearchDlg.ClickNextButton()); + }); + TryWaitForOpenForm(typeof(ImportPeptideSearchDlg.DDASearchPage)); // Stop to show this form during form testing - importPeptideSearchDlg.BuildPepSearchLibControl.DdaSearchDataSources = SearchFilesSameName.Select(o => (MsDataFileUri) new MsDataFilePath(o)).ToArray(); - }); - PauseForScreenShot("same name, different directories"); - var removeSuffix = ShowDialog(() => importPeptideSearchDlg.ClickNextButton()); - RunUI(() => removeSuffix.Suffix = string.Empty); - PauseForScreenShot("expected dialog for name reduction - we'll cancel and go back to try unique names"); - OkDialog(removeSuffix, removeSuffix.CancelDialog); + // Wait for the mzML conversion to complete before canceling + foreach (var searchFile in SearchFiles) + { + var converted = Path.Combine(Path.GetDirectoryName(searchFile) ?? string.Empty, + @"converted", + Path.ChangeExtension(Path.GetFileName(searchFile), @"mzML")); + WaitForCondition(() => File.Exists(converted)); + } - // Test with 2 files - RunUI(() => - { - Assert.IsTrue(importPeptideSearchDlg.ClickBackButton()); - importPeptideSearchDlg.BuildPepSearchLibControl.DdaSearchDataSources = SearchFiles.Select(o => (MsDataFileUri)new MsDataFilePath(o)).ToArray(); - }); + RunUI(() => + { + // Cancel search + importPeptideSearchDlg.SearchControl.Cancel(); + }); + WaitForConditionUI(60000, () => searchSucceeded.HasValue); + Assert.IsFalse(searchSucceeded.Value); + searchSucceeded = null; + PauseForScreenShot("search cancelled, now go back and test 2 input files with the same name in different directories"); + + // Go back and test 2 input files with the same name in different directories + RunUI(() => + { + Assert.IsTrue(importPeptideSearchDlg.ClickBackButton()); + Assert.IsTrue(importPeptideSearchDlg.ClickBackButton()); + Assert.IsTrue(importPeptideSearchDlg.ClickBackButton()); - // With 2 sources, we get the remove prefix/suffix dialog; accept default behavior - var removeSuffix2 = ShowDialog(() => importPeptideSearchDlg.ClickNextButton()); - RunUI(() => removeSuffix2.Suffix = string.Empty); - PauseForScreenShot("expected dialog for name reduction "); - OkDialog(removeSuffix, () => removeSuffix2.YesDialog()); + importPeptideSearchDlg.BuildPepSearchLibControl.DdaSearchDataSources = SearchFilesSameName.Select(o => (MsDataFileUri) new MsDataFilePath(o)).ToArray(); + }); + PauseForScreenShot("same name, different directories"); + var removeSuffix = ShowDialog(() => importPeptideSearchDlg.ClickNextButton()); + RunUI(() => removeSuffix.Suffix = string.Empty); + PauseForScreenShot("expected dialog for name reduction - we'll cancel and go back to try unique names"); + OkDialog(removeSuffix, removeSuffix.CancelDialog); - RunUI(() => - { - Assert.IsTrue(importPeptideSearchDlg.CurrentPage == ImportPeptideSearchDlg.Pages.full_scan_settings_page); - importPeptideSearchDlg.FullScanSettingsControl.PrecursorCharges = new[] { 2, 3 }; - importPeptideSearchDlg.FullScanSettingsControl.PrecursorMassAnalyzer = FullScanMassAnalyzerType.tof; // Per MS1 filtering tutorial - importPeptideSearchDlg.FullScanSettingsControl.PrecursorRes = 10000; // Per MS1 filtering tutorial - importPeptideSearchDlg.FullScanSettingsControl.SetRetentionTimeFilter(RetentionTimeFilterType.ms2_ids, 5); - }); - PauseForScreenShot("Full scan settings - not set Centroided, so instrument settings on next page should not be operable"); - RunUI(() => - { - Assert.IsTrue(importPeptideSearchDlg.ClickNextButton()); - AssertEx.IsFalse(importPeptideSearchDlg.SearchSettingsControl.HardklorInstrumentSettingsAreEditable); - }); - RunUI(() => - { - Assert.IsTrue(importPeptideSearchDlg.ClickBackButton()); - importPeptideSearchDlg.FullScanSettingsControl.PrecursorMassAnalyzer = FullScanMassAnalyzerType.centroided; - importPeptideSearchDlg.FullScanSettingsControl.PrecursorRes = 20; - Assert.IsTrue(importPeptideSearchDlg.ClickNextButton()); - }); - PauseForScreenShot("Full scan settings - set Centroided, so instrument setting should be operable"); + // Test with 2 files + RunUI(() => + { + Assert.IsTrue(importPeptideSearchDlg.ClickBackButton()); + importPeptideSearchDlg.BuildPepSearchLibControl.DdaSearchDataSources = SearchFiles.Select(o => (MsDataFileUri)new MsDataFilePath(o)).ToArray(); + }); + + // With 2 sources, we get the remove prefix/suffix dialog; accept default behavior + var removeSuffix2 = ShowDialog(() => importPeptideSearchDlg.ClickNextButton()); + RunUI(() => removeSuffix2.Suffix = string.Empty); + PauseForScreenShot("expected dialog for name reduction "); + OkDialog(removeSuffix2, () => removeSuffix2.YesDialog()); + + RunUI(() => + { + Assert.IsTrue(importPeptideSearchDlg.CurrentPage == ImportPeptideSearchDlg.Pages.full_scan_settings_page); + importPeptideSearchDlg.FullScanSettingsControl.PrecursorCharges = new[] { 2, 3 }; + importPeptideSearchDlg.FullScanSettingsControl.PrecursorMassAnalyzer = FullScanMassAnalyzerType.tof; // Per MS1 filtering tutorial + importPeptideSearchDlg.FullScanSettingsControl.PrecursorRes = 10000; // Per MS1 filtering tutorial + importPeptideSearchDlg.FullScanSettingsControl.SetRetentionTimeFilter(RetentionTimeFilterType.ms2_ids, 5); + }); + PauseForScreenShot("Full scan settings - not set Centroided, so instrument settings on next page should not be operable"); + RunUI(() => + { + Assert.IsTrue(importPeptideSearchDlg.ClickNextButton()); + AssertEx.IsFalse(importPeptideSearchDlg.SearchSettingsControl.HardklorInstrumentSettingsAreEditable); + Assert.IsTrue(importPeptideSearchDlg.ClickBackButton()); + }); + RunUI(() => + { + importPeptideSearchDlg.FullScanSettingsControl.PrecursorMassAnalyzer = FullScanMassAnalyzerType.centroided; + importPeptideSearchDlg.FullScanSettingsControl.PrecursorRes = 20; + Assert.IsTrue(importPeptideSearchDlg.ClickNextButton()); + }); + } // End if testing cancelability + + PauseForScreenShot("Search Settings page -Full scan settings are set Centroided, so instrument setting should be operable"); RunUI(() => { // We're on the "Search Settings" page. These values should be settable since we set "Centroided" in Full Scan. @@ -438,6 +453,7 @@ void ExpectError(Action act) AssertEx.IsDocumentState(doc, null, expectedPeptideGroups + 1, expectedPeptides + expectedFeaturesMolecules, expectedPeptideTransitionGroups + expectedFeaturesTransitionGroups, expectedPeptideTransitions + expectedFeaturesTransitions); + /* TODO update this for current test data set // Verify that we found every known peptide var colName = FindDocumentGridColumn(documentGrid, "Precursor.Peptide").Index; var colReplicate = FindDocumentGridColumn(documentGrid, "Results!*.Value.PrecursorResult.PeptideResult.ResultFile.Replicate").Index; @@ -514,12 +530,9 @@ void ExpectError(Action act) var threshold = hits.Select(h => h.area).Max() * .1; var missedHits = hits.Where(h => !expectedMisses.Any(miss => Equals(h.name, miss.Item1) && Equals(h.z, miss.Item2) && h.area >= threshold)).ToArray(); - /* TODO update this for current test data set AssertEx.IsFalse(missedHits.Any(), $"Hardklor did not find features for fairly strong peptides\n{string.Join("\n", misses.Select(u => u.ToString()))}"); - */ - /* TODO update this for current test data set var unexpectedMisses = unmatched.Where(um => !expectedMisses.Contains((um.name, um.z))).ToArray(); var unexpectedMatches = matched.Where(um => expectedMisses.Contains((um.name, um.z))).ToArray(); AssertEx.IsFalse(unexpectedMisses.Any(), @@ -531,6 +544,7 @@ void ExpectError(Action act) } + /* TODO uncomment for hit check private HashSet ReduceToBestHits(List hitSet) { var bestHits = new HashSet(); @@ -597,6 +611,7 @@ public override int GetHashCode() } } } + */ private void TidyBetweenPasses(int pass) {