diff --git a/.github/actions/spelling/excludes.txt b/.github/actions/spelling/excludes.txt index 9be208676e..2081fc232e 100644 --- a/.github/actions/spelling/excludes.txt +++ b/.github/actions/spelling/excludes.txt @@ -28,6 +28,7 @@ ignore$ ^Localization/ ^NOTICE$ ^src/AppInstallerCLICore/Commands/ExperimentalCommand\.cpp$ +^src/AppInstallerCLITests/TestData/InputARPData.txt$ ^src/AppInstallerCLITests/TestData/InputNames.txt$ ^src/AppInstallerCLITests/TestData/InputPublishers.txt$ ^src/AppInstallerCLITests/TestData/NormalizationInitialIds.txt$ diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 2ba11ce311..bc17dad3d1 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -159,6 +159,7 @@ hre hresults htm IAttachment +IARP IConfiguration idx IFACEMETHODIMP @@ -379,6 +380,7 @@ TStatus UCase ucasemap UChars +ucnv uec uild uintptr diff --git a/src/AppInstallerCLICore/ExecutionContextData.h b/src/AppInstallerCLICore/ExecutionContextData.h index 9e26d2b191..3498e7513c 100644 --- a/src/AppInstallerCLICore/ExecutionContextData.h +++ b/src/AppInstallerCLICore/ExecutionContextData.h @@ -3,6 +3,7 @@ #pragma once #include #include +#include #include "CompletionData.h" #include "PackageCollection.h" #include "Workflows/WorkflowBase.h" @@ -47,6 +48,7 @@ namespace AppInstaller::CLI::Execution // On import: Sources for the imported packages Sources, ARPSnapshot, + CorrelatedAppsAndFeaturesEntries, Dependencies, DependencySource, AllowedArchitectures, @@ -186,8 +188,13 @@ namespace AppInstaller::CLI::Execution template <> struct DataMapping { - // Contains the { Id, Version, Channel } - using value_t = std::vector>; + using value_t = std::vector; + }; + + template <> + struct DataMapping + { + using value_t = std::vector; }; template <> diff --git a/src/AppInstallerCLICore/Workflows/InstallFlow.cpp b/src/AppInstallerCLICore/Workflows/InstallFlow.cpp index 1c4d2ca26a..d04d06531e 100644 --- a/src/AppInstallerCLICore/Workflows/InstallFlow.cpp +++ b/src/AppInstallerCLICore/Workflows/InstallFlow.cpp @@ -12,6 +12,7 @@ #include "WorkflowBase.h" #include "Workflows/DependenciesFlow.h" #include +#include using namespace winrt::Windows::ApplicationModel::Store::Preview::InstallControl; using namespace winrt::Windows::Foundation; @@ -506,173 +507,78 @@ namespace AppInstaller::CLI::Workflow void ReportARPChanges(Execution::Context& context) try { - if (context.Contains(Execution::Data::ARPSnapshot)) + if (!context.Contains(Execution::Data::ARPSnapshot)) { - const auto& entries = context.Get(); - - // Open it again to get the (potentially) changed ARP entries - Source arpSource = context.Reporter.ExecuteWithProgress( - [](IProgressCallback& progress) - { - Repository::Source result = Repository::Source(PredefinedSource::ARP); - result.Open(progress); - return result; - }, true); - - std::vector changes; - - for (auto& entry : arpSource.Search({}).Matches) - { - auto installed = entry.Package->GetInstalledVersion(); - - if (installed) - { - auto entryKey = std::make_tuple( - entry.Package->GetProperty(PackageProperty::Id), - installed->GetProperty(PackageVersionProperty::Version), - installed->GetProperty(PackageVersionProperty::Channel)); - - auto itr = std::lower_bound(entries.begin(), entries.end(), entryKey); - if (itr == entries.end() || *itr != entryKey) - { - changes.emplace_back(std::move(entry)); - } - } - } - - // Also attempt to find the entry based on the manifest data - const auto& manifest = context.Get(); - - SearchRequest manifestSearchRequest; - AppInstaller::Manifest::Manifest::string_t defaultPublisher; - if (manifest.DefaultLocalization.Contains(Localization::Publisher)) - { - defaultPublisher = manifest.DefaultLocalization.Get(); - } - - // The default localization must contain the name or we cannot do this lookup - if (manifest.DefaultLocalization.Contains(Localization::PackageName)) - { - AppInstaller::Manifest::Manifest::string_t defaultName = manifest.DefaultLocalization.Get(); - manifestSearchRequest.Inclusions.emplace_back(PackageMatchFilter(PackageMatchField::NormalizedNameAndPublisher, MatchType::Exact, defaultName, defaultPublisher)); + return; + } - for (const auto& loc : manifest.Localizations) - { - if (loc.Contains(Localization::PackageName) || loc.Contains(Localization::Publisher)) - { - manifestSearchRequest.Inclusions.emplace_back(PackageMatchFilter(PackageMatchField::NormalizedNameAndPublisher, MatchType::Exact, - loc.Contains(Localization::PackageName) ? loc.Get() : defaultName, - loc.Contains(Localization::Publisher) ? loc.Get() : defaultPublisher)); - } - } - } + const auto& manifest = context.Get(); + const auto& arpSnapshot = context.Get(); - std::vector productCodes; - for (const auto& installer : manifest.Installers) + // Open the ARP source again to get the (potentially) changed ARP entries + Source arpSource = context.Reporter.ExecuteWithProgress( + [](IProgressCallback& progress) { - if (!installer.ProductCode.empty()) - { - if (std::find(productCodes.begin(), productCodes.end(), installer.ProductCode) == productCodes.end()) - { - manifestSearchRequest.Inclusions.emplace_back(PackageMatchFilter(PackageMatchField::ProductCode, MatchType::Exact, installer.ProductCode)); - productCodes.emplace_back(installer.ProductCode); - } - } + Repository::Source result = Repository::Source(PredefinedSource::ARP); + result.Open(progress); + return result; + }, true); - for (const auto& appsAndFeaturesEntry : installer.AppsAndFeaturesEntries) - { - if (!appsAndFeaturesEntry.DisplayName.empty()) - { - manifestSearchRequest.Inclusions.emplace_back(PackageMatchFilter(PackageMatchField::NormalizedNameAndPublisher, MatchType::Exact, - appsAndFeaturesEntry.DisplayName, - appsAndFeaturesEntry.Publisher.empty() ? defaultPublisher : appsAndFeaturesEntry.Publisher)); - } - } - } + auto correlationResult = Correlation::FindARPEntryForNewlyInstalledPackage(manifest, arpSnapshot, arpSource); - SearchResult findByManifest; + // Store the ARP entry found to match the package to record it in the tracking catalog later + if (correlationResult.Package) + { + std::vector entries; - // Don't execute this search if it would just find everything - if (!manifestSearchRequest.IsForEverything()) - { - findByManifest = arpSource.Search(manifestSearchRequest); - } + auto metadata = correlationResult.Package->GetMetadata(); - // Cross reference the changes with the search results - std::vector> packagesInBoth; + AppsAndFeaturesEntry baseEntry; - for (const auto& change : changes) - { - for (const auto& byManifest : findByManifest.Matches) - { - if (change.Package->IsSame(byManifest.Package.get())) - { - packagesInBoth.emplace_back(change.Package); - break; - } - } - } + // Display name and publisher are also available as multi properties, but + // for ARP there will always be only 0 or 1 values. + baseEntry.DisplayName = correlationResult.Package->GetProperty(PackageVersionProperty::Name).get(); + baseEntry.Publisher = correlationResult.Package->GetProperty(PackageVersionProperty::Publisher).get(); + baseEntry.DisplayVersion = correlationResult.Package->GetProperty(PackageVersionProperty::Version).get(); + baseEntry.InstallerType = Manifest::ConvertToInstallerTypeEnum(metadata[PackageVersionMetadata::InstalledType]); - // We now have all of the package changes; time to report them. - // The set of cases we could have for changes to ARP: - // 0 packages :: No changes were detected to ARP, which could mean that the installer - // did not write an entry. It could also be a forced reinstall. - // 1 package :: Golden path; this should be what we installed. - // 2+ packages :: We need to determine which package actually matches the one that we - // were installing. - // - // The set of cases we could have for finding packages based on the manifest: - // 0 packages :: The manifest data does not match the ARP information. - // 1 package :: Golden path; this should be what we installed. - // 2+ packages :: The data in the manifest is either too broad or we have - // a problem with our name normalization. - - // Find the package that we are going to log - std::shared_ptr toLog; - - // If there is only a single common package (changed and matches), it is almost certainly the correct one. - if (packagesInBoth.size() == 1) - { - toLog = packagesInBoth[0]->GetInstalledVersion(); - } - // If it wasn't changed but we still find a match, that is the best thing to report. - else if (findByManifest.Matches.size() == 1) + auto productCodes = correlationResult.Package->GetMultiProperty(PackageVersionMultiProperty::ProductCode); + for (auto&& productCode : productCodes) { - toLog = findByManifest.Matches[0].Package->GetInstalledVersion(); - } - // If only a single ARP entry was changed and we found no matches, report that. - else if (findByManifest.Matches.empty() && changes.size() == 1) - { - toLog = changes[0].Package->GetInstalledVersion(); + AppsAndFeaturesEntry entry = baseEntry; + entry.ProductCode = std::move(productCode).get(); + entries.push_back(std::move(entry)); } - IPackageVersion::Metadata toLogMetadata; - if (toLog) - { - toLogMetadata = toLog->GetMetadata(); - } + context.Add(std::move(entries)); + } - // We can only get the source identifier from an active source - std::string sourceIdentifier; - if (context.Contains(Execution::Data::PackageVersion)) - { - sourceIdentifier = context.Get()->GetProperty(PackageVersionProperty::SourceIdentifier); - } + // We can only get the source identifier from an active source + std::string sourceIdentifier; + if (context.Contains(Execution::Data::PackageVersion)) + { + sourceIdentifier = context.Get()->GetProperty(PackageVersionProperty::SourceIdentifier); + } - Logging::Telemetry().LogSuccessfulInstallARPChange( - sourceIdentifier, - manifest.Id, - manifest.Version, - manifest.Channel, - changes.size(), - findByManifest.Matches.size(), - packagesInBoth.size(), - toLog ? static_cast(toLog->GetProperty(PackageVersionProperty::Name)) : "", - toLog ? static_cast(toLog->GetProperty(PackageVersionProperty::Version)) : "", - toLog ? static_cast(toLogMetadata[PackageVersionMetadata::Publisher]) : "", - toLog ? static_cast(toLogMetadata[PackageVersionMetadata::InstalledLocale]) : "" - ); + IPackageVersion::Metadata arpEntryMetadata; + if (correlationResult.Package) + { + arpEntryMetadata = correlationResult.Package->GetMetadata(); } + + Logging::Telemetry().LogSuccessfulInstallARPChange( + sourceIdentifier, + manifest.Id, + manifest.Version, + manifest.Channel, + correlationResult.ChangesToARP, + correlationResult.MatchesInARP, + correlationResult.CountOfIntersectionOfChangesAndMatches, + correlationResult.Package ? static_cast(correlationResult.Package->GetProperty(PackageVersionProperty::Name)) : "", + correlationResult.Package ? static_cast(correlationResult.Package->GetProperty(PackageVersionProperty::Version)) : "", + correlationResult.Package ? static_cast(correlationResult.Package->GetProperty(PackageVersionProperty::Publisher)) : "", + correlationResult.Package ? static_cast(arpEntryMetadata[PackageVersionMetadata::InstalledLocale]) : "" + ); } CATCH_LOG(); @@ -686,10 +592,23 @@ namespace AppInstaller::CLI::Workflow return; } + auto manifest = context.Get(); + + // If we have determined an ARP entry matches the installed package, + // we set its product code in the manifest we record to ensure we can + // find it in the future. + // Note that this may overwrite existing information. + if (context.Contains(Data::CorrelatedAppsAndFeaturesEntries)) + { + // Use a new Installer entry + manifest.Installers.emplace_back(); + manifest.Installers.back().AppsAndFeaturesEntries = context.Get(); + } + auto trackingCatalog = context.Get()->GetSource().GetTrackingCatalog(); trackingCatalog.RecordInstall( - context.Get(), + manifest, context.Get().value(), WI_IsFlagSet(context.GetFlags(), ContextFlag::InstallerExecutionUseUpdate)); } diff --git a/src/AppInstallerCLICore/Workflows/InstallFlow.h b/src/AppInstallerCLICore/Workflows/InstallFlow.h index 80c7ef745f..34582d28b1 100644 --- a/src/AppInstallerCLICore/Workflows/InstallFlow.h +++ b/src/AppInstallerCLICore/Workflows/InstallFlow.h @@ -167,15 +167,16 @@ namespace AppInstaller::CLI::Workflow // Outputs: ARPSnapshot void SnapshotARPEntries(Execution::Context& context); - // Reports on the changes between the stored ARPSnapshot and the current values. + // Reports on the changes between the stored ARPSnapshot and the current values, + // and stores the product code of the ARP entry found for the package. // Required Args: None // Inputs: ARPSnapshot?, Manifest, PackageVersion - // Outputs: None + // Outputs: CorrelatedAppsAndFeaturesEntries? void ReportARPChanges(Execution::Context& context); // Records the installation to the tracking catalog. // Required Args: None - // Inputs: PackageVersion?, Manifest, Installer + // Inputs: PackageVersion?, Manifest, Installer, CorrelatedAppsAndFeaturesEntries? // Outputs: None void RecordInstall(Execution::Context& context); } diff --git a/src/AppInstallerCLITests/ARPChanges.cpp b/src/AppInstallerCLITests/ARPChanges.cpp index a9f0d6ae35..6df059f11b 100644 --- a/src/AppInstallerCLITests/ARPChanges.cpp +++ b/src/AppInstallerCLITests/ARPChanges.cpp @@ -16,6 +16,7 @@ using namespace AppInstaller::CLI::Execution; using namespace AppInstaller::CLI::Workflow; using namespace AppInstaller::Logging; using namespace AppInstaller::Repository; +using namespace AppInstaller::Repository::Correlation; struct TestTelemetry : public TelemetryTraceLogger { @@ -204,9 +205,27 @@ struct TestContext : public Context } }; +// Override the correlation heuristic by an empty one to ensure that these tests +// consider only the exact matching. +struct TestHeuristicOverride +{ + TestHeuristicOverride() + { + IARPMatchConfidenceAlgorithm::OverrideInstance(&m_algorithm); + } + + ~TestHeuristicOverride() + { + IARPMatchConfidenceAlgorithm::ResetInstance(); + } + +private: + EmptyMatchConfidenceAlgorithm m_algorithm; +}; TEST_CASE("ARPChanges_MSIX_Ignored", "[ARPChanges][workflow]") { + TestHeuristicOverride heuristicOverride; TestContext context(Manifest::InstallerTypeEnum::Msix); context << SnapshotARPEntries; @@ -220,6 +239,7 @@ TEST_CASE("ARPChanges_MSIX_Ignored", "[ARPChanges][workflow]") TEST_CASE("ARPChanges_CheckSnapshot", "[ARPChanges][workflow]") { + TestHeuristicOverride heuristicOverride; TestContext context; context << SnapshotARPEntries; @@ -254,6 +274,7 @@ TEST_CASE("ARPChanges_CheckSnapshot", "[ARPChanges][workflow]") TEST_CASE("ARPChanges_NoChange_NoMatch", "[ARPChanges][workflow]") { + TestHeuristicOverride heuristicOverride; TestContext context; context << SnapshotARPEntries; @@ -265,6 +286,7 @@ TEST_CASE("ARPChanges_NoChange_NoMatch", "[ARPChanges][workflow]") TEST_CASE("ARPChanges_NoChange_SingleMatch", "[ARPChanges][workflow]") { + TestHeuristicOverride heuristicOverride; TestContext context; context << SnapshotARPEntries; @@ -278,6 +300,7 @@ TEST_CASE("ARPChanges_NoChange_SingleMatch", "[ARPChanges][workflow]") TEST_CASE("ARPChanges_NoChange_MultiMatch", "[ARPChanges][workflow]") { + TestHeuristicOverride heuristicOverride; TestContext context; context << SnapshotARPEntries; @@ -292,6 +315,7 @@ TEST_CASE("ARPChanges_NoChange_MultiMatch", "[ARPChanges][workflow]") TEST_CASE("ARPChanges_SingleChange_NoMatch", "[ARPChanges][workflow]") { + TestHeuristicOverride heuristicOverride; TestContext context; context << SnapshotARPEntries; @@ -305,6 +329,7 @@ TEST_CASE("ARPChanges_SingleChange_NoMatch", "[ARPChanges][workflow]") TEST_CASE("ARPChanges_SingleChange_SingleMatch", "[ARPChanges][workflow]") { + TestHeuristicOverride heuristicOverride; TestContext context; context << SnapshotARPEntries; @@ -319,6 +344,7 @@ TEST_CASE("ARPChanges_SingleChange_SingleMatch", "[ARPChanges][workflow]") TEST_CASE("ARPChanges_SingleChange_MultiMatch", "[ARPChanges][workflow]") { + TestHeuristicOverride heuristicOverride; TestContext context; context << SnapshotARPEntries; @@ -334,6 +360,7 @@ TEST_CASE("ARPChanges_SingleChange_MultiMatch", "[ARPChanges][workflow]") TEST_CASE("ARPChanges_MultiChange_NoMatch", "[ARPChanges][workflow]") { + TestHeuristicOverride heuristicOverride; TestContext context; context << SnapshotARPEntries; @@ -348,6 +375,7 @@ TEST_CASE("ARPChanges_MultiChange_NoMatch", "[ARPChanges][workflow]") TEST_CASE("ARPChanges_MultiChange_SingleMatch_NoOverlap", "[ARPChanges][workflow]") { + TestHeuristicOverride heuristicOverride; TestContext context; context << SnapshotARPEntries; @@ -363,6 +391,7 @@ TEST_CASE("ARPChanges_MultiChange_SingleMatch_NoOverlap", "[ARPChanges][workflow TEST_CASE("ARPChanges_MultiChange_SingleMatch_Overlap", "[ARPChanges][workflow]") { + TestHeuristicOverride heuristicOverride; TestContext context; context << SnapshotARPEntries; @@ -378,6 +407,7 @@ TEST_CASE("ARPChanges_MultiChange_SingleMatch_Overlap", "[ARPChanges][workflow]" TEST_CASE("ARPChanges_MultiChange_MultiMatch_NoOverlap", "[ARPChanges][workflow]") { + TestHeuristicOverride heuristicOverride; TestContext context; context << SnapshotARPEntries; @@ -394,6 +424,7 @@ TEST_CASE("ARPChanges_MultiChange_MultiMatch_NoOverlap", "[ARPChanges][workflow] TEST_CASE("ARPChanges_MultiChange_MultiMatch_SingleOverlap", "[ARPChanges][workflow]") { + TestHeuristicOverride heuristicOverride; TestContext context; context << SnapshotARPEntries; @@ -410,6 +441,7 @@ TEST_CASE("ARPChanges_MultiChange_MultiMatch_SingleOverlap", "[ARPChanges][workf TEST_CASE("ARPChanges_MultiChange_MultiMatch_MultiOverlap", "[ARPChanges][workflow]") { + TestHeuristicOverride heuristicOverride; TestContext context; context << SnapshotARPEntries; diff --git a/src/AppInstallerCLITests/AppInstallerCLITests.vcxproj b/src/AppInstallerCLITests/AppInstallerCLITests.vcxproj index 18adf844b0..7fa93306a1 100644 --- a/src/AppInstallerCLITests/AppInstallerCLITests.vcxproj +++ b/src/AppInstallerCLITests/AppInstallerCLITests.vcxproj @@ -188,6 +188,7 @@ + @@ -551,6 +552,9 @@ true + + true + true diff --git a/src/AppInstallerCLITests/AppInstallerCLITests.vcxproj.filters b/src/AppInstallerCLITests/AppInstallerCLITests.vcxproj.filters index 082ea229e5..c1dbf3402e 100644 --- a/src/AppInstallerCLITests/AppInstallerCLITests.vcxproj.filters +++ b/src/AppInstallerCLITests/AppInstallerCLITests.vcxproj.filters @@ -188,6 +188,9 @@ Source Files + + Source Files + @@ -564,5 +567,8 @@ TestData + + TestData + \ No newline at end of file diff --git a/src/AppInstallerCLITests/Correlation.cpp b/src/AppInstallerCLITests/Correlation.cpp new file mode 100644 index 0000000000..9686c376ce --- /dev/null +++ b/src/AppInstallerCLITests/Correlation.cpp @@ -0,0 +1,296 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +#include "pch.h" +#include "TestCommon.h" +#include "TestSource.h" + +#include +#include +#include + +using namespace AppInstaller::Manifest; +using namespace AppInstaller::Repository; +using namespace AppInstaller::Repository::Correlation; + +using namespace TestCommon; + +// Data for defining a test case +struct TestCase +{ + // Actual app data + std::string AppName; + std::string AppPublisher; + + // Data in ARP + std::string ARPName; + std::string ARPPublisher; + + bool IsMatch; +}; + +// Definition of a collection of test cases that we evaluate +// together to get a single aggregate result +struct DataSet +{ + // Details about the apps we are trying to correlate + std::vector TestCases; + + // Additional ARP entries to use as "noise" for the correlation + std::vector ARPNoise; + + // Thresholds for considering a run of an heuristic against + // this data set "good". + // Values are ratios to the total number of test cases + double RequiredTrueMatchRatio; + double RequiredTrueMismatchRatio; + double RequiredFalseMatchRatio; + double RequiredFalseMismatchRatio; +}; + +// Aggregate result of running an heuristic against a data set. +struct ResultSummary +{ + size_t TrueMatches; + size_t TrueMismatches; + size_t FalseMatches; + size_t FalseMismatches; + std::chrono::milliseconds TotalTime; + + size_t TotalCases() const + { + return TrueMatches + TrueMismatches + FalseMatches + FalseMismatches; + } + + auto AverageMatchingTime() const + { + return TotalTime / TotalCases(); + } +}; + +Manifest GetManifestFromTestCase(const TestCase& testCase) +{ + Manifest manifest; + manifest.DefaultLocalization.Add(testCase.AppName); + manifest.DefaultLocalization.Add(testCase.AppPublisher); + manifest.Localizations.push_back(manifest.DefaultLocalization); + return manifest; +} + +ARPEntry GetARPEntryFromTestCase(const TestCase& testCase) +{ + Manifest arpManifest; + arpManifest.DefaultLocalization.Add(testCase.ARPName); + arpManifest.DefaultLocalization.Add(testCase.ARPPublisher); + arpManifest.Localizations.push_back(arpManifest.DefaultLocalization); + return ARPEntry{ TestPackage::Make(arpManifest, TestPackage::MetadataMap{}), false }; +} + +void ReportMatch(std::string_view label, std::string_view appName, std::string_view appPublisher, std::string_view arpName, std::string_view arpPublisher) +{ + WARN(label << '\n' << + "\tApp name = " << appName << '\n' << + "\tApp publisher = " << appPublisher << '\n' << + "\tARP name = " << arpName << '\n' << + "\tARP publisher = " << arpPublisher); +} + +ResultSummary EvaluateDataSetWithHeuristic(const DataSet& dataSet, IARPMatchConfidenceAlgorithm& correlationAlgorithm, bool reportErrors = false) +{ + ResultSummary result{}; + auto startTime = std::chrono::steady_clock::now(); + + // Each entry under test will be pushed at the end of this + // and removed at the end. + auto arpEntries = dataSet.ARPNoise; + + for (const auto& testCase : dataSet.TestCases) + { + arpEntries.push_back(GetARPEntryFromTestCase(testCase)); + auto match = FindARPEntryForNewlyInstalledPackageWithHeuristics(GetManifestFromTestCase(testCase), arpEntries, correlationAlgorithm); + arpEntries.pop_back(); + + if (match) + { + auto matchName = match->GetProperty(PackageVersionProperty::Name); + auto matchPublisher = match->GetProperty(PackageVersionProperty::Publisher); + + if (matchName == testCase.ARPName && matchPublisher == testCase.ARPPublisher) + { + ++result.TrueMatches; + } + else + { + ++result.FalseMatches; + + if (reportErrors) + { + ReportMatch("False match", testCase.AppName, testCase.AppPublisher, matchName, matchPublisher); + } + } + } + else + { + if (testCase.IsMatch) + { + ++result.FalseMismatches; + + if (reportErrors) + { + ReportMatch("False mismatch", testCase.AppName, testCase.AppPublisher, testCase.ARPName, testCase.ARPPublisher); + } + } + else + { + ++result.TrueMismatches; + } + } + } + + auto endTime = std::chrono::steady_clock::now(); + result.TotalTime = std::chrono::duration_cast(endTime - startTime); + + return result; +} + +void ReportResults(ResultSummary results) +{ + // This uses WARN to report as that is always shown regardless of the test result. + // We may want to re-consider reporting in some other way + WARN("Total cases: " << results.TotalCases() << '\n' << + "True matches: " << results.TrueMatches << '\n' << + "False matches: " << results.FalseMatches << '\n' << + "True mismatches: " << results.TrueMismatches << '\n' << + "False mismatches: " << results.FalseMismatches << '\n' << + "Total matching time: " << results.TotalTime.count() << "ms\n" << + "Average matching time: " << results.AverageMatchingTime().count() << "ms"); +} + +void ReportAndEvaluateResults(ResultSummary results, const DataSet& dataSet) +{ + ReportResults(results); + + // Required True ratio is a lower limit. The more results we get right, the better. + // Required False ratio is an upper limit. The fewer results we get wrong, the better. + REQUIRE(results.TrueMatches >= results.TotalCases() * dataSet.RequiredTrueMatchRatio); + REQUIRE(results.TrueMismatches >= results.TotalCases() * dataSet.RequiredTrueMismatchRatio); + REQUIRE(results.FalseMatches <= results.TotalCases() * dataSet.RequiredFalseMatchRatio); + REQUIRE(results.FalseMismatches <= results.TotalCases()* dataSet.RequiredFalseMismatchRatio); +} + +// TODO: Define multiple data sets +// - Data set with many apps. +// - Data set with popular apps. The match requirements should be higher +// - Data set(s) in other languages. +// - Data set where not everything has a match + +std::vector LoadTestData() +{ + // Creates test cases from the test data file. + // The format of the file is one case per line, each with pipe (|) separated values. + // Each row contains: AppId, AppName, AppPublisher, ARPDisplayName, ARPDisplayVersion, ARPPublisherName, ARPProductCode + // TODO: Add more test cases; particularly for non-matches + std::ifstream testDataStream(TestCommon::TestDataFile("InputARPData.txt").GetPath()); + REQUIRE(testDataStream); + + std::vector testCases; + + std::string line; + while (std::getline(testDataStream, line)) + { + std::stringstream ss{ line }; + + TestCase testCase; + std::string appId; + std::string arpDisplayVersion; + std::string arpProductCode; + std::getline(ss, appId, '|'); + std::getline(ss, testCase.AppName, '|'); + std::getline(ss, testCase.AppPublisher, '|'); + std::getline(ss, testCase.ARPName, '|'); + std::getline(ss, arpDisplayVersion, '|'); + std::getline(ss, testCase.ARPPublisher, '|'); + std::getline(ss, arpProductCode, '|'); + + testCase.IsMatch = true; + + testCases.push_back(std::move(testCase)); + } + + return testCases; +} + +DataSet GetDataSet_NoNoise() +{ + DataSet dataSet; + dataSet.TestCases = LoadTestData(); + + // Arbitrary values. We should refine them as the algorithm gets better. + dataSet.RequiredTrueMatchRatio = 0.5; + dataSet.RequiredFalseMatchRatio = 0.1; + dataSet.RequiredTrueMismatchRatio = 0; // There are no expected mismatches in this data set + dataSet.RequiredFalseMismatchRatio = 0.5; + + return dataSet; +} + +DataSet GetDataSet_WithNoise() +{ + DataSet dataSet; + auto baseTestCases = LoadTestData(); + + std::transform(baseTestCases.begin(), baseTestCases.end(), std::back_inserter(dataSet.ARPNoise), GetARPEntryFromTestCase); + dataSet.TestCases = std::move(baseTestCases); + + // Arbitrary values. We should refine them as the algorithm gets better. + dataSet.RequiredTrueMatchRatio = 0.5; + dataSet.RequiredFalseMatchRatio = 0.1; + dataSet.RequiredTrueMismatchRatio = 0; // There are no expected mismatches in this data set + dataSet.RequiredFalseMismatchRatio = 0.5; + + return dataSet; +} + +// Hide this test as it takes too long to run. +// It is useful for comparing multiple algorithms, but for +// regular testing we need only check that the chosen algorithm +// performs well. +TEMPLATE_TEST_CASE("Correlation_MeasureAlgorithmPerformance", "[correlation][.]", + EmptyMatchConfidenceAlgorithm, + EditDistanceMatchConfidenceAlgorithm) +{ + // Each section loads a different data set, + // and then they are all handled the same + DataSet dataSet; + SECTION("No ARP noise") + { + dataSet = GetDataSet_NoNoise(); + } + SECTION("With ARP noise") + { + dataSet = GetDataSet_WithNoise(); + } + + TestType measure; + auto results = EvaluateDataSetWithHeuristic(dataSet, measure); + ReportResults(results); +} + +TEST_CASE("Correlation_ChosenHeuristicIsGood", "[correlation]") +{ + // Each section loads a different data set, + // and then they are all handled the same + DataSet dataSet; + SECTION("No ARP noise") + { + dataSet = GetDataSet_NoNoise(); + } + SECTION("With ARP noise") + { + dataSet = GetDataSet_WithNoise(); + } + + // Use only the measure we ultimately pick + auto& algorithm = IARPMatchConfidenceAlgorithm::Instance(); + auto results = EvaluateDataSetWithHeuristic(dataSet, algorithm, /* reportErrors */ true); + ReportAndEvaluateResults(results, dataSet); +} diff --git a/src/AppInstallerCLITests/TestData/InputARPData.txt b/src/AppInstallerCLITests/TestData/InputARPData.txt new file mode 100644 index 0000000000..fd5cb8f417 --- /dev/null +++ b/src/AppInstallerCLITests/TestData/InputARPData.txt @@ -0,0 +1,111 @@ +XP890ZFCZZR294|Studio Fisioterapico Pro|Esposito Software di M. G. Caputo|Studio Fisioterapico Pro Demo||Copyright Esposito Software|Studio Fisioterapico Pro Demo_is1 +XP89DCGQ3K6VLD|Microsoft PowerToys|Microsoft Corporation|PowerToys (Preview) x64|0.57.0|Microsoft Corporation|{582f7a19-045d-43d4-89bf-7f8e9479311c} +XP89HZ8SVWTT0M|ElevenClock|Martí Climent|ElevenClock version 3.3.2|3.3.2|SomePythonThings|{D62480B8-71F1-48CE-BEEC-9D3E172C87B5}_is1 +XP89HZKG342W76|POWER-KI GUI Client|XPLAB - Research in Automation|POWER-KI GUI|33.11|XPLAB - Research in Automation - Brescia - Italy|{0760E097-F794-4836-9941-8846EA43BE06} +XP89J5462CMGJD|Apache OpenOffice|The Apache Software Foundation|OpenOffice 4.1.11|4.111.9808|Apache Software Foundation|{D2F124FC-5373-4A4A-8C5A-61052A3D34CA} +XP8BTFNM0T53BJ|PolypopLive|Simmetri, Inc.|PolyPop 0.98.222.0|0.98.222.0|Simmetri, Inc.|{75454996-E72B-480E-BB8C-CD743A54C362}_is1 +XP8BX12N1KK2QJ|MyLifeOrganized - To-Do List|Andriy Tkachuk|MyLifeOrganized v. 5.1.3|5.1.3|MyLifeOrganized.net|MyLife Organized +XP8CD7JST163BL|BPM Counter|Abyssmedia.com|BPM Counter 3.8.0.0|3.8.0.0|AbyssMedia.com|BPM Counter_is1 +XP8CDF4CV9XP5Q|Archivio Esami Clinici|Esposito Software di M. G. Caputo|Archivio Esami Clinici 3.0 Demo||Copyright Esposito Software|Archivio Esami Clinici 3.0 Demo_is1 +XP8CF6SB8MX31V|Ashampoo Photo Optimizer 8|Ashampoo|Ashampoo Photo Optimizer 8|8.2.3|Ashampoo GmbH & Co. KG|{91B33C97-5FC6-8971-3444-C57BBE022215}_is1 +XP8JJ8VX6VL0Q5|Cleaner One Pro - Free PC Cleaner|Trend Micro Inc.|Cleaner One Pro 6.6.0|6.6.0|Trend Micro, Inc.|99388cc2-2782-5495-bbd2-525df2487901 +XP8JJRV6TV79LG|DiskZIP|ZIPmagic Software|DiskZIP|2022.3.1415.932|DiskZIP Computing, Inc.|DiskZIP +XP8JJVZXG23JLN|WorldClock.Classic.ScreenSaver|Fulvio Castelli|WorldClock Screen Saver (Trial)|7.0.12.0|Fulvio Castelli|{EF3BC641-89A9-4703-9DED-19CEE72CEF07}_is1 +XP8JK4HZBVF435|Auto Dark Mode|Armin Osaj|Auto Dark Mode|10.1.0.10|Armin Osaj & Samuel Schiegg|{470BC918-3740-4A97-9797-8570A7961130}_is1 +XP8JMKMC3GVX23|Wondershare EdrawMax|WONDERSHARE GLOBAL LIMITED|Wondershare EdrawMax(Build 11.1.2.870)|11.1.2.870|EdrawSoft Co.,Ltd.|{037BAB81-3DF7-4381-A72C-A26B57C03548}_is1 +XP8JNNTH0LT9F1|ApowerEdit|网旭科技|ApowerEdit V1.7.7.22|1.7.7.22|Apowersoft LIMITED|{3089CCCD-BC5F-4309-A3C1-45B5ACA7A5E7}_is1 +XP8K17KD2T7W8V|Ashampoo WinOptimizer 19|Ashampoo|Ashampoo WinOptimizer 19|19.00.23|Ashampoo GmbH & Co. KG|{4209F371-A9E3-7DD2-C1E5-04BB2B081219}_is1 +XP8K1F4KDP9DSJ|Autonoleggio N.S.C.|Esposito Software di M. G. Caputo|Autonoleggio NSC 3.0 Demo||Copyright Esposito Software|Autonoleggio NSC 3.0 Demo_is1 +XP8K43JX54F7FL|Cute Cursors|Cute Cursors|CuteCursors|1.0.0|Apollo One|{6683BBFB-B899-4755-B260-DF0387D9F872} +XP8K513CFB5K58|Archivio Dipendenti con Foto|Esposito Software di Maria Grazia Caputo|Archivio Dipendenti con Foto Demo||Copyright Esposito Software|Archivio Dipendenti con Foto Demo_is1 +XP8LFCZM790F6B|Visual Studio Code - Insiders|Microsoft Corporation|Microsoft Visual Studio Code Insiders (User)|1.67.0|Microsoft Corporation|{217B4C08-948D-4276-BFBB-BEE930AE5A2C}_is1 +XP8LFD92C0T8P0|Stampa Tessere Associazioni|Esposito Software di Maria Grazia Caputo|Stampa Tessere Associazioni 5.0 Demo||Copyright Esposito Software|Stampa Tessere Associazioni 5.0 Demo_is1 +XP8LG1VTM0XW03|Gestione Protocollo e Pratiche|Esposito Software di Maria Grazia Caputo|Gestione Protocollo e Pratiche Demo||Copyright Esposito Software|Gestione Protocollo e Pratiche Demo_is1 +XP8LG2X182JTJ9|Wondershare Dr.Fone - Mobile Device Management|WONDERSHARE GLOBAL LIMITED|Wondershare Dr.Fone (Version 10.9.6)|10.9.6.398|Wondershare Technology Co.,Ltd.|{E8F86DA8-B8E4-42C7-AFD4-EBB692AC43FD}_is1 +XP8LG65GV4C7C8|GitMind Mind Map|网旭科技|GitMind 1.0.8|1.0.8|Apowersoft|a0e10d84-6512-552f-a0ec-5dd2e61ffe64 +XP8LKPZT4X0Z0P|GOM Player|Gom and Company|GOM Player|2.3.67.5331|GOM & Company|GOM Player +XP8LKWQ22DX3TF|JYL Visitor Windows|JYL Software|JYL Visitor 1.94|1.94|JYL Software|{02ADFF54-7D56-42F1-B517-FDA35F55D2CC} +XP99J3KP4XZ4VV|ZOOM Cloud Meetings|Zoom Video Communications, Inc.|Zoom|5.10.0 (4306)|Zoom Video Communications, Inc.|ZoomUMX +XP99J7FXZD0JDM|Emjysoft eSanté|Emjysoft|Suivi des soins et des remboursements de Santé|3.11|Emjysoft|{6CC28634-D98C-4DE1-9EE7-E121277996F6}_is1 +XP99JXDBM4XKFP|Parallels Toolbox|Corel Corporation|Parallels Toolbox|5.1.0.3170|Parallels International GmbH|{5145E2CF-E9FC-48E6-A2B4-E409FC84D059} +XP99K41V2P36RQ|MSIX Editor|InstallAware Software Corporation|InstallAware Msix Editor 1.0|1.0.0.2703|InstallAware Software|InstallAware Msix Editor 1.0 +XP99VR1BPSBQJ2|Epic Games Store|Epic Games Inc.|Epic Games Launcher|1.3.23.0|Epic Games, Inc.|{FAC47927-1A6A-4C6E-AD7D-E9756794A4BC} +XP99WSCKQSH7SW|Emjysoft Sauvegarde Facile|Emjysoft|Easy Backup|VersionApplication|Emjysoft|{37215B1A-1990-4F55-936E-C9BA1634EF75}}_is1 +XP99WT9NMGB1PN|蜜蜂剪辑|网旭科技|BeeCut V1.7.7.22|1.7.7.22|Apowersoft LIMITED|{CA76BFA8-1862-49D7-B2C7-AE3D6CF40E53}_is1 +XP9B0HTG55KTCH|Free Hex Editor Neo|HHD Software Ltd.|HHD Software Free Hex Editor Neo 6.54|6.54.02.6790|HHD Software, Ltd.|{8EB85C0E-DE7D-4A53-BD66-708B8F2C80B0} +XP9B16C2TFN8P1|GOM Mix Pro|Gom and Company|GOM Mix Pro|2.0.4.8|GOM & Company|GOMMixPro +XP9CFZ9PKV0DWS|Automation Workshop|Febooti, SIA|Febooti Automation Workshop|5.1.1.0|Febooti Software|{6114DD12-2516-4465-9275-FB9A8E1A583C} +XP9CRZD7D219NK|FolderSizes|Key Metric Software|FolderSizes 9|9.3.362|Key Metric Software|{587D3069-EFE1-4FC2-B917-01496D5ABF8A} +XP9CRZQDCJ0CC6|LetsView|网旭科技|LetsView V1.1.2.5|1.1.2.5|LetsView LIMITED|{6AA74BE4-9506-4D81-A07C-A40F883C2EA7}_is1 +XP9CSP03RV8BX9|Audials One 2022|Audials AG|Audials 2022|22.0.177.0|Audials AG|{3F273072-3D14-479E-B4CD-AC8B1F436DA1} +XP9K4SR87H227Q|VisualNEO Win|SinLios Soluciones Digitales|VisualNEO Win|21.9.9|SinLios|{57147D4D-2492-41EC-A552-FB37C1C7FF3E}_is1 +XP9K5VRXFHVP75|Database Creator|Esposito Software di Maria Grazia Caputo|Database Creator Demo||Copyright Esposito Software|Database Creator Demo_is1 +XP9K5XN9BRN466|Housecall Free Virus - Malware Scanner|Trend Micro Inc.|HouseCall|1.62|Trend Micro Inc.|{A114E34B-AA5C-4DD8-98A9-3130ACA19491} +XP9KHKZS1M19ZP|x-studio|Simdsoft Limited|x-studio 2022|2022.1.4|Simdsoft Limited|{2F7387D3-EB5F-4CA5-8C42-04C59F922740} +XP9KHM4BK9FZ7Q|Visual Studio Code|Microsoft Corporation|Microsoft Visual Studio Code (User)|1.66.0|Microsoft Corporation|{771FD6B0-FA20-440A-A002-3B3BAC16DC50}_is1 +XP9KHPQ5C9MSN2|ZIPmagic|ZIPmagic Software|ZIPmagic|19.19.21|Simon King|ZIPmagic +XP9KHPXMW6RQLL|Gestione Studio Tecnico|Esposito Software di M. G. Caputo|Gestione Studio Tecnico Demo||Copyright Esposito Software|Gestione Studio Tecnico Demo_is1 +XP9KHQZV691PF9|PTZ Link|AVer Information|AVer PTZ Link|1.1.1013.0|AVer Information Inc|{AC08D179-14D5-4B93-9684-20DBE0848637} +XP9KM2X7H10448|PCmover Reconfigurator|Laplink Software Inc|Laplink Reconfigurator|1.0.0.1|Laplink Software, Inc.|{BBB86720-65BA-452A-A14D-B152CB506DD8} +XP9M20CZB2C5W8|Powder - Gaming Recorder|Unique Entertainment Experience SAS|Powder 2.5.0|2.5.0|powder-team|2b39bc52-9c37-5fcd-ab25-906727f7c690 +XP9MFNDJM19N0G|Gestione Affitti Pro|Esposito Software di M. G. Caputo|Gestione Affitti Pro 4.0 Demo||Copyright Esposito Software|Gestione Affitti Pro 4.0 Demo_is1 +XPDBZ0BW87BCTV|POWER-KI Executor|XPLAB - Research in Automation|POWER-KI Executor|33.11|XPLAB - Research in Automation - Brescia - Italy|{B2B40FB5-0B60-4B47-A1F1-F0254CD0BE04} +XPDBZ4MPRKNN30|Opera GX|Opera Norway AS|Opera GX Stable 82.0.4227.44|82.0.4227.44|Opera Software|Opera GX 82.0.4227.44 +XPDC1LX9VNW7Z7|VirtualDJ|Atomix International, S.A.|VirtualDJ 2021|8.5.6747.0|Atomix Productions|{97CFEA35-98EF-4EBC-8AF1-4F161CFCAE86} +XPDC2KHD93HVJW|Stampa Ricevute Generiche|Esposito Software di Maria Grazia Caputo|Stampa Ricevute Generiche Demo||Copyright Esposito Software|Stampa Ricevute Generiche Demo_is1 +XPDCFJD1GFFDXD|WorldClock.Classic|Fulvio Castelli|WorldClock (Trial)|7.0.12.0|Fulvio Castelli|{E32193B9-8870-40be-B88A-B302251B8AA7}_is1 +XPDCJ80KGNRVSS|TeamSpeak|TeamSpeak Systems GmbH|TeamSpeak|5.0.0|TeamSpeak|{C9D97E1E-B188-4500-A87D-902530E0D1E0} +XPDCK0XGHVWNBK|Trend Micro Antivirus Plus Security|Trend Micro Inc.|Trend Micro Antivirus+|17.7|Trend Micro Inc.|{ABBD4BA8-6703-40D2-AB1E-5BB1F7DB49A4} +XPDDZ434WT2M5Z|SOLARWATT Pro experience|SOLARWATT GmbH|SOLARWATT Experience|2.1.0.4|SOLARWATT|{40CF234F-1D35-4ED8-AAFC-E07EA2FD8B3B} +XPDF9J69VVFMX3|Apowersoft Background Eraser|网旭科技|Apowersoft background eraser V2.3.13|2.3.13|Apowersoft LIMITED|{98EC0F66-C563-40FA-A77A-F2FC558F5DAA}_is1 +XPDFF6P40P0M5Q|星愿浏览器|Twinkstar|Twinkstar Browser|7.12.1000.2112|Twinkstar Limited|Twinkstar +XPDLNG5248Q7NC|HttpMaster Express|Borvid, Informacijske storitve, Janez Čas s.p.|HttpMaster Express Edition 5.4.1|5.4.1|Borvid|{B61241AA-F5FC-42C9-A1F9-F6D72D654349} +XPDM19SX6D8V40|JYL Orders Suppliers Windows|JYL Software|JYL Order Suppliers 1.70|1.70|JYL Software|{57DF6E60-F6E4-498F-9637-18D6C0FA08B9} +XPDM4ZR5KJ9JN9|PowerDirector 365 Free - Video Editor, Movie Maker|CyberLink Corp.|CyberLink PowerDirector 365|20.1.2519.0|CyberLink Corp.|{278A8296-12A6-4CD0-8A8E-6947948477C5} +XPDM5Q9J9SFCX9|Stampa Ricevute Pagamento|Esposito Software di M. G. Caputo|Stampa Ricevute Pagamento Demo||Copyright Esposito Software|Stampa Ricevute Pagamento Demo_is1 +XPDNG54ZDC79K0|JYL Time Clock Windows|JYL Software|JYL Time Clock 2.22|2.22|JYL Software|{839FD23A-EFE9-4252-AF1A-B8B56ED925F4} +XPDNH1FMW7NB40|火绒安全软件|Beijing Huorong Network Technology Co., Ltd.|Huorong Internet Security|5.0|Beijing Huorong Network Technology Co., Ltd.|HuorongSysdiag +XPDNLQK867NNXF|Ashampoo ZIP Pro 4|Ashampoo|Ashampoo ZIP Pro 4|4.10.22|Ashampoo GmbH & Co. KG|{0A11EA01-1F01-7AF6-20A2-E6F8131AD29C}_is1 +XPDNXDPXBRSVXT|WinZip 26|WinZip Computing|WinZip 26.0|26.0.15033|Corel Corporation|{CD95F661-A5C4-44F5-A6AA-ECDD91C2413F} +XPDNXG5333CSVK|Hard Disk Sentinel Professional|Janos Mathe|Hard Disk Sentinel PRO|6.01|Janos Mathe|Hard Disk Sentinel_is1 +XPDNZ9TPLKW6TB|Fy Slideshow|Guutara's Notebook|Fy Slideshow|5.6.0|Guutara|{5A4DEC47-8784-4591-983F-A3A6C3C89A46} +XPDNZJFNCR1B07|Avast Free Antivirus|AVAST Software|Avast Free Antivirus|22.2.6003|Avast Software|Avast Antivirus +XPDP1XPZR8NL28|Studio Medico Pro|Esposito Software di M. G. Caputo|Studio Medico Pro 3.0 Demo||Copyright Esposito Software|Studio Medico Pro 3.0 Demo_is1 +XPDP255TRF9WP8|Logspire|Anfibia Software|Logspire 1.0.0.51|1.0.0.51|Anfibia|Logspire_is1 +XPDP2X1MMZ4KR8|Ashampoo Burning Studio 23|Ashampoo|Ashampoo Burning Studio 23|23.0.5|Ashampoo GmbH & Co. KG|{91B33C97-2A56-F111-077E-E591CE9D7DE7}_is1 +XPFCFBB4FB3D6D|Emjysoft Cleaner|Emjysoft|Emjysoft Cleaner 2022 v4.1|4.1|Emjysoft|{167B1302-A739-42DE-BBD2-4C2F13D1FF51}_is1 +XPFCFKCNNTXGQD|Yandex Browser|Yandex|Yandex|21.9.1.686|ООО «ЯНДЕКС»|YandexBrowser +XPFCFL5ZTNFGD7|Wondershare Anireel|WONDERSHARE GLOBAL LIMITED|Wondershare Anireel(Build 1.6.2)||Wondershare Software|Wondershare Anireel_is1 +XPFCG86X2PGLDJ|Christmas Elf by Pothos|Pothos|Christmas Elf|||ChristmasElf +XPFCGHHXNH4WBW|Biblioteca e Prestiti Librari|Esposito Software di M. G. Caputo|Gestione Biblioteca e Prestiti Librari 3.0 Demo||Copyright Esposito Software|Gestione Biblioteca e Prestiti Librari 3.0 Demo_is1 +XPFCWP0SQWXM3V|CCleaner|Piriform Software Ltd|CCleaner|5.89|Piriform|CCleaner +XPFCXFRDJ8VGPT|Домашняя Бухгалтерия|Keepsoft|Äîìàøíÿÿ áóõãàëòåðèÿ Lite|7.2|Keepsoft|Äîìàøíÿÿ áóõãàëòåðèÿ Lite +XPFCXPF18WNKP6|Total Defense Essential Anti-Virus|Total Defense, Inc.|Total Defense|13.0.0.572|Total Defense, Inc.|TotalDefense +XPFCXS0QVTHDC9|Active@ Disk Editor|LSoft Technologies Inc.|Active@ Disk Editor 7|7|LSoft Technologies Inc|{F40165C8-BD5B-4E42-A40D-396BB707E5B7}_is1 +XPFD27PCFQJQ68|TextSeek|Xiamen Zesite Company|TextSeek|2.12.3060|Zesite Company|TextSeek +XPFD28MTCS0GXJ|VisualNEO Web|SinLios Soluciones Digitales|VisualNeoWeb||SinLios|{EEF9B1C5-7E35-4972-A79A-44B2B2C72D3D}_is1 +XPFFBRXVQ2L6JN|Coolnew PDF|CoolNewPDF|CoolNew PDF|3.0.0.1|CoolNew Software Corporation|coolnewpdf +XPFFC9N4PVM9N8|Prenotazione Tavoli OK|Esposito Software di Maria Grazia Caputo|Prenotazione Tavoli OK Demo||Copyright Esposito Software|Prenotazione Tavoli OK Demo_is1 +XPFFCCM235X204|Fy Memo|Guutara's Notebook|Fy Memo|6.5.0|Guutara|{4BDAE26E-3414-4516-89F9-B6C277029CA5} +XPFFCM599XXT5P|傲软录屏|网旭科技|ApowerREC V1.5.5.18|1.5.5.18|Apowersoft LIMITED|{6F2998B2-21F7-4CEF-94B2-C3919D939CF9}_is1 +XPFFH5S3C4Q1CB|傲软抠图|网旭科技|Apowersoft background eraser V2.3.13|2.3.13|Apowersoft LIMITED|{98EC0F66-C563-40FA-A77A-F2FC558F5DAA}_is1 +XPFFSV0VCDKTM5|PolicyApplicator Conversion Kit|Hauke Hasselberg|PolicyApplicator Conversion Kit|1.0.11.0|Hauke Götze|{C918DB43-6B86-4364-BEAC-1184D3EE3C07} +XPFFT29L5QQ7RL|SRPG Studio|SapphireSoft|SRPG Studio Trial version 1.251|1.251|SapphireSoft|{FBC98908-FD84-4C92-A539-5DA61EDD7F9F}_is1 +XPFFT3RD5FMWX2|Emjysoft Comptabilité Personnelle|Emjysoft|Personal Finance|20.5|Emjysoft|{2369DC9E-11A7-4BAE-A43E-7A4CB477574F}_is1 +XPFFTPNN0NNHVQ|Auto Print Order|NAMTUK|AutoPrintOrder 1.10.1215|1.10.1215|Namtuk|{B26EF0DD-2375-4E88-9991-4652AC89FE3F} +XPFM2BJ3RPZ9XB|轻闪PDF编辑|网旭科技|LightPDF Editor V1.2.6.0|1.2.6.0|Apowersoft LIMITED|{161C8BF4-DB06-49A7-B6AC-7CAB7DAF136F}_is1 +XPFM306TS4PHH5|Ashampoo Burning Studio FREE|Ashampoo|Ashampoo Burning Studio FREE|1.21.5|Ashampoo GmbH & Co. KG|{91B33C97-91F8-FFB3-581B-BC952C901685}_is1 +XPFM5W1J84KQZX|ndCurveMaster|SigmaLab Tomasz Cepowski|ndCurveMaster Trial x64 version 8.2.0.1|8.2.0.1|SigmaLab|{5FB2948C-B95A-49CD-A2ED-62D0A38D7B1C}_is1 +XPFMJGWHHCNL5P|傲软投屏—手机/电脑/电视高清投屏神器|网旭科技|ApowerMirror V1.6.5.1|1.6.5.1|APOWERSOFT LIMITED|{a9482532-9c34-478c-80c3-85bdccbb981f}_is1 +XPFMKKKLHMMK6Q|Videoteca OK|Esposito Software di Maria Grazia Caputo|Videoteca OK 5.0 Demo||Copyright Esposito Software|Videoteca OK 5.0 Demo_is1 +XPFNZJKG6100L4|ASM Visual|gri-software|ASM Visual version 1.1.7|1.1.7|gri-software|{7416EF27-89A5-4819-9996-36C16F49BAEC}_is1 +XPFNZKDRP1SXM6|视频转换王|网旭科技|Apowersoft Video Converter Studio V4.8.6.7|4.8.6.7|APOWERSOFT LIMITED|{195E8D7F-292B-4B04-A6E7-E96CAF04C767}_is1 +XPFP0G0V147H6D|Wondershare PDFelement|WONDERSHARE GLOBAL LIMITED|Wondershare PDFelement ( Version 8.3.0 )|8.3.0|Wondershare|{343A530C-4726-4091-87E0-F9CC41792CE2}_is1 +XPFP2VCXM8D2DB|傲软PDF编辑——一键编辑&转化&压缩&签名PDF文件|网旭科技|ApowerPDF V5.4.2.3|5.4.2.3|Apowersoft LIMITED|{8691C793-7B2C-46C5-9AB2-AB80D129A5EC}_is1 +XPFP30KL61D4SC|Wondershare UniConverter|WONDERSHARE GLOBAL LIMITED|Wondershare UniConverter 13(Build 13.5.1.116)|13.5.1.116|Wondershare Software|UniConverter 13_is1 +XPFP42D8L456SK|X-VPN - Best VPN Proxy and Wifi Security|Free Connected Limited.|X-VPN|71.0|Free Connected Limited|X-VPN +XPFP42J061BPC1|Documenti Lavori Cantiere|Esposito Software di M. G. Caputo|Documenti Lavori Cantiere Demo||Copyright Esposito Software|Documenti Lavori Cantiere Demo_is1 +XPFPFN4LT21PZJ|Studio Dentistico Pro|Esposito Software di M. G. Caputo|Studio Dentistico Pro Demo||Copyright Esposito Software|Studio Dentistico Pro Demo_is1 +XPFPFWMVTR0WHP|Ashampoo UnInstaller 11|Ashampoo|Ashampoo UnInstaller 11|11.00.12|Ashampoo GmbH & Co. KG|{4209F371-B84B-F321-6BD3-1D91E2505732}_is1 +XPFPFWV5JD80K2|BeeCut|网旭科技|BeeCut V1.7.7.22|1.7.7.22|Apowersoft LIMITED|{CA76BFA8-1862-49D7-B2C7-AE3D6CF40E53}_is1 +XPFPLCB36G8V8J|HttpMaster Professional|Borvid, Informacijske storitve, Janez Čas s.p.|HttpMaster Professional Edition 5.4.1|5.4.1|Borvid|{B61241AA-F5FC-42C9-A1F9-F6D72D654349} \ No newline at end of file diff --git a/src/AppInstallerCLITests/TestSource.cpp b/src/AppInstallerCLITests/TestSource.cpp index 77c6d6322f..9b42173dc3 100644 --- a/src/AppInstallerCLITests/TestSource.cpp +++ b/src/AppInstallerCLITests/TestSource.cpp @@ -50,6 +50,8 @@ namespace TestCommon return LocIndString{ VersionManifest.Channel }; case PackageVersionProperty::SourceIdentifier: return LocIndString{ Source.lock()->GetIdentifier() }; + case PackageVersionProperty::Publisher: + return LocIndString{ VersionManifest.DefaultLocalization.Get() }; default: return {}; } diff --git a/src/AppInstallerCommonCore/AppInstallerStrings.cpp b/src/AppInstallerCommonCore/AppInstallerStrings.cpp index d5d295ee49..984bfdb27c 100644 --- a/src/AppInstallerCommonCore/AppInstallerStrings.cpp +++ b/src/AppInstallerCommonCore/AppInstallerStrings.cpp @@ -23,7 +23,7 @@ namespace AppInstaller::Utility { ICUBreakIterator(std::string_view input, UBreakIteratorType type) { - UErrorCode err = U_ZERO_ERROR; + UErrorCode err = U_ZERO_ERROR; m_text.reset(utext_openUTF8(nullptr, input.data(), wil::safe_cast(input.length()), &err)); if (U_FAILURE(err)) @@ -154,6 +154,43 @@ namespace AppInstaller::Utility return result; } + std::u32string ConvertToUTF32(std::string_view input) + { + if (input.empty()) + { + return {}; + } + + UErrorCode errorCode = UErrorCode::U_ZERO_ERROR; + auto utf32ByteCount= ucnv_convert("UTF-32", "UTF-8", nullptr, 0, input.data(), static_cast(input.size()), &errorCode); + + if (errorCode != U_BUFFER_OVERFLOW_ERROR) + { + AICLI_LOG(Core, Error, << "ucnv_convert returned " << errorCode); + THROW_HR(APPINSTALLER_CLI_ERROR_ICU_CONVERSION_ERROR); + } + + FAIL_FAST_HR_IF(E_UNEXPECTED, utf32ByteCount % sizeof(char32_t) != 0); + auto utf32CharCount = utf32ByteCount / sizeof(char32_t); + std::u32string result(utf32CharCount, U'\0'); + + errorCode = UErrorCode::U_ZERO_ERROR; + + auto utf32BytesWritten = ucnv_convert("UTF-32", "UTF-8", (char*)(result.data()), utf32ByteCount, input.data(), static_cast(input.size()), &errorCode); + + // The size we pass to ucnv_convert is not enough for it to put in the null terminator, + // which wouldn't work anyways as it puts a single byte. + if (errorCode != U_STRING_NOT_TERMINATED_WARNING) + { + AICLI_LOG(Core, Error, << "ucnv_convert returned " << errorCode); + THROW_HR(APPINSTALLER_CLI_ERROR_ICU_CONVERSION_ERROR); + } + + FAIL_FAST_HR_IF(E_UNEXPECTED, utf32ByteCount != utf32BytesWritten); + + return result; + } + size_t UTF8Length(std::string_view input) { ICUBreakIterator itr{ input, UBRK_CHARACTER }; diff --git a/src/AppInstallerCommonCore/Errors.cpp b/src/AppInstallerCommonCore/Errors.cpp index c65027b980..ca15f06bd1 100644 --- a/src/AppInstallerCommonCore/Errors.cpp +++ b/src/AppInstallerCommonCore/Errors.cpp @@ -172,6 +172,8 @@ namespace AppInstaller return "The upgrade version is not newer than the installed version"; case APPINSTALLER_CLI_ERROR_UPGRADE_VERSION_UNKNOWN: return "Upgrade version is unknown and override is not specified"; + case APPINSTALLER_CLI_ERROR_ICU_CONVERSION_ERROR: + return "ICU conversion error"; case APPINSTALLER_CLI_ERROR_INSTALL_PACKAGE_IN_USE: return "Application is currently running.Exit the application then try again."; case APPINSTALLER_CLI_ERROR_INSTALL_INSTALL_IN_PROGRESS: diff --git a/src/AppInstallerCommonCore/Public/AppInstallerErrors.h b/src/AppInstallerCommonCore/Public/AppInstallerErrors.h index a87207f743..1ac01522b4 100644 --- a/src/AppInstallerCommonCore/Public/AppInstallerErrors.h +++ b/src/AppInstallerCommonCore/Public/AppInstallerErrors.h @@ -93,6 +93,7 @@ #define APPINSTALLER_CLI_ERROR_INVALID_TABLE_COLUMN ((HRESULT)0x8A15004E) #define APPINSTALLER_CLI_ERROR_UPGRADE_VERSION_NOT_NEWER ((HRESULT)0x8A15004F) #define APPINSTALLER_CLI_ERROR_UPGRADE_VERSION_UNKNOWN ((HRESULT)0x8A150050) +#define APPINSTALLER_CLI_ERROR_ICU_CONVERSION_ERROR ((HRESULT)0x8A150051) // Install errors. #define APPINSTALLER_CLI_ERROR_INSTALL_PACKAGE_IN_USE ((HRESULT)0x8A150101) diff --git a/src/AppInstallerCommonCore/Public/AppInstallerStrings.h b/src/AppInstallerCommonCore/Public/AppInstallerStrings.h index 01b44f9f9e..dd2f09c6e1 100644 --- a/src/AppInstallerCommonCore/Public/AppInstallerStrings.h +++ b/src/AppInstallerCommonCore/Public/AppInstallerStrings.h @@ -15,6 +15,9 @@ namespace AppInstaller::Utility // Converts the given UTF8 string to UTF16 std::wstring ConvertToUTF16(std::string_view input, UINT codePage = CP_UTF8); + // Converts the given UTF8 string to UTF32 + std::u32string ConvertToUTF32(std::string_view input); + // Normalizes a UTF8 string to the given form. std::string Normalize(std::string_view input, NORM_FORM form = NORM_FORM::NormalizationKC); diff --git a/src/AppInstallerRepositoryCore/ARPCorrelation.cpp b/src/AppInstallerRepositoryCore/ARPCorrelation.cpp new file mode 100644 index 0000000000..639b2b73e8 --- /dev/null +++ b/src/AppInstallerRepositoryCore/ARPCorrelation.cpp @@ -0,0 +1,398 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +#include "pch.h" +#include "winget/ARPCorrelation.h" +#include "winget/Manifest.h" +#include "winget/NameNormalization.h" +#include "winget/RepositorySearch.h" +#include "winget/RepositorySource.h" + +using namespace AppInstaller::Manifest; +using namespace AppInstaller::Repository; +using namespace AppInstaller::Utility; + +namespace AppInstaller::Repository::Correlation +{ + namespace + { + constexpr double MatchingThreshold = 0.5; + + IARPMatchConfidenceAlgorithm& InstanceInternal(std::optional algorithmOverride = {}) + { + static EditDistanceMatchConfidenceAlgorithm s_algorithm; + static IARPMatchConfidenceAlgorithm* s_override = nullptr; + + if (algorithmOverride.has_value()) + { + s_override = algorithmOverride.value(); + } + + if (s_override) + { + return *s_override; + } + else + { + return s_algorithm; + } + } + + // A simple matrix class to hold the edit distance table without having to allocate multiple arrays. + struct Matrix + { + Matrix(size_t rows, size_t columns) : m_rows(rows), m_columns(columns), m_data(rows * columns) {} + + double& At(size_t i, size_t j) + { + return m_data[i * m_columns + j]; + } + + private: + size_t m_rows; + size_t m_columns; + std::vector m_data; + }; + + double EditDistanceScore(std::u32string_view sv1, std::u32string_view sv2) + { + // Naive implementation of edit distance (scaled over the string size) + + // We may have empty values coming from the ARP + if (sv1.empty() || sv2.empty()) + { + return 0; + } + + // distance[i, j] = distance between sv1[0:i] and sv2[0:j] + // We don't need to hold more than two rows at a time, but it's simpler to keep the whole table. + Matrix distance(sv1.size(), sv2.size()); + + for (size_t i = 0; i < sv1.size(); ++i) + { + for (size_t j = 0; j < sv2.size(); ++j) + { + double& d = distance.At(i, j); + if (i == 0) + { + d = static_cast(j); + } + else if (j == 0) + { + d = static_cast(i); + } + else if (sv1[i] == sv2[j]) + { + d = distance.At(i - 1, j - 1); + } + else + { + d = std::min( + 1 + distance.At(i - 1, j - 1), + 1 + std::min(distance.At(i, j - 1), distance.At(i - 1, j))); + } + } + } + + // Maximum distance is equal to the length of the longest string. + // We use that to scale to [0,1]. + // A smaller distance represents a higher match, so we subtract from 1 for the final score + double editDistance = distance.At(sv1.size() - 1, sv2.size() - 1); + return 1 - editDistance / std::max(sv1.size(), sv2.size()); + } + } + + IARPMatchConfidenceAlgorithm& IARPMatchConfidenceAlgorithm::Instance() + { + return InstanceInternal(); + } + +#ifndef AICLI_DISABLE_TEST_HOOKS + void IARPMatchConfidenceAlgorithm::OverrideInstance(IARPMatchConfidenceAlgorithm* algorithmOverride) + { + InstanceInternal(algorithmOverride); + } + + void IARPMatchConfidenceAlgorithm::ResetInstance() + { + InstanceInternal(nullptr); + } +#endif + + std::u32string EditDistanceMatchConfidenceAlgorithm::PrepareString(std::string_view s) const + { + return Utility::ConvertToUTF32(Utility::FoldCase(s)); + } + + std::u32string EditDistanceMatchConfidenceAlgorithm::NormalizeAndPrepareName(std::string_view name) const + { + return PrepareString(m_normalizer.NormalizeName(name).Name()); + } + + std::u32string EditDistanceMatchConfidenceAlgorithm::NormalizeAndPreparePublisher(std::string_view publisher) const + { + return PrepareString(m_normalizer.NormalizePublisher(publisher)); + } + + void EditDistanceMatchConfidenceAlgorithm::Init(const Manifest::Manifest& manifest) + { + // We will use the name and publisher from each localization. + m_namesAndPublishers.clear(); + + std::u32string defaultPublisher; + if (manifest.DefaultLocalization.Contains(Localization::Publisher)) + { + defaultPublisher = NormalizeAndPreparePublisher(manifest.DefaultLocalization.Get()); + } + + if (manifest.DefaultLocalization.Contains(Localization::PackageName)) + { + std::u32string defaultName = NormalizeAndPrepareName(manifest.DefaultLocalization.Get()); + m_namesAndPublishers.emplace_back(defaultName, defaultPublisher); + + for (const auto& loc : manifest.Localizations) + { + if (loc.Contains(Localization::PackageName) || loc.Contains(Localization::Publisher)) + { + m_namesAndPublishers.emplace_back( + loc.Contains(Localization::PackageName) ? NormalizeAndPrepareName(loc.Get()) : defaultName, + loc.Contains(Localization::Publisher) ? NormalizeAndPreparePublisher(loc.Get()) : defaultPublisher); + } + } + } + } + + double EditDistanceMatchConfidenceAlgorithm::ComputeConfidence(const ARPEntry& arpEntry) const + { + // Get the best score across all localizations + double bestMatchingScore = 0; + for (const auto& manifestNameAndPublisher : m_namesAndPublishers) + { + // Name and Publisher are available as multi properties, but for ARP entries there will only be 0 or 1 values. + auto arpName = arpEntry.Entry->GetInstalledVersion()->GetProperty(PackageVersionProperty::Name); + auto arpPublisher = arpEntry.Entry->GetInstalledVersion()->GetProperty(PackageVersionProperty::Publisher); + + auto nameDistance = EditDistanceScore(manifestNameAndPublisher.first, NormalizeAndPrepareName(arpName.get())); + auto publisherDistance = EditDistanceScore(manifestNameAndPublisher.second, NormalizeAndPreparePublisher(arpPublisher.get())); + + // TODO: Consider other ways of merging the two values + auto score = (2 * nameDistance + publisherDistance) / 3; + bestMatchingScore = std::max(bestMatchingScore, score); + } + + return bestMatchingScore; + } + + ARPCorrelationResult FindARPEntryForNewlyInstalledPackage( + const Manifest::Manifest& manifest, + const std::vector& arpSnapshot, + Source& arpSource) + { + AICLI_LOG(Repo, Verbose, << "Finding ARP entry matching newly installed package"); + + std::vector changedArpEntries; + std::vector existingArpEntries; + + for (auto& entry : arpSource.Search({}).Matches) + { + auto installed = entry.Package->GetInstalledVersion(); + + if (installed) + { + auto entryKey = std::make_tuple( + entry.Package->GetProperty(PackageProperty::Id), + installed->GetProperty(PackageVersionProperty::Version), + installed->GetProperty(PackageVersionProperty::Channel)); + + auto itr = std::lower_bound(arpSnapshot.begin(), arpSnapshot.end(), entryKey); + if (itr == arpSnapshot.end() || *itr != entryKey) + { + changedArpEntries.emplace_back(entry.Package, true); + } + else + { + existingArpEntries.emplace_back(entry.Package, false); + } + } + } + + // Also attempt to find the entry based on the manifest data + + SearchRequest manifestSearchRequest; + AppInstaller::Manifest::Manifest::string_t defaultPublisher; + if (manifest.DefaultLocalization.Contains(Localization::Publisher)) + { + defaultPublisher = manifest.DefaultLocalization.Get(); + } + + // The default localization must contain the name or we cannot do this lookup + if (manifest.DefaultLocalization.Contains(Localization::PackageName)) + { + AppInstaller::Manifest::Manifest::string_t defaultName = manifest.DefaultLocalization.Get(); + manifestSearchRequest.Inclusions.emplace_back(PackageMatchFilter(PackageMatchField::NormalizedNameAndPublisher, MatchType::Exact, defaultName, defaultPublisher)); + + for (const auto& loc : manifest.Localizations) + { + if (loc.Contains(Localization::PackageName) || loc.Contains(Localization::Publisher)) + { + manifestSearchRequest.Inclusions.emplace_back(PackageMatchFilter(PackageMatchField::NormalizedNameAndPublisher, MatchType::Exact, + loc.Contains(Localization::PackageName) ? loc.Get() : defaultName, + loc.Contains(Localization::Publisher) ? loc.Get() : defaultPublisher)); + } + } + } + + std::vector productCodes; + for (const auto& installer : manifest.Installers) + { + if (!installer.ProductCode.empty()) + { + if (std::find(productCodes.begin(), productCodes.end(), installer.ProductCode) == productCodes.end()) + { + manifestSearchRequest.Inclusions.emplace_back(PackageMatchFilter(PackageMatchField::ProductCode, MatchType::Exact, installer.ProductCode)); + productCodes.emplace_back(installer.ProductCode); + } + } + + for (const auto& appsAndFeaturesEntry : installer.AppsAndFeaturesEntries) + { + if (!appsAndFeaturesEntry.DisplayName.empty()) + { + manifestSearchRequest.Inclusions.emplace_back(PackageMatchFilter(PackageMatchField::NormalizedNameAndPublisher, MatchType::Exact, + appsAndFeaturesEntry.DisplayName, + appsAndFeaturesEntry.Publisher.empty() ? defaultPublisher : appsAndFeaturesEntry.Publisher)); + } + } + } + + SearchResult findByManifest; + + // Don't execute this search if it would just find everything + if (!manifestSearchRequest.IsForEverything()) + { + findByManifest = arpSource.Search(manifestSearchRequest); + } + + // Cross reference the changes with the search results + std::vector> packagesInBoth; + + for (const auto& change : changedArpEntries) + { + for (const auto& byManifest : findByManifest.Matches) + { + if (change.Entry->IsSame(byManifest.Package.get())) + { + packagesInBoth.emplace_back(change.Entry); + break; + } + } + } + + // We now have all of the package changes; time to report them. + // The set of cases we could have for changes to ARP: + // 0 packages :: No changes were detected to ARP, which could mean that the installer + // did not write an entry. It could also be a forced reinstall. + // 1 package :: Golden path; this should be what we installed. + // 2+ packages :: We need to determine which package actually matches the one that we + // were installing. + // + // The set of cases we could have for finding packages based on the manifest: + // 0 packages :: The manifest data does not match the ARP information. + // 1 package :: Golden path; this should be what we installed. + // 2+ packages :: The data in the manifest is either too broad or we have + // a problem with our name normalization. + + // Find the package that we are going to log + ARPCorrelationResult result; + // TODO: Find a good way to consider the other heuristics in these stats. + result.ChangesToARP = changedArpEntries.size(); + result.MatchesInARP = findByManifest.Matches.size(); + result.CountOfIntersectionOfChangesAndMatches = packagesInBoth.size(); + + // If there is only a single common package (changed and matches), it is almost certainly the correct one. + if (packagesInBoth.size() == 1) + { + result.Package = packagesInBoth[0]->GetInstalledVersion(); + } + // If it wasn't changed but we still find a match, that is the best thing to report. + else if (findByManifest.Matches.size() == 1) + { + result.Package = findByManifest.Matches[0].Package->GetInstalledVersion(); + } + // If only a single ARP entry was changed and we found no matches, report that. + else if (findByManifest.Matches.empty() && changedArpEntries.size() == 1) + { + result.Package = changedArpEntries[0].Entry->GetInstalledVersion(); + } + else + { + // We were not able to find an exact match, so we now run some heuristics + // to try and match the package with some ARP entry by assigning them scores. + AICLI_LOG(Repo, Verbose, << "No exact ARP match found. Trying to find one with heuristics"); + + std::vector arpEntries; + for (auto&& entry : changedArpEntries) + { + arpEntries.push_back(std::move(entry)); + } + for (auto&& entry : existingArpEntries) + { + arpEntries.push_back(std::move(entry)); + } + + result.Package = FindARPEntryForNewlyInstalledPackageWithHeuristics(manifest, arpEntries); + } + + return result; + } + + // Find the best match using heuristics + std::shared_ptr FindARPEntryForNewlyInstalledPackageWithHeuristics( + const Manifest::Manifest& manifest, + const std::vector& arpEntries) + { + // TODO: In the future we can make different passes with different algorithms until we find a match + return FindARPEntryForNewlyInstalledPackageWithHeuristics(manifest, arpEntries, IARPMatchConfidenceAlgorithm::Instance()); + } + + std::shared_ptr FindARPEntryForNewlyInstalledPackageWithHeuristics( + const AppInstaller::Manifest::Manifest& manifest, + const std::vector& arpEntries, + IARPMatchConfidenceAlgorithm& algorithm) + { + AICLI_LOG(Repo, Verbose, << "Looking for best match in ARP for manifest " << manifest.Id); + + algorithm.Init(manifest); + + std::optional bestMatch; + double bestScore = 0; + + for (const auto& arpEntry : arpEntries) + { + auto score = algorithm.ComputeConfidence(arpEntry); + AICLI_LOG(Repo, Verbose, << "Match confidence for " << arpEntry.Entry->GetProperty(PackageProperty::Id) << ": " << score); + + if (score < MatchingThreshold) + { + AICLI_LOG(Repo, Verbose, << "Score is lower than threshold"); + continue; + } + + if (!bestMatch || bestScore < score) + { + bestMatch = arpEntry; + bestScore = score; + } + } + + if (bestMatch) + { + AICLI_LOG(Repo, Verbose, << "Best match is " << bestMatch->Entry->GetProperty(PackageProperty::Id)); + } + else + { + AICLI_LOG(Repo, Verbose, << "No ARP entry had a correlation score surpassing the required threshold"); + } + + return bestMatch ? bestMatch->Entry->GetInstalledVersion() : nullptr; + } +} \ No newline at end of file diff --git a/src/AppInstallerRepositoryCore/AppInstallerRepositoryCore.vcxproj b/src/AppInstallerRepositoryCore/AppInstallerRepositoryCore.vcxproj index 5e325d9949..0f0df2c4b5 100644 --- a/src/AppInstallerRepositoryCore/AppInstallerRepositoryCore.vcxproj +++ b/src/AppInstallerRepositoryCore/AppInstallerRepositoryCore.vcxproj @@ -272,6 +272,7 @@ + @@ -299,6 +300,7 @@ + NotUsing diff --git a/src/AppInstallerRepositoryCore/AppInstallerRepositoryCore.vcxproj.filters b/src/AppInstallerRepositoryCore/AppInstallerRepositoryCore.vcxproj.filters index c848a95ee1..78d9b98747 100644 --- a/src/AppInstallerRepositoryCore/AppInstallerRepositoryCore.vcxproj.filters +++ b/src/AppInstallerRepositoryCore/AppInstallerRepositoryCore.vcxproj.filters @@ -264,6 +264,9 @@ Header Files + + Public\winget + @@ -413,6 +416,9 @@ Source Files + + Source Files + diff --git a/src/AppInstallerRepositoryCore/Microsoft/SQLiteIndexSource.cpp b/src/AppInstallerRepositoryCore/Microsoft/SQLiteIndexSource.cpp index 3362a11bcf..d457a59df5 100644 --- a/src/AppInstallerRepositoryCore/Microsoft/SQLiteIndexSource.cpp +++ b/src/AppInstallerRepositoryCore/Microsoft/SQLiteIndexSource.cpp @@ -48,7 +48,8 @@ namespace AppInstaller::Repository::Microsoft return LocIndString{ GetReferenceSource()->GetDetails().Name }; default: // Values coming from the index will always be localized/independent. - return LocIndString{ GetReferenceSource()->GetIndex().GetPropertyByManifestId(m_manifestId, property).value() }; + std::optional optValue = GetReferenceSource()->GetIndex().GetPropertyByManifestId(m_manifestId, property); + return LocIndString{ optValue ? optValue.value() :std::string{} }; } } diff --git a/src/AppInstallerRepositoryCore/Microsoft/Schema/1_1/Interface.h b/src/AppInstallerRepositoryCore/Microsoft/Schema/1_1/Interface.h index e21536ea13..2e8bc227fc 100644 --- a/src/AppInstallerRepositoryCore/Microsoft/Schema/1_1/Interface.h +++ b/src/AppInstallerRepositoryCore/Microsoft/Schema/1_1/Interface.h @@ -30,5 +30,8 @@ namespace AppInstaller::Repository::Microsoft::Schema::V1_1 void PerformQuerySearch(V1_0::SearchResultsTable& resultsTable, const RequestMatch& query) const override; virtual SearchResult SearchInternal(const SQLite::Connection& connection, SearchRequest& request) const; virtual void PrepareForPackaging(SQLite::Connection& connection, bool vacuum); + + // Gets a property already knowing that the manifest id is valid. + virtual std::optional GetPropertyByManifestIdInternal(const SQLite::Connection& connection, SQLite::rowid_t manifestId, PackageVersionProperty property) const; }; } diff --git a/src/AppInstallerRepositoryCore/Microsoft/Schema/1_1/Interface_1_1.cpp b/src/AppInstallerRepositoryCore/Microsoft/Schema/1_1/Interface_1_1.cpp index 7254e20591..543db5001b 100644 --- a/src/AppInstallerRepositoryCore/Microsoft/Schema/1_1/Interface_1_1.cpp +++ b/src/AppInstallerRepositoryCore/Microsoft/Schema/1_1/Interface_1_1.cpp @@ -292,4 +292,24 @@ namespace AppInstaller::Repository::Microsoft::Schema::V1_1 builder.Execute(connection); } } + + std::optional Interface::GetPropertyByManifestIdInternal(const SQLite::Connection& connection, SQLite::rowid_t manifestId, PackageVersionProperty property) const + { + switch (property) + { + case AppInstaller::Repository::PackageVersionProperty::Publisher: + { + // Publisher is not a primary data member in this version, but it may be stored in the metadata + if (ManifestMetadataTable::Exists(connection)) + { + return ManifestMetadataTable::GetMetadataByManifestIdAndMetadata(connection, manifestId, PackageVersionMetadata::Publisher); + } + + // No metadata, so no publisher + return {}; + } + default: + return V1_0::Interface::GetPropertyByManifestIdInternal(connection, manifestId, property); + } + } } diff --git a/src/AppInstallerRepositoryCore/Microsoft/Schema/1_1/ManifestMetadataTable.cpp b/src/AppInstallerRepositoryCore/Microsoft/Schema/1_1/ManifestMetadataTable.cpp index a11ae0f612..8f57310ab9 100644 --- a/src/AppInstallerRepositoryCore/Microsoft/Schema/1_1/ManifestMetadataTable.cpp +++ b/src/AppInstallerRepositoryCore/Microsoft/Schema/1_1/ManifestMetadataTable.cpp @@ -68,6 +68,25 @@ namespace AppInstaller::Repository::Microsoft::Schema::V1_1 return result; } + std::optional ManifestMetadataTable::GetMetadataByManifestIdAndMetadata(const SQLite::Connection& connection, SQLite::rowid_t manifestId, PackageVersionMetadata metadata) + { + using namespace Builder; + + StatementBuilder builder; + builder.Select(s_ManifestMetadataTable_Value_Column).From(s_ManifestMetadataTable_Table_Name). + Where(s_ManifestMetadataTable_Manifest_Column).Equals(manifestId). + And(s_ManifestMetadataTable_Metadata_Column).Equals(metadata); + + Statement statement = builder.Prepare(connection); + + if (statement.Step()) + { + return statement.GetColumn(0); + } + + return {}; + } + void ManifestMetadataTable::SetMetadataByManifestId(SQLite::Connection& connection, SQLite::rowid_t manifestId, PackageVersionMetadata metadata, std::string_view value) { using namespace Builder; diff --git a/src/AppInstallerRepositoryCore/Microsoft/Schema/1_1/ManifestMetadataTable.h b/src/AppInstallerRepositoryCore/Microsoft/Schema/1_1/ManifestMetadataTable.h index eea2ff1b72..37f14df3ac 100644 --- a/src/AppInstallerRepositoryCore/Microsoft/Schema/1_1/ManifestMetadataTable.h +++ b/src/AppInstallerRepositoryCore/Microsoft/Schema/1_1/ManifestMetadataTable.h @@ -26,6 +26,10 @@ namespace AppInstaller::Repository::Microsoft::Schema::V1_1 // The table must exist. static ISQLiteIndex::MetadataResult GetMetadataByManifestId(const SQLite::Connection& connection, SQLite::rowid_t manifestId); + // Gets the specific metadata value for the manifest, if it exists. + // The table must exist. + static std::optional GetMetadataByManifestIdAndMetadata(const SQLite::Connection& connection, SQLite::rowid_t manifestId, PackageVersionMetadata metadata); + // Sets the metadata value for the given manifest. // The table must exist. static void SetMetadataByManifestId(SQLite::Connection& connection, SQLite::rowid_t manifestId, PackageVersionMetadata metadata, std::string_view value); diff --git a/src/AppInstallerRepositoryCore/Public/winget/ARPCorrelation.h b/src/AppInstallerRepositoryCore/Public/winget/ARPCorrelation.h new file mode 100644 index 0000000000..706a2273cd --- /dev/null +++ b/src/AppInstallerRepositoryCore/Public/winget/ARPCorrelation.h @@ -0,0 +1,105 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +#pragma once + +#include + +namespace AppInstaller +{ + namespace Manifest + { + struct Manifest; + struct ManifestLocalization; + } + + namespace Repository + { + struct IPackage; + struct IPackageVersion; + struct Source; + } +} + +namespace AppInstaller::Repository::Correlation +{ + // Contains the { Id, Version, Channel } + using ARPEntrySnapshot = std::tuple; + + // Struct holding all the data from an ARP entry we use for the correlation + struct ARPEntry + { + ARPEntry(std::shared_ptr entry, bool isNewOrUpdated) : Entry(entry), IsNewOrUpdated(isNewOrUpdated) {} + + // Data found in the ARP entry + std::shared_ptr Entry; + + // Whether this entry changed with the current installation + bool IsNewOrUpdated; + }; + + struct ARPCorrelationResult + { + // Correlated package from ARP + std::shared_ptr Package{}; + // Number of ARP entries that are new or updated + size_t ChangesToARP{}; + // Number of ARP entries that match with the installed package + size_t MatchesInARP{}; + // Number of changed ARP entries that match the installed package + size_t CountOfIntersectionOfChangesAndMatches{}; + }; + + struct IARPMatchConfidenceAlgorithm + { + virtual ~IARPMatchConfidenceAlgorithm() = default; + virtual void Init(const AppInstaller::Manifest::Manifest& manifest) = 0; + virtual double ComputeConfidence(const ARPEntry& arpEntry) const = 0; + + // Returns an instance of the algorithm we will actually use. + // We may use multiple instances/specializations for testing and experimentation. + static IARPMatchConfidenceAlgorithm& Instance(); + +#ifndef AICLI_DISABLE_TEST_HOOKS + static void OverrideInstance(IARPMatchConfidenceAlgorithm* algorithmOverride); + static void ResetInstance(); +#endif + }; + + struct EmptyMatchConfidenceAlgorithm : public IARPMatchConfidenceAlgorithm + { + void Init(const AppInstaller::Manifest::Manifest&) override {} + double ComputeConfidence(const ARPEntry&) const override { return 0; } + }; + + // Measures the correlation with the edit distance between the normalized name and publisher strings. + struct EditDistanceMatchConfidenceAlgorithm : public IARPMatchConfidenceAlgorithm + { + void Init(const AppInstaller::Manifest::Manifest& manifest) override; + double ComputeConfidence(const ARPEntry& entry) const override; + + private: + std::u32string PrepareString(std::string_view s) const; + std::u32string NormalizeAndPrepareName(std::string_view name) const; + std::u32string NormalizeAndPreparePublisher(std::string_view publisher) const; + + AppInstaller::Utility::NameNormalizer m_normalizer{ AppInstaller::Utility::NormalizationVersion::Initial }; + std::vector> m_namesAndPublishers; + }; + + // Finds the ARP entry in the ARP source that matches a newly installed package. + // Takes the package manifest, a snapshot of the ARP before the installation, and the current ARP source. + // Returns the entry in the ARP source, or nullptr if there was no match, plus some stats about the correlation. + ARPCorrelationResult FindARPEntryForNewlyInstalledPackage( + const AppInstaller::Manifest::Manifest& manifest, + const std::vector& arpSnapshot, + AppInstaller::Repository::Source& arpSource); + + std::shared_ptr FindARPEntryForNewlyInstalledPackageWithHeuristics( + const AppInstaller::Manifest::Manifest& manifest, + const std::vector& arpEntries); + + std::shared_ptr FindARPEntryForNewlyInstalledPackageWithHeuristics( + const AppInstaller::Manifest::Manifest& manifest, + const std::vector& arpEntries, + IARPMatchConfidenceAlgorithm& algorithm); +} \ No newline at end of file diff --git a/src/AppInstallerRepositoryCore/Public/winget/RepositorySearch.h b/src/AppInstallerRepositoryCore/Public/winget/RepositorySearch.h index df294ba8df..996ded1efa 100644 --- a/src/AppInstallerRepositoryCore/Public/winget/RepositorySearch.h +++ b/src/AppInstallerRepositoryCore/Public/winget/RepositorySearch.h @@ -134,6 +134,7 @@ namespace AppInstaller::Repository RelativePath, // Returned in hexadecimal format ManifestSHA256Hash, + Publisher, }; // A property of a package version that can have multiple values. diff --git a/src/AppInstallerRepositoryCore/Rest/RestSource.cpp b/src/AppInstallerRepositoryCore/Rest/RestSource.cpp index 9dd08c949f..e002b89b2d 100644 --- a/src/AppInstallerRepositoryCore/Rest/RestSource.cpp +++ b/src/AppInstallerRepositoryCore/Rest/RestSource.cpp @@ -201,6 +201,8 @@ namespace AppInstaller::Repository::Rest return Utility::LocIndString{ m_versionInfo.VersionAndChannel.GetVersion().ToString() }; case PackageVersionProperty::Channel: return Utility::LocIndString{ m_versionInfo.VersionAndChannel.GetChannel().ToString() }; + case PackageVersionProperty::Publisher: + return Utility::LocIndString{ m_package->PackageInfo().Publisher }; default: return Utility::LocIndString{}; }