From 312ad6dc130638946a16b36fbddaf343634ff34e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Chac=C3=B3n?= Date: Wed, 27 Apr 2022 21:44:21 -0700 Subject: [PATCH] Remove ARP matching for single ARP change, and consider publisher+name for matching (#2119) --- src/AppInstallerCLIE2ETests/ListCommand.cs | 6 +-- src/AppInstallerCLITests/ARPChanges.cpp | 2 +- src/AppInstallerCLITests/Correlation.cpp | 12 +++--- .../ARPCorrelation.cpp | 40 +++++++++---------- .../Public/winget/ARPCorrelation.h | 3 +- 5 files changed, 30 insertions(+), 33 deletions(-) diff --git a/src/AppInstallerCLIE2ETests/ListCommand.cs b/src/AppInstallerCLIE2ETests/ListCommand.cs index a6acc88ba3..bcfccc625f 100644 --- a/src/AppInstallerCLIE2ETests/ListCommand.cs +++ b/src/AppInstallerCLIE2ETests/ListCommand.cs @@ -34,11 +34,9 @@ public void ListAfterInstall() result = TestCommon.RunAICLICommand("list", productCode); Assert.AreEqual(Constants.ErrorCode.S_OK, result.ExitCode); - Assert.True(result.StdOut.Contains(productCode)); + Assert.True(result.StdOut.Contains("AppInstallerTest.TestExeInstaller")); Assert.True(result.StdOut.Contains("1.0.0.0")); - // TODO: Uncomment when install starts tracking packages. Until then this won't be true because our install - // in this test is using a random guid that won't be in the index. - //Assert.True(result.StdOut.Contains("2.0.0.0")); + Assert.True(result.StdOut.Contains("2.0.0.0")); } } } diff --git a/src/AppInstallerCLITests/ARPChanges.cpp b/src/AppInstallerCLITests/ARPChanges.cpp index 6df059f11b..6111bad98a 100644 --- a/src/AppInstallerCLITests/ARPChanges.cpp +++ b/src/AppInstallerCLITests/ARPChanges.cpp @@ -324,7 +324,7 @@ TEST_CASE("ARPChanges_SingleChange_NoMatch", "[ARPChanges][workflow]") context.AddEverythingResult("EverythingId1", "EverythingName1", "EverythingPublisher1", "EverythingVersion1"); context << ReportARPChanges; - context.ExpectEvent(1, 0, 0, context.EverythingResult.Matches.back().Package.get()); + context.ExpectEvent(1, 0, 0); } TEST_CASE("ARPChanges_SingleChange_SingleMatch", "[ARPChanges][workflow]") diff --git a/src/AppInstallerCLITests/Correlation.cpp b/src/AppInstallerCLITests/Correlation.cpp index 9686c376ce..859fb076b8 100644 --- a/src/AppInstallerCLITests/Correlation.cpp +++ b/src/AppInstallerCLITests/Correlation.cpp @@ -225,10 +225,10 @@ DataSet GetDataSet_NoNoise() dataSet.TestCases = LoadTestData(); // Arbitrary values. We should refine them as the algorithm gets better. - dataSet.RequiredTrueMatchRatio = 0.5; - dataSet.RequiredFalseMatchRatio = 0.1; + dataSet.RequiredTrueMatchRatio = 0.7; + dataSet.RequiredFalseMatchRatio = 0.05; dataSet.RequiredTrueMismatchRatio = 0; // There are no expected mismatches in this data set - dataSet.RequiredFalseMismatchRatio = 0.5; + dataSet.RequiredFalseMismatchRatio = 0.3; return dataSet; } @@ -242,10 +242,10 @@ DataSet GetDataSet_WithNoise() dataSet.TestCases = std::move(baseTestCases); // Arbitrary values. We should refine them as the algorithm gets better. - dataSet.RequiredTrueMatchRatio = 0.5; - dataSet.RequiredFalseMatchRatio = 0.1; + dataSet.RequiredTrueMatchRatio = 0.7; + dataSet.RequiredFalseMatchRatio = 0.05; dataSet.RequiredTrueMismatchRatio = 0; // There are no expected mismatches in this data set - dataSet.RequiredFalseMismatchRatio = 0.5; + dataSet.RequiredFalseMismatchRatio = 0.3; return dataSet; } diff --git a/src/AppInstallerRepositoryCore/ARPCorrelation.cpp b/src/AppInstallerRepositoryCore/ARPCorrelation.cpp index 639b2b73e8..7a1e54a7c9 100644 --- a/src/AppInstallerRepositoryCore/ARPCorrelation.cpp +++ b/src/AppInstallerRepositoryCore/ARPCorrelation.cpp @@ -147,15 +147,17 @@ namespace AppInstaller::Repository::Correlation if (manifest.DefaultLocalization.Contains(Localization::PackageName)) { std::u32string defaultName = NormalizeAndPrepareName(manifest.DefaultLocalization.Get()); - m_namesAndPublishers.emplace_back(defaultName, defaultPublisher); + m_namesAndPublishers.emplace_back(defaultName, defaultPublisher, defaultName + defaultPublisher); for (const auto& loc : manifest.Localizations) { if (loc.Contains(Localization::PackageName) || loc.Contains(Localization::Publisher)) { - m_namesAndPublishers.emplace_back( - loc.Contains(Localization::PackageName) ? NormalizeAndPrepareName(loc.Get()) : defaultName, - loc.Contains(Localization::Publisher) ? NormalizeAndPreparePublisher(loc.Get()) : defaultPublisher); + auto name = loc.Contains(Localization::PackageName) ? NormalizeAndPrepareName(loc.Get()) : defaultName; + auto publisher = loc.Contains(Localization::Publisher) ? NormalizeAndPreparePublisher(loc.Get()) : defaultPublisher; + auto nameAndPublisher = publisher + name; + + m_namesAndPublishers.emplace_back(std::move(name), std::move(publisher), std::move(nameAndPublisher)); } } } @@ -163,16 +165,23 @@ namespace AppInstaller::Repository::Correlation double EditDistanceMatchConfidenceAlgorithm::ComputeConfidence(const ARPEntry& arpEntry) const { + // Name and Publisher are available as multi properties, but for ARP entries there will only be 0 or 1 values. + auto arpName = NormalizeAndPrepareName(arpEntry.Entry->GetInstalledVersion()->GetProperty(PackageVersionProperty::Name).get()); + auto arpPublisher = NormalizeAndPreparePublisher(arpEntry.Entry->GetInstalledVersion()->GetProperty(PackageVersionProperty::Publisher).get()); + auto arpNamePublisher = arpPublisher + arpName; + // Get the best score across all localizations double bestMatchingScore = 0; for (const auto& manifestNameAndPublisher : m_namesAndPublishers) { - // Name and Publisher are available as multi properties, but for ARP entries there will only be 0 or 1 values. - auto arpName = arpEntry.Entry->GetInstalledVersion()->GetProperty(PackageVersionProperty::Name); - auto arpPublisher = arpEntry.Entry->GetInstalledVersion()->GetProperty(PackageVersionProperty::Publisher); - - auto nameDistance = EditDistanceScore(manifestNameAndPublisher.first, NormalizeAndPrepareName(arpName.get())); - auto publisherDistance = EditDistanceScore(manifestNameAndPublisher.second, NormalizeAndPreparePublisher(arpPublisher.get())); + // Sometimes the publisher may be included in the name, for example Microsoft PowerToys as opposed to simply PowerToys. + // This may happen both in the ARP entry and the manifest. We try adding it in case it is in one but not in both. + auto nameDistance = std::max( + EditDistanceScore(std::get<0>(manifestNameAndPublisher), arpName), + std::max( + EditDistanceScore(std::get<2>(manifestNameAndPublisher), arpName), + EditDistanceScore(std::get<0>(manifestNameAndPublisher), arpNamePublisher))); + auto publisherDistance = EditDistanceScore(std::get<1>(manifestNameAndPublisher), arpPublisher); // TODO: Consider other ways of merging the two values auto score = (2 * nameDistance + publisherDistance) / 3; @@ -288,12 +297,6 @@ namespace AppInstaller::Repository::Correlation } // We now have all of the package changes; time to report them. - // The set of cases we could have for changes to ARP: - // 0 packages :: No changes were detected to ARP, which could mean that the installer - // did not write an entry. It could also be a forced reinstall. - // 1 package :: Golden path; this should be what we installed. - // 2+ packages :: We need to determine which package actually matches the one that we - // were installing. // // The set of cases we could have for finding packages based on the manifest: // 0 packages :: The manifest data does not match the ARP information. @@ -318,11 +321,6 @@ namespace AppInstaller::Repository::Correlation { result.Package = findByManifest.Matches[0].Package->GetInstalledVersion(); } - // If only a single ARP entry was changed and we found no matches, report that. - else if (findByManifest.Matches.empty() && changedArpEntries.size() == 1) - { - result.Package = changedArpEntries[0].Entry->GetInstalledVersion(); - } else { // We were not able to find an exact match, so we now run some heuristics diff --git a/src/AppInstallerRepositoryCore/Public/winget/ARPCorrelation.h b/src/AppInstallerRepositoryCore/Public/winget/ARPCorrelation.h index 706a2273cd..9696495a04 100644 --- a/src/AppInstallerRepositoryCore/Public/winget/ARPCorrelation.h +++ b/src/AppInstallerRepositoryCore/Public/winget/ARPCorrelation.h @@ -83,7 +83,8 @@ namespace AppInstaller::Repository::Correlation std::u32string NormalizeAndPreparePublisher(std::string_view publisher) const; AppInstaller::Utility::NameNormalizer m_normalizer{ AppInstaller::Utility::NormalizationVersion::Initial }; - std::vector> m_namesAndPublishers; + // Each entry is a tuple { name, publisher, name + publisher } + std::vector> m_namesAndPublishers; }; // Finds the ARP entry in the ARP source that matches a newly installed package.