Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove ARP matching for single ARP change, and consider publisher+name for matching #2119

Merged
merged 4 commits into from
Apr 28, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/AppInstallerCLITests/ARPChanges.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ TEST_CASE("ARPChanges_SingleChange_NoMatch", "[ARPChanges][workflow]")
context.AddEverythingResult("EverythingId1", "EverythingName1", "EverythingPublisher1", "EverythingVersion1");

context << ReportARPChanges;
context.ExpectEvent(1, 0, 0, context.EverythingResult.Matches.back().Package.get());
context.ExpectEvent(1, 0, 0);
}

TEST_CASE("ARPChanges_SingleChange_SingleMatch", "[ARPChanges][workflow]")
Expand Down
12 changes: 6 additions & 6 deletions src/AppInstallerCLITests/Correlation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,10 +225,10 @@ DataSet GetDataSet_NoNoise()
dataSet.TestCases = LoadTestData();

// Arbitrary values. We should refine them as the algorithm gets better.
dataSet.RequiredTrueMatchRatio = 0.5;
dataSet.RequiredFalseMatchRatio = 0.1;
dataSet.RequiredTrueMatchRatio = 0.7;
dataSet.RequiredFalseMatchRatio = 0.05;
dataSet.RequiredTrueMismatchRatio = 0; // There are no expected mismatches in this data set
dataSet.RequiredFalseMismatchRatio = 0.5;
dataSet.RequiredFalseMismatchRatio = 0.3;

return dataSet;
}
Expand All @@ -242,10 +242,10 @@ DataSet GetDataSet_WithNoise()
dataSet.TestCases = std::move(baseTestCases);

// Arbitrary values. We should refine them as the algorithm gets better.
dataSet.RequiredTrueMatchRatio = 0.5;
dataSet.RequiredFalseMatchRatio = 0.1;
dataSet.RequiredTrueMatchRatio = 0.7;
dataSet.RequiredFalseMatchRatio = 0.05;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would really like false positives to be at 0. If the current algorithm is not at 0 in these unit tests, we should reconsider.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll send another PR once I figure out how to reduce the false positives.

dataSet.RequiredTrueMismatchRatio = 0; // There are no expected mismatches in this data set
dataSet.RequiredFalseMismatchRatio = 0.5;
dataSet.RequiredFalseMismatchRatio = 0.3;

return dataSet;
}
Expand Down
40 changes: 19 additions & 21 deletions src/AppInstallerRepositoryCore/ARPCorrelation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,32 +147,41 @@ namespace AppInstaller::Repository::Correlation
if (manifest.DefaultLocalization.Contains(Localization::PackageName))
{
std::u32string defaultName = NormalizeAndPrepareName(manifest.DefaultLocalization.Get<Localization::PackageName>());
m_namesAndPublishers.emplace_back(defaultName, defaultPublisher);
m_namesAndPublishers.emplace_back(defaultName, defaultPublisher, defaultName + defaultPublisher);

for (const auto& loc : manifest.Localizations)
{
if (loc.Contains(Localization::PackageName) || loc.Contains(Localization::Publisher))
{
m_namesAndPublishers.emplace_back(
loc.Contains(Localization::PackageName) ? NormalizeAndPrepareName(loc.Get<Localization::PackageName>()) : defaultName,
loc.Contains(Localization::Publisher) ? NormalizeAndPreparePublisher(loc.Get<Localization::Publisher>()) : defaultPublisher);
auto name = loc.Contains(Localization::PackageName) ? NormalizeAndPrepareName(loc.Get<Localization::PackageName>()) : defaultName;
auto publisher = loc.Contains(Localization::Publisher) ? NormalizeAndPreparePublisher(loc.Get<Localization::Publisher>()) : defaultPublisher;
auto nameAndPublisher = publisher + name;

m_namesAndPublishers.emplace_back(std::move(name), std::move(publisher), std::move(nameAndPublisher));
}
}
}
}

double EditDistanceMatchConfidenceAlgorithm::ComputeConfidence(const ARPEntry& arpEntry) const
{
// Name and Publisher are available as multi properties, but for ARP entries there will only be 0 or 1 values.
auto arpName = NormalizeAndPrepareName(arpEntry.Entry->GetInstalledVersion()->GetProperty(PackageVersionProperty::Name).get());
auto arpPublisher = NormalizeAndPreparePublisher(arpEntry.Entry->GetInstalledVersion()->GetProperty(PackageVersionProperty::Publisher).get());
auto arpNamePublisher = arpPublisher + arpName;

// Get the best score across all localizations
double bestMatchingScore = 0;
for (const auto& manifestNameAndPublisher : m_namesAndPublishers)
{
// Name and Publisher are available as multi properties, but for ARP entries there will only be 0 or 1 values.
auto arpName = arpEntry.Entry->GetInstalledVersion()->GetProperty(PackageVersionProperty::Name);
auto arpPublisher = arpEntry.Entry->GetInstalledVersion()->GetProperty(PackageVersionProperty::Publisher);

auto nameDistance = EditDistanceScore(manifestNameAndPublisher.first, NormalizeAndPrepareName(arpName.get()));
auto publisherDistance = EditDistanceScore(manifestNameAndPublisher.second, NormalizeAndPreparePublisher(arpPublisher.get()));
// Sometimes the publisher may be included in the name, for example Microsoft PowerToys as opposed to simply PowerToys.
// This may happen both in the ARP entry and the manifest. We try adding it in case it is in one but not in both.
auto nameDistance = std::max(
EditDistanceScore(std::get<0>(manifestNameAndPublisher), arpName),
std::max(
EditDistanceScore(std::get<2>(manifestNameAndPublisher), arpName),
EditDistanceScore(std::get<0>(manifestNameAndPublisher), arpNamePublisher)));
auto publisherDistance = EditDistanceScore(std::get<1>(manifestNameAndPublisher), arpPublisher);

// TODO: Consider other ways of merging the two values
auto score = (2 * nameDistance + publisherDistance) / 3;
Expand Down Expand Up @@ -288,12 +297,6 @@ namespace AppInstaller::Repository::Correlation
}

// We now have all of the package changes; time to report them.
// The set of cases we could have for changes to ARP:
// 0 packages :: No changes were detected to ARP, which could mean that the installer
// did not write an entry. It could also be a forced reinstall.
// 1 package :: Golden path; this should be what we installed.
// 2+ packages :: We need to determine which package actually matches the one that we
// were installing.
//
// The set of cases we could have for finding packages based on the manifest:
// 0 packages :: The manifest data does not match the ARP information.
Expand All @@ -318,11 +321,6 @@ namespace AppInstaller::Repository::Correlation
{
result.Package = findByManifest.Matches[0].Package->GetInstalledVersion();
}
// If only a single ARP entry was changed and we found no matches, report that.
else if (findByManifest.Matches.empty() && changedArpEntries.size() == 1)
{
result.Package = changedArpEntries[0].Entry->GetInstalledVersion();
}
else
{
// We were not able to find an exact match, so we now run some heuristics
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ namespace AppInstaller::Repository::Correlation
std::u32string NormalizeAndPreparePublisher(std::string_view publisher) const;

AppInstaller::Utility::NameNormalizer m_normalizer{ AppInstaller::Utility::NormalizationVersion::Initial };
std::vector<std::pair<std::u32string, std::u32string>> m_namesAndPublishers;
// Each entry is a tuple { name, publisher, name + publisher }
std::vector<std::tuple<std::u32string, std::u32string, std::u32string>> m_namesAndPublishers;
};

// Finds the ARP entry in the ARP source that matches a newly installed package.
Expand Down