Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve correlation by keeping arch info declared in manifest arp DisplayName entry #3100

Merged
merged 10 commits into from
Apr 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions src/AppInstallerCLIE2ETests/ListCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,35 @@ public void ListWithScopeMsixInstalledAsUser()
TestCommon.RemoveMsix(Constants.MsixInstallerName);
}

/// <summary>
/// Test package correlation with same package name but different architecture is correct.
/// </summary>
[Test]
public void ListWithMappingWithArchitecture()
{
var installDir = TestCommon.GetRandomTestDir();
var result = TestCommon.RunAICLICommand("install", $"AppInstallerTest.TestMappingWithArchitectureX86 -l {installDir}");
Assert.AreEqual(Constants.ErrorCode.S_OK, result.ExitCode);

// List with AppInstallerTest.TestMappingWithArchitectureX64 (from available to installed scenario) will not find the package.
result = TestCommon.RunAICLICommand("list", "AppInstallerTest.TestMappingWithArchitectureX64");
Assert.AreEqual(Constants.ErrorCode.ERROR_NO_APPLICATIONS_FOUND, result.ExitCode);
Assert.False(result.StdOut.Contains("AppInstallerTest.TestMappingWithArchitectureX64"));

// List with AppInstallerTest.TestMappingWithArchitectureX86 (from available to installed scenario) will find the package.
result = TestCommon.RunAICLICommand("list", "AppInstallerTest.TestMappingWithArchitectureX86");
Assert.AreEqual(Constants.ErrorCode.S_OK, result.ExitCode);
Assert.True(result.StdOut.Contains("AppInstallerTest.TestMappingWithArchitectureX86"));

// List with product code (from installed to available scenario) will find the AppInstallerTest.TestMappingWithArchitectureX86 package.
result = TestCommon.RunAICLICommand("list", "{0e426f01-b682-4e67-a357-52f9ecb4590d}");
Assert.AreEqual(Constants.ErrorCode.S_OK, result.ExitCode);
Assert.True(result.StdOut.Contains("AppInstallerTest.TestMappingWithArchitectureX86"));

// best effort clean up
TestCommon.RunCommand(Path.Combine(installDir, Constants.TestExeUninstallerFileName));
}

private void ArpVersionMappingTest(string packageIdentifier, string displayNameOverride, string displayVersionOverride, string expectedListVersion, string notExpectedListVersion = "")
{
System.Guid guid = System.Guid.NewGuid();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
PackageIdentifier: AppInstallerTest.TestMappingWithArchitectureX64
PackageVersion: 1.0
PackageName: TestMappingWithArchitecture
PackageLocale: en-US
Publisher: Microsoft
License: Test
ShortDescription: E2E test for mapping with architecture.
Installers:
- Architecture: x64
InstallerUrl: https://localhost:5001/TestKit/AppInstallerTestExeInstaller/AppInstallerTestExeInstaller.exe
InstallerType: exe
InstallerSha256: <EXEHASH>
AppsAndFeaturesEntries:
- DisplayName: "TestMappingWithArchitecture(X64)"
InstallerSwitches:
Custom: '/DisplayName TestMappingWithArchitecture(X64) /ProductID {0e426f01-b682-4e67-a357-52f9ecb4590d}'
InstallLocation: /InstallDir <INSTALLPATH>
ManifestType: singleton
ManifestVersion: 1.2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
PackageIdentifier: AppInstallerTest.TestMappingWithArchitectureX86
PackageVersion: 1.0
PackageName: TestMappingWithArchitecture
PackageLocale: en-US
Publisher: Microsoft
License: Test
ShortDescription: E2E test for mapping with architecture.
Installers:
- Architecture: x86
InstallerUrl: https://localhost:5001/TestKit/AppInstallerTestExeInstaller/AppInstallerTestExeInstaller.exe
InstallerType: exe
InstallerSha256: <EXEHASH>
AppsAndFeaturesEntries:
- DisplayName: "TestMappingWithArchitecture(X86)"
InstallerSwitches:
Custom: '/DisplayName TestMappingWithArchitecture(X86) /ProductID {0e426f01-b682-4e67-a357-52f9ecb4590d}'
InstallLocation: /InstallDir <INSTALLPATH>
ManifestType: singleton
ManifestVersion: 1.2.0
15 changes: 15 additions & 0 deletions src/AppInstallerCLITests/NameNormalization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,18 @@ TEST_CASE("NameNorm_Initial_PreserveWhitespace", "[name_norm]")
REQUIRE(normer.NormalizeName("Some Name").Name() == "Some Name");
REQUIRE(normer.NormalizePublisher("Some Publisher Corp") == "Some Publisher");
}

TEST_CASE("NameNorm_GetNormalizedName_GetNormalizedFields", "[name_norm]")
{
NameNormalizer normer(NormalizationVersion::Initial);

auto normalizedName = normer.NormalizeName("Name(X64)");
REQUIRE(normalizedName.GetNormalizedName(NormalizationField::None) == "Name");
REQUIRE(normalizedName.GetNormalizedName(NormalizationField::Architecture) == "Name(X64)");
REQUIRE(normalizedName.GetNormalizedFields() == NormalizationField::Architecture);

auto normalizedName2 = normer.NormalizeName("Name");
REQUIRE(normalizedName2.GetNormalizedName(NormalizationField::None) == "Name");
REQUIRE(normalizedName2.GetNormalizedName(NormalizationField::Architecture) == "Name");
REQUIRE(normalizedName2.GetNormalizedFields() == NormalizationField::None);
}
24 changes: 24 additions & 0 deletions src/AppInstallerCommonCore/NameNormalization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -501,4 +501,28 @@ namespace AppInstaller::Utility
{
return m_normalizer->NormalizePublisher(publisher);
}

std::string NormalizedName::GetNormalizedName(NormalizationField fieldsToInclude) const
{
std::string result = Name();

if (WI_IsFlagSet(fieldsToInclude, NormalizationField::Architecture) && m_arch != Utility::Architecture::Unknown)
{
result += '(' + std::string(Utility::ToString(m_arch)) + ')';
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about for packages that just include their architecture and not the parens? Or where it isn't just the arch to string? Won't this still cause issues? I feel like where DisplayName is included in the manifest, the normalization shouldn't occur.

  13  AppsAndFeaturesEntries:
  14:   - DisplayName: Bluebeam Revu x64 21
  34: - DisplayName: calibre 64bit
  35    UpgradeCode: '{5DD881FF-756B-4097-9D82-8C0F11D521EA}'
  23    - ProductCode: SyncBackPro64_is1_is1
  24:     DisplayName: SyncBackPro x64
  87:   - DisplayName: 7-Zip 22.01 (x64 edition)
  88      Publisher: Igor Pavlov
  28    - Publisher: Adobe
  29:     DisplayName: Adobe Acrobat DC (64-bit)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Our normalization code uses a regex which should take care of different formats of arch representations. x64, (x64), 64bit etc.

The reason we still need to do normalization on DisplayName comes from various considerations:

  1. Including every declared DisplayName explodes the index (which is currently around 13 mb), it will be at least several times bigger. Considering by default winget updates source every 3 mins if new version applicable, it's a lot of data downloading.
  2. If DisplayName contains version (or any other Package Version specific string) and we don't normalize it, our correlation will rely on the repo containing every version of the package (with every package version declaring DisplayName). For example, if DisplayName is Foo v1.0.0 and Foo v2.0.0 (2 versions exist in winget-pkgs repo), and if we don't normalize them in index, a local installed version with Foo v1.5.0 will not be able to get correlated. We cannot guarantee winget-pkgs will have every version of a package listed and client should not rely on this assumption.

According to our brief analysis, the arch is the main source of false correlation. And it makes sense to keep arch info specifically. We could expand to include locale, or even version if we decide to in the future. But each expansion will come with the cost of making index multiple times larger, and less correlation success if the installed version is not in the repo. Unless we redesign the index structure in future winget v2 (if winget v2 is a thing).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see; thanks for the detailed explanation!

}

return result;
}

NormalizationField NormalizedName::GetNormalizedFields() const
{
NormalizationField result = NormalizationField::None;

if (m_arch != Utility::Architecture::Unknown)
{
result |= NormalizationField::Architecture;
}

return result;
}
}
15 changes: 15 additions & 0 deletions src/AppInstallerCommonCore/Public/winget/NameNormalization.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,16 @@ namespace AppInstaller::Utility
InitialPreserveWhiteSpace,
};

// List of name normalization fields. Architecture, locale, etc.
// Currently only architecture is used.
enum class NormalizationField : uint32_t
{
None = 0x0,
Architecture = 0x1,
};

DEFINE_ENUM_FLAG_OPERATORS(NormalizationField);

struct NameNormalizer;

// A package publisher and name that has been normalized, allowing direct
Expand All @@ -39,6 +49,11 @@ namespace AppInstaller::Utility
void Publisher(std::string&& publisher) { m_publisher = std::move(publisher); }
void Publisher(std::string_view publisher) { m_publisher = publisher; }

// Gets normalized name with additional normalization fields included.
std::string GetNormalizedName(NormalizationField fieldsToInclude) const;
// Gets a flag indicating the list of fields detected in normalization.
NormalizationField GetNormalizedFields() const;

private:
std::string m_name;
Utility::Architecture m_arch = Utility::Architecture::Unknown;
Expand Down
33 changes: 21 additions & 12 deletions src/AppInstallerRepositoryCore/CompositeSource.cpp
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this is part of a larger problem where we are taking the first match rather than treating multiple matches as an issue (and thus resulting in no matches, with some logging of course).

Also, some analysis that I hadn't thought is to look at how many normalized name and publisher values (or any of the system reference string cohort) map onto multiple package ids. Any time that is happening is likely cause for concern.

Copy link
Contributor

@Trenly Trenly Mar 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's 153 unique norm_names that map to more than one package.

ID_Name_Map_DuplicatesOnly_2023-03-29.csv

If you look at the mapping where different package id's have duplicate name AND publisher, there's 339 id's that have duplication.

ID_Name_Publisher_Map_DuplicatesOnly_2023-03-29.csv

Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,8 @@ namespace AppInstaller::Repository
return Field == other.Field && String1 == other.String1 && String2 == other.String2;
}

void AddToFilters(std::vector<PackageMatchFilter>& filters) const
void AddToFilters(
std::vector<PackageMatchFilter>& filters) const
{
switch (Field)
{
Expand Down Expand Up @@ -852,13 +853,14 @@ namespace AppInstaller::Repository
}
}

SearchRequest CreateInclusionsSearchRequest() const
SearchRequest CreateInclusionsSearchRequest(SearchPurpose searchPurpose) const
{
SearchRequest result;
for (const auto& srs : SystemReferenceStrings)
{
srs.AddToFilters(result.Inclusions);
}
result.Purpose = searchPurpose;
return result;
}
};
Expand Down Expand Up @@ -1221,7 +1223,8 @@ namespace AppInstaller::Repository
// Create a search request to run against all available sources
if (!installedPackageData.SystemReferenceStrings.empty())
{
SearchRequest systemReferenceSearch = installedPackageData.CreateInclusionsSearchRequest();
SearchRequest systemReferenceSearch = installedPackageData.CreateInclusionsSearchRequest(SearchPurpose::CorrelationToAvailable);
AICLI_LOG(Repo, Info, << "Finding available package from installed package using system reference search: " << systemReferenceSearch.ToString());

Source trackedSource;
std::shared_ptr<IPackage> trackingPackage;
Expand All @@ -1239,8 +1242,8 @@ namespace AppInstaller::Repository
std::shared_ptr<IPackage> candidatePackage = GetMatchingPackage(trackingResult.Matches,
[&]() {
AICLI_LOG(Repo, Info,
<< "Found multiple matches for installed package [" << installedVersion->GetProperty(PackageVersionProperty::Id) <<
"] in tracking catalog for source [" << source.GetIdentifier() << "] when searching for [" << systemReferenceSearch.ToString() << "]");
<< "Found multiple matches for installed package [" << installedVersion->GetProperty(PackageVersionProperty::Id) <<
"] in tracking catalog for source [" << source.GetIdentifier() << "] when searching for [" << systemReferenceSearch.ToString() << "]");
}, [&] {
AICLI_LOG(Repo, Warning, << " Appropriate tracking package could not be determined");
});
Expand All @@ -1265,8 +1268,12 @@ namespace AppInstaller::Repository
// Directly search for the available package from tracking information.
if (trackingPackage)
{
addedAvailablePackage = true;
compositePackage->AddAvailablePackage(GetTrackedPackageFromAvailableSource(result, trackedSource, trackingPackage->GetProperty(PackageProperty::Id)));
auto availablePackage = GetTrackedPackageFromAvailableSource(result, trackedSource, trackingPackage->GetProperty(PackageProperty::Id));
if (availablePackage)
{
addedAvailablePackage = true;
compositePackage->AddAvailablePackage(std::move(availablePackage));
}
compositePackage->SetTracking(std::move(trackedSource), std::move(trackingPackage), std::move(trackingPackageVersion));
}

Expand Down Expand Up @@ -1297,12 +1304,13 @@ namespace AppInstaller::Repository
auto availablePackage = GetMatchingPackage(availableResult.Matches,
[&]() {
AICLI_LOG(Repo, Info,
<< "Found multiple matches for installed package [" << installedVersion->GetProperty(PackageVersionProperty::Id) <<
"] in source [" << source.GetIdentifier() << "] when searching for [" << systemReferenceSearch.ToString() << "]");
<< "Found multiple matches for installed package [" << installedVersion->GetProperty(PackageVersionProperty::Id) <<
"] in source [" << source.GetIdentifier() << "] when searching for [" << systemReferenceSearch.ToString() << "]");
}, [&] {
AICLI_LOG(Repo, Warning, << " Appropriate available package could not be determined");
});

// For non pinning cases. We found some matching packages here, don't keep going.
addedAvailablePackage = true;
compositePackage->AddAvailablePackage(std::move(availablePackage));
}
Expand Down Expand Up @@ -1342,9 +1350,9 @@ namespace AppInstaller::Repository
// source to create a new composite package entry if we find any packages there.
if (packageData && !packageData->SystemReferenceStrings.empty())
{
// Create a search request to run against the installed source
SearchRequest systemReferenceSearch = packageData->CreateInclusionsSearchRequest();
SearchRequest systemReferenceSearch = packageData->CreateInclusionsSearchRequest(SearchPurpose::CorrelationToInstalled);

AICLI_LOG(Repo, Info, << "Finding installed package from tracking package using system reference search: " << systemReferenceSearch.ToString());
// Correlate against installed (allow exceptions out as we own the installed source)
SearchResult installedCrossRef = m_installedSource.Search(systemReferenceSearch);

Expand Down Expand Up @@ -1391,8 +1399,9 @@ namespace AppInstaller::Repository
if (packageData && !packageData->SystemReferenceStrings.empty())
{
// Create a search request to run against the installed source
SearchRequest systemReferenceSearch = packageData->CreateInclusionsSearchRequest();
SearchRequest systemReferenceSearch = packageData->CreateInclusionsSearchRequest(SearchPurpose::CorrelationToInstalled);

AICLI_LOG(Repo, Info, << "Finding installed package from available package using system reference search: " << systemReferenceSearch.ToString());
// Correlate against installed (allow exceptions out as we own the installed source)
SearchResult installedCrossRef = m_installedSource.Search(systemReferenceSearch);

Expand Down
9 changes: 7 additions & 2 deletions src/AppInstallerRepositoryCore/Microsoft/ARPHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -363,13 +363,19 @@ namespace AppInstaller::Repository::Microsoft
continue;
}
auto displayNameValue = displayName->GetValue<Registry::Value::Type::String>();
manifest.DefaultLocalization.Add<Manifest::Localization::PackageName>(displayNameValue);
if (displayNameValue.empty())
{
AICLI_LOG(Repo, Verbose, << "Skipping " << productCode << " because DisplayName is empty");
continue;
}

manifest.DefaultLocalization.Add<Manifest::Localization::PackageName>(displayNameValue);
// Add DisplayName to ARP entries too
// This is to help normalized publisher and name correlation where ARP DisplayName matching
// will be getting improved in future iterations.
manifest.Installers[0].AppsAndFeaturesEntries.emplace_back();
manifest.Installers[0].AppsAndFeaturesEntries[0].DisplayName = displayNameValue;

// If no version can be determined, ignore this entry
manifest.Version = DetermineVersion(arpKey);
if (manifest.Version.empty())
Expand Down Expand Up @@ -404,7 +410,6 @@ namespace AppInstaller::Repository::Microsoft
auto upgradeCodeItr = upgradeCodes.find(productCode);
if (upgradeCodeItr != upgradeCodes.end())
{
manifest.Installers[0].AppsAndFeaturesEntries.emplace_back();
manifest.Installers[0].AppsAndFeaturesEntries[0].UpgradeCode = upgradeCodeItr->second;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ namespace AppInstaller::Repository::Microsoft::Schema::V1_1
std::vector<Utility::NormalizedString> GetSystemReferenceStrings(
const Manifest::Manifest& manifest,
std::function<const Utility::NormalizedString&(const Manifest::ManifestInstaller&)> extractStringFromInstaller,
std::function<const Utility::NormalizedString& (const Manifest::AppsAndFeaturesEntry&)> extractStringFromAppsAndFeaturesEntry = {})
std::function<const Utility::NormalizedString&(const Manifest::AppsAndFeaturesEntry&)> extractStringFromAppsAndFeaturesEntry = {})
{
std::set<Utility::NormalizedString> set;

Expand Down
Loading