Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow strings in propertywise tests #911

Merged
merged 5 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,15 @@ private static void propertywiseAlikeLine(
}
}

private static String stringAt(UnicodeSet set, int i) {
int codePoints = set.size() - set.strings().size();
eggrobin marked this conversation as resolved.
Show resolved Hide resolved
if (i < codePoints) {
return Character.toString(set.charAt(i));
} else {
return set.strings().stream().skip(i - codePoints).findFirst().get();
}
}

private static void propertywiseCorrespondenceLine(
Set<String> ignoredProperties,
UnicodeSet firstSet,
Expand All @@ -538,13 +547,12 @@ private static void propertywiseCorrespondenceLine(
final List<UnicodeSet> sets = new ArrayList<>();
sets.add(firstSet);
expectToken(":", pp, source);

// Index of the first set of multi-character strings (and of the first multi-character
// reference string).
int m = -1;
markusicu marked this conversation as resolved.
Show resolved Hide resolved
do {
final var set = parseUnicodeSet(source, pp);
if (set.hasStrings()) {
throw new BackwardParseException(
"Set should contain only single code points for property comparison",
pp.getIndex());
}
if (set.size() != firstSet.size()) {
throw new BackwardParseException(
"Sets should have the same size for property correspondence (got "
Expand All @@ -554,18 +562,40 @@ private static void propertywiseCorrespondenceLine(
+ ")",
pp.getIndex());
}
if (set.hasStrings() && set.strings().size() != set.size()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would also change this to the easier to read:

            if (set.codePointsSize() > 0 && set.strings().size() > 0) {

throw new BackwardParseException(
"Sets should be all strings or all code points for property correspondence",
pp.getIndex());
}
if (m == -1) {
if (set.hasStrings()) {
m = sets.size();
}
} else if (!set.hasStrings()) {
throw new BackwardParseException(
"Code points should come before strings in property correspondence",
pp.getIndex());
}
sets.add(set);
} while (Lookahead.oneToken(pp, source).accept(":"));
final List<Integer> referenceCodePoints = new ArrayList<>();
if (m == -1) {
m = sets.size();
}
final List<String> referenceCodePoints = new ArrayList<>();
expectToken("CorrespondTo", pp, source);
do {
final var referenceSet = parseUnicodeSet(source, pp);
if (referenceSet.hasStrings() || referenceSet.size() != 1) {
if (referenceSet.size() != 1) {
throw new BackwardParseException(
"reference should be a single code point or string for property correspondence",
pp.getIndex());
}
if (referenceSet.hasStrings() != (referenceCodePoints.size() >= m)) {
throw new BackwardParseException(
"reference should be a single code point for property correspondence",
"Strings should correspond to strings for property correspondence",
pp.getIndex());
}
referenceCodePoints.add(referenceSet.charAt(0));
referenceCodePoints.add(referenceSet.iterator().next());
} while (Lookahead.oneToken(pp, source).accept(":"));
if (referenceCodePoints.size() != sets.size()) {
throw new BackwardParseException(
Expand Down Expand Up @@ -608,8 +638,8 @@ public ExpectedPropertyDifference(String actualValueAlias, String referenceValue
expectedDifference = expectedPropertyDifferences.get(alias);
}
if (expectedDifference != null) {
for (int k = 0; k < sets.size(); ++k) {
final int rk = referenceCodePoints.get(k);
for (int k = 0; k < m; ++k) {
final int rk = referenceCodePoints.get(k).codePointAt(0);
final String pRk = property.getValue(rk);
if (!Objects.equals(pRk, expectedDifference.referenceValueAlias)) {
errorMessageLines.add(
Expand Down Expand Up @@ -638,9 +668,9 @@ public ExpectedPropertyDifference(String actualValueAlias, String referenceValue
}
}
} else {
for (int k = 0; k < sets.size(); ++k) {
for (int k = 0; k < m; ++k) {
final UnicodeSet set = sets.get(k);
final int rk = referenceCodePoints.get(k);
final int rk = referenceCodePoints.get(k).codePointAt(0);
final String pRk = property.getValue(rk);
loop_over_set:
for (int i = 0; i < set.size(); ++i) {
Expand All @@ -652,10 +682,9 @@ public ExpectedPropertyDifference(String actualValueAlias, String referenceValue
Integer lMatchingForReference = null;
for (int l = 0; l < sets.size(); ++l) {
final boolean pCkEqualsCl =
Objects.equals(pCk, Character.toString(sets.get(l).charAt(i)));
Objects.equals(pCk, stringAt(sets.get(l), i));
final boolean pRkEqualsRl =
Objects.equals(
pRk, Character.toString(referenceCodePoints.get(l)));
Objects.equals(pRk, referenceCodePoints.get(l));
if (pRkEqualsRl) {
lMatchingForReference = l;
if (pCkEqualsCl) {
Expand Down Expand Up @@ -685,8 +714,7 @@ public ExpectedPropertyDifference(String actualValueAlias, String referenceValue
+ ")\t=\t"
+ pCk
+ "\t≠\t"
+ Character.toString(
sets.get(lMatchingForReference).charAt(i))
+ stringAt(sets.get(lMatchingForReference), i)
+ "\twhereas\t"
+ property.getName()
+ "("
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,16 +147,19 @@
# CorrespondTo <R₁> : ... : <Rₙ>
# [ UpTo: <Property> (<SValue> vs <RValue>) {, <Property> (<SValue> vs <RValue>) }]
#
# The Sₖ must be Unicode sets of equal size with no strings. They are considered in code
# point order for the correspondence check (item 2 below).
# The references Rₖ must be Unicode sets each containing a single code point; by a slight abuse of
# notation we refer to the code point as Rₖ in the explanation below.
# The Sₖ must be Unicode sets of equal size, either with no strings or only strings.
# They are considered in code point order for the correspondence check (item 2 below).
# The references Rₖ must be Unicode sets each containing a single code point or a single string;
# by a slight abuse of notation we refer to the code point or string as Rₖ in the explanation below.
# For some m in 2 .. n, the following must hold:
# a. Rₖ is a code point and Sₖ must contain only code points for k ≤ m, and
# b. Rₖ is a string and Sₖ must contain only strings for m < k ≤ n, and
# For every non-ignored property P that does not appear in the optional UpTo clause,
# checks that for each k in 1 .. n, for the ith character C in Sₖ, either:
# checks that for each k in 1 .. m, for the ith character C in Sₖ, either:
# 1. P(C) = P(Rₖ), or
# 2. for some l in 1 .. n, both:
# — P(Rₖ) is equal to Rₗ, and
# — P(C) is equal to the ith character in Sₗ.
# — P(C) is equal to the ith character (or string, if l > m) in Sₗ.
# For every non-ignored property P that appears in the UpTo clause, checks all characters in the
# sets Sₖ have the SValue and all R characters have the RValue.
#
Expand Down Expand Up @@ -1369,6 +1372,13 @@ Ignoring Unicode_1_Name Confusable_MA:
CorrespondTo [ⁱ] : [i] : [I]
end Ignoring;

Propertywise [ゟ] : [{より}]
CorrespondTo [ヿ] : [{コト}]
UpTo: Block (Hiragana vs Katakana),
Script (Hiragana vs Katakana),
Script_Extensions (Hiragana vs Katakana),
Word_Break (Other vs Katakana)

end Ignoring;

end Ignoring;
Loading