Skip to content

Commit

Permalink
make CLDR radical-stroke order = UAX38
Browse files Browse the repository at this point in the history
  • Loading branch information
markusicu committed Aug 14, 2024
1 parent 5845839 commit e073141
Show file tree
Hide file tree
Showing 6 changed files with 195 additions and 206 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# CollationTest_NON_IGNORABLE.txt
# Date: 2024-06-05, 18:49:37 GMT
# Date: 2024-08-14, 00:51:38 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# CollationTest_NON_IGNORABLE_SHORT.txt
# Date: 2024-06-05, 18:49:39 GMT
# Date: 2024-08-14, 00:51:39 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# CollationTest_SHIFTED.txt
# Date: 2024-06-05, 18:49:40 GMT
# Date: 2024-08-14, 00:51:40 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# CollationTest_SHIFTED_SHORT.txt
# Date: 2024-06-05, 18:49:41 GMT
# Date: 2024-08-14, 00:51:41 GMT
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -600,12 +600,12 @@ private static void showOldData(Collator collator, String name, boolean japanese
int strokes = CldrUtility.ifNull(bestStrokesS.get(item), 0);
buffer.append(pad(String.valueOf(strokes), 3)).append(";\t");

int data = getRSShortData(item.codePointAt(0));
long order = getRSOrder(item.codePointAt(0));
String radical = null;
String remainingStrokes = null;
if (data != 0) {
radical = radicalStroke.getRadicalStringFromShortData(data);
remainingStrokes = RadicalStroke.getResidualStrokesFromShortData(data) + "";
if (order != 0) {
radical = radicalStroke.getRadicalString(order);
remainingStrokes = RadicalStroke.getResidualStrokes(order) + "";
}
buffer.append(pad(radical, 4)).append(";\t");
buffer.append(pad(remainingStrokes, 2)).append(";\t");
Expand Down Expand Up @@ -1017,14 +1017,14 @@ public static void addToStrokeInfo(UnicodeMap<Integer> bestStrokesIn, boolean si
for (final String s : bestStrokesIn) {
final int c = s.codePointAt(0);
final Integer bestStrokeInfo = bestStrokesIn.get(c);
int data = getRSShortData(c);
if (data == 0) {
long order = getRSOrder(c);
if (order == 0) {
continue;
}
int radical = RadicalStroke.getRadicalNumberFromShortData(data);
int radical = RadicalStroke.getRadicalNumber(order);
final int radicalsStrokes =
bestStrokeInfo - RadicalStroke.getResidualStrokesFromShortData(data);
if (!RadicalStroke.isSimplifiedFromShortData(data)) {
bestStrokeInfo - RadicalStroke.getResidualStrokes(order);
if (!RadicalStroke.isSimplified(order)) {
mainStrokesTotal.add(radical, radicalsStrokes);
mainCount.add(radical, 1);
} else {
Expand Down Expand Up @@ -1057,11 +1057,11 @@ public static void addToStrokeInfo(UnicodeMap<Integer> bestStrokesIn, boolean si
for (final String s :
new UnicodeSet(kRSUnicode.keySet()).removeAll(bestStrokesIn.keySet())) {
int c = s.codePointAt(0);
int data = getRSShortData(c);
int radical = RadicalStroke.getRadicalNumberFromShortData(data);
long order = getRSOrder(c);
int radical = RadicalStroke.getRadicalNumber(order);
final int computedStrokes =
RadicalStroke.getResidualStrokesFromShortData(data)
+ (RadicalStroke.isSimplifiedFromShortData(data)
RadicalStroke.getResidualStrokes(order)
+ (RadicalStroke.isSimplified(order)
? alternateStrokes[radical]
: mainStrokes[radical]);
bestStrokesIn.put(s, computedStrokes);
Expand All @@ -1079,48 +1079,33 @@ public static void addToStrokeInfo(UnicodeMap<Integer> bestStrokesIn, boolean si
}
}

private static int getRSShortData(int c) {
int data = radicalStroke.getShortDataForCodePoint(c);
if (data != 0) {
return data;
}
if (c < 0x3000) {
String radical = radicalMap.get(c);
if (radical == null) {
return 0;
}
c = radical.codePointAt(0);
assert radical.length() == Character.charCount(c); // single code point
data = radicalStroke.getShortDataForCodePoint(c);
assert data != 0;
return data;
}
String decomp = nfd.normalize(c);
c = decomp.codePointAt(0);
data = radicalStroke.getShortDataForCodePoint(c);
return data;
}

private static long getRSLongOrder(int c) {
long order = radicalStroke.getLongOrder(c);
private static long getRSOrder(int c) {
long order = radicalStroke.getOrderForCodePoint(c);
if (order != 0) {
return order;
}
if (c < 0x3000) {
String radical = radicalMap.get(c);
if (radical == null) {
// Not an ideograph, sort higher than any of them.
return ((long) Integer.MAX_VALUE << 32) | c;
return 0;
}
c = radical.codePointAt(0);
assert radical.length() == Character.charCount(c); // single code point
order = radicalStroke.getLongOrder(c);
order = radicalStroke.getOrderForCodePoint(c);
assert order != 0;
return order;
}
String decomp = nfd.normalize(c);
c = decomp.codePointAt(0);
order = radicalStroke.getLongOrder(c);
return radicalStroke.getOrderForCodePoint(c);
}

/**
* Same as getRSOrder() but if c does not have radical-stroke data, then this function returns a
* value higher than that for any ideograph.
*/
private static long getRSOrderOrHigh(int c) {
long order = getRSOrder(c);
if (order == 0) {
// Not an ideograph, sort higher than any of them.
order = ((long) Integer.MAX_VALUE << 32) | c;
Expand Down Expand Up @@ -1850,8 +1835,8 @@ public int compare(String s1, String s2) {
assert Character.charCount(c1) == s1.length();
int c2 = s2.codePointAt(0);
assert Character.charCount(c2) == s2.length();
long order1 = getRSLongOrder(c1);
long order2 = getRSLongOrder(c2);
long order1 = getRSOrderOrHigh(c1);
long order2 = getRSOrderOrHigh(c2);
if (order1 != order2) {
return order1 < order2 ? -1 : 1;
}
Expand Down Expand Up @@ -1940,13 +1925,13 @@ private static String getIndexValue(InfoType infoType, String s, Output<String>
break;
case radicalStroke:
final int codepoint = s.codePointAt(0);
int data = getRSShortData(codepoint);
if (data == 0) {
long order = getRSOrder(codepoint);
if (order == 0) {
throw new IllegalArgumentException(
"Missing R-S data for U+" + Utility.hex(codepoint));
}
rest = radicalStroke.getRadicalCharFromShortData(data);
comment.value = radicalStroke.getRadicalStringFromShortData(data);
rest = radicalStroke.getRadicalChar(order);
comment.value = radicalStroke.getRadicalString(order);
break;
case stroke:
final Integer strokeCount = getStrokeValue(s, bestStrokesT);
Expand Down
Loading

0 comments on commit e073141

Please sign in to comment.