Skip to content

Commit

Permalink
CLDR-15388 check annotations for empty values or keyword entry withou…
Browse files Browse the repository at this point in the history
…t name (tts) entry
  • Loading branch information
pedberg-icu committed Feb 19, 2025
1 parent 6e94326 commit 54e3b84
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 6 deletions.
Original file line number Diff line number Diff line change
@@ -1,27 +1,73 @@
package org.unicode.cldr.test;

import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype;
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.LocaleIDParser;
import org.unicode.cldr.util.PatternCache;
import org.unicode.cldr.util.XPathParts;

public class CheckAnnotations extends CheckCLDR {
private static final Pattern ANNOTATION_PATH = Pattern.compile("//ldml/annotations/.*");

// Skip tts test for these locale/cp combinations unti
// https://unicode-org.atlassian.net/browse/CLDR-18329 is fixed
private static final Map<String, Set<String>> entriesLackingTts = new HashMap<>();

static {
// locID cp= values to skip
entriesLackingTts.put("ak", new HashSet<>(Arrays.asList(new String[] {"🪀"})));
entriesLackingTts.put("br", new HashSet<>(Arrays.asList(new String[] {"'"})));
entriesLackingTts.put("ccp", new HashSet<>(Arrays.asList(new String[] {"🥪"})));
entriesLackingTts.put(
"ha",
new HashSet<>(Arrays.asList(new String[] {"👨‍🦯", "👨‍🦼", "👩‍🦯", "👩‍🦼"})));
entriesLackingTts.put(
"kab",
new HashSet<>(
Arrays.asList(
new String[] {"⚙", "🀄", "🌤", "🎀", "💊", "💲", "🖲", "🚀"})));
entriesLackingTts.put("om", null); // all entries lack tts version
entriesLackingTts.put("qu", new HashSet<>(Arrays.asList(new String[] {"✒"})));
entriesLackingTts.put(
"sat",
new HashSet<>(
Arrays.asList(
new String[] {
"🍐", "🍑", "🍒", "🍓", "🐠", "🐡", "🐨", "🐳", "🐹", "👞",
"💥", "🤽‍♂", "🥐", "🥸", "🦈"
})));
}
;

@Override
public CheckCLDR handleCheck(
String path, String fullPath, String value, Options options, List<CheckStatus> result) {
if (value == null) {
return this;
} else if (!ANNOTATION_PATH.matcher(path).matches()
|| !getCldrFileToCheck().isNotRoot(path)) {
if (!ANNOTATION_PATH.matcher(path).matches()) return this;
if (!accept(result)) return this;

// check whether annotation is empty
if (value == null || value.isEmpty()) {
result.add(
new CheckStatus()
.setCause(this)
.setMainType(CheckStatus.errorType)
.setSubtype(Subtype.nullOrEmptyValue)
.setMessage("The annotation may not be empty"));
return this;
}
if (!accept(result)) return this;
CLDRFile file = getCldrFileToCheck();
final String ecode = hasAnnotationECode(value);

if (ecode != null) {
// check whether annotation value is E-code
if (ecode != null && file.isNotRoot(path)) {
result.add(
new CheckStatus()
.setCause(this)
Expand All @@ -31,6 +77,42 @@ public CheckCLDR handleCheck(
"The annotation must be a translation and not contain the E… code from root, or anything like it. ({0})",
ecode));
}

// check whether name (tts) entry corresponding to keyword entry is missing in top-level
// locales
String localeID = file.getLocaleID();
String parent = LocaleIDParser.getParent(localeID);
boolean isTopLevel = (parent == null) || parent.equals("root");
if (!path.contains("[@type=\"tts\"]") && isTopLevel) {
// this is a keyword path in top-level locale, check that corresponding tts path is
// present
XPathParts parts = XPathParts.getFrozenInstance(path).cloneAsThawed();
String ttsPath = parts.addAttribute("type", "tts").toString();
String ttsValue = file.getWinningValue(ttsPath);
boolean ttsMissing =
ttsValue == null
|| ttsValue.isEmpty()
|| (file.isNotRoot(path)
&& ecode == null
&& hasAnnotationECode(ttsValue) != null);
if (ttsMissing && entriesLackingTts.keySet().contains(localeID)) {
String cpValue = parts.findAttributeValue("annotation", "cp");
Set<String> cpEntriesLackingTts = entriesLackingTts.get(localeID);
if (cpEntriesLackingTts == null || cpEntriesLackingTts.contains(cpValue)) {
ttsMissing = !ttsMissing; // skip the error report, already have CLDR-18329
}
}
if (ttsMissing) {
result.add(
new CheckStatus()
.setCause(this)
.setMainType(CheckStatus.errorType)
.setSubtype(Subtype.ttsAnnotationMissing)
.setMessage(
"Have keywords but missing the corresponding name (tts) entry"));
}
}

return this;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,8 @@ public enum Subtype {
shortDateFieldInconsistentLength,
illegalParameterValue,
illegalAnnotationCode,
nullOrEmptyValue,
ttsAnnotationMissing,
illegalCharacter;

@Override
Expand Down

0 comments on commit 54e3b84

Please sign in to comment.