From a489c64e27779cca8610d3ad585d24fdea5e6d0d Mon Sep 17 00:00:00 2001 From: Richard Zowalla Date: Tue, 18 Jun 2024 08:49:05 +0200 Subject: [PATCH] OPENNLP-1567 - OpenNLP Models: Provide a Finder / Loader Implementation --- .../simple/SimpleClassPathModelFinder.java | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/opennlp-tools-models/src/main/java/opennlp/tools/models/simple/SimpleClassPathModelFinder.java b/opennlp-tools-models/src/main/java/opennlp/tools/models/simple/SimpleClassPathModelFinder.java index 837c8e03c..0d00f3858 100644 --- a/opennlp-tools-models/src/main/java/opennlp/tools/models/simple/SimpleClassPathModelFinder.java +++ b/opennlp-tools-models/src/main/java/opennlp/tools/models/simple/SimpleClassPathModelFinder.java @@ -33,6 +33,7 @@ import java.util.Locale; import java.util.jar.JarEntry; import java.util.jar.JarFile; +import java.util.regex.Matcher; import java.util.regex.Pattern; import org.slf4j.Logger; @@ -63,6 +64,8 @@ public class SimpleClassPathModelFinder extends AbstractClassPathModelFinder imp private static final Logger logger = LoggerFactory.getLogger(SimpleClassPathModelFinder.class); private static final String FILE_PREFIX = "file"; + private static final Pattern CLASSPATH_SEPARATOR_PATTERN = Pattern.compile("[;:]"); + // ; for Windows, : for Linux/OSX /** * By default, it scans for "opennlp-models-*.jar". @@ -99,6 +102,7 @@ protected List getMatchingURIs(String wildcardPattern, Object context) { return Collections.emptyList(); } + final boolean isWindows = isWindows(); final List cp = getClassPathElements(); final List cpu = new ArrayList<>(); final Pattern jarPattern = Pattern.compile(asRegex("*" + getJarModelPrefix())); @@ -107,7 +111,7 @@ protected List getMatchingURIs(String wildcardPattern, Object context) { for (URL url : cp) { if (matchesPattern(url, jarPattern)) { try { - for (URI u : getURIsFromJar(url)) { + for (URI u : getURIsFromJar(url, isWindows)) { if (matchesPattern(u.toURL(), filePattern)) { cpu.add(u); } @@ -138,9 +142,13 @@ private boolean matchesPattern(URL url, Pattern pattern) { return pattern.matcher(url.getFile()).matches(); } - private List getURIsFromJar(URL fileUrl) throws IOException { + private List getURIsFromJar(URL fileUrl, boolean isWindows) throws IOException { final List uris = new ArrayList<>(); - final URL jarUrl = new URL(JAR + ":" + escapeWindowsURL(fileUrl) + "!/"); + final URL jarUrl = new URL(JAR + ":" + + (isWindows + ? fileUrl.toString().replace("\\", "/") + : fileUrl.toString()) + + "!/"); final JarURLConnection jarConnection = (JarURLConnection) jarUrl.openConnection(); try (JarFile jarFile = jarConnection.getJarFile()) { final Enumeration entries = jarFile.entries(); @@ -160,11 +168,6 @@ private List getURIsFromJar(URL fileUrl) throws IOException { return uris; } - - private String escapeWindowsURL(URL url) { - return isWindows() ? url.toString().replace("\\", "/") : url.toString(); - } - private boolean isWindows() { return System.getProperty("os.name", "unknown").toLowerCase(Locale.ROOT).contains("win"); } @@ -189,11 +192,11 @@ private List getClassPathElements() { return Arrays.asList(fromUcp); } else { final String cp = System.getProperty("java.class.path", ""); - final String[] elements = cp.split("[;:]"); // ; for Windows, : for Linux/OSX + final Matcher matcher = CLASSPATH_SEPARATOR_PATTERN.matcher(cp); final List jarUrls = new ArrayList<>(); - for (String element : elements) { + while (matcher.find()) { try { - jarUrls.add(new URL(FILE_PREFIX, "", element)); + jarUrls.add(new URL(FILE_PREFIX, "", matcher.group())); } catch (MalformedURLException ignored) { //if we cannot parse a URL from the system property, just ignore it... //we couldn't load it anyway @@ -210,13 +213,13 @@ private List getClassPathElements() { */ private URL[] getURLs(ClassLoader classLoader) { try { - final Class builtinClazzLoader = Class.forName("jdk.internal.loader.BuiltinClassLoader"); + final Class builtinClazzLoader = Class.forName("jdk.internal.loader.BuiltinClassLoader"); final Field ucpField = builtinClazzLoader.getDeclaredField("ucp"); ucpField.setAccessible(true); final Object ucpObject = ucpField.get(classLoader); - final Class clazz = Class.forName("jdk.internal.loader.URLClassPath"); + final Class clazz = Class.forName("jdk.internal.loader.URLClassPath"); if (ucpObject != null) { final Method getURLs = clazz.getMethod("getURLs");