Skip to content

Commit

Permalink
[GLUTEN-8483][CORE] A stable and universal way to find component files (
Browse files Browse the repository at this point in the history
#8486)

Closes #8483
  • Loading branch information
zhztheplayer authored Jan 10, 2025
1 parent ab5a691 commit bdc0fbf
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 45 deletions.
100 changes: 62 additions & 38 deletions gluten-core/src/main/java/org/apache/gluten/utils/ResourceUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,20 @@
*/
package org.apache.gluten.utils;

import org.apache.gluten.exception.GlutenException;

import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
Expand All @@ -37,58 +41,73 @@
* and then modified for Gluten's use.
*/
public class ResourceUtil {

private static final Logger LOG = LoggerFactory.getLogger(ResourceUtil.class);

/**
* Get a collection of resource paths by the input RegEx pattern.
* Get a collection of resource paths by the input RegEx pattern in a certain container folder.
*
* @param pattern The pattern to match.
* @param container The container folder. E.g., `META-INF`. Should not be left empty, because
* Classloader requires for at a meaningful file name to search inside the loaded jar files.
* @param pattern The pattern to match on the file names.
* @return The relative resource paths in the order they are found.
*/
public static List<String> getResources(final Pattern pattern) {
public static List<String> getResources(final String container, final Pattern pattern) {
Preconditions.checkArgument(
!container.isEmpty(),
"Resource search should only be used under a certain container folder");
Preconditions.checkArgument(
!container.startsWith("/") && !container.endsWith("/"),
"Resource container should not start or end with\"/\"");
final List<String> buffer = new ArrayList<>();
String classPath = System.getProperty("java.class.path");
processClassPathElements(classPath, pattern, buffer);
return Collections.unmodifiableList(buffer);
}

private static void processClassPathElements(
String classPath, Pattern pattern, List<String> buffer) {
if (classPath == null || classPath.isEmpty()) {
return;
final Enumeration<URL> containerUrls;
try {
containerUrls = Thread.currentThread().getContextClassLoader().getResources(container);
} catch (IOException e) {
throw new GlutenException(e);
}
String[] classPathElements = classPath.split(File.pathSeparator);
Arrays.stream(classPathElements).forEach(element -> getResources(element, pattern, buffer));
// the Gluten project may wrapped by the other service to use the Native Engine.
// As a result, the java.class.path points to xxx/other.jar instead of xxx/gluten.jar.
// This will result in the failure to properly load the required Components.
if (buffer.isEmpty()) {
classPath = ResourceUtil.class.getProtectionDomain().getCodeSource().getLocation().getPath();
classPathElements = classPath.split(File.pathSeparator);
Arrays.stream(classPathElements).forEach(element -> getResources(element, pattern, buffer));
while (containerUrls.hasMoreElements()) {
final URL containerUrl = containerUrls.nextElement();
getResources(containerUrl, pattern, buffer);
}
return Collections.unmodifiableList(buffer);
}

private static void getResources(
final String element, final Pattern pattern, final List<String> buffer) {
final File file = new File(element);
if (!file.exists()) {
LOG.info("Skip non-existing classpath: {}", element);
return;
}
if (file.isDirectory()) {
getResourcesFromDirectory(file, file, pattern, buffer);
} else {
getResourcesFromJarFile(file, pattern, buffer);
final URL containerUrl, final Pattern pattern, final List<String> buffer) {
final String protocol = containerUrl.getProtocol();
switch (protocol) {
case "file":
final File fileContainer = new File(containerUrl.getPath());
Preconditions.checkState(
fileContainer.exists() && fileContainer.isDirectory(),
"Specified file container " + containerUrl + " is not a directory or not a file");
getResourcesFromDirectory(fileContainer, fileContainer, pattern, buffer);
break;
case "jar":
final String jarContainerPath = containerUrl.getPath();
final Pattern jarContainerPattern = Pattern.compile("file:([^!]+)!/(.+)");
final Matcher m = jarContainerPattern.matcher(jarContainerPath);
if (!m.matches()) {
throw new GlutenException("Illegal Jar container URL: " + containerUrl);
}
final String jarPath = m.group(1);
final File jarFile = new File(jarPath);
Preconditions.checkState(
jarFile.exists() && jarFile.isFile(),
"Specified Jar container " + containerUrl + " is not a Jar file");
final String dir = m.group(2);
getResourcesFromJarFile(jarFile, dir, pattern, buffer);
break;
default:
throw new GlutenException("Unrecognizable resource protocol: " + protocol);
}
}

private static void getResourcesFromJarFile(
final File file, final Pattern pattern, final List<String> buffer) {
ZipFile zf;
final File jarFile, final String dir, final Pattern pattern, final List<String> buffer) {
final ZipFile zf;
try {
zf = new ZipFile(file);
zf = new ZipFile(jarFile);
} catch (final ZipException e) {
throw new RuntimeException(e);
} catch (final IOException e) {
Expand All @@ -98,9 +117,14 @@ private static void getResourcesFromJarFile(
while (e.hasMoreElements()) {
final ZipEntry ze = (ZipEntry) e.nextElement();
final String fileName = ze.getName();
final boolean accept = pattern.matcher(fileName).matches();
if (!fileName.startsWith(dir)) {
continue;
}
final String relativeFileName =
new File(dir).toURI().relativize(new File(fileName).toURI()).getPath();
final boolean accept = pattern.matcher(relativeFileName).matches();
if (accept) {
buffer.add(fileName);
buffer.add(relativeFileName);
}
}
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.gluten.component

import org.apache.gluten.exception.GlutenException
Expand All @@ -26,11 +25,8 @@ import org.apache.spark.util.SparkReflectionUtil
import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.util.matching.Regex




// format: off

/**
* Gluten's global discovery to find all [[Component]] definitions in the classpath.
*
Expand All @@ -54,12 +50,12 @@ import scala.util.matching.Regex
// format: on
private object Discovery extends Logging {
private val container: String = "META-INF/gluten-components"
private val componentFilePattern: Regex = s"^$container/(.+)$$".r
private val componentFilePattern: Regex = s"^(.+)$$".r

def discoverAll(): Seq[Component] = {
logInfo("Start discovering components in the current classpath... ")
val prev = System.currentTimeMillis()
val allFiles = ResourceUtil.getResources(componentFilePattern.pattern).asScala
val allFiles = ResourceUtil.getResources(container, componentFilePattern.pattern).asScala
val duration = System.currentTimeMillis() - prev
logInfo(s"Discovered component files: ${allFiles.mkString(", ")}. Duration: $duration ms.")
val deDup = mutable.Set[String]()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.gluten.util;

import org.apache.gluten.utils.ResourceUtil;

import org.junit.Assert;
import org.junit.Test;

import java.util.List;
import java.util.regex.Pattern;

public class ResourceUtilTest {
@Test
public void testFile() {
// Use the class file of this test to verify the sanity of ResourceUtil.
List<String> classes =
ResourceUtil.getResources(
"org", Pattern.compile("apache/gluten/util/ResourceUtilTest\\.class"));
Assert.assertEquals(1, classes.size());
Assert.assertEquals("apache/gluten/util/ResourceUtilTest.class", classes.get(0));
}

@Test
public void testJar() {
// Use the class file of Spark code to verify the sanity of ResourceUtil.
List<String> classes =
ResourceUtil.getResources("org", Pattern.compile("apache/spark/SparkContext\\.class"));
Assert.assertEquals(1, classes.size());
Assert.assertEquals("apache/spark/SparkContext.class", classes.get(0));
}
}

0 comments on commit bdc0fbf

Please sign in to comment.