-
Notifications
You must be signed in to change notification settings - Fork 2.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Spark 3.5: Fix NotSerializableException when migrating Spark tables #11157
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,12 +23,18 @@ | |
import java.io.IOException; | ||
import java.io.Serializable; | ||
import java.net.URI; | ||
import java.util.Collection; | ||
import java.util.Collections; | ||
import java.util.Iterator; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.UUID; | ||
import java.util.concurrent.Callable; | ||
import java.util.concurrent.ExecutionException; | ||
import java.util.concurrent.ExecutorService; | ||
import java.util.concurrent.Future; | ||
import java.util.concurrent.TimeUnit; | ||
import java.util.concurrent.TimeoutException; | ||
import java.util.stream.Collectors; | ||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.fs.Path; | ||
|
@@ -92,6 +98,8 @@ | |
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan; | ||
import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation; | ||
import org.apache.spark.sql.util.CaseInsensitiveStringMap; | ||
import org.jetbrains.annotations.NotNull; | ||
import org.jetbrains.annotations.Nullable; | ||
manuzhang marked this conversation as resolved.
Show resolved
Hide resolved
|
||
import scala.Function2; | ||
import scala.Option; | ||
import scala.Some; | ||
|
@@ -487,7 +495,7 @@ public static void importSparkTable( | |
stagingDir, | ||
partitionFilter, | ||
checkDuplicateFiles, | ||
TableMigrationUtil.migrationService(parallelism)); | ||
executorService(parallelism)); | ||
} | ||
|
||
/** | ||
|
@@ -711,7 +719,7 @@ public static void importSparkPartitions( | |
spec, | ||
stagingDir, | ||
checkDuplicateFiles, | ||
TableMigrationUtil.migrationService(parallelism)); | ||
executorService(parallelism)); | ||
manuzhang marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
/** | ||
|
@@ -971,4 +979,109 @@ public int hashCode() { | |
return Objects.hashCode(values, uri, format); | ||
} | ||
} | ||
|
||
@Nullable | ||
public static ExecutorService executorService(int parallelism) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this need to be public? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also this specifically makes ExecutorServices with the |
||
return parallelism == 1 ? null : new ExecutorServiceFactory(parallelism); | ||
} | ||
|
||
private static class ExecutorServiceFactory implements ExecutorService, Serializable { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably needs a rename since this doesn't actually make Executor Services, probably just LazyExecutorService? |
||
|
||
private final int parallelism; | ||
private volatile ExecutorService service; | ||
|
||
ExecutorServiceFactory(int parallelism) { | ||
this.parallelism = parallelism; | ||
} | ||
|
||
@Override | ||
public void shutdown() { | ||
getService().shutdown(); | ||
} | ||
|
||
@NotNull | ||
@Override | ||
public List<Runnable> shutdownNow() { | ||
return getService().shutdownNow(); | ||
} | ||
|
||
@Override | ||
public boolean isShutdown() { | ||
return getService().isShutdown(); | ||
} | ||
|
||
@Override | ||
public boolean isTerminated() { | ||
return getService().isTerminated(); | ||
} | ||
|
||
@Override | ||
public boolean awaitTermination(long timeout, @NotNull TimeUnit unit) | ||
throws InterruptedException { | ||
return getService().awaitTermination(timeout, unit); | ||
} | ||
|
||
@NotNull | ||
@Override | ||
public <T> Future<T> submit(@NotNull Callable<T> task) { | ||
return getService().submit(task); | ||
} | ||
|
||
@NotNull | ||
@Override | ||
public <T> Future<T> submit(@NotNull Runnable task, T result) { | ||
return getService().submit(task, result); | ||
} | ||
|
||
@NotNull | ||
@Override | ||
public Future<?> submit(@NotNull Runnable task) { | ||
return getService().submit(task); | ||
} | ||
|
||
@NotNull | ||
@Override | ||
public <T> List<Future<T>> invokeAll(@NotNull Collection<? extends Callable<T>> tasks) | ||
throws InterruptedException { | ||
return getService().invokeAll(tasks); | ||
} | ||
|
||
@NotNull | ||
@Override | ||
public <T> List<Future<T>> invokeAll( | ||
@NotNull Collection<? extends Callable<T>> tasks, long timeout, @NotNull TimeUnit unit) | ||
throws InterruptedException { | ||
return getService().invokeAll(tasks, timeout, unit); | ||
} | ||
|
||
@NotNull | ||
@Override | ||
public <T> T invokeAny(@NotNull Collection<? extends Callable<T>> tasks) | ||
throws InterruptedException, ExecutionException { | ||
return getService().invokeAny(tasks); | ||
} | ||
|
||
@Override | ||
public <T> T invokeAny( | ||
@NotNull Collection<? extends Callable<T>> tasks, long timeout, @NotNull TimeUnit unit) | ||
throws InterruptedException, ExecutionException, TimeoutException { | ||
return getService().invokeAny(tasks, timeout, unit); | ||
} | ||
|
||
@Override | ||
public void execute(@NotNull Runnable command) { | ||
getService().execute(command); | ||
} | ||
|
||
private ExecutorService getService() { | ||
if (service == null) { | ||
synchronized (this) { | ||
if (service == null) { | ||
service = TableMigrationUtil.migrationService(parallelism); | ||
} | ||
} | ||
} | ||
return service; | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Quick check do these tests fail without this patch? I just want to make sure because I'm pretty sure we are running this code in local mode and I want to make sure the serializers break without this patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes