From be55d63e6dc6ac0e631ae59c5eb5b254db63607f Mon Sep 17 00:00:00 2001 From: minmingzhu Date: Mon, 18 Sep 2023 07:20:38 +0000 Subject: [PATCH 1/7] update spark to 3.3.3 Signed-off-by: minmingzhu --- README.md | 1 + examples/scala/pom-parent.xml | 2 +- mllib-dal/pom.xml | 2 +- .../com/intel/oap/mllib/classification/NaiveBayesShim.scala | 5 +++-- .../mllib/classification/RandomForestClassifierShim.scala | 6 +++--- .../scala/com/intel/oap/mllib/clustering/KMeansShim.scala | 5 +++-- .../main/scala/com/intel/oap/mllib/feature/PCAShim.scala | 5 +++-- .../scala/com/intel/oap/mllib/recommendation/ALSShim.scala | 4 ++-- .../intel/oap/mllib/regression/LinearRegressionShim.scala | 4 ++-- .../oap/mllib/regression/RandomForestRegressorShim.scala | 5 +++-- .../scala/com/intel/oap/mllib/stat/CorrelationShim.scala | 5 +++-- .../scala/com/intel/oap/mllib/stat/SummarizerShim.scala | 5 +++-- .../classification/{spark322 => spark333}/NaiveBayes.scala | 2 +- .../{spark322 => spark333}/RandomForestClassifier.scala | 2 +- .../spark/ml/clustering/{spark322 => spark333}/KMeans.scala | 2 +- .../spark/ml/feature/{spark322 => spark333}/PCA.scala | 2 +- .../ml/recommendation/{spark322 => spark333}/ALS.scala | 2 +- .../{spark322 => spark333}/LinearRegression.scala | 2 +- .../{spark322 => spark333}/RandomForestRegressor.scala | 2 +- .../spark/ml/stat/{spark322 => spark333}/Correlation.scala | 2 +- .../mllib/stat/{spark322 => spark333}/Statistics.scala | 2 +- 21 files changed, 37 insertions(+), 30 deletions(-) rename mllib-dal/src/main/scala/org/apache/spark/ml/classification/{spark322 => spark333}/NaiveBayes.scala (99%) rename mllib-dal/src/main/scala/org/apache/spark/ml/classification/{spark322 => spark333}/RandomForestClassifier.scala (99%) rename mllib-dal/src/main/scala/org/apache/spark/ml/clustering/{spark322 => spark333}/KMeans.scala (99%) rename mllib-dal/src/main/scala/org/apache/spark/ml/feature/{spark322 => spark333}/PCA.scala (98%) rename mllib-dal/src/main/scala/org/apache/spark/ml/recommendation/{spark322 => spark333}/ALS.scala (99%) rename mllib-dal/src/main/scala/org/apache/spark/ml/regression/{spark322 => spark333}/LinearRegression.scala (99%) rename mllib-dal/src/main/scala/org/apache/spark/ml/regression/{spark322 => spark333}/RandomForestRegressor.scala (99%) rename mllib-dal/src/main/scala/org/apache/spark/ml/stat/{spark322 => spark333}/Correlation.scala (99%) rename mllib-dal/src/main/scala/org/apache/spark/mllib/stat/{spark322 => spark333}/Statistics.scala (98%) diff --git a/README.md b/README.md index 5e2e0724d..55afbf855 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,7 @@ The following runtime packages with all their dependencies should be installed i * Apache Spark 3.2.0 * Apache Spark 3.2.1 * Apache Spark 3.2.2 +* Apache Spark 3.3.3 ### Supported IntelĀ® oneAPI Toolkits diff --git a/examples/scala/pom-parent.xml b/examples/scala/pom-parent.xml index d1fec21cc..7b20fa05b 100644 --- a/examples/scala/pom-parent.xml +++ b/examples/scala/pom-parent.xml @@ -29,7 +29,7 @@ 1.6.0 2.12.15 2.12 - 3.2.2 + 3.3.3 diff --git a/mllib-dal/pom.xml b/mllib-dal/pom.xml index caedfa050..f88829542 100644 --- a/mllib-dal/pom.xml +++ b/mllib-dal/pom.xml @@ -31,7 +31,7 @@ 2.12.15 2.12 3.2.9 - 3.2.2 + 3.3.3 2023.1.0.31217 src/assembly/assembly.xml diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/NaiveBayesShim.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/NaiveBayesShim.scala index dff679027..522a7f6af 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/NaiveBayesShim.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/NaiveBayesShim.scala @@ -20,7 +20,7 @@ import com.intel.oap.mllib.Utils import org.apache.spark.internal.Logging import org.apache.spark.ml.classification.NaiveBayesModel -import org.apache.spark.ml.classification.spark322.{NaiveBayes => NaiveBayesSpark322} +import org.apache.spark.ml.classification.spark333.{NaiveBayes => NaiveBayesSpark333} import org.apache.spark.ml.param.ParamMap import org.apache.spark.sql.Dataset import org.apache.spark.{SPARK_VERSION, SparkException} @@ -35,7 +35,8 @@ object NaiveBayesShim extends Logging { logInfo(s"Loading NaiveBayes for Spark $SPARK_VERSION") val shim = Utils.getSparkVersion() match { - case "3.1.1" | "3.1.2" | "3.1.3" | "3.2.0" | "3.2.1" | "3.2.2" => new NaiveBayesSpark322(uid) + case "3.1.1" | "3.1.2" | "3.1.3" | "3.2.0" | "3.2.1" | "3.2.2" | "3.3.3" => + new NaiveBayesSpark333(uid) case _ => throw new SparkException(s"Unsupported Spark version $SPARK_VERSION") } shim diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierShim.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierShim.scala index 94831c8d7..66c39f9d3 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierShim.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierShim.scala @@ -19,7 +19,7 @@ import com.intel.oap.mllib.Utils import org.apache.spark.internal.Logging import org.apache.spark.ml.classification.RandomForestClassificationModel import org.apache.spark.{SPARK_VERSION, SparkException} -import org.apache.spark.ml.classification.spark322.{RandomForestClassifier => RandomForestClassifier322} +import org.apache.spark.ml.classification.spark333.{RandomForestClassifier => RandomForestClassifier333} import org.apache.spark.ml.param.ParamMap import org.apache.spark.sql.Dataset @@ -33,8 +33,8 @@ object RandomForestClassifierShim extends Logging { logInfo(s"Loading RandomForestClassifier for Spark $SPARK_VERSION") val shim = Utils.getSparkVersion() match { - case "3.1.1" | "3.1.2" | "3.1.3" | "3.2.0" | "3.2.1" | "3.2.2" => - new RandomForestClassifier322(uid) + case "3.1.1" | "3.1.2" | "3.1.3" | "3.2.0" | "3.2.1" | "3.2.2" | "3.3.3" => + new RandomForestClassifier333(uid) case _ => throw new SparkException(s"Unsupported Spark version $SPARK_VERSION") } shim diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansShim.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansShim.scala index 66e16c754..8c969142b 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansShim.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansShim.scala @@ -20,7 +20,7 @@ import com.intel.oap.mllib.Utils import org.apache.spark.internal.Logging import org.apache.spark.ml.clustering.{KMeans, KMeansModel} -import org.apache.spark.ml.clustering.spark322.{KMeans => KMeansSpark322} +import org.apache.spark.ml.clustering.spark333.{KMeans => KMeansSpark333} import org.apache.spark.ml.param.ParamMap import org.apache.spark.sql.Dataset import org.apache.spark.{SPARK_VERSION, SparkException} @@ -34,7 +34,8 @@ object KMeansShim extends Logging { def create(uid: String): KMeansShim = { logInfo(s"Loading KMeans for Spark $SPARK_VERSION") val kmeans = Utils.getSparkVersion() match { - case "3.1.1" | "3.1.2" | "3.1.3" | "3.2.0" | "3.2.1" | "3.2.2" => new KMeansSpark322(uid) + case "3.1.1" | "3.1.2" | "3.1.3" | "3.2.0" | "3.2.1" | "3.2.2" | "3.3.3" => + new KMeansSpark333(uid) case _ => throw new SparkException(s"Unsupported Spark version $SPARK_VERSION") } kmeans diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCAShim.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCAShim.scala index 0f2df5e10..4b656bf6e 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCAShim.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCAShim.scala @@ -20,7 +20,7 @@ import com.intel.oap.mllib.Utils import org.apache.spark.internal.Logging import org.apache.spark.ml.feature.PCAModel -import org.apache.spark.ml.feature.spark322.{PCA => PCASpark322} +import org.apache.spark.ml.feature.spark333.{PCA => PCASpark333} import org.apache.spark.ml.param.ParamMap import org.apache.spark.sql.Dataset import org.apache.spark.{SPARK_VERSION, SparkException} @@ -34,7 +34,8 @@ object PCAShim extends Logging { def create(uid: String): PCAShim = { logInfo(s"Loading PCA for Spark $SPARK_VERSION") val pca = Utils.getSparkVersion() match { - case "3.1.1" | "3.1.2" | "3.1.3" | "3.2.0" | "3.2.1" | "3.2.2" => new PCASpark322(uid) + case "3.1.1" | "3.1.2" | "3.1.3" | "3.2.0" | "3.2.1" | "3.2.2" | "3.3.3" => + new PCASpark333(uid) case _ => throw new SparkException(s"Unsupported Spark version $SPARK_VERSION") } pca diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/recommendation/ALSShim.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/recommendation/ALSShim.scala index f754df8b9..e2c50f41e 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/recommendation/ALSShim.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/recommendation/ALSShim.scala @@ -21,7 +21,7 @@ import com.intel.oap.mllib.Utils import org.apache.spark.internal.Logging import org.apache.spark.ml.recommendation.ALS.Rating import org.apache.spark.ml.recommendation.spark313.{ALS => ALSSpark313} -import org.apache.spark.ml.recommendation.spark322.{ALS => ALSSpark322} +import org.apache.spark.ml.recommendation.spark333.{ALS => ALSSpark333} import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import org.apache.spark.{SPARK_VERSION, SparkException} @@ -50,7 +50,7 @@ object ALSShim extends Logging { logInfo(s"Loading ALS for Spark $SPARK_VERSION") val als = Utils.getSparkVersion() match { case "3.1.1" | "3.1.2" | "3.1.3" => new ALSSpark313() - case "3.2.0" | "3.2.1" | "3.2.2" => new ALSSpark322() + case "3.2.0" | "3.2.1" | "3.2.2" | "3.3.3" => new ALSSpark333() case _ => throw new SparkException(s"Unsupported Spark version $SPARK_VERSION") } als diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionShim.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionShim.scala index 8782d7095..fc9aea021 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionShim.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionShim.scala @@ -22,7 +22,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.regression.LinearRegressionModel import org.apache.spark.ml.regression.spark313.{LinearRegression => LinearRegressionSpark313} -import org.apache.spark.ml.regression.spark322.{LinearRegression => LinearRegressionSpark322} +import org.apache.spark.ml.regression.spark333.{LinearRegression => LinearRegressionSpark333} import org.apache.spark.sql.Dataset import org.apache.spark.{SPARK_VERSION, SparkException} @@ -36,7 +36,7 @@ object LinearRegressionShim extends Logging { logInfo(s"Loading ALS for Spark $SPARK_VERSION") val linearRegression = Utils.getSparkVersion() match { case "3.1.1" | "3.1.2" | "3.1.3" => new LinearRegressionSpark313(uid) - case "3.2.0" | "3.2.1" | "3.2.2" => new LinearRegressionSpark322(uid) + case "3.2.0" | "3.2.1" | "3.2.2" | "3.3.3" => new LinearRegressionSpark333(uid) case _ => throw new SparkException(s"Unsupported Spark version $SPARK_VERSION") } linearRegression diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorShim.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorShim.scala index 6cffe1e11..a056fd08e 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorShim.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorShim.scala @@ -20,7 +20,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.{SPARK_VERSION, SparkException} import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.regression.RandomForestRegressionModel -import org.apache.spark.ml.regression.spark322.{RandomForestRegressor => RandomForestRegressor322} +import org.apache.spark.ml.regression.spark333.{RandomForestRegressor => RandomForestRegressor333} import org.apache.spark.sql.Dataset trait RandomForestRegressorShim extends Logging { @@ -33,7 +33,8 @@ object RandomForestRegressorShim extends Logging { logInfo(s"Loading RandomForestClassifier for Spark $SPARK_VERSION") val shim = Utils.getSparkVersion() match { - case "3.1.1" | "3.1.2" | "3.1.3" | "3.2.0" | "3.2.1" | "3.2.2" => new RandomForestRegressor322(uid) + case "3.1.1" | "3.1.2" | "3.1.3" | "3.2.0" | "3.2.1" | "3.2.2" | "3.3.3" => + new RandomForestRegressor333(uid) case _ => throw new SparkException(s"Unsupported Spark version $SPARK_VERSION") } shim diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationShim.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationShim.scala index 90b9f6d29..036955223 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationShim.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationShim.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.{DataFrame, Dataset} import org.apache.spark.storage.StorageLevel import scala.reflect.ClassTag -import org.apache.spark.ml.stat.spark322.{Correlation => CorrelationSpark322} +import org.apache.spark.ml.stat.spark333.{Correlation => CorrelationSpark333} trait CorrelationShim extends Serializable with Logging { def corr(dataset: Dataset[_], column: String, method: String): DataFrame @@ -35,7 +35,8 @@ object CorrelationShim extends Logging { def create(): CorrelationShim = { logInfo(s"Loading Correlation for Spark $SPARK_VERSION") val als = Utils.getSparkVersion() match { - case "3.1.1" | "3.1.2" | "3.1.3" | "3.2.0" | "3.2.1" | "3.2.2" => new CorrelationSpark322() + case "3.1.1" | "3.1.2" | "3.1.3" | "3.2.0" | "3.2.1" | "3.2.2" | "3.3.3" => + new CorrelationSpark333() case _ => throw new SparkException(s"Unsupported Spark version $SPARK_VERSION") } als diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerShim.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerShim.scala index 5f3ff92df..38efa04a0 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerShim.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerShim.scala @@ -24,7 +24,7 @@ import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.stat.MultivariateStatisticalSummary import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, Dataset} -import org.apache.spark.mllib.stat.spark322.{Statistics => SummarizerSpark322} +import org.apache.spark.mllib.stat.spark333.{Statistics => SummarizerSpark333} trait SummarizerShim extends Serializable with Logging { def colStats(X: RDD[Vector]): MultivariateStatisticalSummary @@ -35,7 +35,8 @@ object SummarizerShim extends Logging { def create(): SummarizerShim = { logInfo(s"Loading Summarizer for Spark $SPARK_VERSION") val summarizer = Utils.getSparkVersion() match { - case "3.1.1" | "3.1.2" | "3.1.3" | "3.2.0" | "3.2.1" | "3.2.2" => new SummarizerSpark322() + case "3.1.1" | "3.1.2" | "3.1.3" | "3.2.0" | "3.2.1" | "3.2.2" | "3.3.3" => + new SummarizerSpark333() case _ => throw new SparkException(s"Unsupported Spark version $SPARK_VERSION") } summarizer diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/classification/spark322/NaiveBayes.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/classification/spark333/NaiveBayes.scala similarity index 99% rename from mllib-dal/src/main/scala/org/apache/spark/ml/classification/spark322/NaiveBayes.scala rename to mllib-dal/src/main/scala/org/apache/spark/ml/classification/spark333/NaiveBayes.scala index d59654305..12936022e 100644 --- a/mllib-dal/src/main/scala/org/apache/spark/ml/classification/spark322/NaiveBayes.scala +++ b/mllib-dal/src/main/scala/org/apache/spark/ml/classification/spark333/NaiveBayes.scala @@ -17,7 +17,7 @@ */ // scalastyle:on -package org.apache.spark.ml.classification.spark322 +package org.apache.spark.ml.classification.spark333 import com.intel.oap.mllib.Utils import com.intel.oap.mllib.classification.{NaiveBayesDALImpl, NaiveBayesShim} diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/classification/spark322/RandomForestClassifier.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/classification/spark333/RandomForestClassifier.scala similarity index 99% rename from mllib-dal/src/main/scala/org/apache/spark/ml/classification/spark322/RandomForestClassifier.scala rename to mllib-dal/src/main/scala/org/apache/spark/ml/classification/spark333/RandomForestClassifier.scala index 6ce3e8261..9777b1413 100644 --- a/mllib-dal/src/main/scala/org/apache/spark/ml/classification/spark322/RandomForestClassifier.scala +++ b/mllib-dal/src/main/scala/org/apache/spark/ml/classification/spark333/RandomForestClassifier.scala @@ -17,7 +17,7 @@ */ // scalastyle:on -package org.apache.spark.ml.classification.spark322 +package org.apache.spark.ml.classification.spark333 import com.intel.oap.mllib.Utils import com.intel.oap.mllib.classification.{LearningNode => LearningNodeDAL, RandomForestClassifierDALImpl, RandomForestClassifierShim} diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/clustering/spark322/KMeans.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/clustering/spark333/KMeans.scala similarity index 99% rename from mllib-dal/src/main/scala/org/apache/spark/ml/clustering/spark322/KMeans.scala rename to mllib-dal/src/main/scala/org/apache/spark/ml/clustering/spark333/KMeans.scala index 8d47377ac..07b56a837 100644 --- a/mllib-dal/src/main/scala/org/apache/spark/ml/clustering/spark322/KMeans.scala +++ b/mllib-dal/src/main/scala/org/apache/spark/ml/clustering/spark333/KMeans.scala @@ -17,7 +17,7 @@ */ // scalastyle:on -package org.apache.spark.ml.clustering.spark322 +package org.apache.spark.ml.clustering.spark333 import com.intel.oap.mllib.Utils import com.intel.oap.mllib.clustering.{KMeansDALImpl, KMeansShim} diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/feature/spark322/PCA.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/feature/spark333/PCA.scala similarity index 98% rename from mllib-dal/src/main/scala/org/apache/spark/ml/feature/spark322/PCA.scala rename to mllib-dal/src/main/scala/org/apache/spark/ml/feature/spark333/PCA.scala index cc99b1779..e0b32fc22 100644 --- a/mllib-dal/src/main/scala/org/apache/spark/ml/feature/spark322/PCA.scala +++ b/mllib-dal/src/main/scala/org/apache/spark/ml/feature/spark333/PCA.scala @@ -17,7 +17,7 @@ */ // scalastyle:on -package org.apache.spark.ml.feature.spark322 +package org.apache.spark.ml.feature.spark333 import com.intel.oap.mllib.Utils import com.intel.oap.mllib.feature.{PCADALImpl, PCAShim} diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/recommendation/spark322/ALS.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/recommendation/spark333/ALS.scala similarity index 99% rename from mllib-dal/src/main/scala/org/apache/spark/ml/recommendation/spark322/ALS.scala rename to mllib-dal/src/main/scala/org/apache/spark/ml/recommendation/spark333/ALS.scala index 81e2d8300..2fe5cd29f 100644 --- a/mllib-dal/src/main/scala/org/apache/spark/ml/recommendation/spark322/ALS.scala +++ b/mllib-dal/src/main/scala/org/apache/spark/ml/recommendation/spark333/ALS.scala @@ -17,7 +17,7 @@ */ // scalastyle:on -package org.apache.spark.ml.recommendation.spark322 +package org.apache.spark.ml.recommendation.spark333 import com.github.fommil.netlib.BLAS.{getInstance => blas} import com.intel.oap.mllib.{Utils => DALUtils} diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/regression/spark322/LinearRegression.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/regression/spark333/LinearRegression.scala similarity index 99% rename from mllib-dal/src/main/scala/org/apache/spark/ml/regression/spark322/LinearRegression.scala rename to mllib-dal/src/main/scala/org/apache/spark/ml/regression/spark333/LinearRegression.scala index ac861b2a6..a921dfbfc 100644 --- a/mllib-dal/src/main/scala/org/apache/spark/ml/regression/spark322/LinearRegression.scala +++ b/mllib-dal/src/main/scala/org/apache/spark/ml/regression/spark333/LinearRegression.scala @@ -17,7 +17,7 @@ */ // scalastyle:on -package org.apache.spark.ml.regression.spark322 +package org.apache.spark.ml.regression.spark333 import breeze.linalg.{DenseVector => BDV} import breeze.optimize.{ diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/regression/spark322/RandomForestRegressor.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/regression/spark333/RandomForestRegressor.scala similarity index 99% rename from mllib-dal/src/main/scala/org/apache/spark/ml/regression/spark322/RandomForestRegressor.scala rename to mllib-dal/src/main/scala/org/apache/spark/ml/regression/spark333/RandomForestRegressor.scala index 2b1557fc2..479e7fdd1 100644 --- a/mllib-dal/src/main/scala/org/apache/spark/ml/regression/spark322/RandomForestRegressor.scala +++ b/mllib-dal/src/main/scala/org/apache/spark/ml/regression/spark333/RandomForestRegressor.scala @@ -17,7 +17,7 @@ */ // scalastyle:on -package org.apache.spark.ml.regression.spark322 +package org.apache.spark.ml.regression.spark333 import com.intel.oap.mllib.Utils import com.intel.oap.mllib.classification.{LearningNode => LearningNodeDAL} diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/stat/spark322/Correlation.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/stat/spark333/Correlation.scala similarity index 99% rename from mllib-dal/src/main/scala/org/apache/spark/ml/stat/spark322/Correlation.scala rename to mllib-dal/src/main/scala/org/apache/spark/ml/stat/spark333/Correlation.scala index 1d456ea62..2fe66fe85 100644 --- a/mllib-dal/src/main/scala/org/apache/spark/ml/stat/spark322/Correlation.scala +++ b/mllib-dal/src/main/scala/org/apache/spark/ml/stat/spark333/Correlation.scala @@ -17,7 +17,7 @@ */ // scalastyle:on -package org.apache.spark.ml.stat.spark322 +package org.apache.spark.ml.stat.spark333 import com.intel.oap.mllib.Utils import com.intel.oap.mllib.stat.{CorrelationDALImpl, CorrelationShim} diff --git a/mllib-dal/src/main/scala/org/apache/spark/mllib/stat/spark322/Statistics.scala b/mllib-dal/src/main/scala/org/apache/spark/mllib/stat/spark333/Statistics.scala similarity index 98% rename from mllib-dal/src/main/scala/org/apache/spark/mllib/stat/spark322/Statistics.scala rename to mllib-dal/src/main/scala/org/apache/spark/mllib/stat/spark333/Statistics.scala index c36fd18bd..222c1139e 100644 --- a/mllib-dal/src/main/scala/org/apache/spark/mllib/stat/spark322/Statistics.scala +++ b/mllib-dal/src/main/scala/org/apache/spark/mllib/stat/spark333/Statistics.scala @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.apache.spark.mllib.stat.spark322 +package org.apache.spark.mllib.stat.spark333 import com.intel.oap.mllib.Utils import com.intel.oap.mllib.stat.{SummarizerDALImpl, SummarizerShim} From 75c3a8cc1e18f4dcdec9844d4eb188a4c22fbb69 Mon Sep 17 00:00:00 2001 From: minmingzhu Date: Thu, 25 Jul 2024 10:56:02 +0800 Subject: [PATCH 2/7] remove oneccl communicator --- mllib-dal/src/main/native/CorrelationImpl.cpp | 15 ++--- .../native/DecisionForestClassifierImpl.cpp | 12 ++-- .../native/DecisionForestRegressorImpl.cpp | 13 ++-- mllib-dal/src/main/native/GPU.cpp | 16 +---- mllib-dal/src/main/native/GPU.h | 3 +- mllib-dal/src/main/native/KMeansImpl.cpp | 16 +++-- .../src/main/native/LinearRegressionImpl.cpp | 15 ++--- mllib-dal/src/main/native/OneCCL.cpp | 63 ++++++++++++++----- mllib-dal/src/main/native/PCAImpl.cpp | 16 ++--- mllib-dal/src/main/native/SummarizerImpl.cpp | 19 +++--- .../javah/com_intel_oap_mllib_OneCCL__.h | 2 +- ...sification_RandomForestClassifierDALImpl.h | 2 +- ...intel_oap_mllib_clustering_KMeansDALImpl.h | 2 +- .../com_intel_oap_mllib_feature_PCADALImpl.h | 2 +- ...mllib_regression_LinearRegressionDALImpl.h | 2 +- ..._regression_RandomForestRegressorDALImpl.h | 2 +- ..._intel_oap_mllib_stat_CorrelationDALImpl.h | 2 +- ...m_intel_oap_mllib_stat_SummarizerDALImpl.h | 2 +- .../scala/com/intel/oap/mllib/OneCCL.scala | 7 ++- .../RandomForestClassifierDALImpl.scala | 6 +- .../oap/mllib/clustering/KMeansDALImpl.scala | 6 +- .../intel/oap/mllib/feature/PCADALImpl.scala | 6 +- .../regression/LinearRegressionDALImpl.scala | 6 +- .../RandomForestRegressorDALImpl.scala | 6 +- .../oap/mllib/stat/CorrelationDALImpl.scala | 6 +- .../oap/mllib/stat/SummarizerDALImpl.scala | 6 +- 26 files changed, 129 insertions(+), 124 deletions(-) diff --git a/mllib-dal/src/main/native/CorrelationImpl.cpp b/mllib-dal/src/main/native/CorrelationImpl.cpp index f2dcac75b..a9103102f 100644 --- a/mllib-dal/src/main/native/CorrelationImpl.cpp +++ b/mllib-dal/src/main/native/CorrelationImpl.cpp @@ -197,19 +197,19 @@ static void doCorrelationOneAPICompute( JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( - JNIEnv *env, jobject obj, jlong pNumTabData, jlong numRows, jlong numCols, + JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows, jlong numCols, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", ComputeDeviceString[computeDeviceOrdinal].c_str()); - ccl::communicator &cclComm = getComm(); - int rankId = cclComm.rank(); ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); switch (device) { case ComputeDevice::host: case ComputeDevice::cpu: { + ccl::communicator &cclComm = getComm(); + int rankId = cclComm.rank(); NumericTablePtr pData = *((NumericTablePtr *)pNumTabData); // Set number of threads for oneDAL to use for each rank services::Environment::getInstance()->setNumberOfThreads(executorCores); @@ -229,19 +229,16 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( logger::println( logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, - rankId); + rank); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); - int size = cclComm.size(); - - auto queue = - getAssignedGPU(device, cclComm, size, rankId, gpuIndices, nGpu); + auto queue = getAssignedGPU(device, gpuIndices); ccl::shared_ptr_class &kvs = getKvs(); auto comm = preview::spmd::make_communicator( - queue, size, rankId, kvs); + queue, executorNum, rank, kvs); doCorrelationOneAPICompute(env, pNumTabData, numRows, numCols, comm, resultObj); env->ReleaseIntArrayElements(gpuIdxArray, gpuIndices, 0); diff --git a/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp b/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp index c1d064d1b..aad8d9048 100644 --- a/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp +++ b/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp @@ -300,7 +300,7 @@ static jobject doRFClassifierOneAPICompute( */ JNIEXPORT jobject JNICALL Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassifierTrainDAL( - JNIEnv *env, jobject obj, jlong pNumTabFeature, jlong featureRows, + JNIEnv *env, jobject obj, jint rank, jlong pNumTabFeature, jlong featureRows, jlong featureCols, jlong pNumTabLabel, jlong labelCols, jint executorNum, jint computeDeviceOrdinal, jint classCount, jint treeCount, jint numFeaturesPerNode, jint minObservationsLeafNode, @@ -310,8 +310,6 @@ Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassif jobject resultObj) { logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels"); - ccl::communicator &cclComm = getComm(); - int rankId = cclComm.rank(); ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); switch (device) { case ComputeDevice::gpu: { @@ -319,20 +317,18 @@ Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassif logger::println( logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, - rankId); + rank); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); - int size = cclComm.size(); ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); - auto queue = - getAssignedGPU(device, cclComm, size, rankId, gpuIndices, nGpu); + auto queue = getAssignedGPU(device, gpuIndices); ccl::shared_ptr_class &kvs = getKvs(); auto comm = preview::spmd::make_communicator( - queue, size, rankId, kvs); + queue, executorNum, rank, kvs); jobject hashmapObj = doRFClassifierOneAPICompute( env, pNumTabFeature, featureRows, featureCols, pNumTabLabel, labelCols, executorNum, computeDeviceOrdinal, classCount, treeCount, diff --git a/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp b/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp index 7619c2879..853f736de 100644 --- a/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp +++ b/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp @@ -292,7 +292,7 @@ static jobject doRFRegressorOneAPICompute( JNIEXPORT jobject JNICALL Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTrainDAL( - JNIEnv *env, jobject obj, jlong pNumTabFeature, jlong featureRows, + JNIEnv *env, jobject obj, jint rank, jlong pNumTabFeature, jlong featureRows, jlong featureCols, jlong pNumTabLabel, jlong labelCols, jint executorNum, jint computeDeviceOrdinal, jint treeCount, jint numFeaturesPerNode, jint minObservationsLeafNode, jint maxTreeDepth, jlong seed, jint maxbins, @@ -301,8 +301,6 @@ Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTra "OneDAL (native): use DPC++ kernels; device %s", ComputeDeviceString[computeDeviceOrdinal].c_str()); - ccl::communicator &cclComm = getComm(); - int rankId = cclComm.rank(); ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); switch (device) { case ComputeDevice::gpu: { @@ -310,19 +308,16 @@ Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTra logger::println( logger::INFO, "OneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, - rankId); + rank); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); - int size = cclComm.size(); - - auto queue = - getAssignedGPU(device, cclComm, size, rankId, gpuIndices, nGpu); + auto queue = getAssignedGPU(device, gpuIndices); ccl::shared_ptr_class &kvs = getKvs(); auto comm = preview::spmd::make_communicator( - queue, size, rankId, kvs); + queue, executorNum, rank, kvs); jobject hashmapObj = doRFRegressorOneAPICompute( env, pNumTabFeature, featureRows, featureCols, pNumTabLabel, labelCols, executorNum, computeDeviceOrdinal, treeCount, diff --git a/mllib-dal/src/main/native/GPU.cpp b/mllib-dal/src/main/native/GPU.cpp index 4d60f9d78..9dbba24f4 100644 --- a/mllib-dal/src/main/native/GPU.cpp +++ b/mllib-dal/src/main/native/GPU.cpp @@ -66,8 +66,7 @@ static sycl::queue getSyclQueue(const sycl::device device) { } } -sycl::queue getAssignedGPU(const ComputeDevice device, ccl::communicator &comm, - int size, int rankId, jint *gpu_indices, int n_gpu) { +sycl::queue getAssignedGPU(const ComputeDevice device, int *gpu_indices) { switch (device) { case ComputeDevice::host: case ComputeDevice::cpu: { @@ -78,19 +77,8 @@ sycl::queue getAssignedGPU(const ComputeDevice device, ccl::communicator &comm, } case ComputeDevice::gpu: { logger::println(logger::INFO, "selector GPU"); - auto local_rank = getLocalRank(comm, size, rankId); auto gpus = get_gpus(); - - logger::println(logger::INFO, - "rank: %d size: %d local_rank: %d n_gpu: %d", rankId, - size, local_rank, n_gpu); - - auto gpu_selected = gpu_indices[local_rank % n_gpu]; - logger::println(logger::INFO, "GPU selected for current rank: %d", - gpu_selected); - - // In case gpu_selected index is larger than number of GPU SYCL devices - auto rank_gpu = gpus[gpu_selected % gpus.size()]; + auto rank_gpu = gpus[0]; sycl::queue q{rank_gpu}; return q; } diff --git a/mllib-dal/src/main/native/GPU.h b/mllib-dal/src/main/native/GPU.h index 818d3ddb4..f8d7c25a9 100644 --- a/mllib-dal/src/main/native/GPU.h +++ b/mllib-dal/src/main/native/GPU.h @@ -7,7 +7,6 @@ #include #include -sycl::queue getAssignedGPU(const ComputeDevice device, ccl::communicator &comm, - int size, int rankId, jint *gpu_indices, int n_gpu); +sycl::queue getAssignedGPU(const ComputeDevice device, jint *gpu_indices); sycl::queue getQueue(const ComputeDevice device); diff --git a/mllib-dal/src/main/native/KMeansImpl.cpp b/mllib-dal/src/main/native/KMeansImpl.cpp index a1c629612..f690c1c45 100644 --- a/mllib-dal/src/main/native/KMeansImpl.cpp +++ b/mllib-dal/src/main/native/KMeansImpl.cpp @@ -305,7 +305,7 @@ static jlong doKMeansOneAPICompute( */ JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCenters( - JNIEnv *env, jobject obj, jlong pNumTabData, jlong numRows, jlong numCols, + JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows, jlong numCols, jlong pNumTabCenters, jint clusterNum, jdouble tolerance, jint iterationNum, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { @@ -314,12 +314,13 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe ComputeDeviceString[computeDeviceOrdinal].c_str()); jlong ret = 0L; - ccl::communicator &cclComm = getComm(); - int rankId = cclComm.rank(); + ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); switch (device) { case ComputeDevice::host: case ComputeDevice::cpu: { + ccl::communicator &cclComm = getComm(); + int rankId = cclComm.rank(); NumericTablePtr pData = *((NumericTablePtr *)pNumTabData); NumericTablePtr centroids = *((NumericTablePtr *)pNumTabCenters); // Set number of threads for OneDAL to use for each rank @@ -341,19 +342,16 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe logger::println( logger::INFO, "OneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, - rankId); + rank); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); - int size = cclComm.size(); - - auto queue = - getAssignedGPU(device, cclComm, size, rankId, gpuIndices, nGpu); + auto queue = getAssignedGPU(device, gpuIndices); ccl::shared_ptr_class &kvs = getKvs(); auto comm = preview::spmd::make_communicator( - queue, size, rankId, kvs); + queue, executorNum, rank, kvs); ret = doKMeansOneAPICompute(env, pNumTabData, numRows, numCols, pNumTabCenters, clusterNum, tolerance, iterationNum, comm, resultObj); diff --git a/mllib-dal/src/main/native/LinearRegressionImpl.cpp b/mllib-dal/src/main/native/LinearRegressionImpl.cpp index 017b7706f..ca94b54c5 100644 --- a/mllib-dal/src/main/native/LinearRegressionImpl.cpp +++ b/mllib-dal/src/main/native/LinearRegressionImpl.cpp @@ -225,10 +225,9 @@ static jlong doLROneAPICompute(JNIEnv *env, size_t rankId, const bool isRoot = (rankId == ccl_root); bool fitIntercept = bool(jfitIntercept); - int size = cclComm.size(); ccl::shared_ptr_class &kvs = getKvs(); auto comm = preview::spmd::make_communicator( - queue, size, rankId, kvs); + queue, executorNum, rankId, kvs); homogen_table xtrain = *reinterpret_cast( createHomogenTableWithArrayPtr(pNumTabFeature, featureRows, featureCols, comm.get_queue()) @@ -262,7 +261,7 @@ static jlong doLROneAPICompute(JNIEnv *env, size_t rankId, */ JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTrainDAL( - JNIEnv *env, jobject obj, jlong feature, jlong featureRows, + JNIEnv *env, jobject obj, jint rank, jlong feature, jlong featureRows, jlong featureCols, jlong label, jlong labelCols, jboolean fitIntercept, jdouble regParam, jdouble elasticNetParam, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, @@ -272,9 +271,6 @@ Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTra "oneDAL (native): use DPC++ kernels; device %s", ComputeDeviceString[computeDeviceOrdinal].c_str()); - ccl::communicator &cclComm = getComm(); - size_t rankId = cclComm.rank(); - ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); bool useGPU = false; if (device == ComputeDevice::gpu && regParam == 0) { @@ -288,15 +284,14 @@ Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTra logger::println( logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, - rankId); + rank); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); int size = cclComm.size(); - auto queue = - getAssignedGPU(device, cclComm, size, rankId, gpuIndices, nGpu); + auto queue = getAssignedGPU(device, gpuIndices); resultptr = doLROneAPICompute( - env, rankId, cclComm, queue, feature, featureRows, featureCols, + env, rank, cclComm, queue, feature, featureRows, featureCols, label, labelCols, fitIntercept, executorNum, resultObj); env->ReleaseIntArrayElements(gpuIdxArray, gpuIndices, 0); #endif diff --git a/mllib-dal/src/main/native/OneCCL.cpp b/mllib-dal/src/main/native/OneCCL.cpp index 7d147d1a8..79fbeebed 100644 --- a/mllib-dal/src/main/native/OneCCL.cpp +++ b/mllib-dal/src/main/native/OneCCL.cpp @@ -32,6 +32,7 @@ #include "Logger.h" #include "OneCCL.h" #include "com_intel_oap_mllib_OneCCL__.h" +#include "service.h" extern const size_t ccl_root = 0; @@ -46,7 +47,7 @@ ccl::communicator &getComm() { return g_comms[0]; } ccl::shared_ptr_class &getKvs() { return g_kvs[0]; } JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init( - JNIEnv *env, jobject obj, jint size, jint rank, jstring ip_port, + JNIEnv *env, jobject obj, jint size, jint rank, jstring ip_port, jint computeDeviceOrdinal, jobject param) { logger::println(logger::INFO, "OneCCL (native): init"); @@ -57,29 +58,34 @@ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init( const char *str = env->GetStringUTFChars(ip_port, 0); ccl::string ccl_ip_port(str); + const char *device = env->GetStringUTFChars(use_device, 0); + ccl::string ccl_ip_port(str); auto &singletonCCLInit = CCLInitSingleton::get(size, rank, ccl_ip_port); g_kvs.push_back(singletonCCLInit.kvs); - g_comms.push_back( - ccl::create_communicator(size, rank, singletonCCLInit.kvs)); - - auto t2 = std::chrono::high_resolution_clock::now(); - auto duration = - (float)std::chrono::duration_cast(t2 - t1) - .count(); - logger::println(logger::INFO, "OneCCL (native): init took %f secs", - duration / 1000); - - rank_id = getComm().rank(); - comm_size = getComm().size(); + ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); + switch (device) { + case ComputeDevice::host: + case ComputeDevice::cpu: { + g_comms.push_back( + ccl::create_communicator(size, rank, singletonCCLInit.kvs)); + + auto t2 = std::chrono::high_resolution_clock::now(); + auto duration = + (float)std::chrono::duration_cast(t2 - t1) + .count(); + logger::println(logger::INFO, "OneCCL (native): init took %f secs", + duration / 1000); + break; + } jclass cls = env->GetObjectClass(param); jfieldID fid_comm_size = env->GetFieldID(cls, "commSize", "J"); jfieldID fid_rank_id = env->GetFieldID(cls, "rankId", "J"); - env->SetLongField(param, fid_comm_size, comm_size); - env->SetLongField(param, fid_rank_id, rank_id); + env->SetLongField(param, size, comm_size); + env->SetLongField(param, rank, rank_id); env->ReleaseStringUTFChars(ip_port, str); return 1; @@ -91,10 +97,35 @@ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init( * Signature: ()I */ JNIEXPORT jint JNICALL -Java_com_intel_oap_mllib_OneCCL_00024_c_1initDpcpp(JNIEnv *env, jobject) { +Java_com_intel_oap_mllib_OneCCL_00024_c_1initDpcpp(JNIEnv *env, jobject, jint size, jint rank, jobject param) { logger::printerrln(logger::INFO, "OneCCL (native): init dpcpp"); + auto t1 = std::chrono::high_resolution_clock::now(); + ccl::init(); + const char *str = env->GetStringUTFChars(ip_port, 0); + ccl::string ccl_ip_port(str); + + auto &singletonCCLInit = CCLInitSingleton::get(size, rank, ccl_ip_port); + + g_kvs.push_back(singletonCCLInit.kvs); + + + auto t2 = std::chrono::high_resolution_clock::now(); + auto duration = + (float)std::chrono::duration_cast(t2 - t1) + .count(); + logger::println(logger::INFO, "OneCCL (native): init took %f secs", + duration / 1000); + + jclass cls = env->GetObjectClass(param); + jfieldID fid_comm_size = env->GetFieldID(cls, "commSize", "J"); + jfieldID fid_rank_id = env->GetFieldID(cls, "rankId", "J"); + + env->SetLongField(param, size, comm_size); + env->SetLongField(param, rank, rank_id); + env->ReleaseStringUTFChars(ip_port, str); + return 1; } diff --git a/mllib-dal/src/main/native/PCAImpl.cpp b/mllib-dal/src/main/native/PCAImpl.cpp index 0600b47d9..f2821d558 100644 --- a/mllib-dal/src/main/native/PCAImpl.cpp +++ b/mllib-dal/src/main/native/PCAImpl.cpp @@ -250,19 +250,18 @@ static void doPCAOneAPICompute( JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL( - JNIEnv *env, jobject obj, jlong pNumTabData, jlong numRows, jlong numCols, + JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows, jlong numCols, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", ComputeDeviceString[computeDeviceOrdinal].c_str()); - - ccl::communicator &cclComm = getComm(); - size_t rankId = cclComm.rank(); ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); switch (device) { case ComputeDevice::host: case ComputeDevice::cpu: { + ccl::communicator &cclComm = getComm(); + size_t rankId = cclComm.rank(); NumericTablePtr pData = *((NumericTablePtr *)pNumTabData); // Set number of threads for oneDAL to use for each rank services::Environment::getInstance()->setNumberOfThreads(executorCores); @@ -282,19 +281,16 @@ Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL( logger::println( logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, - rankId); + rank); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); - int size = cclComm.size(); - - auto queue = - getAssignedGPU(device, cclComm, size, rankId, gpuIndices, nGpu); + auto queue = getAssignedGPU(device, gpuIndices); ccl::shared_ptr_class &kvs = getKvs(); auto comm = preview::spmd::make_communicator( - queue, size, rankId, kvs); + queue, executorNum, rank, kvs); doPCAOneAPICompute(env, pNumTabData, numRows, numCols, comm, resultObj); env->ReleaseIntArrayElements(gpuIdxArray, gpuIndices, 0); break; diff --git a/mllib-dal/src/main/native/SummarizerImpl.cpp b/mllib-dal/src/main/native/SummarizerImpl.cpp index 52b585dc2..9af30d939 100644 --- a/mllib-dal/src/main/native/SummarizerImpl.cpp +++ b/mllib-dal/src/main/native/SummarizerImpl.cpp @@ -268,19 +268,18 @@ static void doSummarizerOneAPICompute( JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL( - JNIEnv *env, jobject obj, jlong pNumTabData, jlong numRows, jlong numCols, + JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows, jlong numCols, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, jobject resultObj) { logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", ComputeDeviceString[computeDeviceOrdinal].c_str()); - - ccl::communicator &cclComm = getComm(); - int rankId = cclComm.rank(); ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); switch (device) { case ComputeDevice::host: case ComputeDevice::cpu: { + ccl::communicator &cclComm = getComm(); + int rankId = cclComm.rank(); NumericTablePtr pData = *((NumericTablePtr *)pNumTabData); // Set number of threads for oneDAL to use for each rank services::Environment::getInstance()->setNumberOfThreads(executorCores); @@ -300,19 +299,15 @@ Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL( logger::println( logger::INFO, "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, - rankId); + rank); jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); - - int size = cclComm.size(); - - auto queue = - getAssignedGPU(device, cclComm, size, rankId, gpuIndices, nGpu); + auto queue = getAssignedGPU(device, gpuIndices); ccl::shared_ptr_class &kvs = getKvs(); auto comm = preview::spmd::make_communicator( - queue, size, rankId, kvs); + queue, executorNum, rank, kvs); doSummarizerOneAPICompute(env, pNumTabData, numRows, numCols, comm, resultObj); env->ReleaseIntArrayElements(gpuIdxArray, gpuIndices, 0); @@ -320,7 +315,7 @@ Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL( } #endif default: { - deviceError("PCA", ComputeDeviceString[computeDeviceOrdinal].c_str()); + deviceError("Summarizer", ComputeDeviceString[computeDeviceOrdinal].c_str()); } } return 0; diff --git a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_OneCCL__.h b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_OneCCL__.h index a89b7d214..4bfa1d0c3 100644 --- a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_OneCCL__.h +++ b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_OneCCL__.h @@ -45,7 +45,7 @@ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1getAvailPort * Signature: (IILjava/lang/String;Lcom/intel/oap/mllib/CCLParam;)I */ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init - (JNIEnv *, jobject, jint, jint, jstring, jobject); + (JNIEnv *, jobject, jint, jint, jstring, jstring, jobject); /* * Class: com_intel_oap_mllib_OneCCL__ diff --git a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_classification_RandomForestClassifierDALImpl.h b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_classification_RandomForestClassifierDALImpl.h index 8c0c4ecdd..79bd6f16f 100644 --- a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_classification_RandomForestClassifierDALImpl.h +++ b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_classification_RandomForestClassifierDALImpl.h @@ -13,7 +13,7 @@ extern "C" { * Signature: (JJIIIIIIIDDIJIZ[ILcom/intel/oap/mllib/classification/RandomForestResult;)Ljava/util/HashMap; */ JNIEXPORT jobject JNICALL Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassifierTrainDAL - (JNIEnv *, jobject, jlong, jlong, jlong, jlong, jlong, jint, jint, jint, jint, jint, jint, jint, jdouble, jdouble, jint, jlong, jint, jboolean, jintArray, jobject); + (JNIEnv *, jobject, jint, jlong, jlong, jlong, jlong, jlong, jint, jint, jint, jint, jint, jint, jint, jdouble, jdouble, jint, jlong, jint, jboolean, jintArray, jobject); #ifdef __cplusplus } diff --git a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_clustering_KMeansDALImpl.h b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_clustering_KMeansDALImpl.h index 595f69fb5..a0fc24dde 100644 --- a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_clustering_KMeansDALImpl.h +++ b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_clustering_KMeansDALImpl.h @@ -13,7 +13,7 @@ extern "C" { * Signature: (JJIDIIII[ILcom/intel/oap/mllib/clustering/KMeansResult;)J */ JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCenters - (JNIEnv *, jobject, jlong, jlong, jlong, jlong, jint, jdouble, jint, jint, jint, jint, jintArray, jobject); + (JNIEnv *, jobject, jint, jlong, jlong, jlong, jlong, jint, jdouble, jint, jint, jint, jint, jintArray, jobject); #ifdef __cplusplus } diff --git a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_feature_PCADALImpl.h b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_feature_PCADALImpl.h index 2ac220860..34646da95 100644 --- a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_feature_PCADALImpl.h +++ b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_feature_PCADALImpl.h @@ -13,7 +13,7 @@ extern "C" { * Signature: (JIII[ILcom/intel/oap/mllib/feature/PCAResult;)J */ JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL - (JNIEnv *, jobject, jlong, jlong, jlong, jint, jint, jint, jintArray, jobject); + (JNIEnv *, jobject, jint, jlong, jlong, jlong, jint, jint, jint, jintArray, jobject); #ifdef __cplusplus } diff --git a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_regression_LinearRegressionDALImpl.h b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_regression_LinearRegressionDALImpl.h index 28c7e8f42..0dc6f4e79 100644 --- a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_regression_LinearRegressionDALImpl.h +++ b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_regression_LinearRegressionDALImpl.h @@ -13,7 +13,7 @@ extern "C" { * Signature: (JJZDDIII[ILcom/intel/oap/mllib/regression/LiRResult;)J */ JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTrainDAL - (JNIEnv *, jobject, jlong, jlong, jlong, jlong, jlong, jboolean, jdouble, jdouble, jint, jint, jint, jintArray, jobject); + (JNIEnv *, jobject, jint, jlong, jlong, jlong, jlong, jlong, jboolean, jdouble, jdouble, jint, jint, jint, jintArray, jobject); #ifdef __cplusplus } diff --git a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_regression_RandomForestRegressorDALImpl.h b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_regression_RandomForestRegressorDALImpl.h index ac457b3bf..1350d8268 100644 --- a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_regression_RandomForestRegressorDALImpl.h +++ b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_regression_RandomForestRegressorDALImpl.h @@ -13,7 +13,7 @@ extern "C" { * Signature: (JJIIIIIIJIZ[ILcom/intel/oap/mllib/classification/RandomForestResult;)Ljava/util/HashMap; */ JNIEXPORT jobject JNICALL Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTrainDAL - (JNIEnv *, jobject, jlong, jlong, jlong, jlong, jlong, jint, jint, jint, jint, jint, jint, jlong, jint, jboolean, jintArray, jobject); + (JNIEnv *, jobject, jint, jlong, jlong, jlong, jlong, jlong, jint, jint, jint, jint, jint, jint, jlong, jint, jboolean, jintArray, jobject); #ifdef __cplusplus } diff --git a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_stat_CorrelationDALImpl.h b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_stat_CorrelationDALImpl.h index 96219ae4f..494b89658 100644 --- a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_stat_CorrelationDALImpl.h +++ b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_stat_CorrelationDALImpl.h @@ -13,7 +13,7 @@ extern "C" { * Signature: (JIII[ILcom/intel/oap/mllib/stat/CorrelationResult;)J */ JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL - (JNIEnv *, jobject, jlong, jlong, jlong, jint, jint, jint, jintArray, jobject); + (JNIEnv *, jobject, jint, jlong, jlong, jlong, jint, jint, jint, jintArray, jobject); #ifdef __cplusplus } diff --git a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_stat_SummarizerDALImpl.h b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_stat_SummarizerDALImpl.h index 754a5b645..7db45743f 100644 --- a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_stat_SummarizerDALImpl.h +++ b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_stat_SummarizerDALImpl.h @@ -13,7 +13,7 @@ extern "C" { * Signature: (JIII[ILcom/intel/oap/mllib/stat/SummarizerResult;)J */ JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL - (JNIEnv *, jobject, jlong, jlong, jlong, jint, jint, jint, jintArray, jobject); + (JNIEnv *, jobject, jint, jlong, jlong, jlong, jint, jint, jint, jintArray, jobject); #ifdef __cplusplus } diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/OneCCL.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/OneCCL.scala index 48caebe1b..70ddef079 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/OneCCL.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/OneCCL.scala @@ -24,14 +24,14 @@ object OneCCL extends Logging { var cclParam = new CCLParam() - def init(executor_num: Int, rank: Int, ip_port: String): Unit = { + def init(executor_num: Int, rank: Int, ip_port: String, computeDeviceOrdinal: Int): Unit = { setExecutorEnv() logInfo(s"Initializing with IP_PORT: ${ip_port}") // cclParam is output from native code - c_init(executor_num, rank, ip_port, cclParam) + c_init(executor_num, rank, ip_port, computeDeviceOrdinal, cclParam) // executor number should equal to oneCCL world size assert(executor_num == cclParam.getCommSize, @@ -67,7 +67,8 @@ object OneCCL extends Logging { @native def c_getAvailPort(localIP: String): Int - @native private def c_init(size: Int, rank: Int, ip_port: String, param: CCLParam): Int + @native private def c_init(size: Int, rank: Int, ip_port: String, + computeDeviceOrdinal: Int, param: CCLParam): Int @native private def c_cleanup(): Unit } diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala index 8e4d27160..d0cfa42e4 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala @@ -76,7 +76,7 @@ class RandomForestClassifierDALImpl(val uid: String, val kvsIPPort = getOneCCLIPPort(labeledPointsTables) labeledPointsTables.mapPartitionsWithIndex { (rank, table) => - OneCCL.init(executorNum, rank, kvsIPPort) + OneCCL.init(executorNum, rank, kvsIPPort, computeDevice.ordinal()) Iterator.empty }.count() rfcTimer.record("OneCCL Init") @@ -96,6 +96,7 @@ class RandomForestClassifierDALImpl(val uid: String, val computeStartTime = System.nanoTime() val result = new RandomForestResult val hashmap = cRFClassifierTrainDAL( + rank, feature._1, feature._2, feature._3, @@ -140,7 +141,8 @@ class RandomForestClassifierDALImpl(val uid: String, results(0) } - @native private[mllib] def cRFClassifierTrainDAL(featureTabAddr: Long, + @native private[mllib] def cRFClassifierTrainDAL(rank: Int, + featureTabAddr: Long, numRows: Long, numCols: Long, lableTabAddr: Long, diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala index e194e9d22..d8752fcd3 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala @@ -53,7 +53,7 @@ class KMeansDALImpl(var nClusters: Int, val kvsIPPort = getOneCCLIPPort(coalescedTables) coalescedTables.mapPartitionsWithIndex { (rank, table) => - OneCCL.init(executorNum, rank, kvsIPPort) + OneCCL.init(executorNum, rank, kvsIPPort, computeDevice.ordinal()) Iterator.empty }.count() kmeansTimer.record("OneCCL Init") @@ -81,6 +81,7 @@ class KMeansDALImpl(var nClusters: Int, } cCentroids = cKMeansOneapiComputeWithInitCenters( + rank, tableArr, rows, columns, @@ -136,7 +137,8 @@ class KMeansDALImpl(var nClusters: Int, parentModel } - @native private[mllib] def cKMeansOneapiComputeWithInitCenters(data: Long, + @native private[mllib] def cKMeansOneapiComputeWithInitCenters( rank: Int, + data: Long, numRows: Long, numCols: Long, centers: Long, diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala index 0410c18a7..b9df1f6c3 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala @@ -60,7 +60,7 @@ class PCADALImpl(val k: Int, pcaTimer.record("Data Convertion") coalescedTables.mapPartitionsWithIndex { (rank, table) => - OneCCL.init(executorNum, rank, kvsIPPort) + OneCCL.init(executorNum, rank, kvsIPPort, computeDevice.ordinal()) Iterator.empty }.count() pcaTimer.record("OneCCL Init") @@ -79,6 +79,7 @@ class PCADALImpl(val k: Int, null } cPCATrainDAL( + rank, tableArr, rows, columns, @@ -214,7 +215,8 @@ class PCADALImpl(val k: Int, // Single entry to call Correlation PCA DAL backend with parameter K - @native private[mllib] def cPCATrainDAL(data: Long, + @native private[mllib] def cPCATrainDAL(rank: Int, + data: Long, numRows: Long, numCols: Long, executorNum: Int, diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala index 806fdb40c..a0ed680d6 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala @@ -121,7 +121,7 @@ class LinearRegressionDALImpl( val fitIntercept: Boolean, (label.toString.toLong, 0L, 0L) } - OneCCL.init(executorNum, rank, kvsIPPort) + OneCCL.init(executorNum, rank, kvsIPPort, computeDevice.ordinal()) val result = new LiRResult() val gpuIndices = if (useDevice == "GPU") { @@ -138,6 +138,7 @@ class LinearRegressionDALImpl( val fitIntercept: Boolean, } val cbeta = cLinearRegressionTrainDAL( + rank, featureTabAddr, featureRows, featureColumns, @@ -183,7 +184,8 @@ class LinearRegressionDALImpl( val fitIntercept: Boolean, } // Single entry to call Linear Regression DAL backend with parameters - @native private def cLinearRegressionTrainDAL(data: Long, + @native private def cLinearRegressionTrainDAL(rank: Int, + data: Long, numRows: Long, numCols: Long, label: Long, diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala index 16fd17cdb..11e924cd6 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala @@ -70,7 +70,7 @@ class RandomForestRegressorDALImpl(val uid: String, val kvsIPPort = getOneCCLIPPort(labeledPointsTables) labeledPointsTables.mapPartitionsWithIndex { (rank, table) => - OneCCL.init(executorNum, rank, kvsIPPort) + OneCCL.init(executorNum, rank, kvsIPPort, computeDevice.ordinal()) Iterator.empty }.count() rfrTimer.record("OneCCL Init") @@ -91,6 +91,7 @@ class RandomForestRegressorDALImpl(val uid: String, val computeStartTime = System.nanoTime() val result = new RandomForestResult val hashmap = cRFRegressorTrainDAL( + rank, feature._1, feature._2, feature._3, @@ -141,7 +142,8 @@ class RandomForestRegressorDALImpl(val uid: String, results(0)._2 } - @native private[mllib] def cRFRegressorTrainDAL(featureTabAddr: Long, + @native private[mllib] def cRFRegressorTrainDAL(rank: Int, + featureTabAddr: Long, numRows: Long, numCols: Long, lableTabAddr: Long, diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala index 21465e1af..73a172bb0 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala @@ -47,7 +47,7 @@ class CorrelationDALImpl( val kvsIPPort = getOneCCLIPPort(coalescedTables) coalescedTables.mapPartitionsWithIndex { (rank, table) => - OneCCL.init(executorNum, rank, kvsIPPort) + OneCCL.init(executorNum, rank, kvsIPPort, computeDevice.ordinal()) Iterator.empty }.count() corTimer.record("OneCCL Init") @@ -69,6 +69,7 @@ class CorrelationDALImpl( null } cCorrelationTrainDAL( + rank, tableArr, rows, columns, @@ -118,7 +119,8 @@ class CorrelationDALImpl( } - @native private[mllib] def cCorrelationTrainDAL(data: Long, + @native private[mllib] def cCorrelationTrainDAL(rank: Int, + data: Long, numRows: Long, numCols: Long, executorNum: Int, diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala index a516962c3..3828674f2 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala @@ -48,7 +48,7 @@ class SummarizerDALImpl(val executorNum: Int, val kvsIPPort = getOneCCLIPPort(data) coalescedTables.mapPartitionsWithIndex { (rank, table) => - OneCCL.init(executorNum, rank, kvsIPPort) + OneCCL.init(executorNum, rank, kvsIPPort, computeDevice.ordinal()) Iterator.empty }.count() sumTimer.record("OneCCL Init") @@ -70,6 +70,7 @@ class SummarizerDALImpl(val executorNum: Int, null } cSummarizerTrainDAL( + rank, tableArr, rows, columns, @@ -150,7 +151,8 @@ class SummarizerDALImpl(val executorNum: Int, summary } - @native private[mllib] def cSummarizerTrainDAL(data: Long, + @native private[mllib] def cSummarizerTrainDAL(rank: Int, + data: Long, numRows: Long, numCols: Long, executorNum: Int, From 407535a988253daf19048404c8d4283c786a6992 Mon Sep 17 00:00:00 2001 From: minmingzhu Date: Thu, 15 Aug 2024 15:03:38 +0800 Subject: [PATCH 3/7] update --- mllib-dal/src/main/native/OneCCL.cpp | 29 +++++++++---------- .../scala/com/intel/oap/mllib/OneCCL.scala | 7 ++--- .../RandomForestClassifierDALImpl.scala | 2 +- .../oap/mllib/clustering/KMeansDALImpl.scala | 2 +- .../intel/oap/mllib/feature/PCADALImpl.scala | 2 +- .../regression/LinearRegressionDALImpl.scala | 2 +- .../RandomForestRegressorDALImpl.scala | 2 +- .../oap/mllib/stat/CorrelationDALImpl.scala | 2 +- .../oap/mllib/stat/SummarizerDALImpl.scala | 2 +- 9 files changed, 23 insertions(+), 27 deletions(-) diff --git a/mllib-dal/src/main/native/OneCCL.cpp b/mllib-dal/src/main/native/OneCCL.cpp index 79fbeebed..b924c6987 100644 --- a/mllib-dal/src/main/native/OneCCL.cpp +++ b/mllib-dal/src/main/native/OneCCL.cpp @@ -47,7 +47,7 @@ ccl::communicator &getComm() { return g_comms[0]; } ccl::shared_ptr_class &getKvs() { return g_kvs[0]; } JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init( - JNIEnv *env, jobject obj, jint size, jint rank, jstring ip_port, jint computeDeviceOrdinal, + JNIEnv *env, jobject obj, jint size, jint rank, jstring ip_port, jobject param) { logger::println(logger::INFO, "OneCCL (native): init"); @@ -64,21 +64,18 @@ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init( auto &singletonCCLInit = CCLInitSingleton::get(size, rank, ccl_ip_port); g_kvs.push_back(singletonCCLInit.kvs); - ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); - switch (device) { - case ComputeDevice::host: - case ComputeDevice::cpu: { - g_comms.push_back( - ccl::create_communicator(size, rank, singletonCCLInit.kvs)); - - auto t2 = std::chrono::high_resolution_clock::now(); - auto duration = - (float)std::chrono::duration_cast(t2 - t1) - .count(); - logger::println(logger::INFO, "OneCCL (native): init took %f secs", - duration / 1000); - break; - } + +#ifdef CPU_ONLY_PROFILE + g_comms.push_back( + ccl::create_communicator(size, rank, singletonCCLInit.kvs)); + + auto t2 = std::chrono::high_resolution_clock::now(); + auto duration = + (float)std::chrono::duration_cast(t2 - t1) + .count(); + logger::println(logger::INFO, "OneCCL (native): init took %f secs", + duration / 1000); +#endif jclass cls = env->GetObjectClass(param); jfieldID fid_comm_size = env->GetFieldID(cls, "commSize", "J"); diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/OneCCL.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/OneCCL.scala index 70ddef079..48caebe1b 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/OneCCL.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/OneCCL.scala @@ -24,14 +24,14 @@ object OneCCL extends Logging { var cclParam = new CCLParam() - def init(executor_num: Int, rank: Int, ip_port: String, computeDeviceOrdinal: Int): Unit = { + def init(executor_num: Int, rank: Int, ip_port: String): Unit = { setExecutorEnv() logInfo(s"Initializing with IP_PORT: ${ip_port}") // cclParam is output from native code - c_init(executor_num, rank, ip_port, computeDeviceOrdinal, cclParam) + c_init(executor_num, rank, ip_port, cclParam) // executor number should equal to oneCCL world size assert(executor_num == cclParam.getCommSize, @@ -67,8 +67,7 @@ object OneCCL extends Logging { @native def c_getAvailPort(localIP: String): Int - @native private def c_init(size: Int, rank: Int, ip_port: String, - computeDeviceOrdinal: Int, param: CCLParam): Int + @native private def c_init(size: Int, rank: Int, ip_port: String, param: CCLParam): Int @native private def c_cleanup(): Unit } diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala index d0cfa42e4..6aca49f14 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala @@ -76,7 +76,7 @@ class RandomForestClassifierDALImpl(val uid: String, val kvsIPPort = getOneCCLIPPort(labeledPointsTables) labeledPointsTables.mapPartitionsWithIndex { (rank, table) => - OneCCL.init(executorNum, rank, kvsIPPort, computeDevice.ordinal()) + OneCCL.init(executorNum, rank, kvsIPPort) Iterator.empty }.count() rfcTimer.record("OneCCL Init") diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala index d8752fcd3..64b2f6c7f 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala @@ -53,7 +53,7 @@ class KMeansDALImpl(var nClusters: Int, val kvsIPPort = getOneCCLIPPort(coalescedTables) coalescedTables.mapPartitionsWithIndex { (rank, table) => - OneCCL.init(executorNum, rank, kvsIPPort, computeDevice.ordinal()) + OneCCL.init(executorNum, rank, kvsIPPort) Iterator.empty }.count() kmeansTimer.record("OneCCL Init") diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala index b9df1f6c3..8eb9554a1 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala @@ -60,7 +60,7 @@ class PCADALImpl(val k: Int, pcaTimer.record("Data Convertion") coalescedTables.mapPartitionsWithIndex { (rank, table) => - OneCCL.init(executorNum, rank, kvsIPPort, computeDevice.ordinal()) + OneCCL.init(executorNum, rank, kvsIPPort) Iterator.empty }.count() pcaTimer.record("OneCCL Init") diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala index a0ed680d6..f95bc0846 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala @@ -121,7 +121,7 @@ class LinearRegressionDALImpl( val fitIntercept: Boolean, (label.toString.toLong, 0L, 0L) } - OneCCL.init(executorNum, rank, kvsIPPort, computeDevice.ordinal()) + OneCCL.init(executorNum, rank, kvsIPPort) val result = new LiRResult() val gpuIndices = if (useDevice == "GPU") { diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala index 11e924cd6..018473a61 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala @@ -70,7 +70,7 @@ class RandomForestRegressorDALImpl(val uid: String, val kvsIPPort = getOneCCLIPPort(labeledPointsTables) labeledPointsTables.mapPartitionsWithIndex { (rank, table) => - OneCCL.init(executorNum, rank, kvsIPPort, computeDevice.ordinal()) + OneCCL.init(executorNum, rank, kvsIPPort) Iterator.empty }.count() rfrTimer.record("OneCCL Init") diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala index 73a172bb0..e521aefe7 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala @@ -47,7 +47,7 @@ class CorrelationDALImpl( val kvsIPPort = getOneCCLIPPort(coalescedTables) coalescedTables.mapPartitionsWithIndex { (rank, table) => - OneCCL.init(executorNum, rank, kvsIPPort, computeDevice.ordinal()) + OneCCL.init(executorNum, rank, kvsIPPort) Iterator.empty }.count() corTimer.record("OneCCL Init") diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala index 3828674f2..277039ab1 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala @@ -48,7 +48,7 @@ class SummarizerDALImpl(val executorNum: Int, val kvsIPPort = getOneCCLIPPort(data) coalescedTables.mapPartitionsWithIndex { (rank, table) => - OneCCL.init(executorNum, rank, kvsIPPort, computeDevice.ordinal()) + OneCCL.init(executorNum, rank, kvsIPPort) Iterator.empty }.count() sumTimer.record("OneCCL Init") From 330b3d87a85cee6f1710d9eadc6f1d4d0eb0e900 Mon Sep 17 00:00:00 2001 From: minmingzhu Date: Fri, 16 Aug 2024 11:16:42 +0800 Subject: [PATCH 4/7] set ZE_AFFINITY_MASK=rankId --- .../src/main/scala/com/intel/oap/mllib/OneCCL.scala | 8 ++------ .../RandomForestClassifierDALImpl.scala | 11 +++++++++++ .../intel/oap/mllib/clustering/KMeansDALImpl.scala | 10 ++++++++++ .../com/intel/oap/mllib/feature/PCADALImpl.scala | 11 +++++++++++ .../mllib/regression/LinearRegressionDALImpl.scala | 11 +++++++++++ .../regression/RandomForestRegressorDALImpl.scala | 11 +++++++++++ .../com/intel/oap/mllib/stat/CorrelationDALImpl.scala | 11 +++++++++++ .../com/intel/oap/mllib/stat/SummarizerDALImpl.scala | 11 +++++++++++ 8 files changed, 78 insertions(+), 6 deletions(-) diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/OneCCL.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/OneCCL.scala index 48caebe1b..c89c9ffd2 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/OneCCL.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/OneCCL.scala @@ -42,12 +42,8 @@ object OneCCL extends Logging { } // Run on Executor - def setExecutorEnv(): Unit = { - setEnv("CCL_ATL_TRANSPORT", "ofi") - // Set CCL_ROOT to workaround CCL_ROOT env read bug, should remove when upstream fix this - setEnv("CCL_ROOT", "/opt/intel/oneapi/ccl/latest") - // Uncomment this if you whant to debug oneCCL - // setEnv("CCL_LOG_LEVEL", "debug") + def setExecutorEnv(key: String, value: String): Unit = { + setEnv(key, value) } // Run on Executor diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala index 6aca49f14..70479c79a 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala @@ -75,6 +75,17 @@ class RandomForestClassifierDALImpl(val uid: String, rfcTimer.record("Data Convertion") val kvsIPPort = getOneCCLIPPort(labeledPointsTables) + labeledPointsTables.mapPartitionsWithIndex { (rank, iter) => + val gpuIndices = if (useDevice == "GPU") { + val resources = TaskContext.get().resources() + resources("gpu").addresses.map(_.toInt) + } else { + null + } + OneCCL.setExecutorEnv("ZE_AFFINITY_MASK", gpuIndices(0).toString()) + Iterator.empty + }.count() + labeledPointsTables.mapPartitionsWithIndex { (rank, table) => OneCCL.init(executorNum, rank, kvsIPPort) Iterator.empty diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala index 64b2f6c7f..14eb16800 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala @@ -51,6 +51,16 @@ class KMeansDALImpl(var nClusters: Int, kmeansTimer.record("Data Convertion") val kvsIPPort = getOneCCLIPPort(coalescedTables) + coalescedTables.mapPartitionsWithIndex { (rank, iter) => + val gpuIndices = if (useDevice == "GPU") { + val resources = TaskContext.get().resources() + resources("gpu").addresses.map(_.toInt) + } else { + null + } + OneCCL.setExecutorEnv("ZE_AFFINITY_MASK", gpuIndices(0).toString()) + Iterator.empty + }.count() coalescedTables.mapPartitionsWithIndex { (rank, table) => OneCCL.init(executorNum, rank, kvsIPPort) diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala index 8eb9554a1..7190ade3f 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala @@ -59,6 +59,17 @@ class PCADALImpl(val k: Int, val kvsIPPort = getOneCCLIPPort(coalescedTables) pcaTimer.record("Data Convertion") + coalescedTables.mapPartitionsWithIndex { (rank, iter) => + val gpuIndices = if (useDevice == "GPU") { + val resources = TaskContext.get().resources() + resources("gpu").addresses.map(_.toInt) + } else { + null + } + OneCCL.setExecutorEnv("ZE_AFFINITY_MASK", gpuIndices(0).toString()) + Iterator.empty + }.count() + coalescedTables.mapPartitionsWithIndex { (rank, table) => OneCCL.init(executorNum, rank, kvsIPPort) Iterator.empty diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala index f95bc0846..40b2f4423 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala @@ -106,6 +106,17 @@ class LinearRegressionDALImpl( val fitIntercept: Boolean, } lrTimer.record("Data Convertion") + labeledPointsTables.mapPartitionsWithIndex { (rank, iter) => + val gpuIndices = if (useDevice == "GPU") { + val resources = TaskContext.get().resources() + resources("gpu").addresses.map(_.toInt) + } else { + null + } + OneCCL.setExecutorEnv("ZE_AFFINITY_MASK", gpuIndices(0).toString()) + Iterator.empty + }.count() + val results = labeledPointsTables.mapPartitionsWithIndex { (rank, tables) => val (feature, label) = tables.next() val (featureTabAddr : Long, featureRows : Long, featureColumns : Long) = diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala index 018473a61..77ea4c656 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala @@ -69,6 +69,17 @@ class RandomForestRegressorDALImpl(val uid: String, val kvsIPPort = getOneCCLIPPort(labeledPointsTables) + labeledPointsTables.mapPartitionsWithIndex { (rank, iter) => + val gpuIndices = if (useDevice == "GPU") { + val resources = TaskContext.get().resources() + resources("gpu").addresses.map(_.toInt) + } else { + null + } + OneCCL.setExecutorEnv("ZE_AFFINITY_MASK", gpuIndices(0).toString()) + Iterator.empty + }.count() + labeledPointsTables.mapPartitionsWithIndex { (rank, table) => OneCCL.init(executorNum, rank, kvsIPPort) Iterator.empty diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala index e521aefe7..fff2d4ac5 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala @@ -52,6 +52,17 @@ class CorrelationDALImpl( }.count() corTimer.record("OneCCL Init") + coalescedTables.mapPartitionsWithIndex { (rank, iter) => + val gpuIndices = if (useDevice == "GPU") { + val resources = TaskContext.get().resources() + resources("gpu").addresses.map(_.toInt) + } else { + null + } + OneCCL.setExecutorEnv("ZE_AFFINITY_MASK", gpuIndices(0).toString()) + Iterator.empty + }.count() + val results = coalescedTables.mapPartitionsWithIndex { (rank, iter) => val (tableArr : Long, rows : Long, columns : Long) = if (useDevice == "GPU") { iter.next() diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala index 277039ab1..dcde7ef91 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala @@ -53,6 +53,17 @@ class SummarizerDALImpl(val executorNum: Int, }.count() sumTimer.record("OneCCL Init") + coalescedTables.mapPartitionsWithIndex { (rank, iter) => + val gpuIndices = if (useDevice == "GPU") { + val resources = TaskContext.get().resources() + resources("gpu").addresses.map(_.toInt) + } else { + null + } + OneCCL.setExecutorEnv("ZE_AFFINITY_MASK", gpuIndices(0).toString()) + Iterator.empty + }.count() + val results = coalescedTables.mapPartitionsWithIndex { (rank, iter) => val (tableArr : Long, rows : Long, columns : Long) = if (useDevice == "GPU") { iter.next() From 9c509a1c06cf85b120533c53ca913581d5a6f36c Mon Sep 17 00:00:00 2001 From: minmingzhu Date: Sun, 18 Aug 2024 09:52:43 +0800 Subject: [PATCH 5/7] optimize code --- mllib-dal/src/main/native/GPU.cpp | 46 +++++++++++++++++++ mllib-dal/src/main/native/GPU.h | 2 + mllib-dal/src/main/native/KMeansImpl.cpp | 16 ++----- .../scala/com/intel/oap/mllib/CommonJob.scala | 46 +++++++++++++++++++ .../RandomForestClassifierDALImpl.scala | 19 ++------ .../oap/mllib/clustering/KMeansDALImpl.scala | 24 +++------- .../intel/oap/mllib/feature/PCADALImpl.scala | 19 ++------ .../regression/LinearRegressionDALImpl.scala | 16 ++----- .../RandomForestRegressorDALImpl.scala | 19 ++------ .../oap/mllib/stat/CorrelationDALImpl.scala | 19 ++------ .../oap/mllib/stat/SummarizerDALImpl.scala | 19 ++------ .../oap/mllib/ConvertHomogenTableSuite.scala | 6 +-- .../mllib/CorrelationHomogenTableSuite.scala | 2 +- .../mllib/SummarizerHomogenTableSuite.scala | 12 ++--- .../com/intel/oap/mllib/TestCommon.scala | 12 ++--- 15 files changed, 141 insertions(+), 136 deletions(-) create mode 100644 mllib-dal/src/main/scala/com/intel/oap/mllib/CommonJob.scala diff --git a/mllib-dal/src/main/native/GPU.cpp b/mllib-dal/src/main/native/GPU.cpp index 9dbba24f4..5be0223a4 100644 --- a/mllib-dal/src/main/native/GPU.cpp +++ b/mllib-dal/src/main/native/GPU.cpp @@ -113,3 +113,49 @@ sycl::queue getQueue(const ComputeDevice device) { } } } + + +preview::spmd::communicator createDalCommunicator(const jint executorNum, const jint rank, const ccl::string ccl_ip_port){ + auto gpus = get_gpus(); + + auto t1 = std::chrono::high_resolution_clock::now(); + + ccl::init(); + + auto t2 = std::chrono::high_resolution_clock::now(); + auto duration = + (float)std::chrono::duration_cast(t2 - t1).count(); + + logger::println(logger::INFO, "OneCCL singleton init took %f secs", + duration / 1000); + logger::Logger::getInstance(c_breakdown_name).printLogToFile("rankID was %d, OneCCL singleton init took %f secs.", rank, duration / 1000 ); + + + t1 = std::chrono::high_resolution_clock::now(); + + auto kvs_attr = ccl::create_kvs_attr(); + + kvs_attr.set(ccl_ip_port); + + ccl::shared_ptr_class kvs = ccl::create_main_kvs(kvs_attr); + + t2 = std::chrono::high_resolution_clock::now(); + duration = + (float)std::chrono::duration_cast(t2 - t1) + .count(); + logger::println(logger::INFO, "OneCCL (native): create kvs took %f secs", + duration / 1000); + logger::Logger::getInstance(c_breakdown_name).printLogToFile("rankID was %d, OneCCL create communicator took %f secs.", rank, duration / 1000 ); + sycl::queue queue{gpus[0]}; + t1 = std::chrono::high_resolution_clock::now(); + auto comm = + preview::spmd::make_communicator( + queue, executorNum, rank, kvs); + t2 = std::chrono::high_resolution_clock::now(); + duration = + (float)std::chrono::duration_cast(t2 - t1) + .count(); + logger::Logger::getInstance(c_breakdown_name).printLogToFile("rankID was %d, create communicator took %f secs.", rank, duration / 1000 ); + return comm; +} + diff --git a/mllib-dal/src/main/native/GPU.h b/mllib-dal/src/main/native/GPU.h index f8d7c25a9..83b3272f0 100644 --- a/mllib-dal/src/main/native/GPU.h +++ b/mllib-dal/src/main/native/GPU.h @@ -6,7 +6,9 @@ #include #include #include +#include "Communicator.hpp" sycl::queue getAssignedGPU(const ComputeDevice device, jint *gpu_indices); sycl::queue getQueue(const ComputeDevice device); +preview::spmd::communicator createDalCommunicator(jint executorNum, jint rank, ccl::string ccl_ip_port); diff --git a/mllib-dal/src/main/native/KMeansImpl.cpp b/mllib-dal/src/main/native/KMeansImpl.cpp index f690c1c45..21ef5e218 100644 --- a/mllib-dal/src/main/native/KMeansImpl.cpp +++ b/mllib-dal/src/main/native/KMeansImpl.cpp @@ -338,25 +338,19 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe } #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { - int nGpu = env->GetArrayLength(gpuIdxArray); logger::println( logger::INFO, - "OneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, - rank); + "OneDAL (native): use GPU kernels with rankid %d", rank); - jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); + const char *str = env->GetStringUTFChars(ip_port, nullptr); + ccl::string ccl_ip_port(str); + auto comm = createDalCommunicator(executorNum, rank, ccl_ip_port); - auto queue = getAssignedGPU(device, gpuIndices); - - ccl::shared_ptr_class &kvs = getKvs(); - auto comm = - preview::spmd::make_communicator( - queue, executorNum, rank, kvs); ret = doKMeansOneAPICompute(env, pNumTabData, numRows, numCols, pNumTabCenters, clusterNum, tolerance, iterationNum, comm, resultObj); - env->ReleaseIntArrayElements(gpuIdxArray, gpuIndices, 0); + env->ReleaseStringUTFChars(ip_port, str); break; } #endif diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/CommonJob.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/CommonJob.scala new file mode 100644 index 000000000..e3e0aab58 --- /dev/null +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/CommonJob.scala @@ -0,0 +1,46 @@ +/* + * Copyright 2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.intel.oap.mllib + +import org.apache.spark.TaskContext +import org.apache.spark.rdd.RDD + +object CommonJob { + + def setAffinityMask(data: RDD[_], useDevice: String): Unit = { + data.mapPartitionsWithIndex { (rank, iter) => + val gpuIndices = if (useDevice == "GPU") { + val resources = TaskContext.get().resources() + resources("gpu").addresses.map(_.toInt) + } else { + null + } + OneCCL.setExecutorEnv("ZE_AFFINITY_MASK", gpuIndices(0).toString()) + Iterator.empty + }.count() + } + + def createCCLInit(data: RDD[_], executorNum: Int, kvsIPPort: String, useDevice: String): Unit = { + if (useDevice == "CPU") { + data.mapPartitionsWithIndex { (rank, table) => + OneCCL.init(executorNum, rank, kvsIPPort) + Iterator.empty + }.count() + } + } + +} diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala index 70479c79a..6a2da4ac7 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala @@ -16,7 +16,7 @@ package com.intel.oap.mllib.classification import com.intel.oap.mllib.Utils.getOneCCLIPPort -import com.intel.oap.mllib.{OneCCL, OneDAL, Utils} +import com.intel.oap.mllib.{CommonJob, OneCCL, OneDAL, Utils} import com.intel.oneapi.dal.table.Common import org.apache.spark.annotation.Since import org.apache.spark.TaskContext @@ -75,21 +75,8 @@ class RandomForestClassifierDALImpl(val uid: String, rfcTimer.record("Data Convertion") val kvsIPPort = getOneCCLIPPort(labeledPointsTables) - labeledPointsTables.mapPartitionsWithIndex { (rank, iter) => - val gpuIndices = if (useDevice == "GPU") { - val resources = TaskContext.get().resources() - resources("gpu").addresses.map(_.toInt) - } else { - null - } - OneCCL.setExecutorEnv("ZE_AFFINITY_MASK", gpuIndices(0).toString()) - Iterator.empty - }.count() - - labeledPointsTables.mapPartitionsWithIndex { (rank, table) => - OneCCL.init(executorNum, rank, kvsIPPort) - Iterator.empty - }.count() + CommonJob.setAffinityMask(labeledPointsTables, useDevice) + CommonJob.createCCLInit(labeledPointsTables, executorNum, kvsIPPort, useDevice) rfcTimer.record("OneCCL Init") val results = labeledPointsTables.mapPartitionsWithIndex { (rank, tables) => diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala index 14eb16800..61dd1ef80 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala @@ -17,7 +17,7 @@ package com.intel.oap.mllib.clustering import com.intel.oap.mllib.Utils.getOneCCLIPPort -import com.intel.oap.mllib.{OneCCL, OneDAL, Utils} +import com.intel.oap.mllib.{CommonJob, OneCCL, OneDAL, Utils} import com.intel.oneapi.dal.table.Common import org.apache.spark.TaskContext import org.apache.spark.internal.Logging @@ -51,21 +51,9 @@ class KMeansDALImpl(var nClusters: Int, kmeansTimer.record("Data Convertion") val kvsIPPort = getOneCCLIPPort(coalescedTables) - coalescedTables.mapPartitionsWithIndex { (rank, iter) => - val gpuIndices = if (useDevice == "GPU") { - val resources = TaskContext.get().resources() - resources("gpu").addresses.map(_.toInt) - } else { - null - } - OneCCL.setExecutorEnv("ZE_AFFINITY_MASK", gpuIndices(0).toString()) - Iterator.empty - }.count() - - coalescedTables.mapPartitionsWithIndex { (rank, table) => - OneCCL.init(executorNum, rank, kvsIPPort) - Iterator.empty - }.count() + + CommonJob.setAffinityMask(coalescedTables, useDevice) + CommonJob.createCCLInit(coalescedTables, executorNum, kvsIPPort, useDevice) kmeansTimer.record("OneCCL Init") val results = coalescedTables.mapPartitionsWithIndex { (rank, iter) => @@ -118,7 +106,9 @@ class KMeansDALImpl(var nClusters: Int, } else { Iterator.empty } - OneCCL.cleanup() + if (useDevice == "CPU") { + OneCCL.cleanup() + } ret }.collect() diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala index 7190ade3f..071117cc0 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala @@ -19,7 +19,7 @@ package com.intel.oap.mllib.feature import java.nio.DoubleBuffer import com.intel.daal.data_management.data.{HomogenNumericTable, NumericTable} import com.intel.oap.mllib.Utils.getOneCCLIPPort -import com.intel.oap.mllib.{OneCCL, OneDAL, Service, Utils} +import com.intel.oap.mllib.{CommonJob, OneCCL, OneDAL, Service, Utils} import org.apache.spark.TaskContext import org.apache.spark.annotation.Since import org.apache.spark.internal.Logging @@ -59,21 +59,8 @@ class PCADALImpl(val k: Int, val kvsIPPort = getOneCCLIPPort(coalescedTables) pcaTimer.record("Data Convertion") - coalescedTables.mapPartitionsWithIndex { (rank, iter) => - val gpuIndices = if (useDevice == "GPU") { - val resources = TaskContext.get().resources() - resources("gpu").addresses.map(_.toInt) - } else { - null - } - OneCCL.setExecutorEnv("ZE_AFFINITY_MASK", gpuIndices(0).toString()) - Iterator.empty - }.count() - - coalescedTables.mapPartitionsWithIndex { (rank, table) => - OneCCL.init(executorNum, rank, kvsIPPort) - Iterator.empty - }.count() + CommonJob.setAffinityMask(coalescedTables, useDevice) + CommonJob.createCCLInit(coalescedTables, executorNum, kvsIPPort, useDevice) pcaTimer.record("OneCCL Init") val results = coalescedTables.mapPartitionsWithIndex { (rank, iter) => diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala index 40b2f4423..79243f988 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala @@ -17,7 +17,7 @@ package com.intel.oap.mllib.regression import com.intel.oap.mllib.Utils.getOneCCLIPPort -import com.intel.oap.mllib.{OneCCL, OneDAL, Utils} +import com.intel.oap.mllib.{CommonJob, OneCCL, OneDAL, Utils} import com.intel.oneapi.dal.table.Common import org.apache.spark.SparkException import org.apache.spark.TaskContext @@ -106,16 +106,9 @@ class LinearRegressionDALImpl( val fitIntercept: Boolean, } lrTimer.record("Data Convertion") - labeledPointsTables.mapPartitionsWithIndex { (rank, iter) => - val gpuIndices = if (useDevice == "GPU") { - val resources = TaskContext.get().resources() - resources("gpu").addresses.map(_.toInt) - } else { - null - } - OneCCL.setExecutorEnv("ZE_AFFINITY_MASK", gpuIndices(0).toString()) - Iterator.empty - }.count() + CommonJob.setAffinityMask(labeledPointsTables, useDevice) + CommonJob.createCCLInit(labeledPointsTables, executorNum, kvsIPPort, useDevice) + lrTimer.record("OneCCL Init") val results = labeledPointsTables.mapPartitionsWithIndex { (rank, tables) => val (feature, label) = tables.next() @@ -132,7 +125,6 @@ class LinearRegressionDALImpl( val fitIntercept: Boolean, (label.toString.toLong, 0L, 0L) } - OneCCL.init(executorNum, rank, kvsIPPort) val result = new LiRResult() val gpuIndices = if (useDevice == "GPU") { diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala index 77ea4c656..100be8823 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala @@ -17,7 +17,7 @@ package com.intel.oap.mllib.regression import com.intel.oap.mllib.Utils.getOneCCLIPPort import com.intel.oap.mllib.classification.{LearningNode, RandomForestResult} -import com.intel.oap.mllib.{OneCCL, OneDAL, Utils} +import com.intel.oap.mllib.{CommonJob, OneCCL, OneDAL, Utils} import com.intel.oneapi.dal.table.Common import org.apache.spark.TaskContext import org.apache.spark.internal.Logging @@ -69,21 +69,8 @@ class RandomForestRegressorDALImpl(val uid: String, val kvsIPPort = getOneCCLIPPort(labeledPointsTables) - labeledPointsTables.mapPartitionsWithIndex { (rank, iter) => - val gpuIndices = if (useDevice == "GPU") { - val resources = TaskContext.get().resources() - resources("gpu").addresses.map(_.toInt) - } else { - null - } - OneCCL.setExecutorEnv("ZE_AFFINITY_MASK", gpuIndices(0).toString()) - Iterator.empty - }.count() - - labeledPointsTables.mapPartitionsWithIndex { (rank, table) => - OneCCL.init(executorNum, rank, kvsIPPort) - Iterator.empty - }.count() + CommonJob.setAffinityMask(labeledPointsTables, useDevice) + CommonJob.createCCLInit(labeledPointsTables, executorNum, kvsIPPort, useDevice) rfrTimer.record("OneCCL Init") val results = labeledPointsTables.mapPartitionsWithIndex { (rank, tables) => diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala index fff2d4ac5..04a3760bb 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala @@ -17,7 +17,7 @@ package com.intel.oap.mllib.stat import com.intel.oap.mllib.Utils.getOneCCLIPPort -import com.intel.oap.mllib.{OneCCL, OneDAL, Utils} +import com.intel.oap.mllib.{CommonJob, OneCCL, OneDAL, Utils} import com.intel.oneapi.dal.table.Common import org.apache.spark.TaskContext import org.apache.spark.internal.Logging @@ -46,23 +46,10 @@ class CorrelationDALImpl( val kvsIPPort = getOneCCLIPPort(coalescedTables) - coalescedTables.mapPartitionsWithIndex { (rank, table) => - OneCCL.init(executorNum, rank, kvsIPPort) - Iterator.empty - }.count() + CommonJob.setAffinityMask(coalescedTables, useDevice) + CommonJob.createCCLInit(coalescedTables, executorNum, kvsIPPort, useDevice) corTimer.record("OneCCL Init") - coalescedTables.mapPartitionsWithIndex { (rank, iter) => - val gpuIndices = if (useDevice == "GPU") { - val resources = TaskContext.get().resources() - resources("gpu").addresses.map(_.toInt) - } else { - null - } - OneCCL.setExecutorEnv("ZE_AFFINITY_MASK", gpuIndices(0).toString()) - Iterator.empty - }.count() - val results = coalescedTables.mapPartitionsWithIndex { (rank, iter) => val (tableArr : Long, rows : Long, columns : Long) = if (useDevice == "GPU") { iter.next() diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala index dcde7ef91..c8422b097 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala @@ -16,7 +16,7 @@ package com.intel.oap.mllib.stat -import com.intel.oap.mllib.{OneCCL, OneDAL, Utils} +import com.intel.oap.mllib.{CommonJob, OneCCL, OneDAL, Utils} import org.apache.spark.TaskContext import org.apache.spark.internal.Logging import org.apache.spark.ml.linalg.Vector @@ -47,23 +47,10 @@ class SummarizerDALImpl(val executorNum: Int, val kvsIPPort = getOneCCLIPPort(data) - coalescedTables.mapPartitionsWithIndex { (rank, table) => - OneCCL.init(executorNum, rank, kvsIPPort) - Iterator.empty - }.count() + CommonJob.setAffinityMask(coalescedTables, useDevice) + CommonJob.createCCLInit(coalescedTables, executorNum, kvsIPPort, useDevice) sumTimer.record("OneCCL Init") - coalescedTables.mapPartitionsWithIndex { (rank, iter) => - val gpuIndices = if (useDevice == "GPU") { - val resources = TaskContext.get().resources() - resources("gpu").addresses.map(_.toInt) - } else { - null - } - OneCCL.setExecutorEnv("ZE_AFFINITY_MASK", gpuIndices(0).toString()) - Iterator.empty - }.count() - val results = coalescedTables.mapPartitionsWithIndex { (rank, iter) => val (tableArr : Long, rows : Long, columns : Long) = if (useDevice == "GPU") { iter.next() diff --git a/mllib-dal/src/test/scala/com/intel/oap/mllib/ConvertHomogenTableSuite.scala b/mllib-dal/src/test/scala/com/intel/oap/mllib/ConvertHomogenTableSuite.scala index bbb6bbe7e..3246387b3 100644 --- a/mllib-dal/src/test/scala/com/intel/oap/mllib/ConvertHomogenTableSuite.scala +++ b/mllib-dal/src/test/scala/com/intel/oap/mllib/ConvertHomogenTableSuite.scala @@ -57,7 +57,7 @@ class ConvertHomogenTableSuite extends FunctionsSuite with Logging { val metadata = table.getMetaData for (i <- 0 until 10) { assert(metadata.getDataType(i) == FLOAT64) - assert(metadata.getFeatureType(i) == Common.FeatureType.RATIO) + assert(metadata.getFeatureType(i) == CommonJob.FeatureType.RATIO) } assertArrayEquals(table.getDoubleData, TestCommon.convertArray(data)) @@ -75,7 +75,7 @@ class ConvertHomogenTableSuite extends FunctionsSuite with Logging { val metadata = table.getMetaData for (i <- 0 until 10) { assert(metadata.getDataType(i) == FLOAT64) - assert(metadata.getFeatureType(i) == Common.FeatureType.RATIO) + assert(metadata.getFeatureType(i) == CommonJob.FeatureType.RATIO) } assertArrayEquals(table.getDoubleData, data) @@ -105,7 +105,7 @@ class ConvertHomogenTableSuite extends FunctionsSuite with Logging { val metadata = table.getMetaData for (i <- 0 until 10) { assert(metadata.getDataType(i) == FLOAT64) - assert(metadata.getFeatureType(i) == Common.FeatureType.RATIO) + assert(metadata.getFeatureType(i) == CommonJob.FeatureType.RATIO) } assertArrayEquals(table.getDoubleData, TestCommon.convertArray(data)) diff --git a/mllib-dal/src/test/scala/com/intel/oap/mllib/CorrelationHomogenTableSuite.scala b/mllib-dal/src/test/scala/com/intel/oap/mllib/CorrelationHomogenTableSuite.scala index 34361766d..98d37a338 100644 --- a/mllib-dal/src/test/scala/com/intel/oap/mllib/CorrelationHomogenTableSuite.scala +++ b/mllib-dal/src/test/scala/com/intel/oap/mllib/CorrelationHomogenTableSuite.scala @@ -45,7 +45,7 @@ class CorrelationHomogenTableSuite extends FunctionsSuite with Logging { val correlationDAL = new CorrelationDALImpl(1, 1) val gpuIndices = Array(0) val result = new CorrelationResult() - correlationDAL.cCorrelationTrainDAL(dataTable.getcObejct(), sourceData.length, sourceData(0).length, 1, 1, Common.ComputeDevice.HOST.ordinal(), gpuIndices, result); + correlationDAL.cCorrelationTrainDAL(dataTable.getcObejct(), sourceData.length, sourceData(0).length, 1, 1, CommonJob.ComputeDevice.HOST.ordinal(), gpuIndices, result); val correlationMatrix = TestCommon.getMatrixFromTable(OneDAL.makeHomogenTable( result.getCorrelationNumericTable), TestCommon.getComputeDevice) diff --git a/mllib-dal/src/test/scala/com/intel/oap/mllib/SummarizerHomogenTableSuite.scala b/mllib-dal/src/test/scala/com/intel/oap/mllib/SummarizerHomogenTableSuite.scala index 712cccbfa..5917af2e1 100644 --- a/mllib-dal/src/test/scala/com/intel/oap/mllib/SummarizerHomogenTableSuite.scala +++ b/mllib-dal/src/test/scala/com/intel/oap/mllib/SummarizerHomogenTableSuite.scala @@ -31,15 +31,15 @@ class SummarizerHomogenTableSuite extends FunctionsSuite with Logging{ val sourceData = TestCommon.readCSV("src/test/resources/data/covcormoments_dense.csv") - val dataTable = new HomogenTable(sourceData.length, sourceData(0).length, TestCommon.convertArray(sourceData), Common.ComputeDevice.HOST); + val dataTable = new HomogenTable(sourceData.length, sourceData(0).length, TestCommon.convertArray(sourceData), CommonJob.ComputeDevice.HOST); val summarizerDAL = new SummarizerDALImpl(1, 1) val gpuIndices = Array(0) val result = new SummarizerResult() - summarizerDAL.cSummarizerTrainDAL(dataTable.getcObejct(), sourceData.length, sourceData(0).length, 1, 1, Common.ComputeDevice.HOST.ordinal(), gpuIndices, result) - val meanTable = OneDAL.homogenTable1xNToVector(OneDAL.makeHomogenTable(result.getMeanNumericTable), Common.ComputeDevice.HOST) - val varianceTable = OneDAL.homogenTable1xNToVector(OneDAL.makeHomogenTable(result.getVarianceNumericTable), Common.ComputeDevice.HOST) - val minimumTable = OneDAL.homogenTable1xNToVector(OneDAL.makeHomogenTable(result.getMinimumNumericTable), Common.ComputeDevice.HOST) - val maximumTable = OneDAL.homogenTable1xNToVector(OneDAL.makeHomogenTable(result.getMaximumNumericTable), Common.ComputeDevice.HOST) + summarizerDAL.cSummarizerTrainDAL(dataTable.getcObejct(), sourceData.length, sourceData(0).length, 1, 1, CommonJob.ComputeDevice.HOST.ordinal(), gpuIndices, result) + val meanTable = OneDAL.homogenTable1xNToVector(OneDAL.makeHomogenTable(result.getMeanNumericTable), CommonJob.ComputeDevice.HOST) + val varianceTable = OneDAL.homogenTable1xNToVector(OneDAL.makeHomogenTable(result.getVarianceNumericTable), CommonJob.ComputeDevice.HOST) + val minimumTable = OneDAL.homogenTable1xNToVector(OneDAL.makeHomogenTable(result.getMinimumNumericTable), CommonJob.ComputeDevice.HOST) + val maximumTable = OneDAL.homogenTable1xNToVector(OneDAL.makeHomogenTable(result.getMaximumNumericTable), CommonJob.ComputeDevice.HOST) assertArrayEquals(expectMean , meanTable.toArray, 0.000001) assertArrayEquals(expectVariance, varianceTable.toDense.values, 0.000001) diff --git a/mllib-dal/src/test/scala/com/intel/oap/mllib/TestCommon.scala b/mllib-dal/src/test/scala/com/intel/oap/mllib/TestCommon.scala index 5a2ecef27..9ae20cec4 100644 --- a/mllib-dal/src/test/scala/com/intel/oap/mllib/TestCommon.scala +++ b/mllib-dal/src/test/scala/com/intel/oap/mllib/TestCommon.scala @@ -84,7 +84,7 @@ object TestCommon { arrayDouble } def getMatrixFromTable(table: HomogenTable, - device: Common.ComputeDevice): DenseMatrix = { + device: CommonJob.ComputeDevice): DenseMatrix = { val numRows = table.getRowCount.toInt val numCols = table.getColumnCount.toInt // returned DoubleBuffer is ByteByffer, need to copy as double array @@ -97,14 +97,14 @@ object TestCommon { matrix } - def getComputeDevice: Common.ComputeDevice = { + def getComputeDevice: CommonJob.ComputeDevice = { val device = System.getProperty("computeDevice") - var computeDevice: Common.ComputeDevice = Common.ComputeDevice.HOST + var computeDevice: CommonJob.ComputeDevice = CommonJob.ComputeDevice.HOST if(device != null) { device.toUpperCase match { - case "HOST" => computeDevice = Common.ComputeDevice.HOST - case "CPU" => computeDevice = Common.ComputeDevice.CPU - case "GPU" => computeDevice = Common.ComputeDevice.GPU + case "HOST" => computeDevice = CommonJob.ComputeDevice.HOST + case "CPU" => computeDevice = CommonJob.ComputeDevice.CPU + case "GPU" => computeDevice = CommonJob.ComputeDevice.GPU case _ => "Invalid Device" } } From 00e411a94e3cfbce8287bfbecaf9d06bc3739455 Mon Sep 17 00:00:00 2001 From: minmingzhu Date: Mon, 19 Aug 2024 13:50:57 +0800 Subject: [PATCH 6/7] update optimize code --- mllib-dal/src/main/native/CorrelationImpl.cpp | 18 +++---- .../native/DecisionForestClassifierImpl.cpp | 19 +++---- .../native/DecisionForestRegressorImpl.cpp | 17 +++--- mllib-dal/src/main/native/GPU.cpp | 6 --- mllib-dal/src/main/native/KMeansImpl.cpp | 2 +- .../src/main/native/LinearRegressionImpl.cpp | 50 ++++++++---------- mllib-dal/src/main/native/OneCCL.cpp | 52 +++---------------- mllib-dal/src/main/native/PCAImpl.cpp | 18 +++---- mllib-dal/src/main/native/SummarizerImpl.cpp | 18 +++---- .../javah/com_intel_oap_mllib_OneCCL__.h | 2 +- ...intel_oap_mllib_clustering_KMeansDALImpl.h | 2 +- .../com_intel_oap_mllib_feature_PCADALImpl.h | 2 +- ...mllib_regression_LinearRegressionDALImpl.h | 2 +- ..._regression_RandomForestRegressorDALImpl.h | 2 +- ..._intel_oap_mllib_stat_CorrelationDALImpl.h | 2 +- ...m_intel_oap_mllib_stat_SummarizerDALImpl.h | 2 +- .../RandomForestClassifierDALImpl.scala | 2 + .../oap/mllib/clustering/KMeansDALImpl.scala | 2 + .../intel/oap/mllib/feature/PCADALImpl.scala | 2 + .../regression/LinearRegressionDALImpl.scala | 2 + .../RandomForestRegressorDALImpl.scala | 2 + .../oap/mllib/stat/CorrelationDALImpl.scala | 2 + .../oap/mllib/stat/SummarizerDALImpl.scala | 2 + 23 files changed, 80 insertions(+), 148 deletions(-) diff --git a/mllib-dal/src/main/native/CorrelationImpl.cpp b/mllib-dal/src/main/native/CorrelationImpl.cpp index a9103102f..dd21890f4 100644 --- a/mllib-dal/src/main/native/CorrelationImpl.cpp +++ b/mllib-dal/src/main/native/CorrelationImpl.cpp @@ -199,7 +199,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows, jlong numCols, jint executorNum, jint executorCores, jint computeDeviceOrdinal, - jintArray gpuIdxArray, jobject resultObj) { + jintArray gpuIdxArray, jstring ip_port, jobject resultObj) { logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", ComputeDeviceString[computeDeviceOrdinal].c_str()); @@ -225,23 +225,17 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( } #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { - int nGpu = env->GetArrayLength(gpuIdxArray); logger::println( logger::INFO, - "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, - rank); + "oneDAL (native): use GPU kernels with rankid %d", rank); - jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); + const char *str = env->GetStringUTFChars(ip_port, nullptr); + ccl::string ccl_ip_port(str); + auto comm = createDalCommunicator(executorNum, rank, ccl_ip_port); - auto queue = getAssignedGPU(device, gpuIndices); - - ccl::shared_ptr_class &kvs = getKvs(); - auto comm = - preview::spmd::make_communicator( - queue, executorNum, rank, kvs); doCorrelationOneAPICompute(env, pNumTabData, numRows, numCols, comm, resultObj); - env->ReleaseIntArrayElements(gpuIdxArray, gpuIndices, 0); + env->ReleaseStringUTFChars(ip_port, str); break; } #endif diff --git a/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp b/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp index aad8d9048..64a0782ca 100644 --- a/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp +++ b/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp @@ -307,28 +307,20 @@ Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassif jint minObservationsSplitNode, jdouble minWeightFractionLeafNode, jdouble minImpurityDecreaseSplitNode, jint maxTreeDepth, jlong seed, jint maxBins, jboolean bootstrap, jintArray gpuIdxArray, - jobject resultObj) { + jstring ip_port, jobject resultObj) { logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels"); ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); switch (device) { case ComputeDevice::gpu: { - int nGpu = env->GetArrayLength(gpuIdxArray); logger::println( logger::INFO, - "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, - rank); - - jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); - - ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); + "oneDAL (native): use GPU kernels with rankid %d", rank); - auto queue = getAssignedGPU(device, gpuIndices); + const char *str = env->GetStringUTFChars(ip_port, nullptr); + ccl::string ccl_ip_port(str); + auto comm = createDalCommunicator(executorNum, rank, ccl_ip_port); - ccl::shared_ptr_class &kvs = getKvs(); - auto comm = - preview::spmd::make_communicator( - queue, executorNum, rank, kvs); jobject hashmapObj = doRFClassifierOneAPICompute( env, pNumTabFeature, featureRows, featureCols, pNumTabLabel, labelCols, executorNum, computeDeviceOrdinal, classCount, treeCount, @@ -336,6 +328,7 @@ Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassif minObservationsSplitNode, minWeightFractionLeafNode, minImpurityDecreaseSplitNode, maxTreeDepth, seed, maxBins, bootstrap, comm, resultObj); + env->ReleaseStringUTFChars(ip_port, str); return hashmapObj; } default: { diff --git a/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp b/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp index 853f736de..c757d12a1 100644 --- a/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp +++ b/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp @@ -296,7 +296,7 @@ Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTra jlong featureCols, jlong pNumTabLabel, jlong labelCols, jint executorNum, jint computeDeviceOrdinal, jint treeCount, jint numFeaturesPerNode, jint minObservationsLeafNode, jint maxTreeDepth, jlong seed, jint maxbins, - jboolean bootstrap, jintArray gpuIdxArray, jobject resultObj) { + jboolean bootstrap, jintArray gpuIdxArray, jstring ip_port, jobject resultObj) { logger::println(logger::INFO, "OneDAL (native): use DPC++ kernels; device %s", ComputeDeviceString[computeDeviceOrdinal].c_str()); @@ -304,25 +304,20 @@ Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTra ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); switch (device) { case ComputeDevice::gpu: { - int nGpu = env->GetArrayLength(gpuIdxArray); logger::println( logger::INFO, - "OneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, - rank); + "OneDAL (native): use GPU kernels with rankid %d", rank); - jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); + const char *str = env->GetStringUTFChars(ip_port, nullptr); + ccl::string ccl_ip_port(str); + auto comm = createDalCommunicator(executorNum, rank, ccl_ip_port); - auto queue = getAssignedGPU(device, gpuIndices); - - ccl::shared_ptr_class &kvs = getKvs(); - auto comm = - preview::spmd::make_communicator( - queue, executorNum, rank, kvs); jobject hashmapObj = doRFRegressorOneAPICompute( env, pNumTabFeature, featureRows, featureCols, pNumTabLabel, labelCols, executorNum, computeDeviceOrdinal, treeCount, numFeaturesPerNode, minObservationsLeafNode, maxTreeDepth, seed, maxbins, bootstrap, comm, resultObj); + env->ReleaseStringUTFChars(ip_port, str); return hashmapObj; } default: { diff --git a/mllib-dal/src/main/native/GPU.cpp b/mllib-dal/src/main/native/GPU.cpp index 5be0223a4..2454fadef 100644 --- a/mllib-dal/src/main/native/GPU.cpp +++ b/mllib-dal/src/main/native/GPU.cpp @@ -25,7 +25,6 @@ static std::vector get_gpus() { } static int getLocalRank(ccl::communicator &comm, int size, int rank) { - const int MPI_MAX_PROCESSOR_NAME = 128; /* Obtain local rank among nodes sharing the same host name */ char zero = static_cast(0); std::vector name(MPI_MAX_PROCESSOR_NAME + 1, zero); @@ -128,8 +127,6 @@ preview::spmd::communicator createDalC logger::println(logger::INFO, "OneCCL singleton init took %f secs", duration / 1000); - logger::Logger::getInstance(c_breakdown_name).printLogToFile("rankID was %d, OneCCL singleton init took %f secs.", rank, duration / 1000 ); - t1 = std::chrono::high_resolution_clock::now(); @@ -145,7 +142,6 @@ preview::spmd::communicator createDalC .count(); logger::println(logger::INFO, "OneCCL (native): create kvs took %f secs", duration / 1000); - logger::Logger::getInstance(c_breakdown_name).printLogToFile("rankID was %d, OneCCL create communicator took %f secs.", rank, duration / 1000 ); sycl::queue queue{gpus[0]}; t1 = std::chrono::high_resolution_clock::now(); auto comm = @@ -155,7 +151,5 @@ preview::spmd::communicator createDalC duration = (float)std::chrono::duration_cast(t2 - t1) .count(); - logger::Logger::getInstance(c_breakdown_name).printLogToFile("rankID was %d, create communicator took %f secs.", rank, duration / 1000 ); return comm; } - diff --git a/mllib-dal/src/main/native/KMeansImpl.cpp b/mllib-dal/src/main/native/KMeansImpl.cpp index 21ef5e218..b868fd475 100644 --- a/mllib-dal/src/main/native/KMeansImpl.cpp +++ b/mllib-dal/src/main/native/KMeansImpl.cpp @@ -308,7 +308,7 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows, jlong numCols, jlong pNumTabCenters, jint clusterNum, jdouble tolerance, jint iterationNum, jint executorNum, jint executorCores, jint computeDeviceOrdinal, - jintArray gpuIdxArray, jobject resultObj) { + jintArray gpuIdxArray, jstring ip_port, jobject resultObj) { logger::println(logger::INFO, "OneDAL (native): use DPC++ kernels; device %s", ComputeDeviceString[computeDeviceOrdinal].c_str()); diff --git a/mllib-dal/src/main/native/LinearRegressionImpl.cpp b/mllib-dal/src/main/native/LinearRegressionImpl.cpp index ca94b54c5..7518a23b6 100644 --- a/mllib-dal/src/main/native/LinearRegressionImpl.cpp +++ b/mllib-dal/src/main/native/LinearRegressionImpl.cpp @@ -215,7 +215,7 @@ ridge_regression_compute(size_t rankId, ccl::communicator &comm, #ifdef CPU_GPU_PROFILE static jlong doLROneAPICompute(JNIEnv *env, size_t rankId, - ccl::communicator &cclComm, sycl::queue &queue, + preview::spmd::communicator comm, jlong pNumTabFeature, jlong featureRows, jlong featureCols, jlong pNumTabLabel, jlong labelCols, jboolean jfitIntercept, @@ -225,9 +225,6 @@ static jlong doLROneAPICompute(JNIEnv *env, size_t rankId, const bool isRoot = (rankId == ccl_root); bool fitIntercept = bool(jfitIntercept); - ccl::shared_ptr_class &kvs = getKvs(); - auto comm = preview::spmd::make_communicator( - queue, executorNum, rankId, kvs); homogen_table xtrain = *reinterpret_cast( createHomogenTableWithArrayPtr(pNumTabFeature, featureRows, featureCols, comm.get_queue()) @@ -265,7 +262,7 @@ Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTra jlong featureCols, jlong label, jlong labelCols, jboolean fitIntercept, jdouble regParam, jdouble elasticNetParam, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, - jobject resultObj) { + jstring ip_port, jobject resultObj) { logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", @@ -280,22 +277,23 @@ Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTra jlong resultptr = 0L; if (useGPU) { #ifdef CPU_GPU_PROFILE - int nGpu = env->GetArrayLength(gpuIdxArray); logger::println( logger::INFO, - "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, - rank); + "oneDAL (native): use GPU kernels with rankid %d", rank); - jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); - int size = cclComm.size(); - auto queue = getAssignedGPU(device, gpuIndices); + const char *str = env->GetStringUTFChars(ip_port, nullptr); + ccl::string ccl_ip_port(str); + auto comm = createDalCommunicator(executorNum, rank, ccl_ip_port); resultptr = doLROneAPICompute( - env, rank, cclComm, queue, feature, featureRows, featureCols, + env, rank, comm, feature, featureRows, featureCols, label, labelCols, fitIntercept, executorNum, resultObj); - env->ReleaseIntArrayElements(gpuIdxArray, gpuIndices, 0); + env->ReleaseStringUTFChars(ip_port, str); #endif } else { + ccl::communicator &cclComm = getComm(); + size_t rankId = cclComm.rank(); + NumericTablePtr pLabel = *((NumericTablePtr *)label); NumericTablePtr pData = *((NumericTablePtr *)feature); @@ -318,22 +316,18 @@ Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTra NumericTablePtr *coeffvectors = new NumericTablePtr(resultTable); resultptr = (jlong)coeffvectors; - } - - jlong ret = 0L; - if (rankId == ccl_root) { - // Get the class of the result object - jclass clazz = env->GetObjectClass(resultObj); - // Get Field references - jfieldID coeffNumericTableField = - env->GetFieldID(clazz, "coeffNumericTable", "J"); + if (rankId == ccl_root) { + // Get the class of the result object + jclass clazz = env->GetObjectClass(resultObj); + // Get Field references + jfieldID coeffNumericTableField = + env->GetFieldID(clazz, "coeffNumericTable", "J"); - env->SetLongField(resultObj, coeffNumericTableField, resultptr); + env->SetLongField(resultObj, coeffNumericTableField, resultptr); - // intercept is already in first column of coeffvectors - ret = resultptr; - } else { - ret = (jlong)0; + // intercept is already in first column of coeffvectors + resultptr = (jlong)coeffvectors; + } } - return ret; + return resultptr; } diff --git a/mllib-dal/src/main/native/OneCCL.cpp b/mllib-dal/src/main/native/OneCCL.cpp index b924c6987..988dd844a 100644 --- a/mllib-dal/src/main/native/OneCCL.cpp +++ b/mllib-dal/src/main/native/OneCCL.cpp @@ -55,72 +55,32 @@ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init( auto t1 = std::chrono::high_resolution_clock::now(); ccl::init(); + auto t2 = std::chrono::high_resolution_clock::now(); + const char *str = env->GetStringUTFChars(ip_port, 0); ccl::string ccl_ip_port(str); - const char *device = env->GetStringUTFChars(use_device, 0); - ccl::string ccl_ip_port(str); auto &singletonCCLInit = CCLInitSingleton::get(size, rank, ccl_ip_port); g_kvs.push_back(singletonCCLInit.kvs); - -#ifdef CPU_ONLY_PROFILE g_comms.push_back( ccl::create_communicator(size, rank, singletonCCLInit.kvs)); - - auto t2 = std::chrono::high_resolution_clock::now(); auto duration = (float)std::chrono::duration_cast(t2 - t1) .count(); logger::println(logger::INFO, "OneCCL (native): init took %f secs", duration / 1000); -#endif - - jclass cls = env->GetObjectClass(param); - jfieldID fid_comm_size = env->GetFieldID(cls, "commSize", "J"); - jfieldID fid_rank_id = env->GetFieldID(cls, "rankId", "J"); - - env->SetLongField(param, size, comm_size); - env->SetLongField(param, rank, rank_id); - env->ReleaseStringUTFChars(ip_port, str); - - return 1; -} - -/* - * Class: com_intel_oap_mllib_OneCCL__ - * Method: c_init - * Signature: ()I - */ -JNIEXPORT jint JNICALL -Java_com_intel_oap_mllib_OneCCL_00024_c_1initDpcpp(JNIEnv *env, jobject, jint size, jint rank, jobject param) { - logger::printerrln(logger::INFO, "OneCCL (native): init dpcpp"); - auto t1 = std::chrono::high_resolution_clock::now(); - ccl::init(); - - const char *str = env->GetStringUTFChars(ip_port, 0); - ccl::string ccl_ip_port(str); - - auto &singletonCCLInit = CCLInitSingleton::get(size, rank, ccl_ip_port); - - g_kvs.push_back(singletonCCLInit.kvs); - - - auto t2 = std::chrono::high_resolution_clock::now(); - auto duration = - (float)std::chrono::duration_cast(t2 - t1) - .count(); - logger::println(logger::INFO, "OneCCL (native): init took %f secs", - duration / 1000); + rank_id = getComm().rank(); + comm_size = getComm().size(); jclass cls = env->GetObjectClass(param); jfieldID fid_comm_size = env->GetFieldID(cls, "commSize", "J"); jfieldID fid_rank_id = env->GetFieldID(cls, "rankId", "J"); - env->SetLongField(param, size, comm_size); - env->SetLongField(param, rank, rank_id); + env->SetLongField(param, fid_comm_size, comm_size); + env->SetLongField(param, fid_rank_id, rank_id); env->ReleaseStringUTFChars(ip_port, str); return 1; diff --git a/mllib-dal/src/main/native/PCAImpl.cpp b/mllib-dal/src/main/native/PCAImpl.cpp index f2821d558..67949341d 100644 --- a/mllib-dal/src/main/native/PCAImpl.cpp +++ b/mllib-dal/src/main/native/PCAImpl.cpp @@ -252,7 +252,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL( JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows, jlong numCols, jint executorNum, jint executorCores, jint computeDeviceOrdinal, - jintArray gpuIdxArray, jobject resultObj) { + jintArray gpuIdxArray, jstring ip_port, jobject resultObj) { logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", ComputeDeviceString[computeDeviceOrdinal].c_str()); @@ -277,22 +277,16 @@ Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL( } #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { - int nGpu = env->GetArrayLength(gpuIdxArray); logger::println( logger::INFO, - "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, - rank); + "oneDAL (native): use GPU kernels with rankid %d", rank); - jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); + const char *str = env->GetStringUTFChars(ip_port, nullptr); + ccl::string ccl_ip_port(str); + auto comm = createDalCommunicator(executorNum, rank, ccl_ip_port); - auto queue = getAssignedGPU(device, gpuIndices); - - ccl::shared_ptr_class &kvs = getKvs(); - auto comm = - preview::spmd::make_communicator( - queue, executorNum, rank, kvs); doPCAOneAPICompute(env, pNumTabData, numRows, numCols, comm, resultObj); - env->ReleaseIntArrayElements(gpuIdxArray, gpuIndices, 0); + env->ReleaseStringUTFChars(ip_port, str); break; } #endif diff --git a/mllib-dal/src/main/native/SummarizerImpl.cpp b/mllib-dal/src/main/native/SummarizerImpl.cpp index 9af30d939..852db8b03 100644 --- a/mllib-dal/src/main/native/SummarizerImpl.cpp +++ b/mllib-dal/src/main/native/SummarizerImpl.cpp @@ -270,7 +270,7 @@ JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL( JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows, jlong numCols, jint executorNum, jint executorCores, jint computeDeviceOrdinal, - jintArray gpuIdxArray, jobject resultObj) { + jintArray gpuIdxArray, jstring ip_port, jobject resultObj) { logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", ComputeDeviceString[computeDeviceOrdinal].c_str()); @@ -295,22 +295,16 @@ Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL( } #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { - int nGpu = env->GetArrayLength(gpuIdxArray); logger::println( logger::INFO, - "oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu, - rank); + "oneDAL (native): use GPU kernels with rankid %d", rank); + const char *str = env->GetStringUTFChars(ip_port, nullptr); + ccl::string ccl_ip_port(str); + auto comm = createDalCommunicator(executorNum, rank, ccl_ip_port); - jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0); - auto queue = getAssignedGPU(device, gpuIndices); - - ccl::shared_ptr_class &kvs = getKvs(); - auto comm = - preview::spmd::make_communicator( - queue, executorNum, rank, kvs); doSummarizerOneAPICompute(env, pNumTabData, numRows, numCols, comm, resultObj); - env->ReleaseIntArrayElements(gpuIdxArray, gpuIndices, 0); + env->ReleaseStringUTFChars(ip_port, str); break; } #endif diff --git a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_OneCCL__.h b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_OneCCL__.h index 4bfa1d0c3..a89b7d214 100644 --- a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_OneCCL__.h +++ b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_OneCCL__.h @@ -45,7 +45,7 @@ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1getAvailPort * Signature: (IILjava/lang/String;Lcom/intel/oap/mllib/CCLParam;)I */ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init - (JNIEnv *, jobject, jint, jint, jstring, jstring, jobject); + (JNIEnv *, jobject, jint, jint, jstring, jobject); /* * Class: com_intel_oap_mllib_OneCCL__ diff --git a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_clustering_KMeansDALImpl.h b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_clustering_KMeansDALImpl.h index a0fc24dde..9a00db0a2 100644 --- a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_clustering_KMeansDALImpl.h +++ b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_clustering_KMeansDALImpl.h @@ -13,7 +13,7 @@ extern "C" { * Signature: (JJIDIIII[ILcom/intel/oap/mllib/clustering/KMeansResult;)J */ JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCenters - (JNIEnv *, jobject, jint, jlong, jlong, jlong, jlong, jint, jdouble, jint, jint, jint, jint, jintArray, jobject); + (JNIEnv *, jobject, jint, jlong, jlong, jlong, jlong, jint, jdouble, jint, jint, jint, jint, jintArray, jstring, jobject); #ifdef __cplusplus } diff --git a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_feature_PCADALImpl.h b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_feature_PCADALImpl.h index 34646da95..3f1875ca9 100644 --- a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_feature_PCADALImpl.h +++ b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_feature_PCADALImpl.h @@ -13,7 +13,7 @@ extern "C" { * Signature: (JIII[ILcom/intel/oap/mllib/feature/PCAResult;)J */ JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL - (JNIEnv *, jobject, jint, jlong, jlong, jlong, jint, jint, jint, jintArray, jobject); + (JNIEnv *, jobject, jint, jlong, jlong, jlong, jint, jint, jint, jintArray, jstring, jobject); #ifdef __cplusplus } diff --git a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_regression_LinearRegressionDALImpl.h b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_regression_LinearRegressionDALImpl.h index 0dc6f4e79..4f90f23f8 100644 --- a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_regression_LinearRegressionDALImpl.h +++ b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_regression_LinearRegressionDALImpl.h @@ -13,7 +13,7 @@ extern "C" { * Signature: (JJZDDIII[ILcom/intel/oap/mllib/regression/LiRResult;)J */ JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTrainDAL - (JNIEnv *, jobject, jint, jlong, jlong, jlong, jlong, jlong, jboolean, jdouble, jdouble, jint, jint, jint, jintArray, jobject); + (JNIEnv *, jobject, jint, jlong, jlong, jlong, jlong, jlong, jboolean, jdouble, jdouble, jint, jint, jint, jintArray, jstring, jobject); #ifdef __cplusplus } diff --git a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_regression_RandomForestRegressorDALImpl.h b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_regression_RandomForestRegressorDALImpl.h index 1350d8268..7bf694a19 100644 --- a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_regression_RandomForestRegressorDALImpl.h +++ b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_regression_RandomForestRegressorDALImpl.h @@ -13,7 +13,7 @@ extern "C" { * Signature: (JJIIIIIIJIZ[ILcom/intel/oap/mllib/classification/RandomForestResult;)Ljava/util/HashMap; */ JNIEXPORT jobject JNICALL Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTrainDAL - (JNIEnv *, jobject, jint, jlong, jlong, jlong, jlong, jlong, jint, jint, jint, jint, jint, jint, jlong, jint, jboolean, jintArray, jobject); + (JNIEnv *, jobject, jint, jlong, jlong, jlong, jlong, jlong, jint, jint, jint, jint, jint, jint, jlong, jint, jboolean, jintArray, jstring, jobject); #ifdef __cplusplus } diff --git a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_stat_CorrelationDALImpl.h b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_stat_CorrelationDALImpl.h index 494b89658..4c404b452 100644 --- a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_stat_CorrelationDALImpl.h +++ b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_stat_CorrelationDALImpl.h @@ -13,7 +13,7 @@ extern "C" { * Signature: (JIII[ILcom/intel/oap/mllib/stat/CorrelationResult;)J */ JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL - (JNIEnv *, jobject, jint, jlong, jlong, jlong, jint, jint, jint, jintArray, jobject); + (JNIEnv *, jobject, jint, jlong, jlong, jlong, jint, jint, jint, jintArray, jstring, jobject); #ifdef __cplusplus } diff --git a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_stat_SummarizerDALImpl.h b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_stat_SummarizerDALImpl.h index 7db45743f..4261d6fdd 100644 --- a/mllib-dal/src/main/native/javah/com_intel_oap_mllib_stat_SummarizerDALImpl.h +++ b/mllib-dal/src/main/native/javah/com_intel_oap_mllib_stat_SummarizerDALImpl.h @@ -13,7 +13,7 @@ extern "C" { * Signature: (JIII[ILcom/intel/oap/mllib/stat/SummarizerResult;)J */ JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL - (JNIEnv *, jobject, jint, jlong, jlong, jlong, jint, jint, jint, jintArray, jobject); + (JNIEnv *, jobject, jint, jlong, jlong, jlong, jint, jint, jint, jintArray, jstring, jobject); #ifdef __cplusplus } diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala index 6a2da4ac7..f0ac1f0b5 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala @@ -114,6 +114,7 @@ class RandomForestClassifierDALImpl(val uid: String, maxBins, bootstrap, gpuIndices, + kvsIPPort, result) val computeEndTime = System.nanoTime() @@ -159,6 +160,7 @@ class RandomForestClassifierDALImpl(val uid: String, maxBins: Int, bootstrap: Boolean, gpuIndices: Array[Int], + kvsIPPort: String, result: RandomForestResult): java.util.HashMap[java.lang.Integer, java.util.ArrayList[LearningNode]] } diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala index 61dd1ef80..be034ca03 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala @@ -91,6 +91,7 @@ class KMeansDALImpl(var nClusters: Int, executorCores, computeDevice.ordinal(), gpuIndices, + kvsIPPort, result ) @@ -149,5 +150,6 @@ class KMeansDALImpl(var nClusters: Int, executorCores: Int, computeDeviceOrdinal: Int, gpuIndices: Array[Int], + kvsIPPort: String, result: KMeansResult): Long } diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala index 071117cc0..06f9039ca 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala @@ -85,6 +85,7 @@ class PCADALImpl(val k: Int, executorCores, computeDevice.ordinal(), gpuIndices, + kvsIPPort, result ) @@ -221,5 +222,6 @@ class PCADALImpl(val k: Int, executorCores: Int, computeDeviceOrdinal: Int, gpuIndices: Array[Int], + kvsIPPort: String, result: PCAResult): Long } diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala index 79243f988..6c45b5807 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala @@ -154,6 +154,7 @@ class LinearRegressionDALImpl( val fitIntercept: Boolean, executorCores, computeDevice.ordinal(), gpuIndices, + kvsIPPort, result ) @@ -200,6 +201,7 @@ class LinearRegressionDALImpl( val fitIntercept: Boolean, executorCores: Int, computeDeviceOrdinal: Int, gpuIndices: Array[Int], + kvsIPPort: String, result: LiRResult): Long } diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala index 100be8823..e5742923b 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala @@ -105,6 +105,7 @@ class RandomForestRegressorDALImpl(val uid: String, maxbins, bootstrap, gpuIndices, + kvsIPPort, result) val computeEndTime = System.nanoTime() @@ -156,5 +157,6 @@ class RandomForestRegressorDALImpl(val uid: String, maxbins: Int, bootstrap: Boolean, gpuIndices: Array[Int], + kvsIPPort: String, result: RandomForestResult): java.util.HashMap[java.lang.Integer, java.util.ArrayList[LearningNode]] } diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala index 04a3760bb..203c00796 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala @@ -75,6 +75,7 @@ class CorrelationDALImpl( executorCores, computeDevice.ordinal(), gpuIndices, + kvsIPPort, result ) @@ -125,5 +126,6 @@ class CorrelationDALImpl( executorCores: Int, computeDeviceOrdinal: Int, gpuIndices: Array[Int], + kvsIPPort: String, result: CorrelationResult): Long } diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala index c8422b097..a3f65b8fd 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala @@ -76,6 +76,7 @@ class SummarizerDALImpl(val executorNum: Int, executorCores, computeDevice.ordinal(), gpuIndices, + kvsIPPort, result ) @@ -157,5 +158,6 @@ class SummarizerDALImpl(val executorNum: Int, executorCores: Int, computeDeviceOrdinal: Int, gpuIndices: Array[Int], + kvsIPPort: String, result: SummarizerResult): Long } From 12774a8aacdc47a68bdf9bf31b472930dbae25e0 Mon Sep 17 00:00:00 2001 From: minmingzhu Date: Fri, 23 Aug 2024 16:25:26 +0800 Subject: [PATCH 7/7] create kvs by store file --- mllib-dal/src/main/native/CorrelationImpl.cpp | 21 +-- .../native/DecisionForestClassifierImpl.cpp | 26 +-- .../native/DecisionForestRegressorImpl.cpp | 25 +-- mllib-dal/src/main/native/GPU.cpp | 122 +++++++++---- mllib-dal/src/main/native/GPU.h | 7 +- mllib-dal/src/main/native/KMeansImpl.cpp | 23 +-- .../src/main/native/LinearRegressionImpl.cpp | 38 ++-- mllib-dal/src/main/native/OneCCL.cpp | 3 +- mllib-dal/src/main/native/PCAImpl.cpp | 21 +-- mllib-dal/src/main/native/SummarizerImpl.cpp | 24 +-- mllib-dal/src/main/native/store.hpp | 165 ++++++++++++++++++ .../RandomForestClassifierDALImpl.scala | 6 +- .../oap/mllib/clustering/KMeansDALImpl.scala | 7 +- .../intel/oap/mllib/feature/PCADALImpl.scala | 7 +- .../regression/LinearRegressionDALImpl.scala | 7 +- .../RandomForestRegressorDALImpl.scala | 6 +- .../oap/mllib/stat/CorrelationDALImpl.scala | 7 +- .../oap/mllib/stat/SummarizerDALImpl.scala | 7 +- 18 files changed, 383 insertions(+), 139 deletions(-) create mode 100644 mllib-dal/src/main/native/store.hpp diff --git a/mllib-dal/src/main/native/CorrelationImpl.cpp b/mllib-dal/src/main/native/CorrelationImpl.cpp index dd21890f4..7fc481ba8 100644 --- a/mllib-dal/src/main/native/CorrelationImpl.cpp +++ b/mllib-dal/src/main/native/CorrelationImpl.cpp @@ -197,9 +197,10 @@ static void doCorrelationOneAPICompute( JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( - JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows, jlong numCols, - jint executorNum, jint executorCores, jint computeDeviceOrdinal, - jintArray gpuIdxArray, jstring ip_port, jobject resultObj) { + JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows, + jlong numCols, jint executorNum, jint executorCores, + jint computeDeviceOrdinal, jintArray gpuIdxArray, jstring store_path, + jobject resultObj) { logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", ComputeDeviceString[computeDeviceOrdinal].c_str()); @@ -225,17 +226,17 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL( } #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { - logger::println( - logger::INFO, - "oneDAL (native): use GPU kernels with rankid %d", rank); + logger::println(logger::INFO, + "oneDAL (native): use GPU kernels with rankid %d", + rank); - const char *str = env->GetStringUTFChars(ip_port, nullptr); - ccl::string ccl_ip_port(str); - auto comm = createDalCommunicator(executorNum, rank, ccl_ip_port); + const char* path = env->GetStringUTFChars(store_path, nullptr); + ccl::string kvs_store_path(str); + auto comm = createDalCommunicator(executorNum, rank, kvs_store_path); doCorrelationOneAPICompute(env, pNumTabData, numRows, numCols, comm, resultObj); - env->ReleaseStringUTFChars(ip_port, str); + env->ReleaseStringUTFChars(store_path, path); break; } #endif diff --git a/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp b/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp index 64a0782ca..f56cf23b5 100644 --- a/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp +++ b/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp @@ -300,26 +300,26 @@ static jobject doRFClassifierOneAPICompute( */ JNIEXPORT jobject JNICALL Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassifierTrainDAL( - JNIEnv *env, jobject obj, jint rank, jlong pNumTabFeature, jlong featureRows, - jlong featureCols, jlong pNumTabLabel, jlong labelCols, jint executorNum, - jint computeDeviceOrdinal, jint classCount, jint treeCount, - jint numFeaturesPerNode, jint minObservationsLeafNode, + JNIEnv *env, jobject obj, jint rank, jlong pNumTabFeature, + jlong featureRows, jlong featureCols, jlong pNumTabLabel, jlong labelCols, + jint executorNum, jint computeDeviceOrdinal, jint classCount, + jint treeCount, jint numFeaturesPerNode, jint minObservationsLeafNode, jint minObservationsSplitNode, jdouble minWeightFractionLeafNode, jdouble minImpurityDecreaseSplitNode, jint maxTreeDepth, jlong seed, - jint maxBins, jboolean bootstrap, jintArray gpuIdxArray, - jstring ip_port, jobject resultObj) { + jint maxBins, jboolean bootstrap, jintArray gpuIdxArray, jstring store_path, + jobject resultObj) { logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels"); ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); switch (device) { case ComputeDevice::gpu: { - logger::println( - logger::INFO, - "oneDAL (native): use GPU kernels with rankid %d", rank); + logger::println(logger::INFO, + "oneDAL (native): use GPU kernels with rankid %d", + rank); - const char *str = env->GetStringUTFChars(ip_port, nullptr); - ccl::string ccl_ip_port(str); - auto comm = createDalCommunicator(executorNum, rank, ccl_ip_port); + const char* path = env->GetStringUTFChars(store_path, nullptr); + ccl::string kvs_store_path(str); + auto comm = createDalCommunicator(executorNum, rank, kvs_store_path); jobject hashmapObj = doRFClassifierOneAPICompute( env, pNumTabFeature, featureRows, featureCols, pNumTabLabel, @@ -328,7 +328,7 @@ Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassif minObservationsSplitNode, minWeightFractionLeafNode, minImpurityDecreaseSplitNode, maxTreeDepth, seed, maxBins, bootstrap, comm, resultObj); - env->ReleaseStringUTFChars(ip_port, str); + env->ReleaseStringUTFChars(store_path, path); return hashmapObj; } default: { diff --git a/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp b/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp index c757d12a1..e560cb3ca 100644 --- a/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp +++ b/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp @@ -292,11 +292,12 @@ static jobject doRFRegressorOneAPICompute( JNIEXPORT jobject JNICALL Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTrainDAL( - JNIEnv *env, jobject obj, jint rank, jlong pNumTabFeature, jlong featureRows, - jlong featureCols, jlong pNumTabLabel, jlong labelCols, jint executorNum, - jint computeDeviceOrdinal, jint treeCount, jint numFeaturesPerNode, - jint minObservationsLeafNode, jint maxTreeDepth, jlong seed, jint maxbins, - jboolean bootstrap, jintArray gpuIdxArray, jstring ip_port, jobject resultObj) { + JNIEnv *env, jobject obj, jint rank, jlong pNumTabFeature, + jlong featureRows, jlong featureCols, jlong pNumTabLabel, jlong labelCols, + jint executorNum, jint computeDeviceOrdinal, jint treeCount, + jint numFeaturesPerNode, jint minObservationsLeafNode, jint maxTreeDepth, + jlong seed, jint maxbins, jboolean bootstrap, jintArray gpuIdxArray, + jstring store_path, jobject resultObj) { logger::println(logger::INFO, "OneDAL (native): use DPC++ kernels; device %s", ComputeDeviceString[computeDeviceOrdinal].c_str()); @@ -304,20 +305,20 @@ Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTra ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal); switch (device) { case ComputeDevice::gpu: { - logger::println( - logger::INFO, - "OneDAL (native): use GPU kernels with rankid %d", rank); + logger::println(logger::INFO, + "OneDAL (native): use GPU kernels with rankid %d", + rank); - const char *str = env->GetStringUTFChars(ip_port, nullptr); - ccl::string ccl_ip_port(str); - auto comm = createDalCommunicator(executorNum, rank, ccl_ip_port); + const char* path = env->GetStringUTFChars(store_path, nullptr); + ccl::string kvs_store_path(str); + auto comm = createDalCommunicator(executorNum, rank, kvs_store_path); jobject hashmapObj = doRFRegressorOneAPICompute( env, pNumTabFeature, featureRows, featureCols, pNumTabLabel, labelCols, executorNum, computeDeviceOrdinal, treeCount, numFeaturesPerNode, minObservationsLeafNode, maxTreeDepth, seed, maxbins, bootstrap, comm, resultObj); - env->ReleaseStringUTFChars(ip_port, str); + env->ReleaseStringUTFChars(store_path, path); return hashmapObj; } default: { diff --git a/mllib-dal/src/main/native/GPU.cpp b/mllib-dal/src/main/native/GPU.cpp index 2454fadef..019e574e8 100644 --- a/mllib-dal/src/main/native/GPU.cpp +++ b/mllib-dal/src/main/native/GPU.cpp @@ -4,11 +4,14 @@ #include "GPU.h" #include "Logger.h" +#define STORE_TIMEOUT_SEC 120 +#define KVS_CREATE_SUCCESS 0 +#define KVS_CREATE_FAILURE -1 typedef std::shared_ptr queuePtr; - static std::mutex g_mtx; static std::vector g_queueVector; +std::shared_ptr store; static std::vector get_gpus() { auto platforms = sycl::platform::get_platforms(); @@ -24,6 +27,55 @@ static std::vector get_gpus() { return {}; } +int create_kvs_by_store(std::shared_ptr store, int rank, + ccl::shared_ptr_class &kvs) { + logger::println(logger::INFO, "OneCCL (native): create_kvs_by_store "); + auto t1 = std::chrono::high_resolution_clock::now(); + ccl::kvs::address_type main_addr; + auto start = std::chrono::system_clock::now(); + if (rank == 0) { + kvs = ccl::create_main_kvs(); + main_addr = kvs->get_address(); + if (store->write((void *)main_addr.data(), main_addr.size()) < 0) { + logger::println( + logger::INFO, + "OneCCL (native): error occurred during write attempt"); + kvs.reset(); + return KVS_CREATE_FAILURE; + } + auto end = std::chrono::system_clock::now(); + auto exec_time = + (float)std::chrono::duration_cast(end - + start) + .count(); + logger::println(logger::INFO, + "OneCCL (native): write to store time %f secs", + exec_time / 1000); + } else { + if (store->read((void *)main_addr.data(), main_addr.size()) < 0) { + logger::println( + logger::INFO, + "OneCCL (native): error occurred during read attempt"); + kvs.reset(); + return KVS_CREATE_FAILURE; + } + auto end = std::chrono::system_clock::now(); + auto exec_time = + (float)std::chrono::duration_cast(end - + start) + .count(); + logger::println(logger::INFO, + "OneCCL (native): read from store time %f secs", + exec_time / 1000); + kvs = ccl::create_kvs(main_addr); + } + auto t2 = std::chrono::high_resolution_clock::now(); + auto duration = + (float)std::chrono::duration_cast(t2 - t1) + .count(); + return KVS_CREATE_SUCCESS; +} + static int getLocalRank(ccl::communicator &comm, int size, int rank) { /* Obtain local rank among nodes sharing the same host name */ char zero = static_cast(0); @@ -113,43 +165,45 @@ sycl::queue getQueue(const ComputeDevice device) { } } +preview::spmd::communicator +createDalCommunicator(const jint executorNum, const jint rank, + const ccl::string kvs_store_path) { + auto gpus = get_gpus(); -preview::spmd::communicator createDalCommunicator(const jint executorNum, const jint rank, const ccl::string ccl_ip_port){ - auto gpus = get_gpus(); - - auto t1 = std::chrono::high_resolution_clock::now(); - - ccl::init(); - - auto t2 = std::chrono::high_resolution_clock::now(); - auto duration = - (float)std::chrono::duration_cast(t2 - t1).count(); + auto t1 = std::chrono::high_resolution_clock::now(); - logger::println(logger::INFO, "OneCCL singleton init took %f secs", - duration / 1000); + ccl::init(); - t1 = std::chrono::high_resolution_clock::now(); + auto t2 = std::chrono::high_resolution_clock::now(); + auto duration = + (float)std::chrono::duration_cast(t2 - t1) + .count(); - auto kvs_attr = ccl::create_kvs_attr(); + logger::println(logger::INFO, "OneCCL singleton init took %f secs", + duration / 1000); - kvs_attr.set(ccl_ip_port); + t1 = std::chrono::high_resolution_clock::now(); + ccl::shared_ptr_class kvs; - ccl::shared_ptr_class kvs = ccl::create_main_kvs(kvs_attr); - - t2 = std::chrono::high_resolution_clock::now(); - duration = - (float)std::chrono::duration_cast(t2 - t1) - .count(); - logger::println(logger::INFO, "OneCCL (native): create kvs took %f secs", - duration / 1000); - sycl::queue queue{gpus[0]}; - t1 = std::chrono::high_resolution_clock::now(); - auto comm = - preview::spmd::make_communicator( - queue, executorNum, rank, kvs); - t2 = std::chrono::high_resolution_clock::now(); - duration = - (float)std::chrono::duration_cast(t2 - t1) - .count(); - return comm; + store = std::make_shared( + kvs_store_path, rank, std::chrono::seconds(STORE_TIMEOUT_SEC)); + if (create_kvs_by_store(store, rank, kvs) != KVS_CREATE_SUCCESS) { + logger::println(logger::INFO, "can not create kvs by store"); + throw std::runtime_error("Failed to create communicator"); + } + t2 = std::chrono::high_resolution_clock::now(); + duration = + (float)std::chrono::duration_cast(t2 - t1) + .count(); + logger::println(logger::INFO, "OneCCL (native): create kvs took %f secs", + duration / 1000); + sycl::queue queue{gpus[0]}; + t1 = std::chrono::high_resolution_clock::now(); + auto comm = preview::spmd::make_communicator( + queue, executorNum, rank, kvs); + t2 = std::chrono::high_resolution_clock::now(); + duration = + (float)std::chrono::duration_cast(t2 - t1) + .count(); + return comm; } diff --git a/mllib-dal/src/main/native/GPU.h b/mllib-dal/src/main/native/GPU.h index 83b3272f0..2798a0574 100644 --- a/mllib-dal/src/main/native/GPU.h +++ b/mllib-dal/src/main/native/GPU.h @@ -1,14 +1,15 @@ #pragma once +#include "Communicator.hpp" #include "service.h" +#include "store.hpp" #include #include #include #include #include -#include "Communicator.hpp" - sycl::queue getAssignedGPU(const ComputeDevice device, jint *gpu_indices); sycl::queue getQueue(const ComputeDevice device); -preview::spmd::communicator createDalCommunicator(jint executorNum, jint rank, ccl::string ccl_ip_port); +preview::spmd::communicator +createDalCommunicator(jint executorNum, jint rank, ccl::string ccl_ip_port); diff --git a/mllib-dal/src/main/native/KMeansImpl.cpp b/mllib-dal/src/main/native/KMeansImpl.cpp index b868fd475..3186a184d 100644 --- a/mllib-dal/src/main/native/KMeansImpl.cpp +++ b/mllib-dal/src/main/native/KMeansImpl.cpp @@ -305,10 +305,11 @@ static jlong doKMeansOneAPICompute( */ JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCenters( - JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows, jlong numCols, - jlong pNumTabCenters, jint clusterNum, jdouble tolerance, jint iterationNum, - jint executorNum, jint executorCores, jint computeDeviceOrdinal, - jintArray gpuIdxArray, jstring ip_port, jobject resultObj) { + JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows, + jlong numCols, jlong pNumTabCenters, jint clusterNum, jdouble tolerance, + jint iterationNum, jint executorNum, jint executorCores, + jint computeDeviceOrdinal, jintArray gpuIdxArray, jstring store_path, + jobject resultObj) { logger::println(logger::INFO, "OneDAL (native): use DPC++ kernels; device %s", ComputeDeviceString[computeDeviceOrdinal].c_str()); @@ -338,19 +339,19 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe } #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { - logger::println( - logger::INFO, - "OneDAL (native): use GPU kernels with rankid %d", rank); + logger::println(logger::INFO, + "OneDAL (native): use GPU kernels with rankid %d", + rank); - const char *str = env->GetStringUTFChars(ip_port, nullptr); - ccl::string ccl_ip_port(str); - auto comm = createDalCommunicator(executorNum, rank, ccl_ip_port); + const char* path = env->GetStringUTFChars(store_path, nullptr); + ccl::string kvs_store_path(str); + auto comm = createDalCommunicator(executorNum, rank, kvs_store_path); ret = doKMeansOneAPICompute(env, pNumTabData, numRows, numCols, pNumTabCenters, clusterNum, tolerance, iterationNum, comm, resultObj); - env->ReleaseStringUTFChars(ip_port, str); + env->ReleaseStringUTFChars(store_path, path); break; } #endif diff --git a/mllib-dal/src/main/native/LinearRegressionImpl.cpp b/mllib-dal/src/main/native/LinearRegressionImpl.cpp index 7518a23b6..27baf6d1c 100644 --- a/mllib-dal/src/main/native/LinearRegressionImpl.cpp +++ b/mllib-dal/src/main/native/LinearRegressionImpl.cpp @@ -214,12 +214,12 @@ ridge_regression_compute(size_t rankId, ccl::communicator &comm, } #ifdef CPU_GPU_PROFILE -static jlong doLROneAPICompute(JNIEnv *env, size_t rankId, - preview::spmd::communicator comm, - jlong pNumTabFeature, jlong featureRows, - jlong featureCols, jlong pNumTabLabel, - jlong labelCols, jboolean jfitIntercept, - jint executorNum, jobject resultObj) { +static jlong doLROneAPICompute( + JNIEnv *env, size_t rankId, + preview::spmd::communicator comm, + jlong pNumTabFeature, jlong featureRows, jlong featureCols, + jlong pNumTabLabel, jlong labelCols, jboolean jfitIntercept, + jint executorNum, jobject resultObj) { logger::println(logger::INFO, "oneDAL (native): GPU compute start , rankid %d", rankId); const bool isRoot = (rankId == ccl_root); @@ -262,7 +262,7 @@ Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTra jlong featureCols, jlong label, jlong labelCols, jboolean fitIntercept, jdouble regParam, jdouble elasticNetParam, jint executorNum, jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray, - jstring ip_port, jobject resultObj) { + jstring store_path, jobject resultObj) { logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", @@ -277,18 +277,18 @@ Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTra jlong resultptr = 0L; if (useGPU) { #ifdef CPU_GPU_PROFILE - logger::println( - logger::INFO, - "oneDAL (native): use GPU kernels with rankid %d", rank); - - const char *str = env->GetStringUTFChars(ip_port, nullptr); - ccl::string ccl_ip_port(str); - auto comm = createDalCommunicator(executorNum, rank, ccl_ip_port); - - resultptr = doLROneAPICompute( - env, rank, comm, feature, featureRows, featureCols, - label, labelCols, fitIntercept, executorNum, resultObj); - env->ReleaseStringUTFChars(ip_port, str); + logger::println(logger::INFO, + "oneDAL (native): use GPU kernels with rankid %d", + rank); + + const char* path = env->GetStringUTFChars(store_path, nullptr); + ccl::string kvs_store_path(str); + auto comm = createDalCommunicator(executorNum, rank, kvs_store_path); + + resultptr = doLROneAPICompute(env, rank, comm, feature, featureRows, + featureCols, label, labelCols, + fitIntercept, executorNum, resultObj); + env->ReleaseStringUTFChars(store_path, path); #endif } else { ccl::communicator &cclComm = getComm(); diff --git a/mllib-dal/src/main/native/OneCCL.cpp b/mllib-dal/src/main/native/OneCCL.cpp index 988dd844a..e9a164507 100644 --- a/mllib-dal/src/main/native/OneCCL.cpp +++ b/mllib-dal/src/main/native/OneCCL.cpp @@ -55,8 +55,7 @@ JNIEXPORT jint JNICALL Java_com_intel_oap_mllib_OneCCL_00024_c_1init( auto t1 = std::chrono::high_resolution_clock::now(); ccl::init(); - auto t2 = std::chrono::high_resolution_clock::now(); - + auto t2 = std::chrono::high_resolution_clock::now(); const char *str = env->GetStringUTFChars(ip_port, 0); ccl::string ccl_ip_port(str); diff --git a/mllib-dal/src/main/native/PCAImpl.cpp b/mllib-dal/src/main/native/PCAImpl.cpp index 67949341d..640c95477 100644 --- a/mllib-dal/src/main/native/PCAImpl.cpp +++ b/mllib-dal/src/main/native/PCAImpl.cpp @@ -250,9 +250,10 @@ static void doPCAOneAPICompute( JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL( - JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows, jlong numCols, - jint executorNum, jint executorCores, jint computeDeviceOrdinal, - jintArray gpuIdxArray, jstring ip_port, jobject resultObj) { + JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows, + jlong numCols, jint executorNum, jint executorCores, + jint computeDeviceOrdinal, jintArray gpuIdxArray, jstring store_path, + jobject resultObj) { logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", ComputeDeviceString[computeDeviceOrdinal].c_str()); @@ -277,16 +278,16 @@ Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL( } #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { - logger::println( - logger::INFO, - "oneDAL (native): use GPU kernels with rankid %d", rank); + logger::println(logger::INFO, + "oneDAL (native): use GPU kernels with rankid %d", + rank); - const char *str = env->GetStringUTFChars(ip_port, nullptr); - ccl::string ccl_ip_port(str); - auto comm = createDalCommunicator(executorNum, rank, ccl_ip_port); + const char* path = env->GetStringUTFChars(store_path, nullptr); + ccl::string kvs_store_path(str); + auto comm = createDalCommunicator(executorNum, rank, kvs_store_path); doPCAOneAPICompute(env, pNumTabData, numRows, numCols, comm, resultObj); - env->ReleaseStringUTFChars(ip_port, str); + env->ReleaseStringUTFChars(store_path, path); break; } #endif diff --git a/mllib-dal/src/main/native/SummarizerImpl.cpp b/mllib-dal/src/main/native/SummarizerImpl.cpp index 852db8b03..6e89f293f 100644 --- a/mllib-dal/src/main/native/SummarizerImpl.cpp +++ b/mllib-dal/src/main/native/SummarizerImpl.cpp @@ -268,9 +268,10 @@ static void doSummarizerOneAPICompute( JNIEXPORT jlong JNICALL Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL( - JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows, jlong numCols, - jint executorNum, jint executorCores, jint computeDeviceOrdinal, - jintArray gpuIdxArray, jstring ip_port, jobject resultObj) { + JNIEnv *env, jobject obj, jint rank, jlong pNumTabData, jlong numRows, + jlong numCols, jint executorNum, jint executorCores, + jint computeDeviceOrdinal, jintArray gpuIdxArray, jstring store_path, + jobject resultObj) { logger::println(logger::INFO, "oneDAL (native): use DPC++ kernels; device %s", ComputeDeviceString[computeDeviceOrdinal].c_str()); @@ -295,21 +296,22 @@ Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL( } #ifdef CPU_GPU_PROFILE case ComputeDevice::gpu: { - logger::println( - logger::INFO, - "oneDAL (native): use GPU kernels with rankid %d", rank); - const char *str = env->GetStringUTFChars(ip_port, nullptr); - ccl::string ccl_ip_port(str); - auto comm = createDalCommunicator(executorNum, rank, ccl_ip_port); + logger::println(logger::INFO, + "oneDAL (native): use GPU kernels with rankid %d", + rank); + const char* path = env->GetStringUTFChars(store_path, nullptr); + ccl::string kvs_store_path(str); + auto comm = createDalCommunicator(executorNum, rank, kvs_store_path); doSummarizerOneAPICompute(env, pNumTabData, numRows, numCols, comm, resultObj); - env->ReleaseStringUTFChars(ip_port, str); + env->ReleaseStringUTFChars(store_path, path); break; } #endif default: { - deviceError("Summarizer", ComputeDeviceString[computeDeviceOrdinal].c_str()); + deviceError("Summarizer", + ComputeDeviceString[computeDeviceOrdinal].c_str()); } } return 0; diff --git a/mllib-dal/src/main/native/store.hpp b/mllib-dal/src/main/native/store.hpp new file mode 100644 index 000000000..9a39a4199 --- /dev/null +++ b/mllib-dal/src/main/native/store.hpp @@ -0,0 +1,165 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#define CHECK(ret, msg) \ + if ((ret) < 0) { \ + throw std::system_error(errno, std::system_category(), msg); \ + } + +class base_store { +public: + base_store(){}; + + virtual ~base_store(){}; + + virtual int write(const void* data, size_t size) = 0; + + virtual int read(void* data, size_t size) = 0; +}; + +class file_store : public base_store { +public: + file_store(const file_store& other) = delete; + file_store& operator=(const file_store& other) = delete; + file_store(std::string path, int rank, const std::chrono::seconds& timeout) + : base_store(), + path(path), + rank(rank), + pos(0), + fd(-1), + timeout(timeout){}; + + virtual ~file_store() { + if (rank == 0) + std::remove(path.c_str()); + }; + + void release_resources() { + try { + CHECK(flock(fd, LOCK_UN), "Unlocking file: "); + } + catch (const std::system_error& e) { + fprintf(stderr, "%d\n%s\n", e.code().value(), e.what()); + } + + close(fd); + fd = -1; + } + + int write(const void* data, size_t size) override { + int ret = 0; + std::unique_lock locker(mtx); + fd = open(path.c_str(), O_CREAT | O_RDWR, 0644); + CHECK(fd, "Open file to write into (" + path + "): "); + + try { + CHECK(flock(fd, LOCK_EX), "Setting exclusive rights for writing to the file: "); + CHECK(lseek(fd, 0, SEEK_END), "Setting a cursor at the EOF: "); + + // writing into the file + while (size > 0) { + auto wr_v = ::write(fd, data, size); + CHECK(wr_v, "An error occured while writing to the file: "); + data = (uint8_t*)data + wr_v; + size -= wr_v; + } + CHECK(fsync(fd), "Flushing file content: "); + } + catch (const std::system_error& e) { + fprintf(stderr, "%d\n%s\n", e.code().value(), e.what()); + ret = -1; + } + + release_resources(); + return ret; + }; + + int read(void* data, size_t size) override { + const auto time_start = std::chrono::steady_clock::now(); + while (1) { + std::unique_lock locker(mtx); + fd = open(path.c_str(), O_RDONLY); + if (fd < 0 && errno == ENOENT) { + // file might not exist yet + const auto time_passed = std::chrono::duration_cast( + std::chrono::steady_clock::now() - time_start); + if (time_passed > timeout) { + throw std::runtime_error("Timeout " + std::to_string(timeout.count()) + + "s waiting for the file " + path + " to open"); + } + std::this_thread::sleep_for(std::chrono::milliseconds(10 * rank)); + continue; + } + else { + CHECK(fd, "Open the file to read from (" + path + "): "); + } + + try { + CHECK(flock(fd, LOCK_SH), "Setting shared rights for reading the file: "); + + auto start = lseek(fd, 0, SEEK_SET); + CHECK(start, "Setting the cursor at the beginning of the file: "); + + // find the real size of the file + auto len = lseek(fd, 0, SEEK_END); + CHECK(len, "Setting the cursor at the EOF: "); + + if (len == start) { + // nothing has been written yet + release_resources(); + locker.unlock(); + const auto time_passed = std::chrono::duration_cast( + std::chrono::steady_clock::now() - time_start); + if (time_passed > timeout) { + throw std::runtime_error("Timeout " + std::to_string(timeout.count()) + + "s waiting for the file " + path + " to read"); + } + std::this_thread::sleep_for(std::chrono::milliseconds(10 * rank)); + continue; + } + + // start from where we stopped last time + start = lseek(fd, pos, SEEK_SET); + CHECK(start, "Setting the cursor at the last known position: "); + + // if there are still some bytes to read + if (len > start && size > 0) { + size -= len; + while (len > 0) { + auto rd = ::read(fd, data, len); + CHECK(rd, "An error occured while reading the file: ") + data = (uint8_t*)data + rd; + len -= rd; + } + pos = lseek(fd, 0, SEEK_CUR); + CHECK(pos, "Saving the cursor current position: "); + } + else { + release_resources(); + break; + } + } + catch (const std::system_error& e) { + fprintf(stderr, "%d\n%s\n", e.code().value(), e.what()); + release_resources(); + return -1; + } + } + return 0; + }; + +protected: + std::string path; + int rank; + off_t pos; + int fd; + std::chrono::seconds timeout; + std::mutex mtx; +}; \ No newline at end of file diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala index f0ac1f0b5..1636c281d 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala @@ -29,6 +29,7 @@ import org.apache.spark.sql.Dataset import org.apache.spark.ml.tree import org.apache.spark.mllib.tree.model.ImpurityStats +import java.time.Instant import java.util import java.util.{ArrayList, Map} import scala.collection.mutable.HashMap @@ -57,6 +58,7 @@ class RandomForestClassifierDALImpl(val uid: String, val sparkContext = labeledPoints.rdd.sparkContext val rfcTimer = new Utils.AlgoTimeMetrics("RandomForestClassifier", sparkContext) val useDevice = sparkContext.getConf.get("spark.oap.mllib.device", Utils.DefaultComputeDevice) + val storePath = sparkContext.getConf.get("spark.oap.mllib.kvsStorePath") + "/" + Instant.now() // used run Random Forest unit test val isTest = sparkContext.getConf.getBoolean("spark.oap.mllib.isTest", false) val computeDevice = Common.ComputeDevice.getDeviceByName(useDevice) @@ -114,7 +116,7 @@ class RandomForestClassifierDALImpl(val uid: String, maxBins, bootstrap, gpuIndices, - kvsIPPort, + storePath, result) val computeEndTime = System.nanoTime() @@ -160,7 +162,7 @@ class RandomForestClassifierDALImpl(val uid: String, maxBins: Int, bootstrap: Boolean, gpuIndices: Array[Int], - kvsIPPort: String, + storePath: String, result: RandomForestResult): java.util.HashMap[java.lang.Integer, java.util.ArrayList[LearningNode]] } diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala index be034ca03..9d829220a 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/clustering/KMeansDALImpl.scala @@ -27,6 +27,8 @@ import org.apache.spark.mllib.clustering.{KMeansModel => MLlibKMeansModel} import org.apache.spark.mllib.linalg.{Vector => OldVector, Vectors => OldVectors} import org.apache.spark.rdd.RDD +import java.time.Instant + class KMeansDALImpl(var nClusters: Int, var maxIterations: Int, var tolerance: Double, @@ -40,6 +42,7 @@ class KMeansDALImpl(var nClusters: Int, val sparkContext = data.sparkContext val kmeansTimer = new Utils.AlgoTimeMetrics("KMeans", sparkContext) val useDevice = sparkContext.getConf.get("spark.oap.mllib.device", Utils.DefaultComputeDevice) + val storePath = sparkContext.getConf.get("spark.oap.mllib.kvsStorePath") + "/" + Instant.now() val computeDevice = Common.ComputeDevice.getDeviceByName(useDevice) kmeansTimer.record("Preprocessing") @@ -91,7 +94,7 @@ class KMeansDALImpl(var nClusters: Int, executorCores, computeDevice.ordinal(), gpuIndices, - kvsIPPort, + storePath, result ) @@ -150,6 +153,6 @@ class KMeansDALImpl(var nClusters: Int, executorCores: Int, computeDeviceOrdinal: Int, gpuIndices: Array[Int], - kvsIPPort: String, + storePath: String, result: KMeansResult): Long } diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala index 06f9039ca..042b54d23 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala @@ -32,6 +32,8 @@ import java.util.Arrays import com.intel.oneapi.dal.table.{Common, HomogenTable, RowAccessor} import org.apache.spark.storage.StorageLevel +import java.time.Instant + class PCADALModel private[mllib] ( val k: Int, val pc: OldDenseMatrix, @@ -47,6 +49,7 @@ class PCADALImpl(val k: Int, val sparkContext = normalizedData.sparkContext val pcaTimer = new Utils.AlgoTimeMetrics("PCA", sparkContext) val useDevice = sparkContext.getConf.get("spark.oap.mllib.device", Utils.DefaultComputeDevice) + val storePath = sparkContext.getConf.get("spark.oap.mllib.kvsStorePath") + "/" + Instant.now() val computeDevice = Common.ComputeDevice.getDeviceByName(useDevice) pcaTimer.record("Preprocessing") @@ -85,7 +88,7 @@ class PCADALImpl(val k: Int, executorCores, computeDevice.ordinal(), gpuIndices, - kvsIPPort, + storePath, result ) @@ -222,6 +225,6 @@ class PCADALImpl(val k: Int, executorCores: Int, computeDeviceOrdinal: Int, gpuIndices: Array[Int], - kvsIPPort: String, + storePath: String, result: PCAResult): Long } diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala index 6c45b5807..ab5a0aac1 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/LinearRegressionDALImpl.scala @@ -29,6 +29,8 @@ import org.apache.spark.mllib.linalg.{Vector => OldVector, Vectors => OldVectors import org.apache.spark.sql.Dataset import org.apache.spark.rdd.RDD +import java.time.Instant + /** * Model fitted by [[LinearRegressionDALImpl]]. @@ -73,6 +75,7 @@ class LinearRegressionDALImpl( val fitIntercept: Boolean, val sparkContext = labeledPoints.sparkSession.sparkContext val lrTimer = new Utils.AlgoTimeMetrics("LinearRegression", sparkContext) val useDevice = sparkContext.getConf.get("spark.oap.mllib.device", Utils.DefaultComputeDevice) + val storePath = sparkContext.getConf.get("spark.oap.mllib.kvsStorePath") + "/" + Instant.now() val computeDevice = Common.ComputeDevice.getDeviceByName(useDevice) val isTest = sparkContext.getConf.getBoolean("spark.oap.mllib.isTest", false) @@ -154,7 +157,7 @@ class LinearRegressionDALImpl( val fitIntercept: Boolean, executorCores, computeDevice.ordinal(), gpuIndices, - kvsIPPort, + storePath, result ) @@ -201,7 +204,7 @@ class LinearRegressionDALImpl( val fitIntercept: Boolean, executorCores: Int, computeDeviceOrdinal: Int, gpuIndices: Array[Int], - kvsIPPort: String, + storePath: String, result: LiRResult): Long } diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala index e5742923b..1f03d4206 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/regression/RandomForestRegressorDALImpl.scala @@ -25,6 +25,7 @@ import org.apache.spark.ml.classification.DecisionTreeClassificationModel import org.apache.spark.ml.linalg.Matrix import org.apache.spark.sql.Dataset +import java.time.Instant import java.util import scala.collection.JavaConversions._ @@ -49,6 +50,7 @@ class RandomForestRegressorDALImpl(val uid: String, val sparkContext = labeledPoints.rdd.sparkContext val rfrTimer = new Utils.AlgoTimeMetrics("RandomForestRegressor", sparkContext) val useDevice = sparkContext.getConf.get("spark.oap.mllib.device", Utils.DefaultComputeDevice) + val storePath = sparkContext.getConf.get("spark.oap.mllib.kvsStorePath") + "/" + Instant.now() val computeDevice = Common.ComputeDevice.getDeviceByName(useDevice) // used run Random Forest unit test val isTest = sparkContext.getConf.getBoolean("spark.oap.mllib.isTest", false) @@ -105,7 +107,7 @@ class RandomForestRegressorDALImpl(val uid: String, maxbins, bootstrap, gpuIndices, - kvsIPPort, + storePath, result) val computeEndTime = System.nanoTime() @@ -157,6 +159,6 @@ class RandomForestRegressorDALImpl(val uid: String, maxbins: Int, bootstrap: Boolean, gpuIndices: Array[Int], - kvsIPPort: String, + storePath: String, result: RandomForestResult): java.util.HashMap[java.lang.Integer, java.util.ArrayList[LearningNode]] } diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala index 203c00796..ab40a74d5 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/CorrelationDALImpl.scala @@ -24,6 +24,8 @@ import org.apache.spark.internal.Logging import org.apache.spark.ml.linalg.{Matrix, Vector} import org.apache.spark.rdd.RDD +import java.time.Instant + class CorrelationDALImpl( val executorNum: Int, val executorCores: Int) @@ -33,6 +35,7 @@ class CorrelationDALImpl( val sparkContext = data.sparkContext val corTimer = new Utils.AlgoTimeMetrics("Correlation", sparkContext) val useDevice = sparkContext.getConf.get("spark.oap.mllib.device", Utils.DefaultComputeDevice) + val storePath = sparkContext.getConf.get("spark.oap.mllib.kvsStorePath") + "/" + Instant.now() val computeDevice = Common.ComputeDevice.getDeviceByName(useDevice) corTimer.record("Preprocessing") @@ -75,7 +78,7 @@ class CorrelationDALImpl( executorCores, computeDevice.ordinal(), gpuIndices, - kvsIPPort, + storePath, result ) @@ -126,6 +129,6 @@ class CorrelationDALImpl( executorCores: Int, computeDeviceOrdinal: Int, gpuIndices: Array[Int], - kvsIPPort: String, + storePath: String, result: CorrelationResult): Long } diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala index a3f65b8fd..294f6ec0e 100644 --- a/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala +++ b/mllib-dal/src/main/scala/com/intel/oap/mllib/stat/SummarizerDALImpl.scala @@ -26,6 +26,8 @@ import org.apache.spark.rdd.RDD import com.intel.oap.mllib.Utils.getOneCCLIPPort import com.intel.oneapi.dal.table.Common +import java.time.Instant + class SummarizerDALImpl(val executorNum: Int, val executorCores: Int) extends Serializable with Logging { @@ -34,6 +36,7 @@ class SummarizerDALImpl(val executorNum: Int, val sparkContext = data.sparkContext val sumTimer = new Utils.AlgoTimeMetrics("Summarizer", sparkContext) val useDevice = sparkContext.getConf.get("spark.oap.mllib.device", Utils.DefaultComputeDevice) + val storePath = sparkContext.getConf.get("spark.oap.mllib.kvsStorePath") + "/" + Instant.now() val computeDevice = Common.ComputeDevice.getDeviceByName(useDevice) sumTimer.record("Preprocessing") @@ -76,7 +79,7 @@ class SummarizerDALImpl(val executorNum: Int, executorCores, computeDevice.ordinal(), gpuIndices, - kvsIPPort, + storePath, result ) @@ -158,6 +161,6 @@ class SummarizerDALImpl(val executorNum: Int, executorCores: Int, computeDeviceOrdinal: Int, gpuIndices: Array[Int], - kvsIPPort: String, + storePath: String, result: SummarizerResult): Long }