Skip to content

Commit

Permalink
merging latest changes from master
Browse files Browse the repository at this point in the history
  • Loading branch information
skaarthik authored Oct 11, 2016
2 parents 013278b + fd386b0 commit da69c1f
Show file tree
Hide file tree
Showing 132 changed files with 3,429 additions and 352 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
*.class
*.dll
*.exe
*.pyc

# Packages #
############
Expand Down Expand Up @@ -40,6 +41,10 @@ build/dependencies/
*.log
lib/

# Local databases used for Dataset/frames #
###########################################
scala/metastore_db/

# Generated Files #
############
SparkCLRCodeCoverage.xml
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ Refer to the [docs folder](docs) for design overview and other info on Mobius
|Build & run unit tests |[Build in Windows](notes/windows-instructions.md#building-mobius) |[Build in Linux](notes/linux-instructions.md#building-mobius-in-linux) |
|Run samples (functional tests) in local mode |[Samples in Windows](notes/windows-instructions.md#running-samples) |[Samples in Linux](notes/linux-instructions.md#running-mobius-samples-in-linux) |
|Run examples in local mode |[Examples in Windows](/notes/running-mobius-app.md#running-mobius-examples-in-local-mode) |[Examples in Linux](notes/linux-instructions.md#running-mobius-examples-in-linux) |
|Run Mobius app |<ul><li>[Standalone cluster](notes/running-mobius-app.md#standalone-cluster)</li><li>[YARN cluster](notes/running-mobius-app.md#yarn-cluster)</li></ul> |<ul><li>[Linux cluster](notes/linux-instructions.md#running-mobius-applications-in-linux)</li><li>[Azure HDInsight Spark Cluster](/notes/linux-instructions.md#mobius-in-azure-hdinsight-spark-cluster)</li><li>[AWS EMR Spark Cluster](/notes/linux-instructions.md#mobius-in-amazon-web-services-emr-spark-cluster)</li> |
|Run Mobius app |<ul><li>[Standalone cluster](notes/running-mobius-app.md#standalone-cluster)</li><li>[YARN cluster](notes/running-mobius-app.md#yarn-cluster)</li></ul> |<ul><li>[Linux cluster](notes/linux-instructions.md#running-mobius-applications-in-linux)</li><li>[Azure HDInsight Spark Cluster](/notes/mobius-in-hdinsight.md)</li><li>[AWS EMR Spark Cluster](/notes/linux-instructions.md#mobius-in-amazon-web-services-emr-spark-cluster)</li> |
|Run Mobius Shell |<ul><li>[Local](notes/mobius-shell.md#run-shell)</li><li>[YARN](notes/mobius-shell.md#run-shell)</li></ul> | Not supported yet |

### Useful Links
Expand All @@ -122,7 +122,7 @@ Refer to the [docs folder](docs) for design overview and other info on Mobius

## Supported Spark Versions

Mobius is built and tested with Apache Spark [1.4.1](https://github.com/Microsoft/Mobius/tree/branch-1.4), [1.5.2](https://github.com/Microsoft/Mobius/tree/branch-1.5) and [1.6.*](https://github.com/Microsoft/Mobius/tree/branch-1.6).
Mobius is built and tested with Apache Spark [1.4.1](https://github.com/Microsoft/Mobius/tree/branch-1.4), [1.5.2](https://github.com/Microsoft/Mobius/tree/branch-1.5), [1.6.*](https://github.com/Microsoft/Mobius/tree/branch-1.6) and [2.0](https://github.com/Microsoft/Mobius/tree/branch-2.0).

## Releases

Expand Down
5 changes: 5 additions & 0 deletions build/Build.cmd
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
@setlocal
@echo OFF

rem
rem Copyright (c) Microsoft. All rights reserved.
rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
rem

if "%1" == "csharp" set buildCSharp=true

SET CMDHOME=%~dp0
Expand Down
49 changes: 48 additions & 1 deletion build/build.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,49 @@
#!/bin/bash

#
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
#

export FWDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

[ ! -d "$FWDIR/dependencies" ] && mkdir "$FWDIR/dependencies"

echo "Download Mobius external dependencies"
pushd "$FWDIR/dependencies"

download_dependency() {
LINK=$1
JAR=$2

if [ ! -e $JAR ];
then
echo "Downloading $JAR"
wget -q $LINK -O $JAR

if [ ! -e $JAR ];
then
echo "Cannot download external dependency $JAR from $LINK"
popd
exit 1
fi
fi
}

SPARK_CSV_LINK="http://search.maven.org/remotecontent?filepath=com/databricks/spark-csv_2.10/1.4.0/spark-csv_2.10-1.4.0.jar"
SPARK_CSV_JAR="spark-csv_2.10-1.4.0.jar"
download_dependency $SPARK_CSV_LINK $SPARK_CSV_JAR

COMMONS_CSV_LINK="http://search.maven.org/remotecontent?filepath=org/apache/commons/commons-csv/1.4/commons-csv-1.4.jar"
COMMONS_CSV_JAR="commons-csv-1.4.jar"
download_dependency $COMMONS_CSV_LINK $COMMONS_CSV_JAR

SPARK_STREAMING_KAFKA_LINK="http://search.maven.org/remotecontent?filepath=org/apache/spark/spark-streaming-kafka-0-8-assembly_2.11/2.0.0/spark-streaming-kafka-0-8-assembly_2.11-2.0.0.jar"
SPARK_STREAMING_KAFKA_JAR="spark-streaming-kafka-0-8-assembly_2.11-2.0.0.jar"
download_dependency $SPARK_STREAMING_KAFKA_LINK $SPARK_STREAMING_KAFKA_JAR

popd

export SPARKCLR_HOME="$FWDIR/runtime"
echo "SPARKCLR_HOME=$SPARKCLR_HOME"

Expand All @@ -17,6 +59,11 @@ fi
[ ! -d "$SPARKCLR_HOME/lib" ] && mkdir "$SPARKCLR_HOME/lib"
[ ! -d "$SPARKCLR_HOME/samples" ] && mkdir "$SPARKCLR_HOME/samples"
[ ! -d "$SPARKCLR_HOME/scripts" ] && mkdir "$SPARKCLR_HOME/scripts"
[ ! -d "$SPARKCLR_HOME/dependencies" ] && mkdir "$SPARKCLR_HOME/dependencies"

echo "Assemble Mobius external dependencies"
cp $FWDIR/dependencies/* "$SPARKCLR_HOME/dependencies/"
[ $? -ne 0 ] && exit 1

echo "Assemble Mobius Scala components"
pushd "$FWDIR/../scala"
Expand All @@ -31,7 +78,7 @@ mvn clean -q
# build the package
mvn package -Puber-jar -q

if [ $? -ne 0 ]
if [ $? -ne 0 ];
then
echo "Build Mobius Scala components failed, stop building."
popd
Expand Down
5 changes: 5 additions & 0 deletions build/copyjar.ps1
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
#
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
#

function Get-ScriptDirectory
{
$Invocation = (Get-Variable MyInvocation -Scope 1).Value;
Expand Down
10 changes: 8 additions & 2 deletions build/localmode/RunSamples.cmd
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
@echo OFF

rem
rem Copyright (c) Microsoft. All rights reserved.
rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
rem

setlocal enabledelayedexpansion

SET CMDHOME=%~dp0
Expand Down Expand Up @@ -62,8 +68,8 @@ set SPARKCLR_HOME=%CMDHOME%\..\runtime

@rem spark-csv package and its depenedency are required for DataFrame operations in Mobius
set SPARKCLR_EXT_PATH=%SPARKCLR_HOME%\dependencies
set SPARKCSV_JAR1PATH=%SPARKCLR_EXT_PATH%\spark-csv_2.10-1.3.0.jar
set SPARKCSV_JAR2PATH=%SPARKCLR_EXT_PATH%\commons-csv-1.1.jar
set SPARKCSV_JAR1PATH=%SPARKCLR_EXT_PATH%\spark-csv_2.10-1.4.0.jar
set SPARKCSV_JAR2PATH=%SPARKCLR_EXT_PATH%\commons-csv-1.4.jar
set SPARKCLR_EXT_JARS=%SPARKCSV_JAR1PATH%,%SPARKCSV_JAR2PATH%

@rem RunSamples.cmd is in local mode, should not load Hadoop or Yarn cluster config. Disable Hadoop/Yarn conf dir.
Expand Down
13 changes: 9 additions & 4 deletions build/localmode/downloadtools.ps1
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
#
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
#

#
# Input -
# (1) "stage" parameter, accepts either "build" or "run"
Expand Down Expand Up @@ -342,14 +347,14 @@ function Download-ExternalDependencies

$readMeStream.WriteLine("------------ Dependencies for CSV parsing in Mobius DataFrame API -----------------------------")
# Downloading spark-csv package and its depenency. These packages are required for DataFrame operations in Mobius
$url = "http://search.maven.org/remotecontent?filepath=com/databricks/spark-csv_2.10/1.3.0/spark-csv_2.10-1.3.0.jar"
$output="$scriptDir\..\dependencies\spark-csv_2.10-1.3.0.jar"
$url = "http://search.maven.org/remotecontent?filepath=com/databricks/spark-csv_2.10/1.4.0/spark-csv_2.10-1.4.0.jar"
$output="$scriptDir\..\dependencies\spark-csv_2.10-1.4.0.jar"
Download-File $url $output
Write-Output "[downloadtools.Download-ExternalDependencies] Downloading $url to $scriptDir\..\dependencies"
$readMeStream.WriteLine("$url")

$url = "http://search.maven.org/remotecontent?filepath=org/apache/commons/commons-csv/1.1/commons-csv-1.1.jar"
$output="$scriptDir\..\dependencies\commons-csv-1.1.jar"
$url = "http://search.maven.org/remotecontent?filepath=org/apache/commons/commons-csv/1.4/commons-csv-1.4.jar"
$output="$scriptDir\..\dependencies\commons-csv-1.4.jar"
Download-File $url $output
Write-Output "[downloadtools.Download-ExternalDependencies] Downloading $url to $scriptDir\..\dependencies"
$readMeStream.WriteLine("$url")
Expand Down
5 changes: 5 additions & 0 deletions build/localmode/dumpsoftware.ps1
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
#
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
#

$x64items = @(Get-ChildItem "HKLM:SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall")
$x64items + @(Get-ChildItem "HKLM:SOFTWARE\wow6432node\Microsoft\Windows\CurrentVersion\Uninstall") `
| ForEach-object { Get-ItemProperty Microsoft.PowerShell.Core\Registry::$_ } `
Expand Down
5 changes: 5 additions & 0 deletions build/localmode/nugetpack.ps1
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
#
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
#

$root = (split-path -parent $MyInvocation.MyCommand.Definition) + '\..\..'

# expected tagname: v{version-string}. E.g., "v1.5.2-snapshot-2", "v1.5.2-prerelease-1"
Expand Down
5 changes: 5 additions & 0 deletions build/localmode/patchpom.ps1
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
#
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
#

#
# Input -
# "targetPom" parameter, target Pom.xml file
Expand Down
5 changes: 5 additions & 0 deletions build/localmode/precheck.cmd
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
@echo OFF

rem
rem Copyright (c) Microsoft. All rights reserved.
rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
rem

set precheck=ok

if not exist "%JAVA_HOME%\bin\java.exe" (
Expand Down
11 changes: 8 additions & 3 deletions build/localmode/run-samples.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
#!/bin/bash

#
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
#

export verbose=

for param in "$@"
Expand Down Expand Up @@ -68,9 +73,9 @@ fi

export SPARKCLR_HOME="$FWDIR/../runtime"
# spark-csv package and its depenedency are required for DataFrame operations in Mobius
export SPARKCLR_EXT_PATH="$SPARKCLR_HOME\dependencies"
export SPARKCSV_JAR1PATH="$SPARKCLR_EXT_PATH\spark-csv_2.10-1.3.0.jar"
export SPARKCSV_JAR2PATH="$SPARKCLR_EXT_PATH\commons-csv-1.1.jar"
export SPARKCLR_EXT_PATH="$SPARKCLR_HOME/dependencies"
export SPARKCSV_JAR1PATH="$SPARKCLR_EXT_PATH/spark-csv_2.10-1.4.0.jar"
export SPARKCSV_JAR2PATH="$SPARKCLR_EXT_PATH/commons-csv-1.4.jar"
export SPARKCLR_EXT_JARS="$SPARKCSV_JAR1PATH,$SPARKCSV_JAR2PATH"

# run-samples.sh is in local mode, should not load Hadoop or Yarn cluster config. Disable Hadoop/Yarn conf dir.
Expand Down
4 changes: 3 additions & 1 deletion build/localmode/zipdir.ps1
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
#

# This script takes in "dir" and "target" parameters, zips all files under dir to the target file
#


Param([string]$dir, [string]$target)

Expand Down
5 changes: 5 additions & 0 deletions cpp/Build.cmd
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
@setlocal
@ECHO off

rem
rem Copyright (c) Microsoft. All rights reserved.
rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
rem

SET CMDHOME=%~dp0
@REM Remove trailing backslash \
set CMDHOME=%CMDHOME:~0,-1%
Expand Down
6 changes: 6 additions & 0 deletions cpp/Clean.cmd
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
@ECHO OFF

rem
rem Copyright (c) Microsoft. All rights reserved.
rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
rem

FOR /D /R . %%G IN (bin) DO @IF EXIST "%%G" (@echo RDMR /S /Q "%%G" & rd /s /q "%%G")
FOR /D /R . %%G IN (obj) DO @IF EXIST "%%G" (@echo RDMR /S /Q "%%G" & rd /s /q "%%G")
FOR /D /R . %%G IN (x64) DO @IF EXIST "%%G" (@echo RDMR /S /Q "%%G" & rd /s /q "%%G")
10 changes: 10 additions & 0 deletions csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -102,20 +102,25 @@
<Compile Include="Network\SockDataToken.cs" />
<Compile Include="Network\SocketFactory.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Proxy\ICatalogProxy.cs" />
<Compile Include="Proxy\IDataFrameNaFunctionsProxy.cs" />
<Compile Include="Proxy\IDataFrameProxy.cs" />
<Compile Include="Proxy\IDataFrameReaderProxy.cs" />
<Compile Include="Proxy\IDataFrameWriterProxy.cs" />
<Compile Include="Proxy\IDatasetProxy.cs" />
<Compile Include="Proxy\IDStreamProxy.cs" />
<Compile Include="Proxy\IHadoopConfigurationProxy.cs" />
<Compile Include="Proxy\Ipc\CatalogIpcProxy.cs" />
<Compile Include="Proxy\Ipc\DataFrameIpcProxy.cs" />
<Compile Include="Proxy\Ipc\DataFrameNaFunctionsIpcProxy.cs" />
<Compile Include="Proxy\Ipc\DataFrameReaderIpcProxy.cs" />
<Compile Include="Proxy\Ipc\DataFrameWriterIpcProxy.cs" />
<Compile Include="Proxy\Ipc\DatasetIpcProxy.cs" />
<Compile Include="Proxy\Ipc\DStreamIpcProxy.cs" />
<Compile Include="Proxy\Ipc\HadoopConfigurationIpcProxy.cs" />
<Compile Include="Proxy\Ipc\RDDIpcProxy.cs" />
<Compile Include="Proxy\Ipc\SparkCLRIpcProxy.cs" />
<Compile Include="Proxy\Ipc\SparkSessionIpcProxy.cs" />
<Compile Include="Proxy\Ipc\SqlContextIpcProxy.cs" />
<Compile Include="Proxy\Ipc\StatusTrackerIpcProxy.cs" />
<Compile Include="Proxy\Ipc\StreamingContextIpcProxy.cs" />
Expand All @@ -125,6 +130,7 @@
<Compile Include="Proxy\ISparkConfProxy.cs" />
<Compile Include="Proxy\ISparkContextProxy.cs" />
<Compile Include="Proxy\Ipc\SparkConfIpcProxy.cs" />
<Compile Include="Proxy\ISparkSessionProxy.cs" />
<Compile Include="Proxy\ISqlContextProxy.cs" />
<Compile Include="Proxy\IStatusTrackerProxy.cs" />
<Compile Include="Proxy\IStreamingContextProxy.cs" />
Expand All @@ -134,17 +140,21 @@
<Compile Include="Services\ILoggerService.cs" />
<Compile Include="Services\Log4NetLoggerService.cs" />
<Compile Include="Services\LoggerServiceFactory.cs" />
<Compile Include="Sql\Builder.cs" />
<Compile Include="Sql\Catalog.cs" />
<Compile Include="Sql\Column.cs" />
<Compile Include="Sql\DataFrame.cs" />
<Compile Include="Sql\DataFrameNaFunctions.cs" />
<Compile Include="Sql\DataFrameReader.cs" />
<Compile Include="Sql\DataFrameWriter.cs" />
<Compile Include="Sql\Dataset.cs" />
<Compile Include="Sql\HiveContext.cs" />
<Compile Include="Sql\PythonSerDe.cs" />
<Compile Include="Sql\RowConstructor.cs" />
<Compile Include="Sql\Row.cs" />
<Compile Include="Sql\Functions.cs" />
<Compile Include="Sql\SaveMode.cs" />
<Compile Include="Sql\SparkSession.cs" />
<Compile Include="Sql\SqlContext.cs" />
<Compile Include="Sql\Types.cs" />
<Compile Include="Sql\UserDefinedFunction.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ internal ConfigurationService()
configuration = new SparkCLRConfiguration(appConfig);
runMode = RunMode.CLUSTER;
}
else if (sparkMaster.Equals("yarn-client", StringComparison.OrdinalIgnoreCase) || sparkMaster.Equals("yarn-cluster", StringComparison.OrdinalIgnoreCase))
else if (sparkMaster.Equals("yarn-cluster", StringComparison.OrdinalIgnoreCase) ||
sparkMaster.Equals("yarn-client", StringComparison.OrdinalIgnoreCase) ||
sparkMaster.Equals("yarn", StringComparison.OrdinalIgnoreCase)) //supported in Spark 2.0
{
configuration = new SparkCLRConfiguration(appConfig);
runMode = RunMode.YARN;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@ internal class CSharpWorkerFunc
public CSharpWorkerFunc(Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> func)
{
this.func = func;
stackTrace = new StackTrace(true).ToString();
stackTrace = new StackTrace(true).ToString().Replace(" at ", " [STACK] ");
}

public CSharpWorkerFunc(Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> func, string innerStackTrace)
: this(func)
{
this.func = func;
stackTrace = new StackTrace(true).ToString() + "\nInner stack trace ...\n" + innerStackTrace;
stackTrace += string.Format(" [STACK] --- Inner stack trace: ---{0}{1}",
Environment.NewLine, innerStackTrace.Replace(" at ", " [STACK] "));
}

public Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> Func
Expand Down
Loading

0 comments on commit da69c1f

Please sign in to comment.