From 09d8a2f2153b68c242429d8f3be9c2f41b8207fb Mon Sep 17 00:00:00 2001
From: Kaarthik Sivashanmugam <skaarthik@users.noreply.github.com>
Date: Thu, 25 Aug 2016 09:04:19 -0700
Subject: [PATCH 01/15] updating examples to use Mobius 2.0 preview release and
 minor updates

---
 README.md                                                 | 2 +-
 build/Build.cmd                                           | 2 +-
 csharp/SparkCLR.nuspec                                    | 2 +-
 dev/scripts/SetSparkClrPackageVersion.ps1                 | 8 ++++----
 examples/Batch/WordCount/WordCount.csproj                 | 6 +++---
 examples/Batch/WordCount/packages.config                  | 2 +-
 examples/Batch/pi/Pi.csproj                               | 6 +++---
 examples/Batch/pi/packages.config                         | 2 +-
 examples/Sql/CassandraDataFrame/CassandraDataFrame.csproj | 4 ++--
 examples/Sql/CassandraDataFrame/packages.config           | 2 +-
 examples/Sql/HiveDataFrame/HiveDataFrame.csproj           | 4 ++--
 examples/Sql/HiveDataFrame/packages.config                | 2 +-
 examples/Sql/JdbcDataFrame/JdbcDataFrame.csproj           | 6 +++---
 examples/Sql/JdbcDataFrame/packages.config                | 2 +-
 examples/Sql/SparkXml/SparkXml.csproj                     | 6 +++---
 examples/Sql/SparkXml/packages.config                     | 2 +-
 examples/Streaming/EventHub/EventHub.csproj               | 6 +++---
 examples/Streaming/EventHub/packages.config               | 2 +-
 examples/Streaming/HdfsWordCount/HdfsWordCount.csproj     | 6 +++---
 examples/Streaming/HdfsWordCount/packages.config          | 2 +-
 examples/Streaming/Kafka/Kafka.csproj                     | 4 ++--
 examples/Streaming/Kafka/packages.config                  | 2 +-
 examples/fsharp/JsonDataFrame/JsonDataFrame.fsproj        | 6 +++---
 examples/fsharp/JsonDataFrame/packages.config             | 2 +-
 examples/fsharp/WordCount/WordCountFSharp.fsproj          | 6 +++---
 examples/fsharp/WordCount/packages.config                 | 2 +-
 26 files changed, 48 insertions(+), 48 deletions(-)
diff --git a/README.md b/README.md
index 2dc6d9ab..a1ce8255 100644
--- a/README.md
+++ b/README.md
@@ -122,7 +122,7 @@ Refer to the [docs folder](docs) for design overview and other info on Mobius
 
 ## Supported Spark Versions
 
-Mobius is built and tested with Apache Spark [1.4.1](https://github.com/Microsoft/Mobius/tree/branch-1.4), [1.5.2](https://github.com/Microsoft/Mobius/tree/branch-1.5) and [1.6.*](https://github.com/Microsoft/Mobius/tree/branch-1.6).
+Mobius is built and tested with Apache Spark [1.4.1](https://github.com/Microsoft/Mobius/tree/branch-1.4), [1.5.2](https://github.com/Microsoft/Mobius/tree/branch-1.5), [1.6.*](https://github.com/Microsoft/Mobius/tree/branch-1.6) and [2.0](https://github.com/Microsoft/Mobius/tree/branch-2.0).
 
 ## Releases
 
diff --git a/build/Build.cmd b/build/Build.cmd
index 7a83f5bd..96166fd0 100644
--- a/build/Build.cmd
+++ b/build/Build.cmd
@@ -225,7 +225,7 @@ if not defined ProjectVersion (
     goto :distdone
 )
 
-set SPARKCLR_NAME=spark-clr_2.10-%ProjectVersion%
+set SPARKCLR_NAME=spark-clr_2.11-%ProjectVersion%
 @echo "%SPARKCLR_HOME%
 
 @rem copy samples to top-level folder before zipping
diff --git a/csharp/SparkCLR.nuspec b/csharp/SparkCLR.nuspec
index 9c027b5d..d725b2f5 100644
--- a/csharp/SparkCLR.nuspec
+++ b/csharp/SparkCLR.nuspec
@@ -2,7 +2,7 @@
 <package xmlns="http://schemas.microsoft.com/packaging/2010/07/nuspec.xsd">
   <metadata>
     <id>Microsoft.SparkCLR</id>
-    <version>1.6.200-SNAPSHOT</version>
+    <version>2.0.000-SNAPSHOT</version>
     <authors>Microsoft Corporation</authors>
     <owners>Microsoft Corporation</owners>
     <licenseUrl>https://github.com/Microsoft/Mobius/blob/master/LICENSE</licenseUrl>
diff --git a/dev/scripts/SetSparkClrPackageVersion.ps1 b/dev/scripts/SetSparkClrPackageVersion.ps1
index b9693b24..3de5b4b4 100644
--- a/dev/scripts/SetSparkClrPackageVersion.ps1
+++ b/dev/scripts/SetSparkClrPackageVersion.ps1
@@ -17,15 +17,15 @@ function Update-Csproj($targetDir, $version)
     Write-Output "[SetSparkClrPackageVersion.Update-Csproj] Start setting *.csproj under $targetDir to version=$version"
 
     # 
-    # Update Mobius package version to this release. Example in *.csproj:  
+    # Update Mobius package version to this release. Example in *.csproj and *.fsproj:  
     #     <HintPath>..\packages\Microsoft.SparkCLR.1.5.2-SNAPSHOT\lib\net45\CSharpWorker.exe</HintPath>
     # 
-    Get-ChildItem $targetDir -filter "*.csproj" -recurs | % { 
-        Write-Output "[SetSparkClrPackageVersion.Update-Csproj] updating $($_.FullName)"
+    Get-ChildItem $targetDir -filter "*.*sproj" -recurs | % { 
+        Write-Output "[SetSparkClrPackageVersion.Update-*sproj] updating $($_.FullName)"
  		((Get-Content $_.FullName) -replace "\\Microsoft\.SparkCLR.*\\lib", "\Microsoft.SparkCLR.$version\lib") | Set-Content -Encoding UTF8 -Path $_.FullName -force
 	}
 
-    Write-Output "[SetSparkClrPackageVersion.Update-Csproj] Done setting *.csproj under $targetDir to version=$version"
+    Write-Output "[SetSparkClrPackageVersion.Update-Csproj] Done setting *.csproj and *.fsproj under $targetDir to version=$version"
 }
 
 function Update-PackageConfig($targetDir, $version)
diff --git a/examples/Batch/WordCount/WordCount.csproj b/examples/Batch/WordCount/WordCount.csproj
index a9f0dfc8..40b8f52a 100644
--- a/examples/Batch/WordCount/WordCount.csproj
+++ b/examples/Batch/WordCount/WordCount.csproj
@@ -34,7 +34,7 @@
   <ItemGroup>
     <Reference Include="CSharpWorker, Version=1.5.2.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -42,7 +42,7 @@
     </Reference>
     <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=1.6.1.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -67,7 +67,7 @@
     <Compile Include="Properties\AssemblyInfo.cs" />
   </ItemGroup>
   <ItemGroup>
-    <None Include="..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\CSharpWorker.exe.config">
+    <None Include="..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe.config">
       <Link>CSharpWorker.exe.config</Link>
     </None>
     <None Include="..\..\App.config">
diff --git a/examples/Batch/WordCount/packages.config b/examples/Batch/WordCount/packages.config
index fb0cfe9e..293105d3 100644
--- a/examples/Batch/WordCount/packages.config
+++ b/examples/Batch/WordCount/packages.config
@@ -4,5 +4,5 @@
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="1.6.200" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
 </packages>
diff --git a/examples/Batch/pi/Pi.csproj b/examples/Batch/pi/Pi.csproj
index 058fa0ca..751852f8 100644
--- a/examples/Batch/pi/Pi.csproj
+++ b/examples/Batch/pi/Pi.csproj
@@ -37,7 +37,7 @@
   <ItemGroup>
     <Reference Include="CSharpWorker, Version=1.5.2.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -45,7 +45,7 @@
     </Reference>
     <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=1.6.1.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -66,7 +66,7 @@
     <Compile Include="Properties\AssemblyInfo.cs" />
   </ItemGroup>
   <ItemGroup>
-    <None Include="..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\CSharpWorker.exe.config">
+    <None Include="..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe.config">
       <Link>CSharpWorker.exe.config</Link>
     </None>
     <None Include="..\..\App.config">
diff --git a/examples/Batch/pi/packages.config b/examples/Batch/pi/packages.config
index 88903cd0..eaa63869 100644
--- a/examples/Batch/pi/packages.config
+++ b/examples/Batch/pi/packages.config
@@ -1,7 +1,7 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <packages>
   <package id="log4net" version="2.0.5" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="1.6.200" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
diff --git a/examples/Sql/CassandraDataFrame/CassandraDataFrame.csproj b/examples/Sql/CassandraDataFrame/CassandraDataFrame.csproj
index 17849686..228764bd 100644
--- a/examples/Sql/CassandraDataFrame/CassandraDataFrame.csproj
+++ b/examples/Sql/CassandraDataFrame/CassandraDataFrame.csproj
@@ -35,13 +35,13 @@
   </PropertyGroup>
   <ItemGroup>
     <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net">
       <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
     </Reference>
     <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
diff --git a/examples/Sql/CassandraDataFrame/packages.config b/examples/Sql/CassandraDataFrame/packages.config
index fb0cfe9e..293105d3 100644
--- a/examples/Sql/CassandraDataFrame/packages.config
+++ b/examples/Sql/CassandraDataFrame/packages.config
@@ -4,5 +4,5 @@
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="1.6.200" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
 </packages>
diff --git a/examples/Sql/HiveDataFrame/HiveDataFrame.csproj b/examples/Sql/HiveDataFrame/HiveDataFrame.csproj
index 00d1ff2d..6c81a1d4 100644
--- a/examples/Sql/HiveDataFrame/HiveDataFrame.csproj
+++ b/examples/Sql/HiveDataFrame/HiveDataFrame.csproj
@@ -38,11 +38,11 @@
       <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
     </Reference>
     <Reference Include="CSharpWorker, Version=1.6.1.0, Culture=neutral, processorArchitecture=MSIL">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
       <Private>True</Private>
     </Reference>
     <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=1.6.1.0, Culture=neutral, processorArchitecture=MSIL">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
       <Private>True</Private>
     </Reference>
     <Reference Include="System" />
diff --git a/examples/Sql/HiveDataFrame/packages.config b/examples/Sql/HiveDataFrame/packages.config
index e0de1c95..218d018f 100644
--- a/examples/Sql/HiveDataFrame/packages.config
+++ b/examples/Sql/HiveDataFrame/packages.config
@@ -1,7 +1,7 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <packages>
   <package id="log4net" version="2.0.5" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="1.6.200" targetFramework="net452" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net452" />
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
diff --git a/examples/Sql/JdbcDataFrame/JdbcDataFrame.csproj b/examples/Sql/JdbcDataFrame/JdbcDataFrame.csproj
index 80c1565b..f19954c4 100644
--- a/examples/Sql/JdbcDataFrame/JdbcDataFrame.csproj
+++ b/examples/Sql/JdbcDataFrame/JdbcDataFrame.csproj
@@ -36,7 +36,7 @@
   <ItemGroup>
     <Reference Include="CSharpWorker, Version=1.5.2.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -44,7 +44,7 @@
     </Reference>
     <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=1.5.2.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -65,7 +65,7 @@
     <Compile Include="Properties\AssemblyInfo.cs" />
   </ItemGroup>
   <ItemGroup>
-    <None Include="..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\CSharpWorker.exe.config">
+    <None Include="..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe.config">
       <Link>CSharpWorker.exe.config</Link>
     </None>
     <None Include="..\..\App.config">
diff --git a/examples/Sql/JdbcDataFrame/packages.config b/examples/Sql/JdbcDataFrame/packages.config
index fb0cfe9e..293105d3 100644
--- a/examples/Sql/JdbcDataFrame/packages.config
+++ b/examples/Sql/JdbcDataFrame/packages.config
@@ -4,5 +4,5 @@
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="1.6.200" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
 </packages>
diff --git a/examples/Sql/SparkXml/SparkXml.csproj b/examples/Sql/SparkXml/SparkXml.csproj
index 22675f61..381ec5db 100644
--- a/examples/Sql/SparkXml/SparkXml.csproj
+++ b/examples/Sql/SparkXml/SparkXml.csproj
@@ -36,7 +36,7 @@
   <ItemGroup>
     <Reference Include="CSharpWorker, Version=1.5.2.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -44,7 +44,7 @@
     </Reference>
     <Reference Include="Microsoft.Spark.CSharp.Adapter, Version=1.5.2.0, Culture=neutral, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -65,7 +65,7 @@
     <Compile Include="Properties\AssemblyInfo.cs" />
   </ItemGroup>
   <ItemGroup>
-    <None Include="..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\CSharpWorker.exe.config">
+    <None Include="..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe.config">
       <Link>CSharpWorker.exe.config</Link>
     </None>
     <None Include="..\..\App.config">
diff --git a/examples/Sql/SparkXml/packages.config b/examples/Sql/SparkXml/packages.config
index fb0cfe9e..293105d3 100644
--- a/examples/Sql/SparkXml/packages.config
+++ b/examples/Sql/SparkXml/packages.config
@@ -4,5 +4,5 @@
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="1.6.200" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
 </packages>
diff --git a/examples/Streaming/EventHub/EventHub.csproj b/examples/Streaming/EventHub/EventHub.csproj
index 057c6f88..631b2c3d 100644
--- a/examples/Streaming/EventHub/EventHub.csproj
+++ b/examples/Streaming/EventHub/EventHub.csproj
@@ -35,7 +35,7 @@
   </PropertyGroup>
   <ItemGroup>
     <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.15.0, Culture=neutral, PublicKeyToken=669e0ddf0bb1aa2a, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -43,7 +43,7 @@
     </Reference>
     <Reference Include="Microsoft.CSharp" />
     <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
@@ -68,7 +68,7 @@
     <Compile Include="EventPublisher.cs" />
   </ItemGroup>
   <ItemGroup>
-    <None Include="..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\CSharpWorker.exe.config">
+    <None Include="..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe.config">
       <Link>CSharpWorker.exe.config</Link>
     </None>
     <None Include="..\..\App.config">
diff --git a/examples/Streaming/EventHub/packages.config b/examples/Streaming/EventHub/packages.config
index edebda46..abe733c5 100644
--- a/examples/Streaming/EventHub/packages.config
+++ b/examples/Streaming/EventHub/packages.config
@@ -4,7 +4,7 @@
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="1.6.200" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
 
   <!-- *** ****************************************************************** *** -->
   <!-- *** Following references are needed for publishing events to EventHubs *** -->
diff --git a/examples/Streaming/HdfsWordCount/HdfsWordCount.csproj b/examples/Streaming/HdfsWordCount/HdfsWordCount.csproj
index b38df4b6..277fa405 100644
--- a/examples/Streaming/HdfsWordCount/HdfsWordCount.csproj
+++ b/examples/Streaming/HdfsWordCount/HdfsWordCount.csproj
@@ -38,7 +38,7 @@
     </Reference>
     <Reference Include="CSharpWorker">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net">
       <SpecificVersion>False</SpecificVersion>
@@ -46,7 +46,7 @@
     </Reference>
     <Reference Include="Microsoft.Spark.CSharp.Adapter">
       <SpecificVersion>False</SpecificVersion>
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Razorvine.Pyrolite">
       <SpecificVersion>False</SpecificVersion>
@@ -64,7 +64,7 @@
     <Compile Include="Properties\AssemblyInfo.cs" />
   </ItemGroup>
   <ItemGroup>
-    <None Include="..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\CSharpWorker.exe.config">
+    <None Include="..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe.config">
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     </None>
     <None Include="..\..\App.config">
diff --git a/examples/Streaming/HdfsWordCount/packages.config b/examples/Streaming/HdfsWordCount/packages.config
index 88903cd0..eaa63869 100644
--- a/examples/Streaming/HdfsWordCount/packages.config
+++ b/examples/Streaming/HdfsWordCount/packages.config
@@ -1,7 +1,7 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <packages>
   <package id="log4net" version="2.0.5" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="1.6.200" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
diff --git a/examples/Streaming/Kafka/Kafka.csproj b/examples/Streaming/Kafka/Kafka.csproj
index e1608f57..c221fa81 100644
--- a/examples/Streaming/Kafka/Kafka.csproj
+++ b/examples/Streaming/Kafka/Kafka.csproj
@@ -33,14 +33,14 @@
   </PropertyGroup>
   <ItemGroup>
     <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net, Version=1.2.10.0, Culture=neutral, PublicKeyToken=1b44e1d426115821, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
       <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
     </Reference>
     <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="Newtonsoft.Json, Version=4.5.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
       <SpecificVersion>False</SpecificVersion>
diff --git a/examples/Streaming/Kafka/packages.config b/examples/Streaming/Kafka/packages.config
index 88903cd0..eaa63869 100644
--- a/examples/Streaming/Kafka/packages.config
+++ b/examples/Streaming/Kafka/packages.config
@@ -1,7 +1,7 @@
 ﻿<?xml version="1.0" encoding="utf-8"?>
 <packages>
   <package id="log4net" version="2.0.5" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="1.6.200" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
diff --git a/examples/fsharp/JsonDataFrame/JsonDataFrame.fsproj b/examples/fsharp/JsonDataFrame/JsonDataFrame.fsproj
index 5ab9eaa3..b49e9de0 100644
--- a/examples/fsharp/JsonDataFrame/JsonDataFrame.fsproj
+++ b/examples/fsharp/JsonDataFrame/JsonDataFrame.fsproj
@@ -66,13 +66,13 @@
   </ItemGroup>
   <ItemGroup>
     <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
     </Reference>
     <Reference Include="log4net">
       <HintPath>..\..\packages\log4net.2.0.5\lib\net45-full\log4net.dll</HintPath>
     </Reference>
     <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
     </Reference>
     <Reference Include="mscorlib" />
     <Reference Include="FSharp.Core, Version=$(TargetFSharpCoreVersion), Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a">
@@ -98,4 +98,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
\ No newline at end of file
+</Project>
diff --git a/examples/fsharp/JsonDataFrame/packages.config b/examples/fsharp/JsonDataFrame/packages.config
index 1b77c005..941d504a 100644
--- a/examples/fsharp/JsonDataFrame/packages.config
+++ b/examples/fsharp/JsonDataFrame/packages.config
@@ -2,7 +2,7 @@
 <packages>
   <package id="FSharp.Core" version="4.0.0.1" targetFramework="net45" />
   <package id="log4net" version="2.0.5" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="1.6.200" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />
diff --git a/examples/fsharp/WordCount/WordCountFSharp.fsproj b/examples/fsharp/WordCount/WordCountFSharp.fsproj
index 4700233f..3b40aad9 100644
--- a/examples/fsharp/WordCount/WordCountFSharp.fsproj
+++ b/examples/fsharp/WordCount/WordCountFSharp.fsproj
@@ -71,7 +71,7 @@
   </ItemGroup>
   <ItemGroup>
     <Reference Include="CSharpWorker">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\CSharpWorker.exe</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\CSharpWorker.exe</HintPath>
       <Private>True</Private>
     </Reference>
     <Reference Include="FSharp.Core">
@@ -83,7 +83,7 @@
       <Private>True</Private>
     </Reference>
     <Reference Include="Microsoft.Spark.CSharp.Adapter">
-      <HintPath>..\..\packages\Microsoft.SparkCLR.1.6.200\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
+      <HintPath>..\..\packages\Microsoft.SparkCLR.2.0.0-PREVIEW-1\lib\net45\Microsoft.Spark.CSharp.Adapter.dll</HintPath>
       <Private>True</Private>
     </Reference>
     <Reference Include="mscorlib" />
@@ -110,4 +110,4 @@
   <Target Name="AfterBuild">
   </Target>
   -->
-</Project>
\ No newline at end of file
+</Project>
diff --git a/examples/fsharp/WordCount/packages.config b/examples/fsharp/WordCount/packages.config
index 1b77c005..941d504a 100644
--- a/examples/fsharp/WordCount/packages.config
+++ b/examples/fsharp/WordCount/packages.config
@@ -2,7 +2,7 @@
 <packages>
   <package id="FSharp.Core" version="4.0.0.1" targetFramework="net45" />
   <package id="log4net" version="2.0.5" targetFramework="net45" />
-  <package id="Microsoft.SparkCLR" version="1.6.200" targetFramework="net45" />
+  <package id="Microsoft.SparkCLR" version="2.0.0-PREVIEW-1" targetFramework="net45" />
   <package id="Newtonsoft.Json" version="7.0.1" targetFramework="net45" />
   <package id="Razorvine.Pyrolite" version="4.10.0.0" targetFramework="net45" />
   <package id="Razorvine.Serpent" version="1.12.0.0" targetFramework="net45" />

From 9f4276e3386ba372f849fba2be8918f68e2972f4 Mon Sep 17 00:00:00 2001
From: Yun Tang <myasuka@live.com>
Date: Thu, 1 Sep 2016 02:33:41 +0800
Subject: [PATCH 02/15] [Mobius-102] Add missing license text in batch and
 powershell scripts (#557)

---
 build/Build.cmd                                | 5 +++++
 build/build.sh                                 | 5 +++++
 build/copyjar.ps1                              | 5 +++++
 build/localmode/RunSamples.cmd                 | 6 ++++++
 build/localmode/downloadtools.ps1              | 5 +++++
 build/localmode/dumpsoftware.ps1               | 5 +++++
 build/localmode/nugetpack.ps1                  | 5 +++++
 build/localmode/patchpom.ps1                   | 5 +++++
 build/localmode/precheck.cmd                   | 5 +++++
 build/localmode/run-samples.sh                 | 5 +++++
 build/localmode/zipdir.ps1                     | 4 +++-
 cpp/Build.cmd                                  | 5 +++++
 cpp/Clean.cmd                                  | 6 ++++++
 csharp/Build.cmd                               | 5 +++++
 csharp/Clean.cmd                               | 6 ++++++
 csharp/Test.cmd                                | 5 +++++
 csharp/build.sh                                | 5 +++++
 csharp/clean.sh                                | 5 +++++
 csharp/test.sh                                 | 5 +++++
 dev/scripts/SetSparkClrJarVersion.ps1          | 5 +++++
 dev/scripts/SetSparkClrNugetPackageVersion.ps1 | 5 +++++
 dev/scripts/SetSparkClrPackageVersion.ps1      | 5 +++++
 dev/scripts/SetVersion.cmd                     | 6 ++++++
 examples/Build.cmd                             | 5 +++++
 examples/Clean.cmd                             | 6 ++++++
 examples/build.sh                              | 5 +++++
 examples/clean.sh                              | 5 +++++
 scripts/sparkclr-submit.cmd                    | 6 ++++++
 scripts/sparkclr-submit.sh                     | 5 +++++
 29 files changed, 149 insertions(+), 1 deletion(-)

diff --git a/build/Build.cmd b/build/Build.cmd
index 96166fd0..05239aca 100644
--- a/build/Build.cmd
+++ b/build/Build.cmd
@@ -1,6 +1,11 @@
 @setlocal
 @echo OFF
 
+rem
+rem Copyright (c) Microsoft. All rights reserved.
+rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
+rem
+
 if "%1" == "csharp" set buildCSharp=true
 
 SET CMDHOME=%~dp0
diff --git a/build/build.sh b/build/build.sh
index 32f17577..5e935cd5 100755
--- a/build/build.sh
+++ b/build/build.sh
@@ -1,5 +1,10 @@
 #!/bin/bash
 
+#
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
+#
+
 export FWDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
 export SPARKCLR_HOME="$FWDIR/runtime"
diff --git a/build/copyjar.ps1 b/build/copyjar.ps1
index ea7184a4..c67830ef 100755
--- a/build/copyjar.ps1
+++ b/build/copyjar.ps1
@@ -1,3 +1,8 @@
+#
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
+#
+
 function Get-ScriptDirectory
 {
     $Invocation = (Get-Variable MyInvocation -Scope 1).Value;
diff --git a/build/localmode/RunSamples.cmd b/build/localmode/RunSamples.cmd
index dec79cf9..6ad9094c 100644
--- a/build/localmode/RunSamples.cmd
+++ b/build/localmode/RunSamples.cmd
@@ -1,4 +1,10 @@
 @echo OFF
+
+rem
+rem Copyright (c) Microsoft. All rights reserved.
+rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
+rem
+
 setlocal enabledelayedexpansion
 
 SET CMDHOME=%~dp0
diff --git a/build/localmode/downloadtools.ps1 b/build/localmode/downloadtools.ps1
index f363e21a..b71f355d 100644
--- a/build/localmode/downloadtools.ps1
+++ b/build/localmode/downloadtools.ps1
@@ -1,3 +1,8 @@
+#
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
+#
+
 #
 # Input - 
 #     (1) "stage" parameter, accepts either "build" or "run"
diff --git a/build/localmode/dumpsoftware.ps1 b/build/localmode/dumpsoftware.ps1
index 5e5462e3..9943bf57 100644
--- a/build/localmode/dumpsoftware.ps1
+++ b/build/localmode/dumpsoftware.ps1
@@ -1,3 +1,8 @@
+#
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
+#
+
 $x64items = @(Get-ChildItem "HKLM:SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall")
 $x64items + @(Get-ChildItem "HKLM:SOFTWARE\wow6432node\Microsoft\Windows\CurrentVersion\Uninstall") `
    | ForEach-object { Get-ItemProperty Microsoft.PowerShell.Core\Registry::$_ } `
diff --git a/build/localmode/nugetpack.ps1 b/build/localmode/nugetpack.ps1
index 9724a305..fc603d02 100644
--- a/build/localmode/nugetpack.ps1
+++ b/build/localmode/nugetpack.ps1
@@ -1,3 +1,8 @@
+#
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
+#
+
 $root = (split-path -parent $MyInvocation.MyCommand.Definition) + '\..\..'
 
 # expected tagname: v{version-string}. E.g., "v1.5.2-snapshot-2", "v1.5.2-prerelease-1"
diff --git a/build/localmode/patchpom.ps1 b/build/localmode/patchpom.ps1
index 9608635f..cd74daf8 100644
--- a/build/localmode/patchpom.ps1
+++ b/build/localmode/patchpom.ps1
@@ -1,3 +1,8 @@
+#
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
+#
+
 #
 # Input -
 #     "targetPom" parameter, target Pom.xml file
diff --git a/build/localmode/precheck.cmd b/build/localmode/precheck.cmd
index dc0217ed..45511aa7 100644
--- a/build/localmode/precheck.cmd
+++ b/build/localmode/precheck.cmd
@@ -1,5 +1,10 @@
 @echo OFF
 
+rem
+rem Copyright (c) Microsoft. All rights reserved.
+rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
+rem
+
 set precheck=ok
 
 if not exist "%JAVA_HOME%\bin\java.exe" (
diff --git a/build/localmode/run-samples.sh b/build/localmode/run-samples.sh
index 48165bf5..5c6486bb 100755
--- a/build/localmode/run-samples.sh
+++ b/build/localmode/run-samples.sh
@@ -1,5 +1,10 @@
 #!/bin/bash
 
+#
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
+#
+
 export verbose=
 
 for param in "$@"
diff --git a/build/localmode/zipdir.ps1 b/build/localmode/zipdir.ps1
index 4620c928..1073a1ba 100644
--- a/build/localmode/zipdir.ps1
+++ b/build/localmode/zipdir.ps1
@@ -1,8 +1,10 @@
+#
 # Copyright (c) Microsoft. All rights reserved.
 # Licensed under the MIT license. See LICENSE file in the project root for full license information.
 #
+
 # This script takes in "dir" and "target" parameters, zips all files under dir to the target file
-#
+
 
 Param([string]$dir, [string]$target)
 
diff --git a/cpp/Build.cmd b/cpp/Build.cmd
index 1feef374..42d2e639 100644
--- a/cpp/Build.cmd
+++ b/cpp/Build.cmd
@@ -1,6 +1,11 @@
 @setlocal
 @ECHO off
 
+rem
+rem Copyright (c) Microsoft. All rights reserved.
+rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
+rem
+
 SET CMDHOME=%~dp0
 @REM Remove trailing backslash \
 set CMDHOME=%CMDHOME:~0,-1%
diff --git a/cpp/Clean.cmd b/cpp/Clean.cmd
index 2a978baa..bf6d5b03 100644
--- a/cpp/Clean.cmd
+++ b/cpp/Clean.cmd
@@ -1,4 +1,10 @@
 @ECHO OFF
+
+rem
+rem Copyright (c) Microsoft. All rights reserved.
+rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
+rem
+
 FOR /D /R . %%G IN (bin) DO @IF EXIST "%%G" (@echo RDMR /S /Q "%%G" & rd /s /q "%%G")
 FOR /D /R . %%G IN (obj) DO @IF EXIST "%%G" (@echo RDMR /S /Q "%%G" & rd /s /q "%%G")
 FOR /D /R . %%G IN (x64) DO @IF EXIST "%%G" (@echo RDMR /S /Q "%%G" & rd /s /q "%%G")
\ No newline at end of file
diff --git a/csharp/Build.cmd b/csharp/Build.cmd
index 6c2b36d3..a9499d3b 100644
--- a/csharp/Build.cmd
+++ b/csharp/Build.cmd
@@ -1,6 +1,11 @@
 @setlocal
 @ECHO off
 
+rem
+rem Copyright (c) Microsoft. All rights reserved.
+rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
+rem
+
 SET CMDHOME=%~dp0
 @REM Remove trailing backslash \
 set CMDHOME=%CMDHOME:~0,-1%
diff --git a/csharp/Clean.cmd b/csharp/Clean.cmd
index e8454b6b..29a6e3cb 100644
--- a/csharp/Clean.cmd
+++ b/csharp/Clean.cmd
@@ -1,3 +1,9 @@
 @ECHO OFF
+
+rem
+rem Copyright (c) Microsoft. All rights reserved.
+rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
+rem
+
 FOR /D /R . %%G IN (bin) DO @IF EXIST "%%G" (@echo RDMR /S /Q "%%G" & rd /s /q "%%G")
 FOR /D /R . %%G IN (obj) DO @IF EXIST "%%G" (@echo RDMR /S /Q "%%G" & rd /s /q "%%G")
\ No newline at end of file
diff --git a/csharp/Test.cmd b/csharp/Test.cmd
index cd1eafab..aaae4559 100644
--- a/csharp/Test.cmd
+++ b/csharp/Test.cmd
@@ -1,6 +1,11 @@
 @setlocal
 @ECHO off
 
+rem
+rem Copyright (c) Microsoft. All rights reserved.
+rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
+rem
+
 SET CMDHOME=%~dp0
 @REM Remove trailing backslash \
 set CMDHOME=%CMDHOME:~0,-1%
diff --git a/csharp/build.sh b/csharp/build.sh
index 01364378..769f3226 100755
--- a/csharp/build.sh
+++ b/csharp/build.sh
@@ -1,5 +1,10 @@
 #!/bin/bash
 
+#
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
+#
+
 export FWDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 export CppDll=NoCpp
 export XBUILDOPT=/verbosity:minimal
diff --git a/csharp/clean.sh b/csharp/clean.sh
index 33edea39..255e4e6b 100755
--- a/csharp/clean.sh
+++ b/csharp/clean.sh
@@ -1,5 +1,10 @@
 #!/bin/bash
 
+#
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
+#
+
 for g in `find . -type d -name bin`
 do
   rm -r -f "$g"
diff --git a/csharp/test.sh b/csharp/test.sh
index 614898d7..7aabbad7 100755
--- a/csharp/test.sh
+++ b/csharp/test.sh
@@ -1,5 +1,10 @@
 #!/bin/bash
 
+#
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
+#
+
 export FWDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
 if [ "$NUNITCONSOLE" = "" ];
diff --git a/dev/scripts/SetSparkClrJarVersion.ps1 b/dev/scripts/SetSparkClrJarVersion.ps1
index 13536a7c..f8d06ead 100644
--- a/dev/scripts/SetSparkClrJarVersion.ps1
+++ b/dev/scripts/SetSparkClrJarVersion.ps1
@@ -1,3 +1,8 @@
+#
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
+#
+
 #
 # This script takes in "version" and "targetDir" (optional) parameters, update Spark-Clr jar 
 # version reference in all scripts under "targetDir". 
diff --git a/dev/scripts/SetSparkClrNugetPackageVersion.ps1 b/dev/scripts/SetSparkClrNugetPackageVersion.ps1
index 90a4b727..25cfaddf 100644
--- a/dev/scripts/SetSparkClrNugetPackageVersion.ps1
+++ b/dev/scripts/SetSparkClrNugetPackageVersion.ps1
@@ -1,3 +1,8 @@
+#
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
+#
+
 #
 # This script takes in and "nuspecDir" and "version" parameters, update Mobius Nuget package 
 # version
diff --git a/dev/scripts/SetSparkClrPackageVersion.ps1 b/dev/scripts/SetSparkClrPackageVersion.ps1
index 3de5b4b4..4e53b09f 100644
--- a/dev/scripts/SetSparkClrPackageVersion.ps1
+++ b/dev/scripts/SetSparkClrPackageVersion.ps1
@@ -1,3 +1,8 @@
+#
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
+#
+
 #
 # This script takes in "version" and "targetDir" (optional) parameters, update Mobius Nuget package 
 # version reference in all *.csproj and packages.config under "dir". 
diff --git a/dev/scripts/SetVersion.cmd b/dev/scripts/SetVersion.cmd
index 144674a7..52efa44f 100644
--- a/dev/scripts/SetVersion.cmd
+++ b/dev/scripts/SetVersion.cmd
@@ -1,4 +1,10 @@
 @echo OFF
+
+rem
+rem Copyright (c) Microsoft. All rights reserved.
+rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
+rem
+
 setlocal enabledelayedexpansion
 
 IF "%1"=="" (goto :usage)
diff --git a/examples/Build.cmd b/examples/Build.cmd
index c641b0bc..f9b0bcb6 100644
--- a/examples/Build.cmd
+++ b/examples/Build.cmd
@@ -1,6 +1,11 @@
 @setlocal
 @ECHO off
 
+rem
+rem Copyright (c) Microsoft. All rights reserved.
+rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
+rem
+
 SET CMDHOME=%~dp0
 @REM Remove trailing backslash \
 set CMDHOME=%CMDHOME:~0,-1%
diff --git a/examples/Clean.cmd b/examples/Clean.cmd
index e8454b6b..29a6e3cb 100644
--- a/examples/Clean.cmd
+++ b/examples/Clean.cmd
@@ -1,3 +1,9 @@
 @ECHO OFF
+
+rem
+rem Copyright (c) Microsoft. All rights reserved.
+rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
+rem
+
 FOR /D /R . %%G IN (bin) DO @IF EXIST "%%G" (@echo RDMR /S /Q "%%G" & rd /s /q "%%G")
 FOR /D /R . %%G IN (obj) DO @IF EXIST "%%G" (@echo RDMR /S /Q "%%G" & rd /s /q "%%G")
\ No newline at end of file
diff --git a/examples/build.sh b/examples/build.sh
index eed3f7fb..a86e96ae 100755
--- a/examples/build.sh
+++ b/examples/build.sh
@@ -1,5 +1,10 @@
 #!/bin/bash
 
+#
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
+#
+
 export FWDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
 export XBUILDOPT=/verbosity:minimal
diff --git a/examples/clean.sh b/examples/clean.sh
index 33edea39..255e4e6b 100755
--- a/examples/clean.sh
+++ b/examples/clean.sh
@@ -1,5 +1,10 @@
 #!/bin/bash
 
+#
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
+#
+
 for g in `find . -type d -name bin`
 do
   rm -r -f "$g"
diff --git a/scripts/sparkclr-submit.cmd b/scripts/sparkclr-submit.cmd
index b880a1bf..68fadd80 100644
--- a/scripts/sparkclr-submit.cmd
+++ b/scripts/sparkclr-submit.cmd
@@ -1,4 +1,10 @@
 @echo off
+
+rem
+rem Copyright (c) Microsoft. All rights reserved.
+rem Licensed under the MIT license. See LICENSE file in the project root for full license information.
+rem
+
 setlocal enabledelayedexpansion
 
 set CMDHOME=%~dp0
diff --git a/scripts/sparkclr-submit.sh b/scripts/sparkclr-submit.sh
index 72383b5e..9482ec34 100755
--- a/scripts/sparkclr-submit.sh
+++ b/scripts/sparkclr-submit.sh
@@ -1,5 +1,10 @@
 #!/bin/bash
 
+#
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
+#
+
 function spark_home_error() {
 	echo "[sparkclr-submit.sh] Error - SPARK_HOME environment variable is not export"
 	echo "[sparkclr-submit.sh] Note that SPARK_HOME environment variable should not have trailing /"

From 7a6d6bfa22f616e64bc7bd6f3e095f43373254b9 Mon Sep 17 00:00:00 2001
From: Hebin Huang <hehuang@microsoft.com>
Date: Fri, 2 Sep 2016 14:06:40 -0700
Subject: [PATCH 03/15] Improve Worker logging to make CSharpWorkerFunc
 stacktrace clear (#559)

---
 .../Core/CSharpWorkerFunc.cs                  |  7 +-
 .../Services/DefaultLoggerService.cs          | 25 +++---
 .../Services/ILoggerService.cs                |  5 ++
 .../Services/Log4NetLoggerService.cs          | 14 ++--
 .../Microsoft.Spark.CSharp.Adapter.Doc.XML    | 16 ++++
 csharp/AdapterTest/ByteBufTest.cs             |  3 +
 .../Worker/Microsoft.Spark.CSharp/Worker.cs   | 84 ++++++++++---------
 7 files changed, 97 insertions(+), 57 deletions(-)

diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/CSharpWorkerFunc.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/CSharpWorkerFunc.cs
index 7101c19c..69bbf6a6 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/CSharpWorkerFunc.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/CSharpWorkerFunc.cs
@@ -22,13 +22,14 @@ internal class CSharpWorkerFunc
         public CSharpWorkerFunc(Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> func)
         {
             this.func = func;
-            stackTrace = new StackTrace(true).ToString();
+            stackTrace = new StackTrace(true).ToString().Replace("   at ", "   [STACK] ");
         }
 
         public CSharpWorkerFunc(Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> func, string innerStackTrace)
+            : this(func)
         {
-            this.func = func;
-            stackTrace = new StackTrace(true).ToString() + "\nInner stack trace ...\n" + innerStackTrace;
+            stackTrace += string.Format("   [STACK] --- Inner stack trace: ---{0}{1}",
+                Environment.NewLine, innerStackTrace.Replace("   at ", "   [STACK] "));
         }
 
         public Func<int, IEnumerable<dynamic>, IEnumerable<dynamic>> Func
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Services/DefaultLoggerService.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Services/DefaultLoggerService.cs
index 8328d517..9588e63a 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Services/DefaultLoggerService.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Services/DefaultLoggerService.cs
@@ -1,8 +1,4 @@
 ﻿using System;
-using System.Collections.Generic;
-using System.Linq;
-using System.Text;
-using System.Threading.Tasks;
 
 namespace Microsoft.Spark.CSharp.Services
 {
@@ -12,7 +8,20 @@ namespace Microsoft.Spark.CSharp.Services
     /// </summary>
     public class DefaultLoggerService : ILoggerService
     {
-        internal readonly static DefaultLoggerService Instance = new DefaultLoggerService(typeof (Type));
+        internal static readonly DefaultLoggerService Instance = new DefaultLoggerService(typeof(Type));
+        private readonly Type type;
+
+        private DefaultLoggerService(Type t)
+        {
+            type = t;
+        }
+
+        /// <summary>
+        /// Gets a value indicating whether logging is enabled for the Debug level.
+        /// Always return true for the DefaultLoggerService object.
+        /// </summary>
+        public bool IsDebugEnabled { get { return true; } }
+
         /// <summary>
         /// Get an instance of ILoggerService by a given type of logger
         /// </summary>
@@ -22,12 +31,6 @@ public ILoggerService GetLoggerInstance(Type type)
         {
             return new DefaultLoggerService(type);
         }
-
-        private readonly Type type;
-        private DefaultLoggerService(Type t)
-        {
-            type = t;
-        }
         
         /// <summary>
         /// Logs a message at debug level.
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Services/ILoggerService.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Services/ILoggerService.cs
index 5560df3e..714a31ac 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Services/ILoggerService.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Services/ILoggerService.cs
@@ -7,6 +7,11 @@ namespace Microsoft.Spark.CSharp.Services
     /// </summary>
     public interface ILoggerService
     {
+        /// <summary>
+        /// Gets a value indicating whether logging is enabled for the Debug level.
+        /// </summary>
+        bool IsDebugEnabled { get; }
+
         /// <summary>
         /// Get an instance of ILoggerService by a given type of logger
         /// </summary>
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Services/Log4NetLoggerService.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Services/Log4NetLoggerService.cs
index 1b6ac76d..716c319d 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Services/Log4NetLoggerService.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Services/Log4NetLoggerService.cs
@@ -1,10 +1,6 @@
 ﻿using System;
-using System.Collections.Generic;
 using System.Diagnostics.CodeAnalysis;
-using System.Linq;
-using System.Text;
 using System.Diagnostics;
-using System.Threading.Tasks;
 using log4net;
 using log4net.Config;
 
@@ -35,7 +31,15 @@ static Log4NetLoggerService()
         public Log4NetLoggerService(Type type)
         {
             logger = LogManager.GetLogger(type);
-            log4net.GlobalContext.Properties["pid"] = Process.GetCurrentProcess().Id;
+            GlobalContext.Properties["pid"] = Process.GetCurrentProcess().Id;
+        }
+
+        /// <summary>
+        /// Gets a value indicating whether logging is enabled for the Debug level.
+        /// </summary>
+        public bool IsDebugEnabled
+        {
+            get { return logger.IsDebugEnabled; }
         }
 
         /// <summary>
diff --git a/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML b/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
index f7220aa2..5a445d12 100644
--- a/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
+++ b/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
@@ -4282,6 +4282,12 @@
             Right now it just prints out the messages to Console
             </summary>
         </member>
+        <member name="P:Microsoft.Spark.CSharp.Services.DefaultLoggerService.IsDebugEnabled">
+            <summary>
+            Gets a value indicating whether logging is enabled for the Debug level.
+            Always return true for the DefaultLoggerService object.
+            </summary>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Services.DefaultLoggerService.GetLoggerInstance(System.Type)">
             <summary>
             Get an instance of ILoggerService by a given type of logger
@@ -4365,6 +4371,11 @@
             Defines a logger what be used in service
             </summary>
         </member>
+        <member name="P:Microsoft.Spark.CSharp.Services.ILoggerService.IsDebugEnabled">
+            <summary>
+            Gets a value indicating whether logging is enabled for the Debug level.
+            </summary>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Services.ILoggerService.GetLoggerInstance(System.Type)">
             <summary>
             Get an instance of ILoggerService by a given type of logger
@@ -4459,6 +4470,11 @@
             </summary>
             <param name="type">The type of the logger</param>
         </member>
+        <member name="P:Microsoft.Spark.CSharp.Services.Log4NetLoggerService.IsDebugEnabled">
+            <summary>
+            Gets a value indicating whether logging is enabled for the Debug level.
+            </summary>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Services.Log4NetLoggerService.LogDebug(System.String)">
             <summary>
             Logs a message at debug level.
diff --git a/csharp/AdapterTest/ByteBufTest.cs b/csharp/AdapterTest/ByteBufTest.cs
index 28a7453e..88d8543c 100644
--- a/csharp/AdapterTest/ByteBufTest.cs
+++ b/csharp/AdapterTest/ByteBufTest.cs
@@ -91,6 +91,9 @@ public void TestWriteReadUnsafeBuf()
         [Test]
         public void TestInvalidByteBuf()
         {
+            // Test ByteBuf with error status.
+            var errorByteBuf = ByteBuf.NewErrorStatusByteBuf(10054);
+            Assert.AreEqual(10054, errorByteBuf.Status);
             // Test invalid parameter to new ByteBuf.
             Assert.Throws<ArgumentOutOfRangeException>(() => new ByteBuf(null, -1, 1024));
             Assert.Throws<ArgumentOutOfRangeException>(() => new ByteBuf(null, 0, -1));
diff --git a/csharp/Worker/Microsoft.Spark.CSharp/Worker.cs b/csharp/Worker/Microsoft.Spark.CSharp/Worker.cs
index 3b2d3c0d..9c95d112 100644
--- a/csharp/Worker/Microsoft.Spark.CSharp/Worker.cs
+++ b/csharp/Worker/Microsoft.Spark.CSharp/Worker.cs
@@ -12,6 +12,7 @@
 using System.Reflection;
 using System.Runtime.Serialization;
 using System.Runtime.Serialization.Formatters.Binary;
+using System.Text;
 using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Interop.Ipc;
 using Microsoft.Spark.CSharp.Network;
@@ -31,10 +32,8 @@ namespace Microsoft.Spark.CSharp
     public class Worker
     {
         private static readonly DateTime UnixTimeEpoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc);
-
-        private static ILoggerService logger = null;
-
-        private static SparkCLRAssemblyHandler assemblyHandler = null;
+        private static ILoggerService logger;
+        private static SparkCLRAssemblyHandler assemblyHandler;
 
         public static void Main(string[] args)
         {
@@ -49,7 +48,7 @@ public static void Main(string[] args)
 
             if (args.Length != 2)
             {
-                Console.Error.WriteLine("Wrong number of args: {0}, will exit", args.Count());
+                Console.Error.WriteLine("Wrong number of args: {0}, will exit", args.Length);
                 Environment.Exit(-1);
             }
 
@@ -126,7 +125,7 @@ private static ISocketWrapper InitializeSocket(int javaPort)
 
         public static bool ProcessStream(Stream inputStream, Stream outputStream, int splitIndex)
         {
-            logger.LogInfo(string.Format("Start of stream processing, splitIndex: {0}", splitIndex));
+            logger.LogInfo("Start of stream processing, splitIndex: {0}", splitIndex);
             bool readComplete = true;   // Whether all input data from the socket is read though completely
 
             try
@@ -170,7 +169,7 @@ public static bool ProcessStream(Stream inputStream, Stream outputStream, int sp
                 else
                 {
                     // This may happen when the input data is not read completely, e.g., when take() operation is performed
-                    logger.LogWarn(string.Format("**** unexpected read: {0}, not all data is read", end));
+                    logger.LogWarn("**** unexpected read: {0}, not all data is read", end);
                     // write a different value to tell JVM to not reuse this worker
                     SerDe.Write(outputStream, (int)SpecialLengths.END_OF_DATA_SECTION);
                     readComplete = false;
@@ -179,8 +178,8 @@ public static bool ProcessStream(Stream inputStream, Stream outputStream, int sp
                 outputStream.Flush();
 
                 // log bytes read and write
-                logger.LogDebug(string.Format("total read bytes: {0}", SerDe.totalReadNum));
-                logger.LogDebug(string.Format("total write bytes: {0}", SerDe.totalWriteNum));
+                logger.LogDebug("total read bytes: {0}", SerDe.totalReadNum);
+                logger.LogDebug("total write bytes: {0}", SerDe.totalWriteNum);
 
                 logger.LogDebug("Stream processing completed successfully");
             }
@@ -202,10 +201,10 @@ public static bool ProcessStream(Stream inputStream, Stream outputStream, int sp
                     logger.LogError("Writing exception to stream failed with exception:");
                     logger.LogException(ex);
                 }
-                throw e;
+                throw;
             }
 
-            logger.LogInfo(string.Format("Stop of stream processing, splitIndex: {0}, readComplete: {1}", splitIndex, readComplete));
+            logger.LogInfo("Stop of stream processing, splitIndex: {0}, readComplete: {1}", splitIndex, readComplete);
             return readComplete;
         }
 
@@ -310,7 +309,6 @@ private static IFormatter ProcessCommand(Stream inputStream, Stream outputStream
                     int stageId = -1;
                     string deserializerMode = null;
                     string serializerMode = null;
-                    CSharpWorkerFunc workerFunc = null;
                     for (int funcIndex = 0; funcIndex < chainedFuncCount; funcIndex++)
                     {
                         int lengthOfCommandByteArray = SerDe.ReadInt(inputStream);
@@ -319,17 +317,11 @@ private static IFormatter ProcessCommand(Stream inputStream, Stream outputStream
 
                         if (lengthOfCommandByteArray > 0)
                         {
+                            CSharpWorkerFunc workerFunc;
                             ReadCommand(inputStream, formatter, out stageId, out deserializerMode, out serializerMode,
                                 out workerFunc);
 
-                            if (func == null)
-                            {
-                                func = workerFunc;
-                            }
-                            else
-                            {
-                                func = CSharpWorkerFunc.Chain(func, workerFunc);
-                            }
+                            func = func == null ? workerFunc : CSharpWorkerFunc.Chain(func, workerFunc);
                         }
                         else
                         {
@@ -387,11 +379,14 @@ private static void ReadCommand(Stream networkStream, IFormatter formatter, out
 
             workerFunc = (CSharpWorkerFunc)formatter.Deserialize(stream);
 
-            logger.LogDebug(
+            if (!logger.IsDebugEnabled) return;
+            var sb = new StringBuilder(Environment.NewLine);
+            sb.AppendLine(
                 "------------------------ Printing stack trace of workerFunc for ** debugging ** ------------------------------");
-            logger.LogDebug(workerFunc.StackTrace);
-            logger.LogDebug(
+            sb.AppendLine(workerFunc.StackTrace);
+            sb.AppendLine(
                 "--------------------------------------------------------------------------------------------------------------");
+            logger.LogDebug(sb.ToString());
         }
 
         private static void ExecuteCommand(Stream inputStream, Stream outputStream, int splitIndex, DateTime bootTime,
@@ -442,9 +437,8 @@ private static void ExecuteCommand(Stream inputStream, Stream outputStream, int
             commandProcessWatch.Stop();
 
             // log statistics
-            logger.LogInfo(string.Format("func process time: {0}", funcProcessWatch.ElapsedMilliseconds));
-            logger.LogInfo(string.Format("stage {0}, command process time: {1}", stageId,
-                commandProcessWatch.ElapsedMilliseconds));
+            logger.LogInfo("func process time: {0}", funcProcessWatch.ElapsedMilliseconds);
+            logger.LogInfo("stage {0}, command process time: {1}", stageId, commandProcessWatch.ElapsedMilliseconds);
         }
 
         private static void WriteOutput(Stream networkStream, string serializerMode, dynamic message, IFormatter formatter)
@@ -509,7 +503,7 @@ private static int ReadDiagnosticsInfo(Stream networkStream)
             int rddId = SerDe.ReadInt(networkStream);
             int stageId = SerDe.ReadInt(networkStream);
             int partitionId = SerDe.ReadInt(networkStream);
-            logger.LogInfo(string.Format("rddInfo: rddId {0}, stageId {1}, partitionId {2}", rddId, stageId, partitionId));
+            logger.LogInfo("rddInfo: rddId {0}, stageId {1}, partitionId {2}", rddId, stageId, partitionId);
             return stageId;
         }
 
@@ -517,8 +511,8 @@ private static void WriteDiagnosticsInfo(Stream networkStream, DateTime bootTime
         {
             DateTime finishTime = DateTime.UtcNow;
             const string format = "MM/dd/yyyy hh:mm:ss.fff tt";
-            logger.LogDebug(string.Format("bootTime: {0}, initTime: {1}, finish_time: {2}",
-                bootTime.ToString(format), initTime.ToString(format), finishTime.ToString(format)));
+            logger.LogDebug("bootTime: {0}, initTime: {1}, finish_time: {2}",
+                bootTime.ToString(format), initTime.ToString(format), finishTime.ToString(format));
             SerDe.Write(networkStream, (int)SpecialLengths.TIMING_DATA);
             SerDe.Write(networkStream, ToUnixTime(bootTime));
             SerDe.Write(networkStream, ToUnixTime(initTime));
@@ -538,7 +532,7 @@ private static void WriteAccumulatorValues(Stream networkStream, IFormatter form
                     item.Value.GetType()
                         .GetField("value", BindingFlags.NonPublic | BindingFlags.Instance)
                         .GetValue(item.Value);
-                logger.LogDebug(string.Format("({0}, {1})", item.Key, value));
+                logger.LogDebug("({0}, {1})", item.Key, value);
                 formatter.Serialize(ms, new KeyValuePair<int, dynamic>(item.Key, value));
                 byte[] buffer = ms.ToArray();
                 SerDe.Write(networkStream, buffer.Length);
@@ -548,13 +542,28 @@ private static void WriteAccumulatorValues(Stream networkStream, IFormatter form
 
         public static void PrintFiles()
         {
-            logger.LogDebug("Files available in executor");
-            var driverFolder = Path.GetDirectoryName(Assembly.GetEntryAssembly().Location);
-            var files = Directory.EnumerateFiles(driverFolder);
+            if (!logger.IsDebugEnabled) return;
+
+            var folder = Path.GetDirectoryName(Assembly.GetEntryAssembly().Location);
+            var files = Directory.EnumerateFiles(folder).Select(Path.GetFileName).ToArray();
+            var longest = files.Max(f => f.Length);
+            var count = 0;
+            var outfiles = new StringBuilder(Environment.NewLine);
             foreach (var file in files)
             {
-                logger.LogDebug(file);
+                switch (count++ % 2)
+                {
+                    case 0:
+                        outfiles.Append("   " + file.PadRight(longest + 2));
+                        break;
+                    default:
+                        outfiles.AppendLine(file);
+                        break;
+                }
             }
+
+            logger.LogDebug("Files available in executor");
+            logger.LogDebug("Location: {0}{1}{2}", folder, Environment.NewLine, outfiles.ToString());
         }
 
         private static long ToUnixTime(DateTime dt)
@@ -622,7 +631,7 @@ private static IEnumerable<dynamic> GetIterator(Stream inputStream, string seria
                         case SerializedMode.Pair:
                             {
                                 byte[] pairKey = buffer;
-                                byte[] pairValue = null;
+                                byte[] pairValue;
 
                                 watch.Start();
                                 int valueLength = SerDe.ReadInt(inputStream);
@@ -650,7 +659,6 @@ private static IEnumerable<dynamic> GetIterator(Stream inputStream, string seria
                                 break;
                             }
 
-                        case SerializedMode.Byte:
                         default:
                             {
                                 if (buffer != null)
@@ -669,7 +677,7 @@ private static IEnumerable<dynamic> GetIterator(Stream inputStream, string seria
                 watch.Start();
             }
 
-            logger.LogInfo(string.Format("total receive time: {0}", watch.ElapsedMilliseconds));
+            logger.LogInfo("total receive time: {0}", watch.ElapsedMilliseconds);
         }
 
         internal class SparkCLRAssemblyHandler
@@ -687,7 +695,7 @@ public void LoadAssemblies(string[] files)
                     }
                     else
                     {
-                        Console.Error.WriteLine("Already loaded assebmly " + assembly.FullName);
+                        Console.Error.WriteLine("Already loaded assembly " + assembly.FullName);
                     }
                 }
             }

From da54d509ed3e27c011e994e6e399daff5b8df8f7 Mon Sep 17 00:00:00 2001
From: Kaarthik Sivashanmugam <skaarthik@users.noreply.github.com>
Date: Tue, 6 Sep 2016 13:11:59 -0700
Subject: [PATCH 04/15] added support for SparkSession, Catalog and Dataset

---
 .gitignore                                    |   4 +
 .../Microsoft.Spark.CSharp/Adapter.csproj     |  10 +
 .../Microsoft.Spark.CSharp/Core/SparkConf.cs  |  25 +
 .../Core/SparkContext.cs                      |  20 +
 .../Proxy/ICatalogProxy.cs                    |  52 ++
 .../Proxy/IDatasetProxy.cs                    |  16 +
 .../Proxy/ISparkConfProxy.cs                  |   1 +
 .../Proxy/ISparkContextProxy.cs               |   3 +-
 .../Proxy/ISparkSessionProxy.cs               |  27 ++
 .../Proxy/ISqlContextProxy.cs                 |   1 -
 .../Proxy/Ipc/CatalogIpcProxy.cs              | 154 ++++++
 .../Proxy/Ipc/DatasetIpcProxy.cs              |  35 ++
 .../Proxy/Ipc/SparkConfIpcProxy.cs            |   7 +-
 .../Proxy/Ipc/SparkContextIpcProxy.cs         |  16 +-
 .../Proxy/Ipc/SparkSessionIpcProxy.cs         | 101 ++++
 .../Proxy/Ipc/SqlContextIpcProxy.cs           |   6 -
 .../Microsoft.Spark.CSharp/Sql/Builder.cs     | 130 +++++
 .../Microsoft.Spark.CSharp/Sql/Catalog.cs     | 350 ++++++++++++++
 .../Microsoft.Spark.CSharp/Sql/DataFrame.cs   |  14 +-
 .../Microsoft.Spark.CSharp/Sql/Dataset.cs     | 135 ++++++
 .../Microsoft.Spark.CSharp/Sql/HiveContext.cs |  11 +-
 .../Sql/SparkSession.cs                       | 140 ++++++
 .../Microsoft.Spark.CSharp/Sql/SqlContext.cs  |  38 +-
 .../Microsoft.Spark.CSharp.Adapter.Doc.XML    | 453 ++++++++++++++++++
 .../documentation/Mobius_API_Documentation.md |  98 +++-
 csharp/AdapterTest/AdapterTest.csproj         |   5 +
 csharp/AdapterTest/BuilderTest.cs             |  50 ++
 csharp/AdapterTest/CatalogTest.cs             | 212 ++++++++
 csharp/AdapterTest/DatasetTest.cs             | 150 ++++++
 csharp/AdapterTest/HiveContextTest.cs         |  34 +-
 .../AdapterTest/Mocks/MockSparkConfProxy.cs   |   5 +
 .../Mocks/MockSparkContextProxy.cs            |  13 +-
 .../Mocks/MockSparkSessionProxy.cs            |  53 ++
 .../AdapterTest/Mocks/MockSqlContextProxy.cs  |   5 -
 csharp/AdapterTest/SparkSessionTest.cs        |  30 ++
 csharp/AdapterTest/SqlContextTest.cs          |  66 ++-
 .../Samples/Microsoft.Spark.CSharp/App.config |   3 +-
 .../Microsoft.Spark.CSharp/CatalogSamples.cs  |  28 ++
 .../DataFrameSamples.cs                       |  10 +-
 .../Samples/Microsoft.Spark.CSharp/Program.cs |   1 +
 .../Microsoft.Spark.CSharp/Samples.csproj     |   2 +
 .../SparkSessionSamples.cs                    | 189 ++++++++
 .../spark/sql/api/csharp/JvmBridgeUtils.scala |  20 +
 .../spark/sql/api/csharp/SQLUtils.scala       |   9 +-
 .../util/csharp/JvmBridgeUtilsSuite.scala     |  31 ++
 45 files changed, 2671 insertions(+), 92 deletions(-)
 create mode 100644 csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ICatalogProxy.cs
 create mode 100644 csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IDatasetProxy.cs
 create mode 100644 csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkSessionProxy.cs
 create mode 100644 csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/CatalogIpcProxy.cs
 create mode 100644 csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/DatasetIpcProxy.cs
 create mode 100644 csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkSessionIpcProxy.cs
 create mode 100644 csharp/Adapter/Microsoft.Spark.CSharp/Sql/Builder.cs
 create mode 100644 csharp/Adapter/Microsoft.Spark.CSharp/Sql/Catalog.cs
 create mode 100644 csharp/Adapter/Microsoft.Spark.CSharp/Sql/Dataset.cs
 create mode 100644 csharp/Adapter/Microsoft.Spark.CSharp/Sql/SparkSession.cs
 create mode 100644 csharp/AdapterTest/BuilderTest.cs
 create mode 100644 csharp/AdapterTest/CatalogTest.cs
 create mode 100644 csharp/AdapterTest/DatasetTest.cs
 create mode 100644 csharp/AdapterTest/Mocks/MockSparkSessionProxy.cs
 create mode 100644 csharp/AdapterTest/SparkSessionTest.cs
 create mode 100644 csharp/Samples/Microsoft.Spark.CSharp/CatalogSamples.cs
 create mode 100644 csharp/Samples/Microsoft.Spark.CSharp/SparkSessionSamples.cs
 create mode 100644 scala/src/test/scala/org/apache/spark/util/csharp/JvmBridgeUtilsSuite.scala

diff --git a/.gitignore b/.gitignore
index 5ad71338..b42159a0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -40,6 +40,10 @@ build/dependencies/
 *.log
 lib/
 
+# Local databases used for Dataset/frames #
+###########################################
+scala/metastore_db/
+
 # Generated Files #
 ############
 SparkCLRCodeCoverage.xml
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj b/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj
index 56fb696d..4daf4aa5 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Adapter.csproj
@@ -102,20 +102,25 @@
     <Compile Include="Network\SockDataToken.cs" />
     <Compile Include="Network\SocketFactory.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="Proxy\ICatalogProxy.cs" />
     <Compile Include="Proxy\IDataFrameNaFunctionsProxy.cs" />
     <Compile Include="Proxy\IDataFrameProxy.cs" />
     <Compile Include="Proxy\IDataFrameReaderProxy.cs" />
     <Compile Include="Proxy\IDataFrameWriterProxy.cs" />
+    <Compile Include="Proxy\IDatasetProxy.cs" />
     <Compile Include="Proxy\IDStreamProxy.cs" />
     <Compile Include="Proxy\IHadoopConfigurationProxy.cs" />
+    <Compile Include="Proxy\Ipc\CatalogIpcProxy.cs" />
     <Compile Include="Proxy\Ipc\DataFrameIpcProxy.cs" />
     <Compile Include="Proxy\Ipc\DataFrameNaFunctionsIpcProxy.cs" />
     <Compile Include="Proxy\Ipc\DataFrameReaderIpcProxy.cs" />
     <Compile Include="Proxy\Ipc\DataFrameWriterIpcProxy.cs" />
+    <Compile Include="Proxy\Ipc\DatasetIpcProxy.cs" />
     <Compile Include="Proxy\Ipc\DStreamIpcProxy.cs" />
     <Compile Include="Proxy\Ipc\HadoopConfigurationIpcProxy.cs" />
     <Compile Include="Proxy\Ipc\RDDIpcProxy.cs" />
     <Compile Include="Proxy\Ipc\SparkCLRIpcProxy.cs" />
+    <Compile Include="Proxy\Ipc\SparkSessionIpcProxy.cs" />
     <Compile Include="Proxy\Ipc\SqlContextIpcProxy.cs" />
     <Compile Include="Proxy\Ipc\StatusTrackerIpcProxy.cs" />
     <Compile Include="Proxy\Ipc\StreamingContextIpcProxy.cs" />
@@ -125,6 +130,7 @@
     <Compile Include="Proxy\ISparkConfProxy.cs" />
     <Compile Include="Proxy\ISparkContextProxy.cs" />
     <Compile Include="Proxy\Ipc\SparkConfIpcProxy.cs" />
+    <Compile Include="Proxy\ISparkSessionProxy.cs" />
     <Compile Include="Proxy\ISqlContextProxy.cs" />
     <Compile Include="Proxy\IStatusTrackerProxy.cs" />
     <Compile Include="Proxy\IStreamingContextProxy.cs" />
@@ -134,17 +140,21 @@
     <Compile Include="Services\ILoggerService.cs" />
     <Compile Include="Services\Log4NetLoggerService.cs" />
     <Compile Include="Services\LoggerServiceFactory.cs" />
+    <Compile Include="Sql\Builder.cs" />
+    <Compile Include="Sql\Catalog.cs" />
     <Compile Include="Sql\Column.cs" />
     <Compile Include="Sql\DataFrame.cs" />
     <Compile Include="Sql\DataFrameNaFunctions.cs" />
     <Compile Include="Sql\DataFrameReader.cs" />
     <Compile Include="Sql\DataFrameWriter.cs" />
+    <Compile Include="Sql\Dataset.cs" />
     <Compile Include="Sql\HiveContext.cs" />
     <Compile Include="Sql\PythonSerDe.cs" />
     <Compile Include="Sql\RowConstructor.cs" />
     <Compile Include="Sql\Row.cs" />
     <Compile Include="Sql\Functions.cs" />
     <Compile Include="Sql\SaveMode.cs" />
+    <Compile Include="Sql\SparkSession.cs" />
     <Compile Include="Sql\SqlContext.cs" />
     <Compile Include="Sql\Types.cs" />
     <Compile Include="Sql\UserDefinedFunction.cs" />
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkConf.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkConf.cs
index e7cdc161..42600236 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkConf.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkConf.cs
@@ -2,6 +2,8 @@
 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
 
 using System;
+using System.Collections.Generic;
+using System.Text.RegularExpressions;
 using Microsoft.Spark.CSharp.Configuration;
 using Microsoft.Spark.CSharp.Interop;
 using Microsoft.Spark.CSharp.Proxy;
@@ -122,6 +124,29 @@ public string Get(string key, string defaultValue)
         {
             return sparkConfProxy.Get(key, defaultValue);
         }
+
+        /// <summary>
+        /// Get all parameters as a list of pairs
+        /// </summary>
+        public Dictionary<string, string> GetAll()
+        {
+            var configKvp = new Dictionary<string, string>();
+            var kvpStringCollection = sparkConfProxy.GetSparkConfAsString();
+            var kvpStringArray = Regex.Split(kvpStringCollection, ";");
+            foreach (var kvpString in kvpStringArray)
+            {
+                if (!string.IsNullOrEmpty(kvpString))
+                {
+                    var kvpItems = Regex.Split(kvpString, "=");
+                    if (kvpItems.Length == 2 && !string.IsNullOrEmpty(kvpItems[0]) && !string.IsNullOrEmpty(kvpItems[1]))
+                    {
+                        configKvp.Add(kvpItems[0], kvpItems[1]);
+                    }
+                }
+            }
+
+            return configKvp;
+        }
     }
     
 
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs
index f16220c0..bc8faac4 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Core/SparkContext.cs
@@ -129,6 +129,7 @@ internal SparkContext(ISparkContextProxy sparkContextProxy, SparkConf conf)
         {
             SparkContextProxy = sparkContextProxy;
             SparkConf = conf;
+            _activeSparkContext = this;
         }
 
         private SparkContext(string master, string appName, string sparkHome, SparkConf conf)
@@ -145,6 +146,25 @@ private SparkContext(string master, string appName, string sparkHome, SparkConf
             _activeSparkContext = this;
         }
 
+        /// <summary>
+        /// This function may be used to get or instantiate a SparkContext and register it as a
+        /// singleton object. Because we can only have one active SparkContext per JVM,
+        /// this is useful when applications may wish to share a SparkContext.
+        /// Note: This function cannot be used to create multiple SparkContext instances
+        /// even if multiple contexts are allowed.
+        /// </summary>
+        /// <param name="conf"></param>
+        /// <returns></returns>
+        public static SparkContext GetOrCreate(SparkConf conf)
+        {
+            if (_activeSparkContext == null)
+            {
+                _activeSparkContext = new SparkContext(conf);
+            }
+
+            return _activeSparkContext;
+        }
+
         internal void StartAccumulatorServer()
         {
             if (accumulatorServer == null)
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ICatalogProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ICatalogProxy.cs
new file mode 100644
index 00000000..95570cb1
--- /dev/null
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ICatalogProxy.cs
@@ -0,0 +1,52 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Microsoft.Spark.CSharp.Sql;
+using Microsoft.Spark.CSharp.Sql.Catalog;
+using Column = Microsoft.Spark.CSharp.Sql.Catalog.Column;
+
+namespace Microsoft.Spark.CSharp.Proxy
+{
+    interface ICatalogProxy
+    {
+        string CurrentDatabase { get; }
+
+        void SetCurrentDatabase(string dbName);
+
+        Dataset<Database> ListDatabases();
+
+        Dataset<Table> ListTables(string dbName);
+
+        Dataset<Function> ListFunctions(string dbName);
+
+        Dataset<Column> ListColumns(string tableName);
+
+        Dataset<Column> ListColumns(string dbName, string tableName);
+
+        void DropTempTable(string tableName);
+
+        bool IsCached(string tableName);
+
+        void CacheTable(string tableName);
+
+        void UnCacheTable(string tableName);
+
+        void RefreshTable(string tableName);
+
+        void ClearCache();
+
+        DataFrame CreateExternalTable(string tableName, string path);
+
+        DataFrame CreateExternalTable(string tableName, string path, string source);
+
+        DataFrame CreateExternalTable(string tableName, string source, Dictionary<string, string> options);
+
+        DataFrame CreateExternalTable(string tableName, string source, StructType schema,
+            Dictionary<string, string> options);
+    }
+}
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IDatasetProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IDatasetProxy.cs
new file mode 100644
index 00000000..4a760f85
--- /dev/null
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/IDatasetProxy.cs
@@ -0,0 +1,16 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Microsoft.Spark.CSharp.Proxy
+{
+    interface IDatasetProxy
+    {
+        IDataFrameProxy ToDF();
+    }
+}
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkConfProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkConfProxy.cs
index 3c7069bf..2d7d4d11 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkConfProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkConfProxy.cs
@@ -18,5 +18,6 @@ internal interface ISparkConfProxy
         void Set(string key, string value);
         int GetInt(string key, int defaultValue);
         string Get(string key, string defaultValue);
+        string GetSparkConfAsString();
     }
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
index 51324332..2861e068 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
@@ -15,8 +15,7 @@ namespace Microsoft.Spark.CSharp.Proxy
     internal interface ISparkContextProxy
     {
         ISparkConfProxy GetConf();
-        ISqlContextProxy CreateSqlContext();
-        ISqlContextProxy CreateHiveContext();
+        ISparkSessionProxy CreateSparkSession();
         IColumnProxy CreateColumnFromName(string name);
         IColumnProxy CreateFunction(string name, object self);
         IColumnProxy CreateBinaryMathFunction(string name, object self, object other);
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkSessionProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkSessionProxy.cs
new file mode 100644
index 00000000..56f869cd
--- /dev/null
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkSessionProxy.cs
@@ -0,0 +1,27 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Microsoft.Spark.CSharp.Sql;
+
+namespace Microsoft.Spark.CSharp.Proxy
+{
+    internal interface IUdfRegistration { }
+
+    interface ISparkSessionProxy
+    {
+        ISqlContextProxy SqlContextProxy { get; }
+        IUdfRegistration Udf { get; }
+        ICatalogProxy GetCatalog();
+        IDataFrameReaderProxy Read();
+        ISparkSessionProxy NewSession();
+        IDataFrameProxy CreateDataFrame(IRDDProxy rddProxy, IStructTypeProxy structTypeProxy);
+        IDataFrameProxy Table(string tableName);
+        IDataFrameProxy Sql(string query);
+        void Stop();
+    }
+}
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISqlContextProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISqlContextProxy.cs
index 3dd5a76f..60531295 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISqlContextProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISqlContextProxy.cs
@@ -14,7 +14,6 @@ namespace Microsoft.Spark.CSharp.Proxy
     internal interface ISqlContextProxy
     {
         IDataFrameReaderProxy Read();
-        ISqlContextProxy NewSession();
         string GetConf(string key, string defaultValue);
         void SetConf(string key, string value);
         IDataFrameProxy CreateDataFrame(IRDDProxy rddProxy, IStructTypeProxy structTypeProxy);
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/CatalogIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/CatalogIpcProxy.cs
new file mode 100644
index 00000000..b0e60568
--- /dev/null
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/CatalogIpcProxy.cs
@@ -0,0 +1,154 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Interop.Ipc;
+using Microsoft.Spark.CSharp.Sql;
+using Microsoft.Spark.CSharp.Sql.Catalog;
+
+namespace Microsoft.Spark.CSharp.Proxy.Ipc
+{
+    [ExcludeFromCodeCoverage] //IPC calls to JVM validated using validation-enabled samples - unit test coverage not reqiured
+    internal class CatalogIpcProxy : ICatalogProxy
+    {
+        private readonly JvmObjectReference jvmCatalogReference;
+        private readonly ISqlContextProxy sqlContextProxy;
+
+        internal CatalogIpcProxy(JvmObjectReference jvmCatalogReference, ISqlContextProxy sqlContextProxy)
+        {
+            this.jvmCatalogReference = jvmCatalogReference;
+            this.sqlContextProxy = sqlContextProxy;
+        }
+
+        public string CurrentDatabase
+        {
+            get
+            {
+                return SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmCatalogReference, "currentDatabase").ToString();
+            }
+        }
+
+        public void CacheTable(string tableName)
+        {
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmCatalogReference, "cacheTable", new object[] { tableName });
+        }
+
+        public void ClearCache()
+        {
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmCatalogReference, "clearCache");
+        }
+
+        public DataFrame CreateExternalTable(string tableName, string path)
+        {
+            return new DataFrame(
+                new DataFrameIpcProxy(
+                    new JvmObjectReference(
+                        SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmCatalogReference, "createExternalTable",
+                            new object[] {tableName, path}).ToString()), sqlContextProxy), SparkContext.GetActiveSparkContext());
+        }
+
+        public DataFrame CreateExternalTable(string tableName, string source, Dictionary<string, string> options)
+        {
+            throw new NotImplementedException(); //TODO - implement
+        }
+
+        public DataFrame CreateExternalTable(string tableName, string path, string source)
+        {
+            return new DataFrame(
+                new DataFrameIpcProxy(
+                    new JvmObjectReference(
+                        SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmCatalogReference, "createExternalTable",
+                            new object[] { tableName, path, source }).ToString()), sqlContextProxy), SparkContext.GetActiveSparkContext());
+        }
+
+        public DataFrame CreateExternalTable(string tableName, string source, StructType schema, Dictionary<string, string> options)
+        {
+            throw new NotImplementedException(); //TODO - implement
+        }
+
+        public void DropTempTable(string tableName)
+        {
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmCatalogReference, "dropTempView", new object[] { tableName });
+        }
+
+        public bool IsCached(string tableName)
+        {
+            return
+                SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmCatalogReference, "isCached",
+                    new object[] {tableName}).ToString().Equals("true", StringComparison.InvariantCultureIgnoreCase);
+        }
+
+        public Dataset<Sql.Catalog.Column> ListColumns(string tableName)
+        {
+            return new Dataset<Sql.Catalog.Column>(
+                new DatasetIpcProxy(
+                    new JvmObjectReference(
+                        (string)
+                            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmCatalogReference, "listColumns",
+                                new object[] { tableName })), sqlContextProxy));
+        }
+
+        public Dataset<Sql.Catalog.Column> ListColumns(string dbName, string tableName)
+        {
+            return new Dataset<Sql.Catalog.Column>(
+                new DatasetIpcProxy(
+                    new JvmObjectReference(
+                        (string)
+                            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmCatalogReference, "listColumns",
+                                new object[] { dbName, tableName })), sqlContextProxy));
+        }
+
+        public Dataset<Database> ListDatabases()
+        {
+            return new Dataset<Database>(
+                        new DatasetIpcProxy(
+                            new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmCatalogReference, "listDatabases")), sqlContextProxy));
+        }
+
+        public Dataset<Function> ListFunctions(string dbName)
+        {
+            return new Dataset<Function>(
+            new DatasetIpcProxy(
+                new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmCatalogReference, "listFunctions", new object[] { dbName })), sqlContextProxy));
+        }
+
+        public Dataset<Table> ListTables(string dbName = null)
+        {
+            if (dbName != null)
+                return new Dataset<Table>(
+                    new DatasetIpcProxy(
+                        new JvmObjectReference(
+                            (string)
+                                SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmCatalogReference, "listTables",
+                                    new object[] {dbName})), sqlContextProxy));
+            else
+                return new Dataset<Table>(
+                    new DatasetIpcProxy(
+                        new JvmObjectReference(
+                            (string)
+                                SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmCatalogReference, "listTables")),
+                        sqlContextProxy));
+        }
+
+        public void SetCurrentDatabase(string dbName)
+        {
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmCatalogReference, "setCurrentDatabase", new object[] { dbName });
+        }
+
+        public void UnCacheTable(string tableName)
+        {
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmCatalogReference, "uncacheTable", new object[] { tableName });
+        }
+
+        public void RefreshTable(string tableName)
+        {
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmCatalogReference, "refreshTable", new object[] { tableName });
+        }
+    }
+}
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/DatasetIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/DatasetIpcProxy.cs
new file mode 100644
index 00000000..84b4c581
--- /dev/null
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/DatasetIpcProxy.cs
@@ -0,0 +1,35 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Microsoft.Spark.CSharp.Interop.Ipc;
+
+namespace Microsoft.Spark.CSharp.Proxy.Ipc
+{
+    [ExcludeFromCodeCoverage] //IPC calls to JVM validated using validation-enabled samples - unit test coverage not reqiured
+    internal class DatasetIpcProxy : IDatasetProxy
+    {
+        private readonly JvmObjectReference jvmDatasetReference;
+        private readonly ISqlContextProxy sqlContextProxy;
+
+        internal DatasetIpcProxy(JvmObjectReference jvmDatasetReference, ISqlContextProxy sqlContextProxy)
+        {
+            this.jvmDatasetReference = jvmDatasetReference;
+            this.sqlContextProxy = sqlContextProxy;
+        }
+
+        public IDataFrameProxy ToDF()
+        {
+            return new DataFrameIpcProxy(
+                    new JvmObjectReference(
+                        (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmDatasetReference, "toDF")), 
+                    sqlContextProxy
+                );
+        }
+    }
+}
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkConfIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkConfIpcProxy.cs
index a314082a..dbc9e083 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkConfIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkConfIpcProxy.cs
@@ -57,5 +57,10 @@ public string Get(string key, string defaultValue)
         {
             return SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSparkConfReference, "get", new object[] { key, defaultValue }).ToString();
         }
-    }
+
+        public string GetSparkConfAsString()
+         {
+             return SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.JvmBridgeUtils", "getSparkConfAsString").ToString();
+         }
+}
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
index 6521b8d9..2e0534e6 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
@@ -38,17 +38,15 @@ public SparkContextIpcProxy(JvmObjectReference jvmSparkContextReference, JvmObje
             this.jvmSparkContextReference = jvmSparkContextReference;
             this.jvmJavaContextReference = jvmJavaContextReference;
         }
-        
-        public ISqlContextProxy CreateSqlContext()
-        {
-            return new SqlContextIpcProxy(new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils", "createSQLContext", new object[] { jvmSparkContextReference })));
-        }
 
-        public ISqlContextProxy CreateHiveContext()
+        public ISparkSessionProxy CreateSparkSession()
         {
-            return new SqlContextIpcProxy(new JvmObjectReference(
-                (string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod(
-                    "org.apache.spark.sql.api.csharp.SQLUtils", "createHiveContext", new object[] { jvmSparkContextReference })));
+            return
+                new SparkSessionIpcProxy(
+                    new JvmObjectReference(
+                        (string)
+                            SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils",
+                                "createSparkSession", new object[] {jvmSparkContextReference})));
         }
 
         public void CreateSparkContext(string master, string appName, string sparkHome, ISparkConfProxy conf)
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkSessionIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkSessionIpcProxy.cs
new file mode 100644
index 00000000..d134c086
--- /dev/null
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkSessionIpcProxy.cs
@@ -0,0 +1,101 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Microsoft.Spark.CSharp.Interop.Ipc;
+
+namespace Microsoft.Spark.CSharp.Proxy.Ipc
+{
+    [ExcludeFromCodeCoverage] //IPC calls to JVM validated using validation-enabled samples - unit test coverage not reqiured
+    internal class SparkSessionIpcProxy : ISparkSessionProxy
+    {
+        private readonly JvmObjectReference jvmSparkSessionReference;
+        private readonly ISqlContextProxy sqlContextProxy;
+
+        private readonly IUdfRegistration udfRegistration;
+
+        public IUdfRegistration Udf
+        {
+            get
+            {
+                if (udfRegistration == null)
+                {
+                    //TODO implementation needed
+                }
+
+                return udfRegistration;
+            }
+        }
+
+        public ISqlContextProxy SqlContextProxy
+        {
+            get { return sqlContextProxy; }
+        }
+
+        public ICatalogProxy GetCatalog()
+        {
+            return new CatalogIpcProxy(new JvmObjectReference((string) SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSparkSessionReference, "catalog")), sqlContextProxy);
+        }
+
+        internal SparkSessionIpcProxy(JvmObjectReference jvmSparkSessionReference)
+        {
+            this.jvmSparkSessionReference = jvmSparkSessionReference;
+            sqlContextProxy = new SqlContextIpcProxy(GetSqlContextReference());
+        }
+
+        private JvmObjectReference GetSqlContextReference()
+        {
+            return
+                new JvmObjectReference(
+                    (string) SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils", "getSqlContext", new object[] { jvmSparkSessionReference }));
+        }
+
+        public ISparkSessionProxy NewSession()
+        {
+            return new SparkSessionIpcProxy(
+                new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSparkSessionReference, "newSession")));
+        }
+
+        public IDataFrameReaderProxy Read()
+        {
+            var javaDataFrameReaderReference = SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSparkSessionReference, "read");
+            return new DataFrameReaderIpcProxy(new JvmObjectReference(javaDataFrameReaderReference.ToString()), sqlContextProxy);
+        }
+
+        public IDataFrameProxy CreateDataFrame(IRDDProxy rddProxy, IStructTypeProxy structTypeProxy)
+        {
+            var rdd = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils", "byteArrayRDDToAnyArrayRDD",
+                    new object[] { (rddProxy as RDDIpcProxy).JvmRddReference }).ToString());
+
+            return new DataFrameIpcProxy(
+                new JvmObjectReference(
+                    SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSparkSessionReference, "applySchemaToPythonRDD",
+                    new object[] { rdd, (structTypeProxy as StructTypeIpcProxy).JvmStructTypeReference }).ToString()), sqlContextProxy);
+        }
+
+        public IDataFrameProxy Sql(string sqlQuery)
+        {
+            var javaDataFrameReference = SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSparkSessionReference, "sql", new object[] { sqlQuery });
+            var javaObjectReferenceForDataFrame = new JvmObjectReference(javaDataFrameReference.ToString());
+            return new DataFrameIpcProxy(javaObjectReferenceForDataFrame, sqlContextProxy);
+        }
+
+        public IDataFrameProxy Table(string tableName)
+        {
+            return new DataFrameIpcProxy(
+                new JvmObjectReference(
+                    (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSparkSessionReference, "table",
+                        new object[] { tableName })), sqlContextProxy);
+        }
+
+        public void Stop()
+        {
+            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSparkSessionReference, "stop");
+        }
+    }
+}
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SqlContextIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SqlContextIpcProxy.cs
index e22d6877..4bb930fe 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SqlContextIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SqlContextIpcProxy.cs
@@ -112,12 +112,6 @@ public void RegisterFunction(string name, byte[] command, string returnType)
             SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(judf, "registerPython", new object[] { name, udf });
         }
 
-        public ISqlContextProxy NewSession()
-        {
-            return new SqlContextIpcProxy(
-                new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSqlContextReference, "newSession")));
-        }
-
         public string GetConf(string key, string defaultValue)
         {
             return (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSqlContextReference, "getConf", new object[] { key, defaultValue });
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Builder.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Builder.cs
new file mode 100644
index 00000000..24af064d
--- /dev/null
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Builder.cs
@@ -0,0 +1,130 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Microsoft.Spark.CSharp.Core;
+
+namespace Microsoft.Spark.CSharp.Sql
+{
+    /// <summary>
+    /// The entry point to programming Spark with the Dataset and DataFrame API.
+    /// </summary>
+    public class Builder
+    {
+        internal Dictionary<string, string> options = new Dictionary<string, string>();
+
+        internal Builder() { }
+
+        /// <summary>
+        /// Sets the Spark master URL to connect to, such as "local" to run locally, "local[4]" to
+        /// run locally with 4 cores, or "spark://master:7077" to run on a Spark standalone cluster.
+        /// </summary>
+        /// <param name="master">Master URL</param>
+        public Builder Master(string master)
+        {
+            Config("spark.master", master);
+            return this;
+        }
+
+        /// <summary>
+        /// Sets a name for the application, which will be shown in the Spark web UI.
+        /// If no application name is set, a randomly generated name will be used.
+        /// </summary>
+        /// <param name="appName">Name of the app</param>
+        public Builder AppName(string appName)
+        {
+            Config("spark.app.name", appName);
+            return this;
+        }
+
+        /// <summary>
+        /// Sets a config option. Options set using this method are automatically propagated to
+        /// both SparkConf and SparkSession's own configuration.
+        /// </summary>
+        /// <param name="key">Key for the configuration</param>
+        /// <param name="value">value of the configuration</param>
+        public Builder Config(string key, string value)
+        {
+            options[key] = value;
+            return this;
+        }
+
+        /// <summary>
+        /// Sets a config option. Options set using this method are automatically propagated to
+        /// both SparkConf and SparkSession's own configuration.
+        /// </summary>
+        /// <param name="key">Key for the configuration</param>
+        /// <param name="value">value of the configuration</param>
+        public Builder Config(string key, bool value)
+        {
+            options[key] = value.ToString();
+            return this;
+        }
+
+        /// <summary>
+        /// Sets a config option. Options set using this method are automatically propagated to
+        /// both SparkConf and SparkSession's own configuration.
+        /// </summary>
+        /// <param name="key">Key for the configuration</param>
+        /// <param name="value">value of the configuration</param>
+        public Builder Config(string key, double value)
+        {
+            options[key] = value.ToString();
+            return this;
+        }
+
+        /// <summary>
+        /// Sets a config option. Options set using this method are automatically propagated to
+        /// both SparkConf and SparkSession's own configuration.
+        /// </summary>
+        /// <param name="key">Key for the configuration</param>
+        /// <param name="value">value of the configuration</param>
+        public Builder Config(string key, long value)
+        {
+            options[key] = value.ToString();
+            return this;
+        }
+
+        /// <summary>
+        /// Sets a list of config options based on the given SparkConf
+        /// </summary>
+        public Builder Config(SparkConf conf)
+        {
+            foreach (var keyValuePair in conf.GetAll())
+            {
+                options[keyValuePair.Key] = keyValuePair.Value;
+            }
+
+            return this;
+        }
+        
+        /// <summary>
+        /// Enables Hive support, including connectivity to a persistent Hive metastore, support for
+        /// Hive serdes, and Hive user-defined functions.
+        /// </summary>
+        public Builder EnableHiveSupport()
+        {
+            return Config("spark.sql.catalogImplementation", "hive");
+        }
+
+        /// <summary>
+        /// Gets an existing [[SparkSession]] or, if there is no existing one, creates a new
+        /// one based on the options set in this builder.
+        /// </summary>
+        /// <returns></returns>
+        public SparkSession GetOrCreate()
+        {
+            var sparkConf = new SparkConf();
+            foreach (var option in options)
+            {
+                sparkConf.Set(option.Key, option.Value);
+            }
+            var sparkContext = SparkContext.GetOrCreate(sparkConf);
+            return SqlContext.GetOrCreate(sparkContext).SparkSession;
+        }
+    }
+}
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Catalog.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Catalog.cs
new file mode 100644
index 00000000..94859fcc
--- /dev/null
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Catalog.cs
@@ -0,0 +1,350 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Microsoft.Spark.CSharp.Proxy;
+
+namespace Microsoft.Spark.CSharp.Sql.Catalog
+{
+    /// <summary>
+    /// Catalog interface for Spark.
+    /// </summary>
+    public class Catalog
+    {
+        ICatalogProxy catalogProxy;
+
+        internal Catalog(ICatalogProxy catalogProxy)
+        {
+            this.catalogProxy = catalogProxy;
+        }
+
+        /// <summary>
+        /// Returns the current default database in this session.
+        /// </summary>
+        public string CurrentDatabase
+        {
+            get { return catalogProxy.CurrentDatabase; }
+        }
+
+        // TODO Enable these convenience functions if needed
+        /*
+        public List<Database> GetDatabasesList()
+        {
+            var rows = ListDatabases().Collect();
+            var list = new List<Database>();
+            foreach (var row in rows)
+            {
+                list.Add(new Database
+                {
+                    Name = row.Get("name"),
+                    Description = row.Get("description"),
+                    LocationUri = row.Get("locationUri")
+                });
+            }
+
+            return list;
+        }
+
+        public List<Table> GetTablesList(string dbName = null)
+        {
+            var tables = ListTables(dbName).Collect();
+            //iterate and construct Table
+            throw new NotImplementedException();
+        }
+
+        public List<Table> GetColumnsList(string tableName, string dbName = null)
+        {
+            var tables = ListColumns(tableName, dbName).Collect();
+            //iterate and construct Column 
+            throw new NotImplementedException();
+        }
+
+        public List<Table> GetFunctionsList(string dbName = null)
+        {
+            var tables = ListFunctions(dbName).Collect();
+            //iterate and construct Table
+            throw new NotImplementedException();
+        }
+        */
+
+        /// <summary>
+        /// Returns a list of databases available across all sessions.
+        /// </summary>
+        /// <returns></returns>
+        public DataFrame ListDatabases()
+        {
+            return catalogProxy.ListDatabases().ToDF();
+        }
+
+        /// <summary>
+        /// Returns a list of tables in the current database or given database
+        /// This includes all temporary tables.
+        /// </summary>
+        /// <param name="dbName">Optional database name. If not provided, current database is used</param>
+        public DataFrame ListTables(string dbName = null)
+        {
+            return catalogProxy.ListTables(dbName ?? CurrentDatabase).ToDF();
+        }
+
+        /// <summary>
+        /// Returns a list of columns for the given table in the current database or
+        /// the given temporary table.
+        /// </summary>
+        /// <param name="tableName">Name of the table</param>
+        /// <param name="dbName">Name of the database. If database is not provided, current database is used</param>
+        public DataFrame ListColumns(string tableName, string dbName = null)
+        {
+            return catalogProxy.ListColumns(tableName, dbName ?? CurrentDatabase).ToDF();
+        }
+
+        /// <summary>
+        /// Returns a list of functions registered in the specified database.
+        /// This includes all temporary functions
+        /// </summary>
+        /// <param name="dbName">Name of the database. If database is not provided, current database is used</param>
+        public DataFrame ListFunctions(string dbName = null)
+        {
+            return catalogProxy.ListFunctions(dbName ?? CurrentDatabase).ToDF();
+        }
+
+        /// <summary>
+        /// Sets the current default database in this session.
+        /// </summary>
+        /// <param name="dbName">Name of database</param>
+        public void SetCurrentDatabase(string dbName)
+        {
+            catalogProxy.SetCurrentDatabase(dbName);
+        }
+
+        /// <summary>
+        /// Drops the temporary view with the given view name in the catalog.
+        /// If the view has been cached before, then it will also be uncached.
+        /// </summary>
+        /// <param name="tempViewName">Name of the table</param>
+        public void DropTempView(string tempViewName)
+        {
+            catalogProxy.DropTempTable(tempViewName);
+        }
+
+        /// <summary>
+        /// Returns true if the table is currently cached in-memory.
+        /// </summary>
+        /// <param name="tableName">Name of the table</param>
+        public bool IsCached(string tableName)
+        {
+            return catalogProxy.IsCached(tableName);
+        }
+
+        /// <summary>
+        /// Caches the specified table in-memory.
+        /// </summary>
+        /// <param name="tableName">Name of the table</param>
+        public void CacheTable(string tableName)
+        {
+            catalogProxy.CacheTable(tableName);
+        }
+
+        /// <summary>
+        /// Removes the specified table from the in-memory cache.
+        /// </summary>
+        /// <param name="tableName">Name of the table</param>
+        public void UnCacheTable(string tableName)
+        {
+            catalogProxy.UnCacheTable(tableName);
+        }
+
+        /// <summary>
+        /// Invalidate and refresh all the cached metadata of the given table. For performance reasons,
+        /// Spark SQL or the external data source library it uses might cache certain metadata about a
+        /// table, such as the location of blocks.When those change outside of Spark SQL, users should
+        /// call this function to invalidate the cache.
+        /// If this table is cached as an InMemoryRelation, drop the original cached version and make the
+        /// new version cached lazily.
+        /// </summary>
+        /// <param name="tableName">Name of the table</param>
+        public void RefreshTable(string tableName)
+        {
+            catalogProxy.RefreshTable(tableName);
+        }
+
+        /// <summary>
+        /// Removes all cached tables from the in-memory cache.
+        /// </summary>
+        public void ClearCache()
+        {
+            catalogProxy.ClearCache();
+        }
+
+        /// <summary>
+        /// Creates an external table from the given path and returns the corresponding DataFrame.
+        /// It will use the default data source configured by spark.sql.sources.default.
+        /// </summary>
+        /// <param name="tableName">Name of the table</param>
+        /// <param name="path">Path to table</param>
+        public DataFrame CreateExternalTable(string tableName, string path)
+        {
+            return catalogProxy.CreateExternalTable(tableName, path);
+        }
+
+        /// <summary>
+        /// Creates an external table from the given path on a data source and returns DataFrame
+        /// </summary>
+        /// <param name="tableName">Name of the table</param>
+        /// <param name="path">Path to table</param>
+        /// <param name="source">Data source</param>
+        public DataFrame CreateExternalTable(string tableName, string path, string source)
+        {
+            return catalogProxy.CreateExternalTable(tableName, path, source);
+        }
+
+        /// <summary>
+        /// Creates an external table from the given path based on a data source and a set of options.
+        /// Then, returns the corresponding DataFrame.
+        /// </summary>
+        /// <param name="tableName">Name of the table</param>
+        /// <param name="source">Data source</param>
+        /// <param name="options">Options to create table</param>
+        /// <returns></returns>
+        public DataFrame CreateExternalTable(string tableName, string source, Dictionary<string, string> options)
+        {
+            return catalogProxy.CreateExternalTable(tableName, source, options);
+        }
+
+        /// <summary>
+        /// Create an external table from the given path based on a data source, a schema and
+        /// a set of options.Then, returns the corresponding DataFrame.
+        /// </summary>
+        /// <param name="tableName">Name of the table</param>
+        /// <param name="source">Data source</param>
+        /// <param name="schema">Schema of the table</param>
+        /// <param name="options">Options to create table</param>
+        /// <returns></returns>
+        public DataFrame CreateExternalTable(string tableName, string source, StructType schema, Dictionary<string, string> options)
+        {
+            return catalogProxy.CreateExternalTable(tableName, source, schema, options);
+        }
+    }
+
+    /// <summary>
+    /// A database in Spark
+    /// </summary>
+    public class Database
+    {
+        /// <summary>
+        /// Name of the database
+        /// </summary>
+        public string Name { get; internal set; }
+
+        /// <summary>
+        /// Desciption for the database
+        /// </summary>
+        public string Description { get; internal set; }
+
+        /// <summary>
+        /// Location of the database
+        /// </summary>
+        public string LocationUri { get; internal set; }
+    }
+
+    /// <summary>
+    /// A table in Spark
+    /// </summary>
+    public class Table
+    {
+        /// <summary>
+        /// Name of the table
+        /// </summary>
+        public string Name { get; internal set; }
+
+        /// <summary>
+        /// Name of the database Table belongs to
+        /// </summary>
+        public string Database { get; internal set; }
+
+        /// <summary>
+        /// Description of the table
+        /// </summary>
+        public string Description { get; internal set; }
+
+        /// <summary>
+        /// Type of the table (table, view)
+        /// </summary>
+        public string TableType { get; internal set; }
+
+        /// <summary>
+        /// Whether the table is a temporary table
+        /// </summary>
+        public bool IsTemporary { get; internal set; }
+    }
+
+    /// <summary>
+    /// A column in Spark
+    /// </summary>
+    public class Column
+    {
+        /// <summary>
+        /// Name of the column
+        /// </summary>
+        public string Name { get; internal set; }
+
+        /// <summary>
+        /// Datatype of the column
+        /// </summary>
+        public string DataType { get; internal set; }
+
+        /// <summary>
+        /// Description of the column
+        /// </summary>
+        public string Description { get; internal set; }
+
+        /// <summary>
+        /// Whether the column value can be null
+        /// </summary>
+        public bool IsNullable { get; internal set; }
+
+        /// <summary>
+        /// Whether the column is a partition column.
+        /// </summary>
+        public bool IsPartition { get; internal set; }
+
+        /// <summary>
+        /// Whether the column is a bucket column.
+        /// </summary>
+        public bool IsBucket { get; internal set; }
+    }
+
+    /// <summary>
+    /// A user-defined function in Spark
+    /// </summary>
+    public class Function
+    {
+        /// <summary>
+        /// Name of the column
+        /// </summary>
+        public string Name { get; internal set; }
+
+        /// <summary>
+        /// Name of the database
+        /// </summary>
+        public string Database { get; internal set; }
+
+        /// <summary>
+        /// Description of the function
+        /// </summary>
+        public string Description { get; internal set; }
+
+        /// <summary>
+        /// Fully qualified class name of the function
+        /// </summary>
+        public string ClassName { get; internal set; }
+
+        /// <summary>
+        /// Whether the function is a temporary function or not.
+        /// </summary>
+        public bool IsTemporary { get; internal set; }
+    }
+}
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrame.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrame.cs
index 803655a9..66601ca2 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrame.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/DataFrame.cs
@@ -170,11 +170,17 @@ public IEnumerable<Row> Collect()
             return Rdd.Collect(port).Cast<Row>();
         }
 
+        //TODO - add this method if needed to convert Row to collection of T
+        //public IEnumerable<T> Collect<T>()
+        //{
+        //    throw new NotImplementedException();
+        //}
+
         /// <summary>
-        /// Converts the DataFrame to RDD of Row
-        /// </summary>
-        /// <returns>resulting RDD</returns>
-        public RDD<Row> ToRDD() //RDD created using byte representation of Row objects
+    /// Converts the DataFrame to RDD of Row
+    /// </summary>
+    /// <returns>resulting RDD</returns>
+    public RDD<Row> ToRDD() //RDD created using byte representation of Row objects
         {
             return Rdd;
         }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Dataset.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Dataset.cs
new file mode 100644
index 00000000..b3a81cf0
--- /dev/null
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/Dataset.cs
@@ -0,0 +1,135 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Proxy;
+
+namespace Microsoft.Spark.CSharp.Sql
+{
+    /// <summary>
+    ///  Dataset is a strongly typed collection of domain-specific objects that can be transformed
+    /// in parallel using functional or relational operations.Each Dataset also has an untyped view 
+    /// called a DataFrame, which is a Dataset of Row.
+    /// </summary>
+    public class Dataset
+    {
+        IDatasetProxy datasetProxy;
+
+        internal Dataset(IDatasetProxy datasetProxy)
+        {
+            this.datasetProxy = datasetProxy;
+        }
+
+        /// <summary>
+        /// Selects column based on the column name
+        /// </summary>
+        /// <param name="columnName">Name of the column</param>
+        /// <returns></returns>
+        public Column this[string columnName]
+        {
+            get { return ToDF()[columnName]; }
+        }
+
+        private DataFrame dataFrame;
+
+        /// <summary>
+        /// Converts this strongly typed collection of data to generic Dataframe. In contrast to the
+        /// strongly typed objects that Dataset operations work on, a Dataframe returns generic[[Row]]
+        /// objects that allow fields to be accessed by ordinal or name.
+        /// </summary>
+        /// <returns>DataFrame created from Dataset</returns>
+        public DataFrame ToDF()
+        {
+            return dataFrame ?? (dataFrame = new DataFrame(datasetProxy.ToDF(), SparkContext.GetActiveSparkContext()));
+        }
+
+        /// <summary>
+        /// Prints the schema to the console in a nice tree format.
+        /// </summary>
+        public void PrintSchema()
+        {
+            ToDF().ShowSchema();
+        }
+
+        /// <summary>
+        /// Prints the plans (logical and physical) to the console for debugging purposes.
+        /// </summary>
+        /// <param name="extended"></param>
+        public void Explain(bool extended)
+        {
+            ToDF().Explain(extended);
+        }
+
+        /// <summary>
+        /// Prints the physical plan to the console for debugging purposes.
+        /// </summary>
+        public void Explain()
+        {
+            ToDF().Explain();
+        }
+
+        /// <summary>
+        /// Returns all column names and their data types as an array.
+        /// </summary>
+        public IEnumerable<Tuple<string, string>> DTypes()
+        {
+            return ToDF().DTypes();
+        }
+
+        /// <summary>
+        /// Returns all column names as an array.
+        /// </summary>
+        public IEnumerable<string> Columns()
+        {
+            return ToDF().Columns();
+        }
+
+        /// <summary>
+        /// Displays the top 20 rows of Dataset in a tabular form. Strings more than 20 characters
+        /// will be truncated, and all cells will be aligned right.
+        /// </summary>
+        /// <param name="numberOfRows">Number of rows - default is 20</param>
+        /// <param name="truncate">Indicates if rows with more than 20 characters to be truncated</param>
+        public void Show(int numberOfRows = 20, bool truncate = true)
+        {
+            ToDF().Show(numberOfRows, truncate);
+        }
+
+        /// <summary>
+        /// Prints schema
+        /// </summary>
+        public void ShowSchema()
+        {
+            ToDF().ShowSchema();
+        }
+    }
+
+    /// <summary>
+    /// Dataset of specific types
+    /// </summary>
+    /// <typeparam name="T">Type parameter</typeparam>
+    public class Dataset<T> : Dataset
+    {
+        internal Dataset(IDatasetProxy datasetProxy): base(datasetProxy) {}
+
+        /************************************************************
+         * Would it be useful to expose methods like the following?
+         * It would offer static type checking at the cost of runtime optimizations
+         * because C# functionality need to execute in CLR
+         ************************************************************
+
+        public Dataset<T> Filter(Func<T, bool> func)
+        {
+            throw new NotImplementedException();
+        }
+
+        public Dataset<U> Map<U>(Func<T, U> mapFunc)
+        {
+            throw new NotImplementedException();
+        }
+
+        */
+    }
+}
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/HiveContext.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/HiveContext.cs
index 434ac076..9274cf5a 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/HiveContext.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/HiveContext.cs
@@ -17,13 +17,8 @@ public class HiveContext : SqlContext
         /// Creates a HiveContext
         /// </summary>
         /// <param name="sparkContext"></param>
-        public HiveContext(SparkContext sparkContext) 
-            : base(sparkContext, sparkContext.SparkContextProxy.CreateHiveContext())
-        {
-        }
-
-        internal HiveContext(SparkContext sparkContext, ISqlContextProxy sqlContextProxy)
-            : base(sparkContext, sqlContextProxy)
+        public HiveContext(SparkContext sparkContext)
+            : base(SparkSession.Builder().Config(sparkContext.SparkConf).EnableHiveSupport().GetOrCreate())
         {
         }
 
@@ -36,7 +31,7 @@ internal HiveContext(SparkContext sparkContext, ISqlContextProxy sqlContextProxy
         /// <param name="tableName"></param>
         public void RefreshTable(string tableName)
         {
-            SqlContextProxy.RefreshTable(tableName);
+            SparkSession.Catalog.RefreshTable(tableName);
         }
     }
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SparkSession.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SparkSession.cs
new file mode 100644
index 00000000..3ff8a8ab
--- /dev/null
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SparkSession.cs
@@ -0,0 +1,140 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Runtime.Remoting.Contexts;
+using System.Text;
+using System.Threading.Tasks;
+using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Proxy;
+using Microsoft.Spark.CSharp.Services;
+using Microsoft.Spark.CSharp.Sql.Catalog;
+
+namespace Microsoft.Spark.CSharp.Sql
+{
+    /// <summary>
+    /// The entry point to programming Spark with the Dataset and DataFrame API.
+    /// </summary>
+    public class SparkSession
+    {
+        private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(SparkSession));
+
+        private ISparkSessionProxy sparkSessionProxy;
+        private readonly SparkContext sparkContext;
+
+        internal ISparkSessionProxy SparkSessionProxy
+        {
+            get { return sparkSessionProxy; } 
+            //setter is used only for testing...//TODO - refactor
+            set { sparkSessionProxy = value; }
+        }
+
+        private Catalog.Catalog catalog;
+
+        /// <summary>
+        /// Interface through which the user may create, drop, alter or query underlying
+        /// databases, tables, functions etc.
+        /// </summary>
+        public Catalog.Catalog Catalog
+        {
+            get { return catalog ?? (catalog = new Catalog.Catalog(SparkSessionProxy.GetCatalog())); }
+        }
+
+        internal SparkContext SparkContext
+        {
+            get { return sparkContext; }
+        }
+
+        /// <summary>
+        /// Builder for SparkSession
+        /// </summary>
+        public static Builder Builder()
+        {
+            return new Builder();
+        }
+
+        internal SparkSession(SparkContext sparkContext)
+        {
+            sparkSessionProxy = sparkContext.SparkContextProxy.CreateSparkSession();
+            this.sparkContext = sparkContext;
+        }
+
+        internal SparkSession(ISparkSessionProxy sparkSessionProxy)
+        {
+            this.sparkSessionProxy = sparkSessionProxy;
+        }
+
+        /// <summary>
+        /// Start a new session with isolated SQL configurations, temporary tables, registered
+        /// functions are isolated, but sharing the underlying [[SparkContext]] and cached data.
+        /// Note: Other than the [[SparkContext]], all shared state is initialized lazily.
+        /// This method will force the initialization of the shared state to ensure that parent
+        /// and child sessions are set up with the same shared state. If the underlying catalog
+        /// implementation is Hive, this will initialize the metastore, which may take some time.
+        /// </summary>
+        public SparkSession NewSession()
+        {
+            return new SparkSession(sparkSessionProxy.NewSession());
+        }
+
+        /// <summary>
+        /// Stop underlying SparkContext
+        /// </summary>
+        public void Stop()
+        {
+            sparkSessionProxy.Stop();
+        }
+
+        /// <summary>
+        /// Returns a DataFrameReader that can be used to read non-streaming data in as a DataFrame
+        /// </summary>
+        /// <returns></returns>
+        public DataFrameReader Read()
+        {
+            logger.LogInfo("Using DataFrameReader to read input data from external data source");
+            return new DataFrameReader(sparkSessionProxy.Read(), sparkContext);
+        }
+
+        /// <summary>
+        /// Creates a <see cref="DataFrame"/> from a RDD containing array of object using the given schema.
+        /// </summary>
+        /// <param name="rdd">RDD containing array of object. The array acts as a row and items within the array act as columns which the schema is specified in <paramref name="schema"/>. </param>
+        /// <param name="schema">The schema of DataFrame.</param>
+        /// <returns></returns>
+        public DataFrame CreateDataFrame(RDD<object[]> rdd, StructType schema)
+        {
+            // Note: This is for pickling RDD, convert to RDD<byte[]> which happens in CSharpWorker. 
+            // The below sqlContextProxy.CreateDataFrame() will call byteArrayRDDToAnyArrayRDD() of SQLUtils.scala which only accept RDD of type RDD[Array[Byte]].
+            // In byteArrayRDDToAnyArrayRDD() of SQLUtils.scala, the SerDeUtil.pythonToJava() will be called which is a mapPartitions inside. 
+            // It will be executed until the CSharpWorker finishes Pickling to RDD[Array[Byte]].
+            var rddRow = rdd.Map(r => r);
+            rddRow.serializedMode = SerializedMode.Row;
+
+            return new DataFrame(sparkSessionProxy.CreateDataFrame(rddRow.RddProxy, schema.StructTypeProxy), sparkContext);
+        }
+
+        /// <summary>
+        /// Returns the specified table as a <see cref="DataFrame"/>
+        /// </summary>
+        /// <param name="tableName"></param>
+        /// <returns></returns>
+        public DataFrame Table(string tableName)
+        {
+            return new DataFrame(sparkSessionProxy.Table(tableName), sparkContext);
+        }
+
+        /// <summary>
+        /// Executes a SQL query using Spark, returning the result as a DataFrame. The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'
+        /// </summary>
+        /// <param name="sqlQuery"></param>
+        /// <returns></returns>
+        public DataFrame Sql(string sqlQuery)
+        {
+            logger.LogInfo("SQL query to execute on the dataframe is {0}", sqlQuery);
+            return new DataFrame(sparkSessionProxy.Sql(sqlQuery), sparkContext);
+        }
+    }
+}
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SqlContext.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SqlContext.cs
index 829b14e6..4f1bf7aa 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SqlContext.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SqlContext.cs
@@ -23,17 +23,38 @@ public class SqlContext
 
         private static SqlContext instance;
 
+        private SparkSession sparkSession;
+        private bool isRootContext;
+
+        /// <summary>
+        /// Underlying SparkSession
+        /// </summary>
+        public SparkSession SparkSession
+        {
+            get { return sparkSession; }
+        }
+
+        internal SqlContext(SparkSession sparkSession, bool isRootContext)
+        {
+            this.sparkSession = sparkSession;
+            this.isRootContext = isRootContext;
+            if (instance == null) instance = this;
+        }
+
+        internal SqlContext(SparkSession sparkSession) : this(sparkSession, true)
+        { }
+
         /// <summary>
         /// Creates a SqlContext
         /// </summary>
         /// <param name="sparkContext"></param>
-        public SqlContext(SparkContext sparkContext)
+        public SqlContext(SparkContext sparkContext) : this(new SparkSession(sparkContext))
         {
+            sqlContextProxy = sparkSession.SparkSessionProxy.SqlContextProxy;
             this.sparkContext = sparkContext;
-            sqlContextProxy = sparkContext.SparkContextProxy.CreateSqlContext();
-            if (instance == null) instance = this;
         }
 
+        //TODO - remove this constructor after fixing unit tests that reference this
         internal SqlContext(SparkContext sparkContext, ISqlContextProxy sqlContextProxy)
         {
             this.sparkContext = sparkContext;
@@ -62,8 +83,7 @@ public static SqlContext GetOrCreate(SparkContext sparkContext)
         /// <returns></returns>
         public SqlContext NewSession()
         {
-            var newSessionProxy = sqlContextProxy.NewSession();
-            return new SqlContext(this.sparkContext, newSessionProxy);
+            return new SqlContext(sparkSession.NewSession());
         }
 
         /// <summary>
@@ -75,7 +95,7 @@ public SqlContext NewSession()
         /// <returns></returns>
         public string GetConf(string key, string defaultValue)
         {
-            return sqlContextProxy.GetConf(key, defaultValue);
+            return SparkSession.SparkSessionProxy.SqlContextProxy.GetConf(key, defaultValue);
         }
 
         /// <summary>
@@ -85,7 +105,7 @@ public string GetConf(string key, string defaultValue)
         /// <param name="value"></param>
         public void SetConf(string key, string value)
         {
-            sqlContextProxy.SetConf(key, value);
+            SparkSession.SparkSessionProxy.SqlContextProxy.SetConf(key, value);
         }
 
         /// <summary>
@@ -155,7 +175,7 @@ public void DropTempTable(string tableName)
         /// <returns></returns>
         public DataFrame Table(string tableName)
         {
-            return new DataFrame(sqlContextProxy.Table(tableName), sparkContext);
+            return SparkSession.Table(tableName);
         }
 
         /// <summary>
@@ -230,7 +250,7 @@ public bool IsCached(string tableName)
         public DataFrame Sql(string sqlQuery)
         {
             logger.LogInfo("SQL query to execute on the dataframe is {0}", sqlQuery);
-            return new DataFrame(sqlContextProxy.Sql(sqlQuery), sparkContext);
+            return SparkSession.Sql(sqlQuery);
         }
 
         /// <summary>
diff --git a/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML b/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
index 5a445d12..0304bdcc 100644
--- a/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
+++ b/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
@@ -1992,6 +1992,11 @@
             <param name="key">Key to use</param>
             <param name="defaultValue">Default value to use</param>
         </member>
+        <member name="M:Microsoft.Spark.CSharp.Core.SparkConf.GetAll">
+            <summary>
+            Get all parameters as a list of pairs
+            </summary>
+        </member>
         <member name="T:Microsoft.Spark.CSharp.Core.SparkContext">
             <summary>
             Main entry point for Spark functionality. A SparkContext represents the 
@@ -2072,6 +2077,17 @@
             <param name="sparkContextProxy"></param>
             <param name="conf"></param>
         </member>
+        <member name="M:Microsoft.Spark.CSharp.Core.SparkContext.GetOrCreate(Microsoft.Spark.CSharp.Core.SparkConf)">
+            <summary>
+            This function may be used to get or instantiate a SparkContext and register it as a
+            singleton object. Because we can only have one active SparkContext per JVM,
+            this is useful when applications may wish to share a SparkContext.
+            Note: This function cannot be used to create multiple SparkContext instances
+            even if multiple contexts are allowed.
+            </summary>
+            <param name="conf"></param>
+            <returns></returns>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Core.SparkContext.TextFile(System.String,System.Int32)">
             <summary>
             Read a text file from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI, and return it as an RDD of Strings.
@@ -4577,6 +4593,312 @@
             </summary>
             <returns></returns>
         </member>
+        <member name="T:Microsoft.Spark.CSharp.Sql.Builder">
+            <summary>
+            The entry point to programming Spark with the Dataset and DataFrame API.
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Builder.Master(System.String)">
+            <summary>
+            Sets the Spark master URL to connect to, such as "local" to run locally, "local[4]" to
+            run locally with 4 cores, or "spark://master:7077" to run on a Spark standalone cluster.
+            </summary>
+            <param name="master">Master URL</param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Builder.AppName(System.String)">
+            <summary>
+            Sets a name for the application, which will be shown in the Spark web UI.
+            If no application name is set, a randomly generated name will be used.
+            </summary>
+            <param name="appName">Name of the app</param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Builder.Config(System.String,System.String)">
+            <summary>
+            Sets a config option. Options set using this method are automatically propagated to
+            both SparkConf and SparkSession's own configuration.
+            </summary>
+            <param name="key">Key for the configuration</param>
+            <param name="value">value of the configuration</param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Builder.Config(System.String,System.Boolean)">
+            <summary>
+            Sets a config option. Options set using this method are automatically propagated to
+            both SparkConf and SparkSession's own configuration.
+            </summary>
+            <param name="key">Key for the configuration</param>
+            <param name="value">value of the configuration</param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Builder.Config(System.String,System.Double)">
+            <summary>
+            Sets a config option. Options set using this method are automatically propagated to
+            both SparkConf and SparkSession's own configuration.
+            </summary>
+            <param name="key">Key for the configuration</param>
+            <param name="value">value of the configuration</param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Builder.Config(System.String,System.Int64)">
+            <summary>
+            Sets a config option. Options set using this method are automatically propagated to
+            both SparkConf and SparkSession's own configuration.
+            </summary>
+            <param name="key">Key for the configuration</param>
+            <param name="value">value of the configuration</param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Builder.Config(Microsoft.Spark.CSharp.Core.SparkConf)">
+            <summary>
+            Sets a list of config options based on the given SparkConf
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Builder.EnableHiveSupport">
+            <summary>
+            Enables Hive support, including connectivity to a persistent Hive metastore, support for
+            Hive serdes, and Hive user-defined functions.
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Builder.GetOrCreate">
+            <summary>
+            Gets an existing [[SparkSession]] or, if there is no existing one, creates a new
+            one based on the options set in this builder.
+            </summary>
+            <returns></returns>
+        </member>
+        <member name="T:Microsoft.Spark.CSharp.Sql.Catalog.Catalog">
+            <summary>
+            Catalog interface for Spark.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Catalog.CurrentDatabase">
+            <summary>
+            Returns the current default database in this session.
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Catalog.Catalog.ListDatabases">
+            <summary>
+            Returns a list of databases available across all sessions.
+            </summary>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Catalog.Catalog.ListTables(System.String)">
+            <summary>
+            Returns a list of tables in the current database or given database
+            This includes all temporary tables.
+            </summary>
+            <param name="dbName">Optional database name. If not provided, current database is used</param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Catalog.Catalog.ListColumns(System.String,System.String)">
+            <summary>
+            Returns a list of columns for the given table in the current database or
+            the given temporary table.
+            </summary>
+            <param name="tableName">Name of the table</param>
+            <param name="dbName">Name of the database. If database is not provided, current database is used</param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Catalog.Catalog.ListFunctions(System.String)">
+            <summary>
+            Returns a list of functions registered in the specified database.
+            This includes all temporary functions
+            </summary>
+            <param name="dbName">Name of the database. If database is not provided, current database is used</param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Catalog.Catalog.SetCurrentDatabase(System.String)">
+            <summary>
+            Sets the current default database in this session.
+            </summary>
+            <param name="dbName">Name of database</param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Catalog.Catalog.DropTempView(System.String)">
+            <summary>
+            Drops the temporary view with the given view name in the catalog.
+            If the view has been cached before, then it will also be uncached.
+            </summary>
+            <param name="tempViewName">Name of the table</param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Catalog.Catalog.IsCached(System.String)">
+            <summary>
+            Returns true if the table is currently cached in-memory.
+            </summary>
+            <param name="tableName">Name of the table</param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Catalog.Catalog.CacheTable(System.String)">
+            <summary>
+            Caches the specified table in-memory.
+            </summary>
+            <param name="tableName">Name of the table</param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Catalog.Catalog.UnCacheTable(System.String)">
+            <summary>
+            Removes the specified table from the in-memory cache.
+            </summary>
+            <param name="tableName">Name of the table</param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Catalog.Catalog.RefreshTable(System.String)">
+            <summary>
+            Invalidate and refresh all the cached metadata of the given table. For performance reasons,
+            Spark SQL or the external data source library it uses might cache certain metadata about a
+            table, such as the location of blocks.When those change outside of Spark SQL, users should
+            call this function to invalidate the cache.
+            If this table is cached as an InMemoryRelation, drop the original cached version and make the
+            new version cached lazily.
+            </summary>
+            <param name="tableName">Name of the table</param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Catalog.Catalog.ClearCache">
+            <summary>
+            Removes all cached tables from the in-memory cache.
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Catalog.Catalog.CreateExternalTable(System.String,System.String)">
+            <summary>
+            Creates an external table from the given path and returns the corresponding DataFrame.
+            It will use the default data source configured by spark.sql.sources.default.
+            </summary>
+            <param name="tableName">Name of the table</param>
+            <param name="path">Path to table</param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Catalog.Catalog.CreateExternalTable(System.String,System.String,System.String)">
+            <summary>
+            Creates an external table from the given path on a data source and returns DataFrame
+            </summary>
+            <param name="tableName">Name of the table</param>
+            <param name="path">Path to table</param>
+            <param name="source">Data source</param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Catalog.Catalog.CreateExternalTable(System.String,System.String,System.Collections.Generic.Dictionary{System.String,System.String})">
+            <summary>
+            Creates an external table from the given path based on a data source and a set of options.
+            Then, returns the corresponding DataFrame.
+            </summary>
+            <param name="tableName">Name of the table</param>
+            <param name="source">Data source</param>
+            <param name="options">Options to create table</param>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Catalog.Catalog.CreateExternalTable(System.String,System.String,Microsoft.Spark.CSharp.Sql.StructType,System.Collections.Generic.Dictionary{System.String,System.String})">
+            <summary>
+            Create an external table from the given path based on a data source, a schema and
+            a set of options.Then, returns the corresponding DataFrame.
+            </summary>
+            <param name="tableName">Name of the table</param>
+            <param name="source">Data source</param>
+            <param name="schema">Schema of the table</param>
+            <param name="options">Options to create table</param>
+            <returns></returns>
+        </member>
+        <member name="T:Microsoft.Spark.CSharp.Sql.Catalog.Database">
+            <summary>
+            A database in Spark
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Database.Name">
+            <summary>
+            Name of the database
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Database.Description">
+            <summary>
+            Desciption for the database
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Database.LocationUri">
+            <summary>
+            Location of the database
+            </summary>
+        </member>
+        <member name="T:Microsoft.Spark.CSharp.Sql.Catalog.Table">
+            <summary>
+            A table in Spark
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Table.Name">
+            <summary>
+            Name of the table
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Table.Database">
+            <summary>
+            Name of the database Table belongs to
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Table.Description">
+            <summary>
+            Description of the table
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Table.TableType">
+            <summary>
+            Type of the table (table, view)
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Table.IsTemporary">
+            <summary>
+            Whether the table is a temporary table
+            </summary>
+        </member>
+        <member name="T:Microsoft.Spark.CSharp.Sql.Catalog.Column">
+            <summary>
+            A column in Spark
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Column.Name">
+            <summary>
+            Name of the column
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Column.DataType">
+            <summary>
+            Datatype of the column
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Column.Description">
+            <summary>
+            Description of the column
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Column.IsNullable">
+            <summary>
+            Whether the column value can be null
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Column.IsPartition">
+            <summary>
+            Whether the column is a partition column.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Column.IsBucket">
+            <summary>
+            Whether the column is a bucket column.
+            </summary>
+        </member>
+        <member name="T:Microsoft.Spark.CSharp.Sql.Catalog.Function">
+            <summary>
+            A user-defined function in Spark
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Function.Name">
+            <summary>
+            Name of the column
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Function.Database">
+            <summary>
+            Name of the database
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Function.Description">
+            <summary>
+            Description of the function
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Function.ClassName">
+            <summary>
+            Fully qualified class name of the function
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Catalog.Function.IsTemporary">
+            <summary>
+            Whether the function is a temporary function or not.
+            </summary>
+        </member>
         <member name="T:Microsoft.Spark.CSharp.Sql.Column">
             <summary>
             A column that will be computed based on the data in a DataFrame.
@@ -5785,6 +6107,73 @@
                Format("parquet").Save(path)
             </summary>
         </member>
+        <member name="T:Microsoft.Spark.CSharp.Sql.Dataset">
+            <summary>
+             Dataset is a strongly typed collection of domain-specific objects that can be transformed
+            in parallel using functional or relational operations.Each Dataset also has an untyped view 
+            called a DataFrame, which is a Dataset of Row.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.Dataset.Item(System.String)">
+            <summary>
+            Selects column based on the column name
+            </summary>
+            <param name="columnName">Name of the column</param>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Dataset.ToDF">
+            <summary>
+            Converts this strongly typed collection of data to generic Dataframe. In contrast to the
+            strongly typed objects that Dataset operations work on, a Dataframe returns generic[[Row]]
+            objects that allow fields to be accessed by ordinal or name.
+            </summary>
+            <returns>DataFrame created from Dataset</returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Dataset.PrintSchema">
+            <summary>
+            Prints the schema to the console in a nice tree format.
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Dataset.Explain(System.Boolean)">
+            <summary>
+            Prints the plans (logical and physical) to the console for debugging purposes.
+            </summary>
+            <param name="extended"></param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Dataset.Explain">
+            <summary>
+            Prints the physical plan to the console for debugging purposes.
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Dataset.DTypes">
+            <summary>
+            Returns all column names and their data types as an array.
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Dataset.Columns">
+            <summary>
+            Returns all column names as an array.
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Dataset.Show(System.Int32,System.Boolean)">
+            <summary>
+            Displays the top 20 rows of Dataset in a tabular form. Strings more than 20 characters
+            will be truncated, and all cells will be aligned right.
+            </summary>
+            <param name="numberOfRows">Number of rows - default is 20</param>
+            <param name="truncate">Indicates if rows with more than 20 characters to be truncated</param>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.Dataset.ShowSchema">
+            <summary>
+            Prints schema
+            </summary>
+        </member>
+        <member name="T:Microsoft.Spark.CSharp.Sql.Dataset`1">
+            <summary>
+            Dataset of specific types
+            </summary>
+            <typeparam name="T">Type parameter</typeparam>
+        </member>
         <member name="T:Microsoft.Spark.CSharp.Sql.HiveContext">
             <summary>
             A variant of Spark SQL that integrates with data stored in Hive. 
@@ -6594,12 +6983,76 @@
             <param name="mode">The given SaveMode</param>
             <returns>The string that represents the given SaveMode</returns>
         </member>
+        <member name="T:Microsoft.Spark.CSharp.Sql.SparkSession">
+            <summary>
+            The entry point to programming Spark with the Dataset and DataFrame API.
+            </summary>
+        </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.SparkSession.Catalog">
+            <summary>
+            Interface through which the user may create, drop, alter or query underlying
+            databases, tables, functions etc.
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SparkSession.Builder">
+            <summary>
+            Builder for SparkSession
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SparkSession.NewSession">
+            <summary>
+            Start a new session with isolated SQL configurations, temporary tables, registered
+            functions are isolated, but sharing the underlying [[SparkContext]] and cached data.
+            Note: Other than the [[SparkContext]], all shared state is initialized lazily.
+            This method will force the initialization of the shared state to ensure that parent
+            and child sessions are set up with the same shared state. If the underlying catalog
+            implementation is Hive, this will initialize the metastore, which may take some time.
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SparkSession.Stop">
+            <summary>
+            Stop underlying SparkContext
+            </summary>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SparkSession.Read">
+            <summary>
+            Returns a DataFrameReader that can be used to read non-streaming data in as a DataFrame
+            </summary>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SparkSession.CreateDataFrame(Microsoft.Spark.CSharp.Core.RDD{System.Object[]},Microsoft.Spark.CSharp.Sql.StructType)">
+            <summary>
+            Creates a <see cref="T:Microsoft.Spark.CSharp.Sql.DataFrame"/> from a RDD containing array of object using the given schema.
+            </summary>
+            <param name="rdd">RDD containing array of object. The array acts as a row and items within the array act as columns which the schema is specified in <paramref name="schema"/>. </param>
+            <param name="schema">The schema of DataFrame.</param>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SparkSession.Table(System.String)">
+            <summary>
+            Returns the specified table as a <see cref="T:Microsoft.Spark.CSharp.Sql.DataFrame"/>
+            </summary>
+            <param name="tableName"></param>
+            <returns></returns>
+        </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.SparkSession.Sql(System.String)">
+            <summary>
+            Executes a SQL query using Spark, returning the result as a DataFrame. The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'
+            </summary>
+            <param name="sqlQuery"></param>
+            <returns></returns>
+        </member>
         <member name="T:Microsoft.Spark.CSharp.Sql.SqlContext">
             <summary>
             The entry point for working with structured data (rows and columns) in Spark.  
             Allows the creation of [[DataFrame]] objects as well as the execution of SQL queries.
             </summary>
         </member>
+        <member name="P:Microsoft.Spark.CSharp.Sql.SqlContext.SparkSession">
+            <summary>
+            Underlying SparkSession
+            </summary>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Sql.SqlContext.#ctor(Microsoft.Spark.CSharp.Core.SparkContext)">
             <summary>
             Creates a SqlContext
diff --git a/csharp/Adapter/documentation/Mobius_API_Documentation.md b/csharp/Adapter/documentation/Mobius_API_Documentation.md
index 87b56ec2..6ad46d6e 100644
--- a/csharp/Adapter/documentation/Mobius_API_Documentation.md
+++ b/csharp/Adapter/documentation/Mobius_API_Documentation.md
@@ -311,7 +311,7 @@
         
 ####Methods
 
-<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">SetMaster</font></td><td>The master URL to connect to, such as "local" to run locally with one thread, "local[4]" to run locally with 4 cores, or "spark://master:7077" to run on a Spark standalone cluster.</td></tr><tr><td><font color="blue">SetAppName</font></td><td>Set a name for your application. Shown in the Spark web UI.</td></tr><tr><td><font color="blue">SetSparkHome</font></td><td>Set the location where Spark is installed on worker nodes.</td></tr><tr><td><font color="blue">Set</font></td><td>Set the value of a string config</td></tr><tr><td><font color="blue">GetInt</font></td><td>Get a int parameter value, falling back to a default if not set</td></tr><tr><td><font color="blue">Get</font></td><td>Get a string parameter value, falling back to a default if not set</td></tr></table>
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">SetMaster</font></td><td>The master URL to connect to, such as "local" to run locally with one thread, "local[4]" to run locally with 4 cores, or "spark://master:7077" to run on a Spark standalone cluster.</td></tr><tr><td><font color="blue">SetAppName</font></td><td>Set a name for your application. Shown in the Spark web UI.</td></tr><tr><td><font color="blue">SetSparkHome</font></td><td>Set the location where Spark is installed on worker nodes.</td></tr><tr><td><font color="blue">Set</font></td><td>Set the value of a string config</td></tr><tr><td><font color="blue">GetInt</font></td><td>Get a int parameter value, falling back to a default if not set</td></tr><tr><td><font color="blue">Get</font></td><td>Get a string parameter value, falling back to a default if not set</td></tr><tr><td><font color="blue">GetAll</font></td><td>Get all parameters as a list of pairs</td></tr></table>
 
 ---
   
@@ -327,7 +327,7 @@
         
 ####Methods
 
-<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">GetActiveSparkContext</font></td><td>Get existing SparkContext</td></tr><tr><td><font color="blue">GetConf</font></td><td>Return a copy of this JavaSparkContext's configuration. The configuration ''cannot'' be changed at runtime.</td></tr><tr><td><font color="blue">TextFile</font></td><td>Read a text file from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI, and return it as an RDD of Strings.</td></tr><tr><td><font color="blue">Parallelize``1</font></td><td>Distribute a local collection to form an RDD. sc.Parallelize(new int[] {0, 2, 3, 4, 6}, 5).Glom().Collect() [[0], [2], [3], [4], [6]]</td></tr><tr><td><font color="blue">EmptyRDD</font></td><td>Create an RDD that has no partitions or elements.</td></tr><tr><td><font color="blue">WholeTextFiles</font></td><td>Read a directory of text files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. Each file is read as a single record and returned in a key-value pair, where the key is the path of each file, the value is the content of each file. For example, if you have the following files: {{{ hdfs://a-hdfs-path/part-00000 hdfs://a-hdfs-path/part-00001 ... hdfs://a-hdfs-path/part-nnnnn }}} Do {{{ RDD&lt;KeyValuePair&lt;string, string&gt;&gt; rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path") }}} then `rdd` contains {{{ (a-hdfs-path/part-00000, its content) (a-hdfs-path/part-00001, its content) ... (a-hdfs-path/part-nnnnn, its content) }}} Small files are preferred, large file is also allowable, but may cause bad performance. minPartitions A suggestion value of the minimal splitting number for input data.</td></tr><tr><td><font color="blue">BinaryFiles</font></td><td>Read a directory of binary files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI as a byte array. Each file is read as a single record and returned in a key-value pair, where the key is the path of each file, the value is the content of each file. For example, if you have the following files: {{{ hdfs://a-hdfs-path/part-00000 hdfs://a-hdfs-path/part-00001 ... hdfs://a-hdfs-path/part-nnnnn }}} Do RDD&lt;KeyValuePair&lt;string, byte[]&gt;&gt;"/&gt; rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`, then `rdd` contains {{{ (a-hdfs-path/part-00000, its content) (a-hdfs-path/part-00001, its content) ... (a-hdfs-path/part-nnnnn, its content) }}} @note Small files are preferred; very large files but may cause bad performance. @param minPartitions A suggestion value of the minimal splitting number for input data.</td></tr><tr><td><font color="blue">SequenceFile</font></td><td>Read a Hadoop SequenceFile with arbitrary key and value Writable class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is as follows: 1. A Java RDD is created from the SequenceFile or other InputFormat, and the key and value Writable classes 2. Serialization is attempted via Pyrolite pickling 3. If this fails, the fallback is to call 'toString' on each key and value 4. PickleSerializer is used to deserialize pickled objects on the Python side</td></tr><tr><td><font color="blue">NewAPIHadoopFile</font></td><td>Read a 'new API' Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is the same as for sc.sequenceFile. A Hadoop configuration can be passed in as a Python dict. This will be converted into a Configuration in Java</td></tr><tr><td><font color="blue">NewAPIHadoopRDD</font></td><td>Read a 'new API' Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict. This will be converted into a Configuration in Java. The mechanism is the same as for sc.sequenceFile.</td></tr><tr><td><font color="blue">HadoopFile</font></td><td>Read an 'old' Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is the same as for sc.sequenceFile. A Hadoop configuration can be passed in as a Python dict. This will be converted into a Configuration in Java.</td></tr><tr><td><font color="blue">HadoopRDD</font></td><td>Read an 'old' Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict. This will be converted into a Configuration in Java. The mechanism is the same as for sc.sequenceFile.</td></tr><tr><td><font color="blue">Union``1</font></td><td>Build the union of a list of RDDs. This supports unions() of RDDs with different serialized formats, although this forces them to be reserialized using the default serializer: &gt;&gt;&gt; path = os.path.join(tempdir, "union-text.txt") &gt;&gt;&gt; with open(path, "w") as testFile: ... _ = testFile.write("Hello") &gt;&gt;&gt; textFile = sc.textFile(path) &gt;&gt;&gt; textFile.collect() [u'Hello'] &gt;&gt;&gt; parallelized = sc.parallelize(["World!"]) &gt;&gt;&gt; sorted(sc.union([textFile, parallelized]).collect()) [u'Hello', 'World!']</td></tr><tr><td><font color="blue">Broadcast``1</font></td><td>Broadcast a read-only variable to the cluster, returning a Broadcast object for reading it in distributed functions. The variable will be sent to each cluster only once.</td></tr><tr><td><font color="blue">Accumulator``1</font></td><td>Create an with the given initial value, using a given helper object to define how to add values of the data type if provided. Default AccumulatorParams are used for integers and floating-point numbers if you do not provide one. For other types, a custom AccumulatorParam can be used.</td></tr><tr><td><font color="blue">Stop</font></td><td>Shut down the SparkContext.</td></tr><tr><td><font color="blue">AddFile</font></td><td>Add a file to be downloaded with this Spark job on every node. The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs, use `SparkFiles.get(fileName)` to find its download location.</td></tr><tr><td><font color="blue">SetCheckpointDir</font></td><td>Set the directory under which RDDs are going to be checkpointed. The directory must be a HDFS path if running on a cluster.</td></tr><tr><td><font color="blue">SetJobGroup</font></td><td>Assigns a group ID to all the jobs started by this thread until the group ID is set to a different value or cleared. Often, a unit of execution in an application consists of multiple Spark actions or jobs. Application programmers can use this method to group all those jobs together and give a group description. Once set, the Spark web UI will associate such jobs with this group. The application can also use [[org.apache.spark.api.java.JavaSparkContext.cancelJobGroup]] to cancel all running jobs in this group. For example, {{{ // In the main thread: sc.setJobGroup("some_job_to_cancel", "some job description"); rdd.map(...).count(); // In a separate thread: sc.cancelJobGroup("some_job_to_cancel"); }}} If interruptOnCancel is set to true for the job group, then job cancellation will result in Thread.interrupt() being called on the job's executor threads. This is useful to help ensure that the tasks are actually stopped in a timely manner, but is off by default due to HDFS-1208, where HDFS may respond to Thread.interrupt() by marking nodes as dead.</td></tr><tr><td><font color="blue">SetLocalProperty</font></td><td>Set a local property that affects jobs submitted from this thread, such as the Spark fair scheduler pool.</td></tr><tr><td><font color="blue">GetLocalProperty</font></td><td>Get a local property set in this thread, or null if it is missing. See [[org.apache.spark.api.java.JavaSparkContext.setLocalProperty]].</td></tr><tr><td><font color="blue">SetLogLevel</font></td><td>Control our logLevel. This overrides any user-defined log settings. @param logLevel The desired log level as a string. Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN</td></tr><tr><td><font color="blue">CancelJobGroup</font></td><td>Cancel active jobs for the specified group. See for more information.</td></tr><tr><td><font color="blue">CancelAllJobs</font></td><td>Cancel all jobs that have been scheduled or are running.</td></tr></table>
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">GetActiveSparkContext</font></td><td>Get existing SparkContext</td></tr><tr><td><font color="blue">GetConf</font></td><td>Return a copy of this JavaSparkContext's configuration. The configuration ''cannot'' be changed at runtime.</td></tr><tr><td><font color="blue">GetOrCreate</font></td><td>This function may be used to get or instantiate a SparkContext and register it as a singleton object. Because we can only have one active SparkContext per JVM, this is useful when applications may wish to share a SparkContext. Note: This function cannot be used to create multiple SparkContext instances even if multiple contexts are allowed.</td></tr><tr><td><font color="blue">TextFile</font></td><td>Read a text file from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI, and return it as an RDD of Strings.</td></tr><tr><td><font color="blue">Parallelize``1</font></td><td>Distribute a local collection to form an RDD. sc.Parallelize(new int[] {0, 2, 3, 4, 6}, 5).Glom().Collect() [[0], [2], [3], [4], [6]]</td></tr><tr><td><font color="blue">EmptyRDD</font></td><td>Create an RDD that has no partitions or elements.</td></tr><tr><td><font color="blue">WholeTextFiles</font></td><td>Read a directory of text files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. Each file is read as a single record and returned in a key-value pair, where the key is the path of each file, the value is the content of each file. For example, if you have the following files: {{{ hdfs://a-hdfs-path/part-00000 hdfs://a-hdfs-path/part-00001 ... hdfs://a-hdfs-path/part-nnnnn }}} Do {{{ RDD&lt;KeyValuePair&lt;string, string&gt;&gt; rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path") }}} then `rdd` contains {{{ (a-hdfs-path/part-00000, its content) (a-hdfs-path/part-00001, its content) ... (a-hdfs-path/part-nnnnn, its content) }}} Small files are preferred, large file is also allowable, but may cause bad performance. minPartitions A suggestion value of the minimal splitting number for input data.</td></tr><tr><td><font color="blue">BinaryFiles</font></td><td>Read a directory of binary files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI as a byte array. Each file is read as a single record and returned in a key-value pair, where the key is the path of each file, the value is the content of each file. For example, if you have the following files: {{{ hdfs://a-hdfs-path/part-00000 hdfs://a-hdfs-path/part-00001 ... hdfs://a-hdfs-path/part-nnnnn }}} Do RDD&lt;KeyValuePair&lt;string, byte[]&gt;&gt;"/&gt; rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`, then `rdd` contains {{{ (a-hdfs-path/part-00000, its content) (a-hdfs-path/part-00001, its content) ... (a-hdfs-path/part-nnnnn, its content) }}} @note Small files are preferred; very large files but may cause bad performance. @param minPartitions A suggestion value of the minimal splitting number for input data.</td></tr><tr><td><font color="blue">SequenceFile</font></td><td>Read a Hadoop SequenceFile with arbitrary key and value Writable class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is as follows: 1. A Java RDD is created from the SequenceFile or other InputFormat, and the key and value Writable classes 2. Serialization is attempted via Pyrolite pickling 3. If this fails, the fallback is to call 'toString' on each key and value 4. PickleSerializer is used to deserialize pickled objects on the Python side</td></tr><tr><td><font color="blue">NewAPIHadoopFile</font></td><td>Read a 'new API' Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is the same as for sc.sequenceFile. A Hadoop configuration can be passed in as a Python dict. This will be converted into a Configuration in Java</td></tr><tr><td><font color="blue">NewAPIHadoopRDD</font></td><td>Read a 'new API' Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict. This will be converted into a Configuration in Java. The mechanism is the same as for sc.sequenceFile.</td></tr><tr><td><font color="blue">HadoopFile</font></td><td>Read an 'old' Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is the same as for sc.sequenceFile. A Hadoop configuration can be passed in as a Python dict. This will be converted into a Configuration in Java.</td></tr><tr><td><font color="blue">HadoopRDD</font></td><td>Read an 'old' Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict. This will be converted into a Configuration in Java. The mechanism is the same as for sc.sequenceFile.</td></tr><tr><td><font color="blue">Union``1</font></td><td>Build the union of a list of RDDs. This supports unions() of RDDs with different serialized formats, although this forces them to be reserialized using the default serializer: &gt;&gt;&gt; path = os.path.join(tempdir, "union-text.txt") &gt;&gt;&gt; with open(path, "w") as testFile: ... _ = testFile.write("Hello") &gt;&gt;&gt; textFile = sc.textFile(path) &gt;&gt;&gt; textFile.collect() [u'Hello'] &gt;&gt;&gt; parallelized = sc.parallelize(["World!"]) &gt;&gt;&gt; sorted(sc.union([textFile, parallelized]).collect()) [u'Hello', 'World!']</td></tr><tr><td><font color="blue">Broadcast``1</font></td><td>Broadcast a read-only variable to the cluster, returning a Broadcast object for reading it in distributed functions. The variable will be sent to each cluster only once.</td></tr><tr><td><font color="blue">Accumulator``1</font></td><td>Create an with the given initial value, using a given helper object to define how to add values of the data type if provided. Default AccumulatorParams are used for integers and floating-point numbers if you do not provide one. For other types, a custom AccumulatorParam can be used.</td></tr><tr><td><font color="blue">Stop</font></td><td>Shut down the SparkContext.</td></tr><tr><td><font color="blue">AddFile</font></td><td>Add a file to be downloaded with this Spark job on every node. The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs, use `SparkFiles.get(fileName)` to find its download location.</td></tr><tr><td><font color="blue">SetCheckpointDir</font></td><td>Set the directory under which RDDs are going to be checkpointed. The directory must be a HDFS path if running on a cluster.</td></tr><tr><td><font color="blue">SetJobGroup</font></td><td>Assigns a group ID to all the jobs started by this thread until the group ID is set to a different value or cleared. Often, a unit of execution in an application consists of multiple Spark actions or jobs. Application programmers can use this method to group all those jobs together and give a group description. Once set, the Spark web UI will associate such jobs with this group. The application can also use [[org.apache.spark.api.java.JavaSparkContext.cancelJobGroup]] to cancel all running jobs in this group. For example, {{{ // In the main thread: sc.setJobGroup("some_job_to_cancel", "some job description"); rdd.map(...).count(); // In a separate thread: sc.cancelJobGroup("some_job_to_cancel"); }}} If interruptOnCancel is set to true for the job group, then job cancellation will result in Thread.interrupt() being called on the job's executor threads. This is useful to help ensure that the tasks are actually stopped in a timely manner, but is off by default due to HDFS-1208, where HDFS may respond to Thread.interrupt() by marking nodes as dead.</td></tr><tr><td><font color="blue">SetLocalProperty</font></td><td>Set a local property that affects jobs submitted from this thread, such as the Spark fair scheduler pool.</td></tr><tr><td><font color="blue">GetLocalProperty</font></td><td>Get a local property set in this thread, or null if it is missing. See [[org.apache.spark.api.java.JavaSparkContext.setLocalProperty]].</td></tr><tr><td><font color="blue">SetLogLevel</font></td><td>Control our logLevel. This overrides any user-defined log settings. @param logLevel The desired log level as a string. Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN</td></tr><tr><td><font color="blue">CancelJobGroup</font></td><td>Cancel active jobs for the specified group. See for more information.</td></tr><tr><td><font color="blue">CancelAllJobs</font></td><td>Cancel all jobs that have been scheduled or are running.</td></tr></table>
 
 ---
   
@@ -550,6 +550,62 @@
 ---
   
   
+###<font color="#68228B">Microsoft.Spark.CSharp.Sql.Builder</font>
+####Summary
+  
+            
+            The entry point to programming Spark with the Dataset and DataFrame API.
+            
+        
+####Methods
+
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Master</font></td><td>Sets the Spark master URL to connect to, such as "local" to run locally, "local[4]" to run locally with 4 cores, or "spark://master:7077" to run on a Spark standalone cluster.</td></tr><tr><td><font color="blue">AppName</font></td><td>Sets a name for the application, which will be shown in the Spark web UI. If no application name is set, a randomly generated name will be used.</td></tr><tr><td><font color="blue">Config</font></td><td>Sets a config option. Options set using this method are automatically propagated to both SparkConf and SparkSession's own configuration.</td></tr><tr><td><font color="blue">Config</font></td><td>Sets a config option. Options set using this method are automatically propagated to both SparkConf and SparkSession's own configuration.</td></tr><tr><td><font color="blue">Config</font></td><td>Sets a config option. Options set using this method are automatically propagated to both SparkConf and SparkSession's own configuration.</td></tr><tr><td><font color="blue">Config</font></td><td>Sets a config option. Options set using this method are automatically propagated to both SparkConf and SparkSession's own configuration.</td></tr><tr><td><font color="blue">Config</font></td><td>Sets a list of config options based on the given SparkConf</td></tr><tr><td><font color="blue">EnableHiveSupport</font></td><td>Enables Hive support, including connectivity to a persistent Hive metastore, support for Hive serdes, and Hive user-defined functions.</td></tr><tr><td><font color="blue">GetOrCreate</font></td><td>Gets an existing [[SparkSession]] or, if there is no existing one, creates a new one based on the options set in this builder.</td></tr></table>
+
+---
+  
+  
+###<font color="#68228B">Microsoft.Spark.CSharp.Sql.Catalog.Catalog</font>
+####Summary
+  
+            
+            Catalog interface for Spark.
+            
+        
+####Methods
+
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">ListDatabases</font></td><td>Returns a list of databases available across all sessions.</td></tr><tr><td><font color="blue">ListTables</font></td><td>Returns a list of tables in the current database or given database This includes all temporary tables.</td></tr><tr><td><font color="blue">ListColumns</font></td><td>Returns a list of columns for the given table in the current database or the given temporary table.</td></tr><tr><td><font color="blue">ListFunctions</font></td><td>Returns a list of functions registered in the specified database. This includes all temporary functions</td></tr><tr><td><font color="blue">SetCurrentDatabase</font></td><td>Sets the current default database in this session.</td></tr><tr><td><font color="blue">DropTempView</font></td><td>Drops the temporary view with the given view name in the catalog. If the view has been cached before, then it will also be uncached.</td></tr><tr><td><font color="blue">IsCached</font></td><td>Returns true if the table is currently cached in-memory.</td></tr><tr><td><font color="blue">CacheTable</font></td><td>Caches the specified table in-memory.</td></tr><tr><td><font color="blue">UnCacheTable</font></td><td>Removes the specified table from the in-memory cache.</td></tr><tr><td><font color="blue">RefreshTable</font></td><td>Invalidate and refresh all the cached metadata of the given table. For performance reasons, Spark SQL or the external data source library it uses might cache certain metadata about a table, such as the location of blocks.When those change outside of Spark SQL, users should call this function to invalidate the cache. If this table is cached as an InMemoryRelation, drop the original cached version and make the new version cached lazily.</td></tr><tr><td><font color="blue">ClearCache</font></td><td>Removes all cached tables from the in-memory cache.</td></tr><tr><td><font color="blue">CreateExternalTable</font></td><td>Creates an external table from the given path and returns the corresponding DataFrame. It will use the default data source configured by spark.sql.sources.default.</td></tr><tr><td><font color="blue">CreateExternalTable</font></td><td>Creates an external table from the given path on a data source and returns DataFrame</td></tr><tr><td><font color="blue">CreateExternalTable</font></td><td>Creates an external table from the given path based on a data source and a set of options. Then, returns the corresponding DataFrame.</td></tr><tr><td><font color="blue">CreateExternalTable</font></td><td>Create an external table from the given path based on a data source, a schema and a set of options.Then, returns the corresponding DataFrame.</td></tr></table>
+
+---
+  
+  
+###<font color="#68228B">Microsoft.Spark.CSharp.Sql.Catalog.Database</font>
+####Summary
+  
+            
+            A database in Spark
+            
+        
+###<font color="#68228B">Microsoft.Spark.CSharp.Sql.Catalog.Table</font>
+####Summary
+  
+            
+            A table in Spark
+            
+        
+###<font color="#68228B">Microsoft.Spark.CSharp.Sql.Catalog.Column</font>
+####Summary
+  
+            
+            A column in Spark
+            
+        
+###<font color="#68228B">Microsoft.Spark.CSharp.Sql.Catalog.Function</font>
+####Summary
+  
+            
+            A user-defined function in Spark
+            
+        
 ###<font color="#68228B">Microsoft.Spark.CSharp.Sql.Column</font>
 ####Summary
   
@@ -647,6 +703,30 @@
 ---
   
   
+###<font color="#68228B">Microsoft.Spark.CSharp.Sql.Dataset</font>
+####Summary
+  
+            
+             Dataset is a strongly typed collection of domain-specific objects that can be transformed
+            in parallel using functional or relational operations.Each Dataset also has an untyped view 
+            called a DataFrame, which is a Dataset of Row.
+            
+        
+####Methods
+
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">ToDF</font></td><td>Converts this strongly typed collection of data to generic Dataframe. In contrast to the strongly typed objects that Dataset operations work on, a Dataframe returns generic[[Row]] objects that allow fields to be accessed by ordinal or name.</td></tr><tr><td><font color="blue">PrintSchema</font></td><td>Prints the schema to the console in a nice tree format.</td></tr><tr><td><font color="blue">Explain</font></td><td>Prints the plans (logical and physical) to the console for debugging purposes.</td></tr><tr><td><font color="blue">Explain</font></td><td>Prints the physical plan to the console for debugging purposes.</td></tr><tr><td><font color="blue">DTypes</font></td><td>Returns all column names and their data types as an array.</td></tr><tr><td><font color="blue">Columns</font></td><td>Returns all column names as an array.</td></tr><tr><td><font color="blue">Show</font></td><td>Displays the top 20 rows of Dataset in a tabular form. Strings more than 20 characters will be truncated, and all cells will be aligned right.</td></tr><tr><td><font color="blue">ShowSchema</font></td><td>Prints schema</td></tr></table>
+
+---
+  
+  
+###<font color="#68228B">Microsoft.Spark.CSharp.Sql.Dataset`1</font>
+####Summary
+  
+            
+            Dataset of specific types
+            
+            Type parameter
+        
 ###<font color="#68228B">Microsoft.Spark.CSharp.Sql.HiveContext</font>
 ####Summary
   
@@ -747,6 +827,20 @@
 ---
   
   
+###<font color="#68228B">Microsoft.Spark.CSharp.Sql.SparkSession</font>
+####Summary
+  
+            
+            The entry point to programming Spark with the Dataset and DataFrame API.
+            
+        
+####Methods
+
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Builder</font></td><td>Builder for SparkSession</td></tr><tr><td><font color="blue">NewSession</font></td><td>Start a new session with isolated SQL configurations, temporary tables, registered functions are isolated, but sharing the underlying [[SparkContext]] and cached data. Note: Other than the [[SparkContext]], all shared state is initialized lazily. This method will force the initialization of the shared state to ensure that parent and child sessions are set up with the same shared state. If the underlying catalog implementation is Hive, this will initialize the metastore, which may take some time.</td></tr><tr><td><font color="blue">Stop</font></td><td>Stop underlying SparkContext</td></tr><tr><td><font color="blue">Read</font></td><td>Returns a DataFrameReader that can be used to read non-streaming data in as a DataFrame</td></tr><tr><td><font color="blue">CreateDataFrame</font></td><td>Creates a from a RDD containing array of object using the given schema.</td></tr><tr><td><font color="blue">Table</font></td><td>Returns the specified table as a</td></tr><tr><td><font color="blue">Sql</font></td><td>Executes a SQL query using Spark, returning the result as a DataFrame. The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'</td></tr></table>
+
+---
+  
+  
 ###<font color="#68228B">Microsoft.Spark.CSharp.Sql.SqlContext</font>
 ####Summary
   
diff --git a/csharp/AdapterTest/AdapterTest.csproj b/csharp/AdapterTest/AdapterTest.csproj
index dfe00ea8..ca95b87c 100644
--- a/csharp/AdapterTest/AdapterTest.csproj
+++ b/csharp/AdapterTest/AdapterTest.csproj
@@ -65,10 +65,12 @@
     <Otherwise />
   </Choose>
   <ItemGroup>
+    <Compile Include="BuilderTest.cs" />
     <Compile Include="ByteBufChunkListTest.cs" />
     <Compile Include="ByteBufChunkTest.cs" />
     <Compile Include="ByteBufPoolTest.cs" />
     <Compile Include="ByteBufTest.cs" />
+    <Compile Include="CatalogTest.cs" />
     <Compile Include="ColumnTest.cs" />
     <Compile Include="AccumulatorTest.cs" />
     <Compile Include="BroadcastTest.cs" />
@@ -76,6 +78,7 @@
     <Compile Include="DataFrameNaFunctionsTest.cs" />
     <Compile Include="DataFrameReaderTest.cs" />
     <Compile Include="DataFrameWriterTest.cs" />
+    <Compile Include="DatasetTest.cs" />
     <Compile Include="EventHubsUtilsTest.cs" />
     <Compile Include="HadoopConfigurationTest.cs" />
     <Compile Include="JsonSerDeTest.cs" />
@@ -83,6 +86,7 @@
     <Compile Include="Mocks\MockDataFrameReaderProxy.cs" />
     <Compile Include="Mocks\MockRDDCollector.cs" />
     <Compile Include="Mocks\MockRow.cs" />
+    <Compile Include="Mocks\MockSparkSessionProxy.cs" />
     <Compile Include="PayloadHelperTest.cs" />
     <Compile Include="PriorityQueueTest.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
@@ -91,6 +95,7 @@
     <Compile Include="SocketWrapperTest.cs" />
     <Compile Include="SerDeTest.cs" />
     <Compile Include="HiveContextTest.cs" />
+    <Compile Include="SparkSessionTest.cs" />
     <Compile Include="StatusTrackerTest.cs" />
     <Compile Include="TestWithMoqDemo.cs" />
     <Compile Include="Mocks\MockStructTypeProxy.cs" />
diff --git a/csharp/AdapterTest/BuilderTest.cs b/csharp/AdapterTest/BuilderTest.cs
new file mode 100644
index 00000000..aae3c626
--- /dev/null
+++ b/csharp/AdapterTest/BuilderTest.cs
@@ -0,0 +1,50 @@
+﻿using System;
+using Microsoft.Spark.CSharp.Sql;
+using NUnit.Framework;
+
+namespace AdapterTest
+{
+    [TestFixture]
+    public class BuilderTest
+    {
+        [Test]
+        public void TestMaster()
+        {
+            var builder = new Builder();
+            builder.Master("test");
+            Assert.AreEqual("test", builder.options["spark.master"]);
+        }
+
+        [Test]
+        public void TestAppName()
+        {
+            var builder = new Builder();
+            builder.AppName("test");
+            Assert.AreEqual("test", builder.options["spark.app.name"]);
+        }
+
+        [Test]
+        public void TestBoolConfig()
+        {
+            var builder = new Builder();
+            builder.Config("boolvalue", true);
+            Assert.True(builder.options["boolvalue"].Equals("true", StringComparison.InvariantCultureIgnoreCase));
+        }
+
+        [Test]
+        public void TestLongConfig()
+        {
+            var builder = new Builder();
+            builder.Config("longvalue", 3L);
+            Assert.True(builder.options["longvalue"].Equals("3", StringComparison.InvariantCultureIgnoreCase));
+        }
+
+        [Test]
+        public void TestDoubleConfig()
+        {
+            var builder = new Builder();
+            builder.Config("doublevalue", 3.5D);
+            Assert.True(builder.options["doublevalue"].Equals("3.5", StringComparison.InvariantCultureIgnoreCase));
+        }
+    }
+}
diff --git a/csharp/AdapterTest/CatalogTest.cs b/csharp/AdapterTest/CatalogTest.cs
new file mode 100644
index 00000000..e1fbdf05
--- /dev/null
+++ b/csharp/AdapterTest/CatalogTest.cs
@@ -0,0 +1,212 @@
+﻿using System;
+using System.Collections.Generic;
+using Microsoft.Spark.CSharp.Proxy;
+using Microsoft.Spark.CSharp.Sql;
+using Microsoft.Spark.CSharp.Sql.Catalog;
+using Moq;
+using NUnit.Framework;
+using NUnit.Framework.Internal;
+using Column = Microsoft.Spark.CSharp.Sql.Catalog.Column;
+
+namespace AdapterTest
+{
+    [TestFixture]
+    public class CatalogTest
+    {
+        [Test]
+        public void TestCurrentCatalog()
+        {
+            var mockCatalogProxy = new Mock<ICatalogProxy>();
+            mockCatalogProxy.Setup(m => m.CurrentDatabase).Returns("currentdb");
+
+            var catalog = new Catalog(mockCatalogProxy.Object);
+            Assert.AreEqual("currentdb", catalog.CurrentDatabase);
+        }
+
+        [Test]
+        public void TestGetDatabasesList()
+        {
+            var mockCatalogProxy = new Mock<ICatalogProxy>();
+            var mockDatasetProxy = new Mock<IDatasetProxy>();
+            var mockDataFrameProxy = new Mock<IDataFrameProxy>();
+            mockDatasetProxy.Setup(m => m.ToDF()).Returns(mockDataFrameProxy.Object);
+            mockCatalogProxy.Setup(m => m.ListDatabases()).Returns(new Dataset<Database>(mockDatasetProxy.Object));
+
+            var catalog = new Catalog(mockCatalogProxy.Object);
+            var databases = catalog.ListDatabases();
+            Assert.AreSame(mockDataFrameProxy.Object, databases.DataFrameProxy);
+        }
+
+        [Test]
+        public void TestGetTablesList()
+        {
+            var mockCatalogProxy = new Mock<ICatalogProxy>();
+            var mockDatasetProxy = new Mock<IDatasetProxy>();
+            var mockDataFrameProxy = new Mock<IDataFrameProxy>();
+            mockDatasetProxy.Setup(m => m.ToDF()).Returns(mockDataFrameProxy.Object);
+            mockCatalogProxy.Setup(m => m.ListTables(It.IsAny<string>())).Returns(new Dataset<Table>(mockDatasetProxy.Object));
+
+            var catalog = new Catalog(mockCatalogProxy.Object);
+            var tables = catalog.ListTables();
+            Assert.AreSame(mockDataFrameProxy.Object, tables.DataFrameProxy);
+        }
+
+        [Test]
+        public void TestGetColumnsList()
+        {
+            var mockCatalogProxy = new Mock<ICatalogProxy>();
+            var mockDatasetProxy = new Mock<IDatasetProxy>();
+            var mockDataFrameProxy = new Mock<IDataFrameProxy>();
+            mockDatasetProxy.Setup(m => m.ToDF()).Returns(mockDataFrameProxy.Object);
+            mockCatalogProxy.Setup(m => m.ListColumns(It.IsAny<string>(), It.IsAny<string>())).Returns(new Dataset<Column>(mockDatasetProxy.Object));
+
+            var catalog = new Catalog(mockCatalogProxy.Object);
+            var columns = catalog.ListColumns("dbname");
+            Assert.AreSame(mockDataFrameProxy.Object, columns.DataFrameProxy);
+        }
+
+        [Test]
+        public void TestGetFunctionsList()
+        {
+            var mockCatalogProxy = new Mock<ICatalogProxy>();
+            var mockDatasetProxy = new Mock<IDatasetProxy>();
+            var mockDataFrameProxy = new Mock<IDataFrameProxy>();
+            mockDatasetProxy.Setup(m => m.ToDF()).Returns(mockDataFrameProxy.Object);
+            mockCatalogProxy.Setup(m => m.ListFunctions(It.IsAny<string>())).Returns(new Dataset<Function>(mockDatasetProxy.Object));
+
+            var catalog = new Catalog(mockCatalogProxy.Object);
+            var columns = catalog.ListFunctions("dbname");
+            Assert.AreSame(mockDataFrameProxy.Object, columns.DataFrameProxy);
+        }
+
+        [Test]
+        public void TestSetCurrentDatabase()
+        {
+            var mockCatalogProxy = new Mock<ICatalogProxy>();
+            var catalog = new Catalog(mockCatalogProxy.Object);
+            catalog.SetCurrentDatabase("dbname");
+            mockCatalogProxy.Verify(m => m.SetCurrentDatabase("dbname"), Times.Once);
+        }
+
+        [Test]
+        public void TestDropTempTable()
+        {
+            var mockCatalogProxy = new Mock<ICatalogProxy>();
+            var catalog = new Catalog(mockCatalogProxy.Object);
+            catalog.DropTempView("tablename");
+            mockCatalogProxy.Verify(m => m.DropTempTable("tablename"), Times.Once);
+        }
+
+        [Test]
+        public void TestIsCached()
+        {
+            var mockCatalogProxy = new Mock<ICatalogProxy>();
+            var catalog = new Catalog(mockCatalogProxy.Object);
+            mockCatalogProxy.Setup(m => m.IsCached(It.IsAny<string>())).Returns(false);
+            var isCached = catalog.IsCached("tablename");
+            mockCatalogProxy.Verify(m => m.IsCached(It.IsAny<string>()), Times.Once);
+            Assert.False(isCached);
+        }
+
+        [Test]
+        public void TestCacheTable()
+        {
+            var mockCatalogProxy = new Mock<ICatalogProxy>();
+            var catalog = new Catalog(mockCatalogProxy.Object);
+            catalog.CacheTable("tablename");
+            mockCatalogProxy.Verify(m => m.CacheTable("tablename"), Times.Once);
+        }
+
+        [Test]
+        public void TestUnCacheTable()
+        {
+            var mockCatalogProxy = new Mock<ICatalogProxy>();
+            var catalog = new Catalog(mockCatalogProxy.Object);
+            catalog.UnCacheTable("tablename");
+            mockCatalogProxy.Verify(m => m.UnCacheTable("tablename"), Times.Once);
+        }
+
+        [Test]
+        public void TestRefreshTable()
+        {
+            var mockCatalogProxy = new Mock<ICatalogProxy>();
+            var catalog = new Catalog(mockCatalogProxy.Object);
+            catalog.RefreshTable("tablename");
+            mockCatalogProxy.Verify(m => m.RefreshTable("tablename"), Times.Once);
+        }
+
+        [Test]
+        public void TestClearCache()
+        {
+            var mockCatalogProxy = new Mock<ICatalogProxy>();
+            var catalog = new Catalog(mockCatalogProxy.Object);
+            catalog.ClearCache();
+            mockCatalogProxy.Verify(m => m.ClearCache(), Times.Once);
+        }
+
+        [Test]
+        public void TestCreateExternalTable()
+        {
+            var mockCatalogProxy = new Mock<ICatalogProxy>();
+            DataFrame dataFrame = null;
+            mockCatalogProxy.Setup(m => m.CreateExternalTable(It.IsAny<string>(), It.IsAny<string>())).Returns(dataFrame);
+            var catalog = new Catalog(mockCatalogProxy.Object);
+            var df = catalog.CreateExternalTable("tableName", "path");
+            mockCatalogProxy.Verify(m => m.CreateExternalTable("tableName", "path"), Times.Once);
+        }
+
+        [Test]
+        public void TestCreateExternalTable2()
+        {
+            var mockCatalogProxy = new Mock<ICatalogProxy>();
+            DataFrame dataFrame = null;
+            mockCatalogProxy.Setup(m => m.CreateExternalTable(It.IsAny<string>(), It.IsAny<string>())).Returns(dataFrame);
+            var catalog = new Catalog(mockCatalogProxy.Object);
+            var df = catalog.CreateExternalTable("tableName", "path", "source");
+            mockCatalogProxy.Verify(m => m.CreateExternalTable("tableName", "path", "source"), Times.Once);
+        }
+
+        [Test]
+        public void TestDatabaseProperties()
+        {
+            var database = new Database {Description = "desc", Name = "name", LocationUri = "uri"};
+            Assert.AreEqual("desc", database.Description);
+            Assert.AreEqual("name", database.Name);
+            Assert.AreEqual("uri", database.LocationUri);
+        }
+
+        [Test]
+        public void TestTableProperties()
+        {
+            var table = new Table { Description = "desc", Name = "name", Database = "db", TableType = "type", IsTemporary = false};
+            Assert.AreEqual("desc", table.Description);
+            Assert.AreEqual("name", table.Name);
+            Assert.AreEqual("db", table.Database);
+            Assert.AreEqual("type", table.TableType);
+            Assert.False(table.IsTemporary);
+        }
+
+        [Test]
+        public void TestColumnProperties()
+        {
+            var column = new Column { Description = "desc", Name = "name", DataType = "dtype", IsNullable = true, IsPartition = false, IsBucket = true};
+            Assert.AreEqual("desc", column.Description);
+            Assert.AreEqual("name", column.Name);
+            Assert.AreEqual("dtype", column.DataType);
+            Assert.False(column.IsPartition);
+            Assert.True(column.IsNullable);
+            Assert.True(column.IsBucket);
+        }
+
+        [Test]
+        public void TestFunctionProperties()
+        {
+            var function = new Function { Description = "desc", Name = "name", Database = "db", ClassName = "classname", IsTemporary = false };
+            Assert.AreEqual("desc", function.Description);
+            Assert.AreEqual("name", function.Name);
+            Assert.AreEqual("db", function.Database);
+            Assert.AreEqual("classname", function.ClassName);
+            Assert.False(function.IsTemporary);
+        }
+    }
+}
diff --git a/csharp/AdapterTest/DatasetTest.cs b/csharp/AdapterTest/DatasetTest.cs
new file mode 100644
index 00000000..7ee59db9
--- /dev/null
+++ b/csharp/AdapterTest/DatasetTest.cs
@@ -0,0 +1,150 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using AdapterTest.Mocks;
+using Microsoft.Spark.CSharp.Interop;
+using Microsoft.Spark.CSharp.Proxy;
+using Microsoft.Spark.CSharp.Sql;
+using Moq;
+using NUnit.Framework;
+
+namespace AdapterTest
+{
+    [TestFixture]
+    public class DatasetTest
+    {
+        private static Mock<IDatasetProxy> mockDatasetProxy;
+
+        [OneTimeSetUp]
+        public static void ClassInitialize()
+        {
+            mockDatasetProxy = new Mock<IDatasetProxy>();
+        }
+
+        [SetUp]
+        public void TestInitialize()
+        {
+            mockDatasetProxy.Reset();
+        }
+
+        [TearDown]
+        public void TestCleanUp()
+        {
+            // Revert to use Static mock class to prevent blocking other test methods which uses static mock class
+            SparkCLREnvironment.SparkCLRProxy = new MockSparkCLRProxy();
+        }
+
+        [Test]
+        public void TestShow()
+        {
+            Mock<IDataFrameProxy> mockDataFrameProxy = new Mock<IDataFrameProxy>();
+            mockDataFrameProxy.Setup(m => m.GetShowString(It.IsAny<int>(), It.IsAny<bool>())).Returns("Show");
+            mockDatasetProxy.Setup(m => m.ToDF()).Returns(mockDataFrameProxy.Object);
+
+            var dataset = new Dataset(mockDatasetProxy.Object);
+            dataset.Show();
+            mockDataFrameProxy.Verify(m => m.GetShowString(20, true), Times.Once);
+        }
+
+        [Test]
+        public void TestExplain()
+        {
+            Mock<IDataFrameProxy> mockDataFrameProxy = new Mock<IDataFrameProxy>();
+            mockDataFrameProxy.Setup(m => m.GetQueryExecution()).Returns("Execution Plan");
+            mockDataFrameProxy.Setup(m => m.GetExecutedPlan()).Returns("Execution Plan");
+            mockDatasetProxy.Setup(m => m.ToDF()).Returns(mockDataFrameProxy.Object);
+
+            var dataset = new Dataset(mockDatasetProxy.Object);
+            dataset.Explain();
+            mockDataFrameProxy.Verify(m => m.GetQueryExecution(), Times.Once);
+
+            dataset.Explain(true);
+            mockDataFrameProxy.Verify(m => m.GetExecutedPlan(), Times.Once);
+        }
+
+        [Test]
+        public void TestSchema()
+        {
+            TestSchema(true);
+            TestSchema(false);
+        }
+
+        public void TestSchema(bool usePrintSchema)
+        {
+            var requestsSchema = new StructType(new List<StructField>
+            {
+                new StructField("test", new StringType(), false),
+            });
+            var jsonValue = requestsSchema.JsonValue.ToString();
+            Mock<IStructTypeProxy> mockStructTypeProxy = new Mock<IStructTypeProxy>();
+            mockStructTypeProxy.Setup(m => m.ToJson()).Returns(jsonValue);
+            Mock<IDataFrameProxy> mockDataFrameProxy = new Mock<IDataFrameProxy>();
+            mockDataFrameProxy.Setup(m => m.GetSchema()).Returns(mockStructTypeProxy.Object);
+            mockDatasetProxy.Setup(m => m.ToDF()).Returns(mockDataFrameProxy.Object);
+
+            var dataset = new Dataset(mockDatasetProxy.Object);
+
+            if (usePrintSchema)
+                dataset.PrintSchema();
+            else
+                dataset.ShowSchema();
+
+            mockDataFrameProxy.Verify(m => m.GetSchema(), Times.Once);
+            mockStructTypeProxy.Verify(m => m.ToJson(), Times.Once());
+        }
+
+        [Test]
+        public void TestColumns()
+        {
+            var requestsSchema = new StructType(new List<StructField>
+            {
+                new StructField("test", new StringType(), false),
+            });
+            var x = requestsSchema.JsonValue.ToString();
+            Mock<IStructTypeProxy> mockStructTypeProxy = new Mock<IStructTypeProxy>();
+            mockStructTypeProxy.Setup(m => m.ToJson()).Returns(x);
+            Mock<IStructFieldProxy> mockStructFieldProxy = new Mock<IStructFieldProxy>();
+            mockStructFieldProxy.Setup(m => m.GetStructFieldName()).Returns("testcol");
+            mockStructTypeProxy.Setup(m => m.GetStructTypeFields())
+                .Returns(new List<IStructFieldProxy>() {mockStructFieldProxy.Object});
+            Mock<IDataFrameProxy> mockDataFrameProxy = new Mock<IDataFrameProxy>();
+            mockDataFrameProxy.Setup(m => m.GetSchema()).Returns(mockStructTypeProxy.Object);
+            mockDatasetProxy.Setup(m => m.ToDF()).Returns(mockDataFrameProxy.Object);
+
+            var dataset = new Dataset(mockDatasetProxy.Object);
+            var columns = dataset.Columns();
+            Assert.AreEqual(1, columns.Count());
+            Assert.AreEqual("testcol", columns.First());
+        }
+
+        [Test]
+        public void TestDTypes()
+        {
+            var requestsSchema = new StructType(new List<StructField>
+            {
+                new StructField("test", new StringType(), false),
+            });
+            var x = requestsSchema.JsonValue.ToString();
+            Mock<IStructTypeProxy> mockStructTypeProxy = new Mock<IStructTypeProxy>();
+            mockStructTypeProxy.Setup(m => m.ToJson()).Returns(x);
+            Mock<IStructFieldProxy> mockStructFieldProxy = new Mock<IStructFieldProxy>();
+            mockStructFieldProxy.Setup(m => m.GetStructFieldName()).Returns("testcol");
+            Mock<IStructDataTypeProxy> mockStructDataTypeProxy = new Mock<IStructDataTypeProxy>();
+            mockStructDataTypeProxy.Setup(m => m.GetDataTypeSimpleString()).Returns("ss");
+            mockStructFieldProxy.Setup(m => m.GetStructFieldDataType()).Returns(mockStructDataTypeProxy.Object);
+            mockStructTypeProxy.Setup(m => m.GetStructTypeFields())
+                .Returns(new List<IStructFieldProxy>() { mockStructFieldProxy.Object });
+            Mock<IDataFrameProxy> mockDataFrameProxy = new Mock<IDataFrameProxy>();
+            mockDataFrameProxy.Setup(m => m.GetSchema()).Returns(mockStructTypeProxy.Object);
+            mockDatasetProxy.Setup(m => m.ToDF()).Returns(mockDataFrameProxy.Object);
+
+            var dataset = new Dataset(mockDatasetProxy.Object);
+            var dTypes = dataset.DTypes();
+            Assert.AreEqual(1, dTypes.Count());
+            var first = dTypes.First();
+            Assert.AreEqual("testcol", first.Item1);
+            Assert.AreEqual("ss", first.Item2);
+        }
+
+    }
+}
diff --git a/csharp/AdapterTest/HiveContextTest.cs b/csharp/AdapterTest/HiveContextTest.cs
index 8a67b2df..8e55f029 100644
--- a/csharp/AdapterTest/HiveContextTest.cs
+++ b/csharp/AdapterTest/HiveContextTest.cs
@@ -45,8 +45,21 @@ public void TestCleanUp()
         [Test]
         public void TestHiveContextConstructor()
         {
-            var hiveContext = new HiveContext(new SparkContext("", ""));
-            Assert.IsNotNull((hiveContext.SqlContextProxy as MockSqlContextProxy).mockSqlContextReference);
+            var mockSparkContextProxy = new Mock<ISparkContextProxy>();
+
+            var mockSparkSessionProxy = new Mock<ISparkSessionProxy>();
+            var mockCatalogProxy = new Mock<ICatalogProxy>();
+            mockCatalogProxy.Setup(m => m.RefreshTable(It.IsAny<string>()));
+            mockSparkSessionProxy.Setup(m => m.GetCatalog()).Returns(mockCatalogProxy.Object);
+            mockSparkContextProxy.Setup(m => m.CreateSparkSession()).Returns(mockSparkSessionProxy.Object);
+
+            var mockSparkConfProxy = new Mock<ISparkConfProxy>();
+            mockSparkConfProxy.Setup(m => m.GetSparkConfAsString())
+                .Returns("spark.master=master;spark.app.name=appname;config1=value1;config2=value2;");
+
+            var conf = new SparkConf(mockSparkConfProxy.Object);
+            var hiveContext = new HiveContext(new SparkContext(mockSparkContextProxy.Object, conf));
+            Assert.IsNotNull(hiveContext.SparkSession);
         }
         
         [Test]
@@ -54,14 +67,25 @@ public void TestHiveContextRefreshTable()
         {
             // arrange
             var mockSparkContextProxy = new Mock<ISparkContextProxy>();
-            mockSqlContextProxy.Setup(m => m.RefreshTable(It.IsAny<string>()));
-            var hiveContext = new HiveContext(new SparkContext("", ""), mockSqlContextProxy.Object);
+            var mockSparkSessionProxy = new Mock<ISparkSessionProxy>();
+            var mockCatalogProxy = new Mock<ICatalogProxy>();
+            mockCatalogProxy.Setup(m => m.RefreshTable(It.IsAny<string>()));
+            mockSparkSessionProxy.Setup(m => m.GetCatalog()).Returns(mockCatalogProxy.Object);
+            mockSparkContextProxy.Setup(m => m.CreateSparkSession()).Returns(mockSparkSessionProxy.Object);
+
+            var mockSparkConfProxy = new Mock<ISparkConfProxy>();
+            mockSparkConfProxy.Setup(m => m.GetSparkConfAsString())
+                .Returns("spark.master=master;spark.app.name=appname;config1=value1;config2=value2;");
+
+            var conf = new SparkConf(mockSparkConfProxy.Object);
+            var hiveContext = new HiveContext(new SparkContext(mockSparkContextProxy.Object, conf));
+            hiveContext.SparkSession.SparkSessionProxy = mockSparkSessionProxy.Object;
 
             // act
             hiveContext.RefreshTable("table");
 
             // assert
-            mockSqlContextProxy.Verify(m => m.RefreshTable("table"));
+            mockCatalogProxy.Verify(m => m.RefreshTable("table"));
         }
     }
 }
diff --git a/csharp/AdapterTest/Mocks/MockSparkConfProxy.cs b/csharp/AdapterTest/Mocks/MockSparkConfProxy.cs
index 2f8bd99b..3ce3bb5b 100644
--- a/csharp/AdapterTest/Mocks/MockSparkConfProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockSparkConfProxy.cs
@@ -60,5 +60,10 @@ public string Get(string key, string defaultValue)
             }
             return defaultValue;
         }
+
+        public string GetSparkConfAsString()
+        {
+            throw new NotImplementedException();
+        }
     }
 }
diff --git a/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs b/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
index b0f1fced..6a6b1d8b 100644
--- a/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
@@ -293,19 +293,14 @@ public ISparkConfProxy GetConf()
             return new MockSparkConfProxy();
         }  
 
-        public ISqlContextProxy CreateSqlContext()
-        {
-            return new MockSqlContextProxy(this);
-        }
-
-        public ISqlContextProxy CreateHiveContext()
+        public IRDDProxy Parallelize(IEnumerable<byte[]> values, int numSlices)
         {
-            return new MockSqlContextProxy(this);
+            return new MockRddProxy(null);
         }
 
-        public IRDDProxy Parallelize(IEnumerable<byte[]> values, int numSlices)
+        public ISparkSessionProxy CreateSparkSession()
         {
-            return new MockRddProxy(null);
+            return new MockSparkSessionProxy();
         }
     }
 }
diff --git a/csharp/AdapterTest/Mocks/MockSparkSessionProxy.cs b/csharp/AdapterTest/Mocks/MockSparkSessionProxy.cs
new file mode 100644
index 00000000..da695c3f
--- /dev/null
+++ b/csharp/AdapterTest/Mocks/MockSparkSessionProxy.cs
@@ -0,0 +1,53 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Microsoft.Spark.CSharp.Proxy;
+
+namespace AdapterTest.Mocks
+{
+    class MockSparkSessionProxy : ISparkSessionProxy
+    {
+        public ISqlContextProxy SqlContextProxy { get { return new MockSqlContextProxy(new MockSparkContextProxy(new MockSparkConfProxy()));} }
+        public IUdfRegistration Udf { get; }
+        public ICatalogProxy GetCatalog()
+        {
+            throw new NotImplementedException();
+        }
+
+        public IDataFrameReaderProxy Read()
+        {
+            return new MockDataFrameReaderProxy(SqlContextProxy);
+        }
+
+        internal ISparkSessionProxy InjectedSparkSessionProxy { get; set; }
+        public ISparkSessionProxy NewSession()
+        {
+            return InjectedSparkSessionProxy;
+        }
+
+        public IDataFrameProxy CreateDataFrame(IRDDProxy rddProxy, IStructTypeProxy structTypeProxy)
+        {
+            throw new NotImplementedException();
+        }
+
+        public IDataFrameProxy Table(string tableName)
+        {
+            return new MockDataFrameProxy(new object[] { tableName }, null);
+        }
+
+        public IDataFrameProxy Sql(string query)
+        {
+            return new MockDataFrameProxy(new object[] {query}, null);
+        }
+
+        public void Stop()
+        {
+            throw new NotImplementedException();
+        }
+    }
+}
diff --git a/csharp/AdapterTest/Mocks/MockSqlContextProxy.cs b/csharp/AdapterTest/Mocks/MockSqlContextProxy.cs
index e89996de..4dd02787 100644
--- a/csharp/AdapterTest/Mocks/MockSqlContextProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockSqlContextProxy.cs
@@ -69,11 +69,6 @@ public void RegisterFunction(string name, byte[] command, string returnType)
             throw new NotImplementedException();
         }
 
-        public ISqlContextProxy NewSession()
-        {
-            throw new NotImplementedException();
-        }
-
         public string GetConf(string key, string defaultValue)
         {
             throw new NotImplementedException();
diff --git a/csharp/AdapterTest/SparkSessionTest.cs b/csharp/AdapterTest/SparkSessionTest.cs
new file mode 100644
index 00000000..3dbd0af3
--- /dev/null
+++ b/csharp/AdapterTest/SparkSessionTest.cs
@@ -0,0 +1,30 @@
+﻿using System;
+using Microsoft.Spark.CSharp.Proxy;
+using Microsoft.Spark.CSharp.Sql;
+using Moq;
+using NUnit.Framework;
+
+namespace AdapterTest
+{
+    [TestFixture]
+    public class SparkSessionTest
+    {
+        [Test]
+        public void TestRead()
+        {
+            var mockSparkSessionProxy = new Mock<ISparkSessionProxy>();
+            var sparkSession = new SparkSession(mockSparkSessionProxy.Object);
+            var reader = sparkSession.Read();
+            mockSparkSessionProxy.Verify(m => m.Read(), Times.Once);
+        }
+
+        [Test]
+        public void TestStop()
+        {
+            var mockSparkSessionProxy = new Mock<ISparkSessionProxy>();
+            var sparkSession = new SparkSession(mockSparkSessionProxy.Object);
+            sparkSession.Stop();
+            mockSparkSessionProxy.Verify(m => m.Stop(), Times.Once);
+        }
+    }
+}
diff --git a/csharp/AdapterTest/SqlContextTest.cs b/csharp/AdapterTest/SqlContextTest.cs
index faaeb8aa..a403b704 100644
--- a/csharp/AdapterTest/SqlContextTest.cs
+++ b/csharp/AdapterTest/SqlContextTest.cs
@@ -61,15 +61,16 @@ public void TestSqlContextGetOrCreate()
         public void TestSqlContextNewSession()
         {
             // arrange
-            var sessionProxy = new SqlContextIpcProxy(new JvmObjectReference("1"));
-            mockSqlContextProxy.Setup(m => m.NewSession()).Returns(sessionProxy);
-            var sqlContext = new SqlContext(new SparkContext("", ""), mockSqlContextProxy.Object);
+            var sparkSessionProxy = new Mock<ISparkSessionProxy>();
+            var newSparkSessionProxy = new Mock<ISparkSessionProxy>();
 
             // act
-            var actualNewSession = sqlContext.NewSession();
+            sparkSessionProxy.Setup(m => m.NewSession()).Returns(newSparkSessionProxy.Object);
+            var sqlContext = new SqlContext(new SparkSession(sparkSessionProxy.Object));
+            var ns = sqlContext.NewSession();
 
             // assert
-            Assert.AreEqual(sessionProxy, actualNewSession.SqlContextProxy);
+            sparkSessionProxy.Verify(m => m.NewSession());
         }
 
         [Test]
@@ -79,9 +80,24 @@ public void TestSqlContextGetConf()
             const string key = "key";
             const string value = "value";
             mockSqlContextProxy.Setup(m => m.GetConf(key, "")).Returns(value);
-            var sqlContext = new SqlContext(new SparkContext("", ""), mockSqlContextProxy.Object);
+            var mockSparkContextProxy = new Mock<ISparkContextProxy>();
 
-            // act
+            var mockSparkSessionProxy = new Mock<ISparkSessionProxy>();
+            var mockCatalogProxy = new Mock<ICatalogProxy>();
+            mockCatalogProxy.Setup(m => m.RefreshTable(It.IsAny<string>()));
+            mockSparkSessionProxy.Setup(m => m.GetCatalog()).Returns(mockCatalogProxy.Object);
+            mockSparkContextProxy.Setup(m => m.CreateSparkSession()).Returns(mockSparkSessionProxy.Object);
+            mockSparkSessionProxy.Setup(m => m.SqlContextProxy).Returns(mockSqlContextProxy.Object);
+
+            var mockSparkConfProxy = new Mock<ISparkConfProxy>();
+            mockSparkConfProxy.Setup(m => m.GetSparkConfAsString())
+                .Returns("spark.master=master;spark.app.name=appname;config1=value1;config2=value2;");
+
+            var conf = new SparkConf(mockSparkConfProxy.Object);
+            var sqlContext = new SqlContext(new SparkContext(mockSparkContextProxy.Object, conf));
+            sqlContext.SparkSession.SparkSessionProxy = mockSparkSessionProxy.Object;
+
+            //act
             var actualValue = sqlContext.GetConf(key, "");
 
             // assert
@@ -95,7 +111,22 @@ public void TestSqlContextSetConf()
             const string key = "key";
             const string value = "value";
             mockSqlContextProxy.Setup(m => m.SetConf(key, value));
-            var sqlContext = new SqlContext(new SparkContext("", ""), mockSqlContextProxy.Object);
+            var mockSparkContextProxy = new Mock<ISparkContextProxy>();
+
+            var mockSparkSessionProxy = new Mock<ISparkSessionProxy>();
+            var mockCatalogProxy = new Mock<ICatalogProxy>();
+            mockCatalogProxy.Setup(m => m.RefreshTable(It.IsAny<string>()));
+            mockSparkSessionProxy.Setup(m => m.GetCatalog()).Returns(mockCatalogProxy.Object);
+            mockSparkContextProxy.Setup(m => m.CreateSparkSession()).Returns(mockSparkSessionProxy.Object);
+            mockSparkSessionProxy.Setup(m => m.SqlContextProxy).Returns(mockSqlContextProxy.Object);
+
+            var mockSparkConfProxy = new Mock<ISparkConfProxy>();
+            mockSparkConfProxy.Setup(m => m.GetSparkConfAsString())
+                .Returns("spark.master=master;spark.app.name=appname;config1=value1;config2=value2;");
+
+            var conf = new SparkConf(mockSparkConfProxy.Object);
+            var sqlContext = new SqlContext(new SparkContext(mockSparkContextProxy.Object, conf));
+            sqlContext.SparkSession.SparkSessionProxy = mockSparkSessionProxy.Object;
 
             // act
             sqlContext.SetConf(key, value);
@@ -175,16 +206,11 @@ public void TestSqlContextDropTempTable()
         [Test]
         public void TestSqlContextTable()
         {
-            // arrange
-            var sqlContext = new SqlContext(new SparkContext("", ""), mockSqlContextProxy.Object);
-            var dataFrameProxy = new DataFrameIpcProxy(new JvmObjectReference("1"), mockSqlContextProxy.Object);
-            mockSqlContextProxy.Setup(m => m.Table(It.IsAny<string>())).Returns(dataFrameProxy);
-
-            // act
-            var actualTableDataFrame = sqlContext.Table("table");
-
-            // assert
-            Assert.AreEqual(dataFrameProxy, actualTableDataFrame.DataFrameProxy);
+            var sqlContext = new SqlContext(new SparkContext("", ""));
+            string tableName = "TestTableName";
+            var dataFrame = sqlContext.Table(tableName);
+            var paramValuesToTableMethod = (dataFrame.DataFrameProxy as MockDataFrameProxy).mockDataFrameReference;
+            Assert.AreEqual(tableName, paramValuesToTableMethod[0]);
         }
 
         [Test]
@@ -292,8 +318,8 @@ public void TestSqlContextSql()
         {
             var sqlContext = new SqlContext(new SparkContext("", ""));
             var dataFrame = sqlContext.Sql("Query of SQL text");
-            var paramValuesToJsonFileMethod = (dataFrame.DataFrameProxy as MockDataFrameProxy).mockDataFrameReference;
-            Assert.AreEqual("Query of SQL text", paramValuesToJsonFileMethod[0]);
+            var paramValuesToSqlMethod = (dataFrame.DataFrameProxy as MockDataFrameProxy).mockDataFrameReference;
+            Assert.AreEqual("Query of SQL text", paramValuesToSqlMethod[0]);
         }
 
         [Test]
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/App.config b/csharp/Samples/Microsoft.Spark.CSharp/App.config
index 7ac02fd5..64779e2f 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/App.config
+++ b/csharp/Samples/Microsoft.Spark.CSharp/App.config
@@ -58,8 +58,9 @@
     <!--********************************************************************************************************-->
 
     <!--
-      <add key="CSharpWorkerPath" value="C:\path\to\mobius\driver\application\CSharpWorker.exe"/>
+       <add key="CSharpWorkerPath" value="C:\path\to\mobius\driver\application\CSharpWorker.exe"/>
     -->
+    
 
     <!-- *** Settings for Mobius in Linux *** -->
 
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/CatalogSamples.cs b/csharp/Samples/Microsoft.Spark.CSharp/CatalogSamples.cs
new file mode 100644
index 00000000..cdca7d46
--- /dev/null
+++ b/csharp/Samples/Microsoft.Spark.CSharp/CatalogSamples.cs
@@ -0,0 +1,28 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using NUnit.Framework;
+
+namespace Microsoft.Spark.CSharp.Samples
+{
+    class CatalogSamples
+    {
+        [Sample]
+        internal static void CatalogSample()
+        {
+            var catalog = SparkSessionSamples.GetSparkSession().Catalog;
+            var currentDatabase = catalog.CurrentDatabase;
+            var databasesList = SparkSessionSamples.GetSparkSession().Catalog.ListDatabases().Collect();
+            
+            if (SparkCLRSamples.Configuration.IsValidationEnabled)
+            {
+                var defaultDatabase = databasesList.First(row => row.Get("name").Equals("default")); //throws exception if First() is missing
+            }
+        }
+    }
+}
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/DataFrameSamples.cs b/csharp/Samples/Microsoft.Spark.CSharp/DataFrameSamples.cs
index f0a691e9..5f4e5b49 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/DataFrameSamples.cs
+++ b/csharp/Samples/Microsoft.Spark.CSharp/DataFrameSamples.cs
@@ -16,11 +16,11 @@ namespace Microsoft.Spark.CSharp.Samples
 {
     class DataFrameSamples
     {
-        private const string PeopleJson = @"people.json";
-        private const string OrderJson = @"order.json";
-        private const string RequestsLog = @"requestslog.txt";
-        private const string MetricsLog = @"metricslog.txt";
-        private const string CSVTestLog = @"csvtestlog.txt";
+        internal const string PeopleJson = @"people.json";
+        internal const string OrderJson = @"order.json";
+        internal const string RequestsLog = @"requestslog.txt";
+        internal const string MetricsLog = @"metricslog.txt";
+        internal const string CSVTestLog = @"csvtestlog.txt";
 
         private static SqlContext sqlContext;
 
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/Program.cs b/csharp/Samples/Microsoft.Spark.CSharp/Program.cs
index ec5dce6d..1f25fa26 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/Program.cs
+++ b/csharp/Samples/Microsoft.Spark.CSharp/Program.cs
@@ -78,6 +78,7 @@ private static SparkContext CreateSparkContext()
             {
                 conf.Set("spark.local.dir", Configuration.SparkLocalDirectoryOverride);
             }
+
             return new SparkContext(conf);
         }
 
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/Samples.csproj b/csharp/Samples/Microsoft.Spark.CSharp/Samples.csproj
index 5dfb94da..880feb27 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/Samples.csproj
+++ b/csharp/Samples/Microsoft.Spark.CSharp/Samples.csproj
@@ -47,6 +47,7 @@
     <Reference Include="Microsoft.CSharp" />
   </ItemGroup>
   <ItemGroup>
+    <Compile Include="CatalogSamples.cs" />
     <Compile Include="CommandlineArgumentProcessor.cs" />
     <Compile Include="Configuration.cs" />
     <Compile Include="DataFrameSamples.cs" />
@@ -61,6 +62,7 @@
     <Compile Include="SamplesRunner.cs" />
     <Compile Include="SparkContextSamples.cs" />
     <Compile Include="RDDSamples.cs" />
+    <Compile Include="SparkSessionSamples.cs" />
     <Compile Include="SqlContextSamples.cs" />
   </ItemGroup>
   <ItemGroup>
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/SparkSessionSamples.cs b/csharp/Samples/Microsoft.Spark.CSharp/SparkSessionSamples.cs
new file mode 100644
index 00000000..f628e1c8
--- /dev/null
+++ b/csharp/Samples/Microsoft.Spark.CSharp/SparkSessionSamples.cs
@@ -0,0 +1,189 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Microsoft.Spark.CSharp.Core;
+using Microsoft.Spark.CSharp.Sql;
+using NUnit.Framework;
+
+namespace Microsoft.Spark.CSharp.Samples
+{
+    class SparkSessionSamples
+    {
+        private static SparkSession sparkSession;
+
+        internal static SparkSession GetSparkSession()
+        {
+            return sparkSession ?? (sparkSession = SparkSession.Builder().EnableHiveSupport().GetOrCreate());
+        }
+
+        [Sample]
+        internal static void SSNewSessionSample()
+        {
+            RunDataFrameSample(true);
+        }
+
+        [Sample]
+        internal static void SSDataFrameSample()
+        {
+            RunDataFrameSample(false);
+        }
+
+        private static void RunDataFrameSample(bool createNewSession)
+        {
+            SparkSession ss = GetSparkSession();
+
+            if (createNewSession)
+            {
+                ss = sparkSession.NewSession();
+            }
+
+            var peopleDataFrame = ss.Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(DataFrameSamples.PeopleJson));
+            var count = peopleDataFrame.Count();
+            Console.WriteLine("Count of items in DataFrame {0}", count);
+
+            var sortedDataFrame = peopleDataFrame.Sort(new string[] { "name", "age" }, new bool[] { true, false });
+
+            sortedDataFrame.Show();
+
+            if (SparkCLRSamples.Configuration.IsValidationEnabled)
+            {
+                var sortedDF = sortedDataFrame.Collect().ToArray();
+                Assert.AreEqual("789", sortedDF[0].GetAs<string>("id"));
+                Assert.AreEqual("123", sortedDF[1].GetAs<string>("id"));
+                Assert.AreEqual("531", sortedDF[2].GetAs<string>("id"));
+                Assert.AreEqual("456", sortedDF[3].GetAs<string>("id"));
+            }
+        }
+
+        [Sample]
+        internal static void SSShowSchemaSample()
+        {
+            var peopleDataFrame = GetSparkSession().Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(DataFrameSamples.PeopleJson));
+            peopleDataFrame.Explain(true);
+            peopleDataFrame.ShowSchema();
+        }
+
+        [Sample]
+        internal static void SSTableSample()
+        {
+            var originalPeopleDataFrame = GetSparkSession().Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(DataFrameSamples.PeopleJson));
+            originalPeopleDataFrame.RegisterTempTable("people");
+
+            var peopleDataFrame = GetSparkSession().Table("people");
+
+            var projectedFilteredDataFrame = peopleDataFrame.Select("name", "address.state")
+                                        .Where("name = 'Bill' or state = 'California'");
+
+            projectedFilteredDataFrame.ShowSchema();
+            projectedFilteredDataFrame.Show();
+            
+            if (SparkCLRSamples.Configuration.IsValidationEnabled)
+            {
+                CollectionAssert.AreEqual(new[] { "name", "state" }, projectedFilteredDataFrame.Schema.Fields.Select(f => f.Name).ToArray());
+                Assert.IsTrue(projectedFilteredDataFrame.Collect().All(row => row.Get("name") == "Bill" || row.Get("state") == "California"));
+            }
+        }
+
+        [Sample]
+        internal static void SSSqlSample()
+        {
+            var originalPeopleDataFrame = GetSparkSession().Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(DataFrameSamples.PeopleJson));
+            originalPeopleDataFrame.RegisterTempTable("people");
+
+            var nameFilteredDataFrame = GetSparkSession().Sql("SELECT name, address.city, address.state FROM people where name='Bill'");
+            var countDataFrame = GetSparkSession().Sql("SELECT count(name) FROM people where name='Bill'");
+            var maxAgeDataFrame = GetSparkSession().Sql("SELECT max(age) FROM people where name='Bill'");
+            long maxAgeDataFrameRowsCount = maxAgeDataFrame.Count();
+            long nameFilteredDataFrameRowsCount = nameFilteredDataFrame.Count();
+            long countDataFrameRowsCount = countDataFrame.Count();
+            Console.WriteLine("nameFilteredDataFrameRowsCount={0}, maxAgeDataFrameRowsCount={1}, countDataFrameRowsCount={2}", nameFilteredDataFrameRowsCount, maxAgeDataFrameRowsCount, countDataFrameRowsCount);
+
+            if (SparkCLRSamples.Configuration.IsValidationEnabled)
+            {
+                Assert.AreEqual(1, maxAgeDataFrameRowsCount);
+                Assert.AreEqual(2, nameFilteredDataFrameRowsCount);
+                Assert.AreEqual(1, countDataFrameRowsCount);
+            }
+        }
+
+        [Sample]
+        internal static void SSDropTableSample()
+        {
+            var originalPeopleDataFrame = GetSparkSession().Read().Json(SparkCLRSamples.Configuration.GetInputDataPath(DataFrameSamples.PeopleJson));
+            originalPeopleDataFrame.RegisterTempTable("people");
+
+            var nameFilteredDataFrame = GetSparkSession().Sql("SELECT name, address.city, address.state FROM people where name='Bill'");
+            long nameFilteredDataFrameRowsCount = nameFilteredDataFrame.Count();
+
+            GetSparkSession().Catalog.DropTempView("people");
+            
+            if (SparkCLRSamples.Configuration.IsValidationEnabled)
+            {
+                bool tableMissing = false;
+                try
+                {
+                    //parsing would fail
+                    var nameFilteredDataFrame2 = GetSparkSession().Sql("SELECT name, address.city, address.state FROM people where name='Bill'");
+                }
+                catch (Exception)
+                {
+                    tableMissing = true;
+                }
+
+                Assert.True(tableMissing);
+            }
+        }
+
+        [Sample]
+        internal static void SSCreateDataFrameSample()
+        {
+            var schemaPeople = new StructType(new List<StructField>
+                                        {
+                                            new StructField("id", new StringType()),
+                                            new StructField("name", new StringType()),
+                                            new StructField("age", new IntegerType()),
+                                            new StructField("address", new StructType(new List<StructField>
+                                                                                      {
+                                                                                          new StructField("city", new StringType()),
+                                                                                          new StructField("state", new StringType())
+                                                                                      })),
+                                            new StructField("phone numbers", new ArrayType(new StringType()))
+                                        });
+
+            var rddPeople = SparkCLRSamples.SparkContext.Parallelize(
+                                    new List<object[]>
+                                    {
+                                        new object[] { "123", "Bill", 43, new object[]{ "Columbus", "Ohio" }, new string[]{ "Tel1", "Tel2" } },
+                                        new object[] { "456", "Steve", 34,  new object[]{ "Seattle", "Washington" }, new string[]{ "Tel3", "Tel4" } }
+                                    });
+
+            var dataFramePeople = GetSparkSession().CreateDataFrame(rddPeople, schemaPeople);
+            Console.WriteLine("------ Schema of People Data Frame:\r\n");
+            dataFramePeople.ShowSchema();
+            Console.WriteLine();
+            var collected = dataFramePeople.Collect().ToArray();
+            foreach (var people in collected)
+            {
+                string id = people.Get("id");
+                string name = people.Get("name");
+                int age = people.Get("age");
+                Row address = people.Get("address");
+                string city = address.Get("city");
+                string state = address.Get("state");
+                object[] phoneNumbers = people.Get("phone numbers");
+                Console.WriteLine("id:{0}, name:{1}, age:{2}, address:(city:{3},state:{4}), phoneNumbers:[{5},{6}]\r\n", id, name, age, city, state, phoneNumbers[0], phoneNumbers[1]);
+            }
+
+            if (SparkCLRSamples.Configuration.IsValidationEnabled)
+            {
+                Assert.AreEqual(2, dataFramePeople.Rdd.Count());
+                Assert.AreEqual(schemaPeople.Json, dataFramePeople.Schema.Json);
+            }
+        }
+    }
+}
diff --git a/scala/src/main/org/apache/spark/sql/api/csharp/JvmBridgeUtils.scala b/scala/src/main/org/apache/spark/sql/api/csharp/JvmBridgeUtils.scala
index 74f3514f..128bff23 100644
--- a/scala/src/main/org/apache/spark/sql/api/csharp/JvmBridgeUtils.scala
+++ b/scala/src/main/org/apache/spark/sql/api/csharp/JvmBridgeUtils.scala
@@ -6,6 +6,7 @@
 package org.apache.spark.sql.api.csharp
 
 import java.util
+import org.apache.spark.SparkConf
 import scala.collection.JavaConverters._
 
 /*
@@ -18,4 +19,23 @@ object JvmBridgeUtils {
   def toMutableMap[K, V](map: util.HashMap[K, V]) : Map[K, V] = {
     map.asScala.toMap
   }
+
+  def getKeyValuePairAsString(kvp: Tuple2[String, String]) : String = {
+    return kvp._1 + "=" + kvp._2
+  }
+
+  def getKeyValuePairArrayAsString(kvpArray : Array[Tuple2[String, String]]) : String = {
+    val sb = new StringBuilder
+
+    for(kvp <- kvpArray) {
+      sb.append(getKeyValuePairAsString(kvp))
+      sb.append(";")
+    }
+
+    sb.toString
+  }
+
+  def getSparkConfAsString(sparkConf: SparkConf): String = {
+    getKeyValuePairArrayAsString(sparkConf.getAll)
+  }
 }
diff --git a/scala/src/main/org/apache/spark/sql/api/csharp/SQLUtils.scala b/scala/src/main/org/apache/spark/sql/api/csharp/SQLUtils.scala
index e1aad772..f13d0087 100644
--- a/scala/src/main/org/apache/spark/sql/api/csharp/SQLUtils.scala
+++ b/scala/src/main/org/apache/spark/sql/api/csharp/SQLUtils.scala
@@ -25,13 +25,12 @@ import org.apache.spark.broadcast.Broadcast
  * implementation constructs from SparkR
  */
 object SQLUtils {
-  def createSQLContext(sc: SparkContext): SQLContext = {
-      new SQLContext(sc)
+  def createSparkSession(sc: SparkContext): SparkSession = {
+       new SparkSession(sc)
   }
 
-  def createHiveContext(sc: SparkContext): SQLContext = {
-    // TODO fix this
-    new SQLContext(sc)
+  def getSqlContext(ss: SparkSession): SQLContext = {
+        ss.sqlContext
   }
 
   def getJavaSparkContext(sqlCtx: SQLContext): JavaSparkContext = {
diff --git a/scala/src/test/scala/org/apache/spark/util/csharp/JvmBridgeUtilsSuite.scala b/scala/src/test/scala/org/apache/spark/util/csharp/JvmBridgeUtilsSuite.scala
new file mode 100644
index 00000000..faef7e69
--- /dev/null
+++ b/scala/src/test/scala/org/apache/spark/util/csharp/JvmBridgeUtilsSuite.scala
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) Microsoft. All rights reserved.
+ * Licensed under the MIT license. See LICENSE file in the project root for full license information.
+ */
+
+package org.apache.spark.util.csharp
+
+import org.apache.spark.SparkConf
+import org.apache.spark.csharp.SparkCLRFunSuite
+import org.apache.spark.sql.api.csharp.JvmBridgeUtils
+
+
+class JvmBridgeUtilsSuite extends SparkCLRFunSuite{
+  test("getSparkConfAsString") {
+    var sparkConf = new SparkConf(true)
+    val appName = "appName"
+    sparkConf.setAppName(appName)
+    val master = "master"
+    sparkConf.setMaster(master)
+    val kvp1 = ("spark.config1.name", "config1.value")
+    sparkConf.set(kvp1._1, kvp1._2)
+    val kvp2 = ("spark.config2.name", "config2.value")
+    sparkConf.set(kvp2._1, kvp2._2)
+
+    val returnValue = JvmBridgeUtils.getSparkConfAsString(sparkConf)
+    assert(returnValue.contains(s"spark.master=${master}"))
+    assert(returnValue.contains(s"spark.app.name=${appName}"))
+    assert(returnValue.contains(s"${kvp1._1}=${kvp1._2}"))
+    assert(returnValue.contains(s"${kvp2._1}=${kvp2._2}"))
+  }
+}
\ No newline at end of file

From 9a37ec84fde9882f5ac1c7792025c73877e5849e Mon Sep 17 00:00:00 2001
From: Yun Tang <myasuka@live.com>
Date: Sat, 10 Sep 2016 05:20:34 +0800
Subject: [PATCH 05/15] Use logInfo and logError method instead of println
 (#556)

* Use logInfo and logError method insted of println

Finish TODO "logError does not work now"

* remove redundant printStackTrace method
---
 .../spark/api/csharp/CSharpBackend.scala      | 16 ++++---
 .../api/csharp/CSharpBackendHandler.scala     | 46 ++++++-------------
 .../apache/spark/api/csharp/CSharpRDD.scala   |  2 +-
 .../spark/deploy/csharp/CSharpRunner.scala    | 28 ++++++-----
 .../streaming/api/csharp/CSharpDStream.scala  |  8 ++--
 5 files changed, 40 insertions(+), 60 deletions(-)

diff --git a/scala/src/main/org/apache/spark/api/csharp/CSharpBackend.scala b/scala/src/main/org/apache/spark/api/csharp/CSharpBackend.scala
index 2cdbc8d2..9fae5f7f 100644
--- a/scala/src/main/org/apache/spark/api/csharp/CSharpBackend.scala
+++ b/scala/src/main/org/apache/spark/api/csharp/CSharpBackend.scala
@@ -7,15 +7,16 @@ package org.apache.spark.api.csharp
 
 import java.io.{DataOutputStream, File, FileOutputStream, IOException}
 import java.net.{InetAddress, InetSocketAddress, ServerSocket, Socket}
-import java.util.concurrent.{LinkedBlockingQueue, BlockingQueue, TimeUnit}
+import java.util.concurrent.{BlockingQueue, LinkedBlockingQueue, TimeUnit}
 
 import io.netty.bootstrap.ServerBootstrap
 import io.netty.channel.nio.NioEventLoopGroup
 import io.netty.channel.socket.SocketChannel
 import io.netty.channel.socket.nio.NioServerSocketChannel
-import io.netty.channel.{ChannelInitializer, EventLoopGroup, ChannelFuture}
+import io.netty.channel.{ChannelFuture, ChannelInitializer, EventLoopGroup}
 import io.netty.handler.codec.LengthFieldBasedFrameDecoder
 import io.netty.handler.codec.bytes.{ByteArrayDecoder, ByteArrayEncoder}
+import org.apache.spark.internal.Logging
 
 
 /**
@@ -24,9 +25,10 @@ import io.netty.handler.codec.bytes.{ByteArrayDecoder, ByteArrayEncoder}
  * This implementation is identical to RBackend and that can be reused
  * in SparkCLR if the handler is made pluggable
  */
-// Since SparkCLR is a package to Spark and not a part of spark-core it mirrors the implementation of
-// selected parts from RBackend with SparkCLR customizations
-class CSharpBackend { self => // for accessing the this reference in inner class(ChannelInitializer)
+// Since SparkCLR is a package to Spark and not a part of spark-core it mirrors the implementation
+// of selected parts from RBackend with SparkCLR customizations
+class CSharpBackend extends Logging
+{ self => // for accessing the this reference in inner class(ChannelInitializer)
   private[this] var channelFuture: ChannelFuture = null
   private[this] var bootstrap: ServerBootstrap = null
   private[this] var bossGroup: EventLoopGroup = null
@@ -82,7 +84,7 @@ class CSharpBackend { self => // for accessing the this reference in inner class
     bootstrap = null
 
     // Send close to CSharp callback server.
-    println("Requesting to close all call back sockets.")
+    logInfo("Requesting to close all call back sockets.")
     var socket: Socket = null
     do {
       socket = CSharpBackend.callbackSockets.poll()
@@ -94,7 +96,7 @@ class CSharpBackend { self => // for accessing the this reference in inner class
           socket = null
         }
         catch {
-          case e : Exception => println("Exception when closing socket: " + e)
+          case e : Exception => logError("Exception when closing socket: ", e)
         }
       }
     } while (socket != null)
diff --git a/scala/src/main/org/apache/spark/api/csharp/CSharpBackendHandler.scala b/scala/src/main/org/apache/spark/api/csharp/CSharpBackendHandler.scala
index 9fd9fd92..20fd3816 100644
--- a/scala/src/main/org/apache/spark/api/csharp/CSharpBackendHandler.scala
+++ b/scala/src/main/org/apache/spark/api/csharp/CSharpBackendHandler.scala
@@ -6,11 +6,12 @@
 package org.apache.spark.api.csharp
 
 import org.apache.spark.util.Utils
-import java.io.{DataOutputStream, ByteArrayOutputStream, DataInputStream, ByteArrayInputStream}
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
 import java.net.Socket
 
 import io.netty.channel.ChannelHandler.Sharable
 import io.netty.channel.{ChannelHandlerContext, SimpleChannelInboundHandler}
+import org.apache.spark.internal.Logging
 
 // TODO - work with SparkR devs to make this configurable and reuse RBackendHandler
 import org.apache.spark.api.csharp.SerDe._
@@ -24,7 +25,8 @@ import scala.collection.mutable.HashMap
   */
 // Since SparkCLR is a package to Spark and not a part of spark-core, it mirrors the implementation
 // of selected parts from RBackend with SparkCLR customizations
-class CSharpBackendHandler(server: CSharpBackend) extends SimpleChannelInboundHandler[Array[Byte]] {
+class CSharpBackendHandler(server: CSharpBackend) extends SimpleChannelInboundHandler[Array[Byte]]
+  with Logging{
 
   override def channelRead0(ctx: ChannelHandlerContext, msg: Array[Byte]): Unit = {
     val reply = handleBackendRequest(msg)
@@ -71,15 +73,13 @@ class CSharpBackendHandler(server: CSharpBackend) extends SimpleChannelInboundHa
           val t = readObjectType(dis)
           assert(t == 'i')
           val port = readInt(dis)
-          // scalastyle:off println
-          println("[CSharpBackendHandler] Connecting to a callback server at port " + port)
+          logInfo(s"Connecting to a callback server at port $port")
           CSharpBackend.callbackPort = port
           writeInt(dos, 0)
           writeType(dos, "void")
         case "closeCallback" =>
           // Send close to CSharp callback server.
-          println("[CSharpBackendHandler] Requesting to close all call back sockets.")
-          // scalastyle:on
+          logInfo("Requesting to close all call back sockets.")
           var socket: Socket = null
           do {
             socket = CSharpBackend.callbackSockets.poll()
@@ -91,7 +91,7 @@ class CSharpBackendHandler(server: CSharpBackend) extends SimpleChannelInboundHa
                 socket = null
               }
               catch {
-                case e: Exception => println("Exception when closing socket: " + e)
+                case e: Exception => logError("Exception when closing socket: ", e)
               }
             }
           } while (socket != null)
@@ -111,10 +111,7 @@ class CSharpBackendHandler(server: CSharpBackend) extends SimpleChannelInboundHa
 
   override def exceptionCaught(ctx: ChannelHandlerContext, cause: Throwable): Unit = {
     // Close the connection when an exception is raised.
-    // scalastyle:off println
-    println("Exception caught: " + cause.getMessage)
-    // scalastyle:on
-    cause.printStackTrace()
+    logError("Exception caught: ", cause)
     ctx.close()
   }
 
@@ -176,31 +173,26 @@ class CSharpBackendHandler(server: CSharpBackend) extends SimpleChannelInboundHa
       }
     } catch {
       case e: Exception =>
-        // TODO - logError does not work now..fix //logError(s"$methodName on $objId failed", e)
         val jvmObj = JVMObjectTracker.get(objId)
         val jvmObjName = jvmObj match {
           case Some(jObj) => jObj.getClass.getName
           case None => "NullObject"
         }
-        // scalastyle:off println
-        println(s"[CSharpBackendHandler] $methodName on object of type $jvmObjName failed")
-        println(e.getMessage)
-        println(e.printStackTrace())
+        logError(s"On object of type $jvmObjName failed", e)
         if (methods != null) {
-          println("methods:")
-          methods.foreach(println(_))
+          logError("methods:")
+          methods.foreach(m => logError(m.toString))
         }
         if (args != null) {
-          println("args:")
+          logError("args:")
           args.foreach(arg => {
             if (arg != null) {
-              println("argType: " + arg.getClass.getCanonicalName + ", argValue: " + arg)
+              logError(s"argType: ${arg.getClass.getCanonicalName}, argValue: $arg")
             } else {
-              println("arg: NULL")
+              logError("arg: NULL")
             }
           })
         }
-        // scalastyle:on println
         writeInt(dos, -1)
         writeString(dos, Utils.exceptionString(e.getCause))
     }
@@ -254,16 +246,6 @@ class CSharpBackendHandler(server: CSharpBackend) extends SimpleChannelInboundHa
     true
   }
 
-  // scalastyle:off println
-  def logError(id: String) {
-    println(id)
-  }
-
-  def logWarning(id: String) {
-    println(id)
-  }
-
-  // scalastyle:on println
 
   def logError(id: String, e: Exception): Unit = {
 
diff --git a/scala/src/main/org/apache/spark/api/csharp/CSharpRDD.scala b/scala/src/main/org/apache/spark/api/csharp/CSharpRDD.scala
index 3ecd8969..1faf7766 100644
--- a/scala/src/main/org/apache/spark/api/csharp/CSharpRDD.scala
+++ b/scala/src/main/org/apache/spark/api/csharp/CSharpRDD.scala
@@ -181,7 +181,7 @@ class CSharpRDD(
       case e: OverlappingFileLockException =>
         logInfo("Already obtained the lock.")
         waitUnzipOperationDone(doneFlag)
-      case e: Exception => e.printStackTrace()
+      case e: Exception => logError("Exception when unzipping cSharpWorkerWorkingDir", e)
     }
     finally {
       if (lock != null && lock.isValid) lock.release()
diff --git a/scala/src/main/org/apache/spark/deploy/csharp/CSharpRunner.scala b/scala/src/main/org/apache/spark/deploy/csharp/CSharpRunner.scala
index ddd72351..63986760 100644
--- a/scala/src/main/org/apache/spark/deploy/csharp/CSharpRunner.scala
+++ b/scala/src/main/org/apache/spark/deploy/csharp/CSharpRunner.scala
@@ -14,6 +14,7 @@ import org.apache.spark.SparkConf
 import org.apache.spark.SecurityManager
 import org.apache.spark.api.csharp.CSharpBackend
 import org.apache.spark.deploy.{PythonRunner, SparkHadoopUtil, SparkSubmitArguments}
+import org.apache.spark.internal.Logging
 import org.apache.spark.util.{RedirectThread, Utils}
 import org.apache.spark.util.csharp.{Utils => CSharpSparkUtils}
 
@@ -22,8 +23,7 @@ import org.apache.spark.util.csharp.{Utils => CSharpSparkUtils}
  * gets its port number and launches C# process passing the port number to it.
  * The runner implementation is mostly identical to RRunner with SparkCLR-specific customizations.
  */
-// scalastyle:off println
-object CSharpRunner {
+object CSharpRunner extends Logging{
   val MOBIUS_DEBUG_PORT = 5567
 
   def main(args: Array[String]): Unit = {
@@ -51,7 +51,7 @@ object CSharpRunner {
           zipFileName = downloadDriverFile(zipFileName, driverDir.getAbsolutePath).getName
         }
 
-        println(s"[CSharpRunner.main] Unzipping driver $zipFileName in $driverDir")
+        logInfo(s"Unzipping driver $zipFileName in $driverDir")
         CSharpSparkUtils.unzip(new File(zipFileName), driverDir)
         // reusing windows-specific formatting in PythonRunner
         csharpExecutable = PythonRunner.formatPath(args(1))
@@ -74,7 +74,7 @@ object CSharpRunner {
     processParameters.add(formatPath(csharpExecutable))
     otherArgs.foreach( arg => processParameters.add(arg) )
 
-    println("[CSharpRunner.main] Starting CSharpBackend!")
+    logInfo("Starting CSharpBackend!")
     // Time to wait for CSharpBackend to initialize in seconds
 
     val backendTimeout = sys.env.getOrElse("CSHARPBACKEND_TIMEOUT", "120").toInt
@@ -88,8 +88,7 @@ object CSharpRunner {
         // need to get back csharpBackendPortNumber because if the value passed to init is 0
         // the port number is dynamically assigned in the backend
         csharpBackendPortNumber = csharpBackend.init(csharpBackendPortNumber)
-        println("[CSharpRunner.main] Port number used by CSharpBackend is "
-          + csharpBackendPortNumber) // TODO - send to logger also
+        logInfo(s"Port number used by CSharpBackend is $csharpBackendPortNumber")
         initialized.release()
         csharpBackend.run()
       }
@@ -107,8 +106,7 @@ object CSharpRunner {
 
           for ((key, value) <- Utils.getSystemProperties if key.startsWith("spark.")) {
             env.put(key, value)
-            println("[CSharpRunner.main] adding key=" + key
-              + " and value=" + value + " to environment")
+            logInfo(s"Adding key=$key and value=$value to environment")
           }
           builder.redirectErrorStream(true) // Ugly but needed for stdout and stderr to synchronize
           val process = builder.start()
@@ -123,22 +121,23 @@ object CSharpRunner {
           closeBackend(csharpBackend)
         } catch {
           case t: Throwable =>
-            println("[CSharpRunner.main]" + t.getMessage + "\n" + t.getStackTrace)
+            logError(s"${t.getMessage} \n ${t.getStackTrace}")
         }
 
-        println("[CSharpRunner.main] Return CSharpBackend code " + returnCode)
+        logInfo(s"Return CSharpBackend code $returnCode")
         CSharpSparkUtils.exit(returnCode)
       } else {
+        // scalastyle:off println
         println("***********************************************************************")
         println("* [CSharpRunner.main] Backend running debug mode. Press enter to exit *")
         println("***********************************************************************")
+        // scalastyle:on println
         Console.readLine()
         closeBackend(csharpBackend)
         CSharpSparkUtils.exit(0)
       }
     } else {
-      println("[CSharpRunner.main] CSharpBackend did not initialize in "
-        + backendTimeout + " seconds")
+      logError(s"CSharpBackend did not initialize in $backendTimeout seconds")
       CSharpSparkUtils.exit(-1)
     }
   }
@@ -168,7 +167,7 @@ object CSharpRunner {
     val localFile = new File(driverDir, jarFileName)
 
     if (!localFile.exists()) { // May already exist if running multiple workers on one node
-      println(s"Copying user file $filePath to $driverDir")
+      logInfo(s"Copying user file $filePath to $driverDir")
       Utils.fetchFile(
         hdfsFilePath,
         new File(driverDir),
@@ -187,7 +186,7 @@ object CSharpRunner {
   }
 
   def closeBackend(csharpBackend: CSharpBackend): Unit = {
-    println("[CSharpRunner.main] closing CSharpBackend")
+    logInfo("Closing CSharpBackend")
     csharpBackend.close()
   }
 
@@ -205,4 +204,3 @@ object CSharpRunner {
     (runInDebugMode, portNumber)
   }
 }
-// scalastyle:on println
diff --git a/scala/src/main/org/apache/spark/streaming/api/csharp/CSharpDStream.scala b/scala/src/main/org/apache/spark/streaming/api/csharp/CSharpDStream.scala
index 93d5e58c..1d87e539 100644
--- a/scala/src/main/org/apache/spark/streaming/api/csharp/CSharpDStream.scala
+++ b/scala/src/main/org/apache/spark/streaming/api/csharp/CSharpDStream.scala
@@ -29,7 +29,7 @@ import org.apache.spark.streaming.api.java._
 
 import scala.language.existentials
 
-object CSharpDStream {
+object CSharpDStream extends Logging{
 
   // Variables for debugging
   var debugMode = false
@@ -78,9 +78,7 @@ object CSharpDStream {
       case e: Exception =>
         // log exception only when callback socket is not shutdown explicitly
         if (!CSharpBackend.callbackSocketShutdown) {
-          // TODO: change println to log
-          System.err.println("CSharp transform callback failed with " + e) // scalastyle:off println
-          e.printStackTrace()
+          logError(s"CSharp transform callback failed", e)
         }
 
         // close this socket if error happen
@@ -89,7 +87,7 @@ object CSharpDStream {
             socket.close()
           }
           catch {
-            case e: Exception => println("Exception when closing socket: " + e)
+            case e: Exception => logError("Exception when closing socket", e)
           }
         }
 

From 67046cef3e1d642ac6402f28f111490518230c0e Mon Sep 17 00:00:00 2001
From: dwnichols <dwnichols@gmail.com>
Date: Fri, 16 Sep 2016 14:13:48 -0400
Subject: [PATCH 06/15] Update run-samples.sh to use unix path separators

---
 build/localmode/run-samples.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/build/localmode/run-samples.sh b/build/localmode/run-samples.sh
index 5c6486bb..37846c51 100755
--- a/build/localmode/run-samples.sh
+++ b/build/localmode/run-samples.sh
@@ -73,9 +73,9 @@ fi
 
 export SPARKCLR_HOME="$FWDIR/../runtime"
 # spark-csv package and its depenedency are required for DataFrame operations in Mobius
-export SPARKCLR_EXT_PATH="$SPARKCLR_HOME\dependencies"
-export SPARKCSV_JAR1PATH="$SPARKCLR_EXT_PATH\spark-csv_2.10-1.3.0.jar"
-export SPARKCSV_JAR2PATH="$SPARKCLR_EXT_PATH\commons-csv-1.1.jar"
+export SPARKCLR_EXT_PATH="$SPARKCLR_HOME/dependencies"
+export SPARKCSV_JAR1PATH="$SPARKCLR_EXT_PATH/spark-csv_2.10-1.3.0.jar"
+export SPARKCSV_JAR2PATH="$SPARKCLR_EXT_PATH/commons-csv-1.1.jar"
 export SPARKCLR_EXT_JARS="$SPARKCSV_JAR1PATH,$SPARKCSV_JAR2PATH"
 
 # run-samples.sh is in local mode, should not load Hadoop or Yarn cluster config. Disable Hadoop/Yarn conf dir.

From 427ce52e8df910b6775d628a7fc2cfc411d5b8c5 Mon Sep 17 00:00:00 2001
From: dwnichols <dwnichols@gmail.com>
Date: Fri, 16 Sep 2016 16:32:06 -0400
Subject: [PATCH 07/15] Download external dependencies from the build.sh shell
 script

---
 build/build.sh | 37 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/build/build.sh b/build/build.sh
index 5e935cd5..25ada051 100755
--- a/build/build.sh
+++ b/build/build.sh
@@ -22,6 +22,41 @@ fi
 [ ! -d "$SPARKCLR_HOME/lib" ] && mkdir "$SPARKCLR_HOME/lib"
 [ ! -d "$SPARKCLR_HOME/samples" ] && mkdir "$SPARKCLR_HOME/samples"
 [ ! -d "$SPARKCLR_HOME/scripts" ] && mkdir "$SPARKCLR_HOME/scripts"
+[ ! -d "$SPARKCLR_HOME/dependencies" ] && mkdir "$SPARKCLR_HOME/dependencies"
+
+echo "Download Mobius external dependencies"
+pushd "$SPARKCLR_HOME/dependencies"
+
+download_dependency() {
+  LINK=$1
+  JAR=$2
+
+  if [ ! -e $JAR ];
+  then
+    wget $LINK -O $JAR
+
+    if [ ! -e $JAR ];
+    then
+      echo "Cannot download external dependency $JAR from $LINK"
+      popd
+      exit 1
+    fi
+  fi
+}
+
+SPARK_CSV_LINK="http://search.maven.org/remotecontent?filepath=com/databricks/spark-csv_2.10/1.3.0/spark-csv_2.10-1.3.0.jar"
+SPARK_CSV_JAR="spark-csv_2.10-1.3.0.jar"
+download_dependency $SPARK_CSV_LINK $SPARK_CSV_JAR
+
+COMMONS_CSV_LINK="http://search.maven.org/remotecontent?filepath=org/apache/commons/commons-csv/1.1/commons-csv-1.1.jar"
+COMMONS_CSV_JAR="commons-csv-1.1.jar"
+download_dependency $COMMONS_CSV_LINK $COMMONS_CSV_JAR
+
+SPARK_STREAMING_KAFKA_LINK="http://search.maven.org/remotecontent?filepath=org/apache/spark/spark-streaming-kafka-0-8-assembly_2.11/2.0.0/spark-streaming-kafka-0-8-assembly_2.11-2.0.0.jar"
+SPARK_STREAMING_KAFKA_JAR="spark-streaming-kafka-0-8-assembly_2.11-2.0.0.jar"
+download_dependency $SPARK_STREAMING_KAFKA_LINK $SPARK_STREAMING_KAFKA_JAR
+
+popd
 
 echo "Assemble Mobius Scala components"
 pushd "$FWDIR/../scala"
@@ -36,7 +71,7 @@ mvn clean -q
 # build the package
 mvn package -Puber-jar -q
 
-if [ $? -ne 0 ]
+if [ $? -ne 0 ];
 then
 	echo "Build Mobius Scala components failed, stop building."
 	popd

From 3da5beb91ed2d4dbb83addae6b334ce43949f5ab Mon Sep 17 00:00:00 2001
From: dwnichols <dwnichols@gmail.com>
Date: Fri, 16 Sep 2016 18:21:19 -0400
Subject: [PATCH 08/15] Store external dependencies in build before copying to
 runtime folder

---
 build/build.sh | 43 +++++++++++++++++++++++++------------------
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/build/build.sh b/build/build.sh
index 25ada051..6b83d359 100755
--- a/build/build.sh
+++ b/build/build.sh
@@ -7,25 +7,10 @@
 
 export FWDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
-export SPARKCLR_HOME="$FWDIR/runtime"
-echo "SPARKCLR_HOME=$SPARKCLR_HOME"
-
-if [ -d "$SPARKCLR_HOME" ];
-then
-  echo "Delete existing $SPARKCLR_HOME ..."
-  rm -r -f "$SPARKCLR_HOME"
-fi
-
-[ ! -d "$SPARKCLR_HOME" ] && mkdir "$SPARKCLR_HOME"
-[ ! -d "$SPARKCLR_HOME/bin" ] && mkdir "$SPARKCLR_HOME/bin"
-[ ! -d "$SPARKCLR_HOME/data" ] && mkdir "$SPARKCLR_HOME/data"
-[ ! -d "$SPARKCLR_HOME/lib" ] && mkdir "$SPARKCLR_HOME/lib"
-[ ! -d "$SPARKCLR_HOME/samples" ] && mkdir "$SPARKCLR_HOME/samples"
-[ ! -d "$SPARKCLR_HOME/scripts" ] && mkdir "$SPARKCLR_HOME/scripts"
-[ ! -d "$SPARKCLR_HOME/dependencies" ] && mkdir "$SPARKCLR_HOME/dependencies"
+[ ! -d "$FWDIR/dependencies" ] && mkdir "$FWDIR/dependencies"
 
 echo "Download Mobius external dependencies"
-pushd "$SPARKCLR_HOME/dependencies"
+pushd "$FWDIR/dependencies"
 
 download_dependency() {
   LINK=$1
@@ -33,7 +18,8 @@ download_dependency() {
 
   if [ ! -e $JAR ];
   then
-    wget $LINK -O $JAR
+    echo "Downloading $JAR"
+    wget -q $LINK -O $JAR
 
     if [ ! -e $JAR ];
     then
@@ -58,6 +44,27 @@ download_dependency $SPARK_STREAMING_KAFKA_LINK $SPARK_STREAMING_KAFKA_JAR
 
 popd
 
+export SPARKCLR_HOME="$FWDIR/runtime"
+echo "SPARKCLR_HOME=$SPARKCLR_HOME"
+
+if [ -d "$SPARKCLR_HOME" ];
+then
+  echo "Delete existing $SPARKCLR_HOME ..."
+  rm -r -f "$SPARKCLR_HOME"
+fi
+
+[ ! -d "$SPARKCLR_HOME" ] && mkdir "$SPARKCLR_HOME"
+[ ! -d "$SPARKCLR_HOME/bin" ] && mkdir "$SPARKCLR_HOME/bin"
+[ ! -d "$SPARKCLR_HOME/data" ] && mkdir "$SPARKCLR_HOME/data"
+[ ! -d "$SPARKCLR_HOME/lib" ] && mkdir "$SPARKCLR_HOME/lib"
+[ ! -d "$SPARKCLR_HOME/samples" ] && mkdir "$SPARKCLR_HOME/samples"
+[ ! -d "$SPARKCLR_HOME/scripts" ] && mkdir "$SPARKCLR_HOME/scripts"
+[ ! -d "$SPARKCLR_HOME/dependencies" ] && mkdir "$SPARKCLR_HOME/dependencies"
+
+echo "Assemble Mobius external dependencies"
+cp $FWDIR/dependencies/* "$SPARKCLR_HOME/dependencies/"
+[ $? -ne 0 ] && exit 1
+
 echo "Assemble Mobius Scala components"
 pushd "$FWDIR/../scala"
 

From 3c767bddd44229fbee95ff8b39f7323fb701c2d3 Mon Sep 17 00:00:00 2001
From: Quanmao LIU <qualiu@microsoft.com>
Date: Tue, 20 Sep 2016 13:41:29 +0800
Subject: [PATCH 09/15] Add Python version performance benchmark test; Add
 usage + example and RIOSocket option support for C# version benchmark; Update
 Scala version benchmark; Update csv package version. (#565)

Besides normal spark-submit , there's a convenient way using https://github.com/qualiu/testMobius
1.Cluster mode examples as following:
D:\msgit\lqmMobius\testMobius\scripts\perf\submit-python-perf-test.bat d:\msgit\lqmMobius\python\perf 10 hdfs:///perf/data/deletions/*
D:\msgit\lqmMobius\testMobius\scripts\perf\submit-scala-perf-test.bat  d:\msgit\lqmMobius\scala\perf  10
D:\msgit\lqmMobius\testMobius\scripts\perf\submit-mobius-perf-test.bat d:\msgit\lqmMobius\csharp\perf 10

2.Local mode examples :
(1) First set SparkOptions : just copy the "Local Mode set SparkOptions=" after running the bat without arguments.
(2) Then submit local test : (you should have downloaded or copied the test data, like d:\mobius\deletions)
D:\msgit\lqmMobius\testMobius\scripts\perf\submit-python-perf-test.bat d:\msgit\lqmMobius\python\perf 3 d:\mobius\deletions\deletions.csv-00000-of-00020
D:\msgit\lqmMobius\testMobius\scripts\perf\submit-scala-perf-test.bat  d:\msgit\lqmMobius\scala\perf  3 d:\mobius\deletions\deletions.csv-00000-of-00020
D:\msgit\lqmMobius\testMobius\scripts\perf\submit-mobius-perf-test.bat d:\msgit\lqmMobius\csharp\perf 3 d:\mobius\deletions\deletions.csv-00000-of-00020
---
 .gitignore                                    |  1 +
 build/build.sh                                |  8 +-
 build/localmode/RunSamples.cmd                |  4 +-
 build/localmode/downloadtools.ps1             |  8 +-
 build/localmode/run-samples.sh                |  4 +-
 .../FreebaseDeletionsBenchmark.cs             | 20 ++--
 .../PerfBenchmark.csproj                      | 15 +++
 csharp/Perf/Microsoft.Spark.CSharp/Program.cs | 32 +++++--
 python/perf/FreebaseDeletionsBenchmark.py     | 96 +++++++++++++++++++
 python/perf/PerfBenchmark.py                  | 61 ++++++++++++
 python/perf/spark-python-perf.py              | 33 +++++++
 scala/perf/pom.xml                            |  2 +-
 .../csharp/FreebaseDeletionsBenchmark.scala   | 30 +++---
 .../spark/csharp/PerfBenchmark.scala          | 61 +++++++-----
 scala/pom.xml                                 |  2 +-
 15 files changed, 304 insertions(+), 73 deletions(-)
 create mode 100644 python/perf/FreebaseDeletionsBenchmark.py
 create mode 100644 python/perf/PerfBenchmark.py
 create mode 100644 python/perf/spark-python-perf.py

diff --git a/.gitignore b/.gitignore
index b42159a0..7354c165 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,7 @@
 *.class
 *.dll
 *.exe
+*.pyc
 
 # Packages #
 ############
diff --git a/build/build.sh b/build/build.sh
index 6b83d359..21713db5 100755
--- a/build/build.sh
+++ b/build/build.sh
@@ -30,12 +30,12 @@ download_dependency() {
   fi
 }
 
-SPARK_CSV_LINK="http://search.maven.org/remotecontent?filepath=com/databricks/spark-csv_2.10/1.3.0/spark-csv_2.10-1.3.0.jar"
-SPARK_CSV_JAR="spark-csv_2.10-1.3.0.jar"
+SPARK_CSV_LINK="http://search.maven.org/remotecontent?filepath=com/databricks/spark-csv_2.10/1.4.0/spark-csv_2.10-1.4.0.jar"
+SPARK_CSV_JAR="spark-csv_2.10-1.4.0.jar"
 download_dependency $SPARK_CSV_LINK $SPARK_CSV_JAR
 
-COMMONS_CSV_LINK="http://search.maven.org/remotecontent?filepath=org/apache/commons/commons-csv/1.1/commons-csv-1.1.jar"
-COMMONS_CSV_JAR="commons-csv-1.1.jar"
+COMMONS_CSV_LINK="http://search.maven.org/remotecontent?filepath=org/apache/commons/commons-csv/1.4/commons-csv-1.4.jar"
+COMMONS_CSV_JAR="commons-csv-1.4.jar"
 download_dependency $COMMONS_CSV_LINK $COMMONS_CSV_JAR
 
 SPARK_STREAMING_KAFKA_LINK="http://search.maven.org/remotecontent?filepath=org/apache/spark/spark-streaming-kafka-0-8-assembly_2.11/2.0.0/spark-streaming-kafka-0-8-assembly_2.11-2.0.0.jar"
diff --git a/build/localmode/RunSamples.cmd b/build/localmode/RunSamples.cmd
index 6ad9094c..3b75e857 100644
--- a/build/localmode/RunSamples.cmd
+++ b/build/localmode/RunSamples.cmd
@@ -68,8 +68,8 @@ set SPARKCLR_HOME=%CMDHOME%\..\runtime
 
 @rem spark-csv package and its depenedency are required for DataFrame operations in Mobius
 set SPARKCLR_EXT_PATH=%SPARKCLR_HOME%\dependencies
-set SPARKCSV_JAR1PATH=%SPARKCLR_EXT_PATH%\spark-csv_2.10-1.3.0.jar
-set SPARKCSV_JAR2PATH=%SPARKCLR_EXT_PATH%\commons-csv-1.1.jar
+set SPARKCSV_JAR1PATH=%SPARKCLR_EXT_PATH%\spark-csv_2.10-1.4.0.jar
+set SPARKCSV_JAR2PATH=%SPARKCLR_EXT_PATH%\commons-csv-1.4.jar
 set SPARKCLR_EXT_JARS=%SPARKCSV_JAR1PATH%,%SPARKCSV_JAR2PATH%
 
 @rem RunSamples.cmd is in local mode, should not load Hadoop or Yarn cluster config. Disable Hadoop/Yarn conf dir.
diff --git a/build/localmode/downloadtools.ps1 b/build/localmode/downloadtools.ps1
index b71f355d..78d20a91 100644
--- a/build/localmode/downloadtools.ps1
+++ b/build/localmode/downloadtools.ps1
@@ -347,14 +347,14 @@ function Download-ExternalDependencies
 	
 	$readMeStream.WriteLine("------------ Dependencies for CSV parsing in Mobius DataFrame API -----------------------------")
 	# Downloading spark-csv package and its depenency. These packages are required for DataFrame operations in Mobius
-	$url = "http://search.maven.org/remotecontent?filepath=com/databricks/spark-csv_2.10/1.3.0/spark-csv_2.10-1.3.0.jar"
-    $output="$scriptDir\..\dependencies\spark-csv_2.10-1.3.0.jar"
+	$url = "http://search.maven.org/remotecontent?filepath=com/databricks/spark-csv_2.10/1.4.0/spark-csv_2.10-1.4.0.jar"
+    $output="$scriptDir\..\dependencies\spark-csv_2.10-1.4.0.jar"
     Download-File $url $output
 	Write-Output "[downloadtools.Download-ExternalDependencies] Downloading $url to $scriptDir\..\dependencies"
 	$readMeStream.WriteLine("$url")
 	
-	$url = "http://search.maven.org/remotecontent?filepath=org/apache/commons/commons-csv/1.1/commons-csv-1.1.jar"
-	$output="$scriptDir\..\dependencies\commons-csv-1.1.jar"
+	$url = "http://search.maven.org/remotecontent?filepath=org/apache/commons/commons-csv/1.4/commons-csv-1.4.jar"
+	$output="$scriptDir\..\dependencies\commons-csv-1.4.jar"
 	Download-File $url $output
     Write-Output "[downloadtools.Download-ExternalDependencies] Downloading $url to $scriptDir\..\dependencies"
 	$readMeStream.WriteLine("$url")
diff --git a/build/localmode/run-samples.sh b/build/localmode/run-samples.sh
index 37846c51..4e94bca2 100755
--- a/build/localmode/run-samples.sh
+++ b/build/localmode/run-samples.sh
@@ -74,8 +74,8 @@ fi
 export SPARKCLR_HOME="$FWDIR/../runtime"
 # spark-csv package and its depenedency are required for DataFrame operations in Mobius
 export SPARKCLR_EXT_PATH="$SPARKCLR_HOME/dependencies"
-export SPARKCSV_JAR1PATH="$SPARKCLR_EXT_PATH/spark-csv_2.10-1.3.0.jar"
-export SPARKCSV_JAR2PATH="$SPARKCLR_EXT_PATH/commons-csv-1.1.jar"
+export SPARKCSV_JAR1PATH="$SPARKCLR_EXT_PATH/spark-csv_2.10-1.4.0.jar"
+export SPARKCSV_JAR2PATH="$SPARKCLR_EXT_PATH/commons-csv-1.4.jar"
 export SPARKCLR_EXT_JARS="$SPARKCSV_JAR1PATH,$SPARKCSV_JAR2PATH"
 
 # run-samples.sh is in local mode, should not load Hadoop or Yarn cluster config. Disable Hadoop/Yarn conf dir.
diff --git a/csharp/Perf/Microsoft.Spark.CSharp/FreebaseDeletionsBenchmark.cs b/csharp/Perf/Microsoft.Spark.CSharp/FreebaseDeletionsBenchmark.cs
index d4e8d6f8..f000cf98 100644
--- a/csharp/Perf/Microsoft.Spark.CSharp/FreebaseDeletionsBenchmark.cs
+++ b/csharp/Perf/Microsoft.Spark.CSharp/FreebaseDeletionsBenchmark.cs
@@ -12,7 +12,7 @@ namespace Microsoft.Spark.CSharp.PerfBenchmark
     /// <summary>
     /// Perf benchmark that users Freebase deletions data
     /// This data is licensed under CC-BY license (http://creativecommons.org/licenses/by/2.5)
-    /// Data is available for download at https://developers.google.com/freebase/data)
+    /// Data is available for downloading : "Freebase Deleted Triples" at https://developers.google.com/freebase
     /// Data format - CSV, size - 8 GB uncompressed
     /// Columns in the dataset are
     ///     1. creation_timestamp (Unix epoch time in milliseconds)
@@ -55,7 +55,7 @@ internal static void RunRDDMaxDeletionsByUser(string[] args)
             var lines = PerfBenchmark.SparkContext.TextFile(filePath);
             var parsedRows = lines.Map(s =>
             {
-                var columns = s.Split(new[] {','});
+                var columns = s.Split(new[] { ',' });
 
                 //data has some bad records - use bool flag to indicate corrupt rows
                 if (columns.Length > 4)
@@ -75,7 +75,7 @@ internal static void RunRDDMaxDeletionsByUser(string[] args)
                 else
                     return kvp2;
             });
-            
+
             stopwatch.Stop();
             PerfBenchmark.ExecutionTimeList.Add(stopwatch.Elapsed);
 
@@ -101,22 +101,22 @@ internal static void RunDFMaxDeletionsByUser(string[] args)
             stopwatch.Restart();
 
             var rows = PerfBenchmark.SqlContext.TextFile(args[2]);
-            var filtered = rows.Filter("C1 = C3");
-            var aggregated = filtered.GroupBy("C1").Agg(new Dictionary<string, string> { { "C1", "count" } });
+            var filtered = rows.Filter("_c1 = _c3");
+            var aggregated = filtered.GroupBy("_c1").Agg(new Dictionary<string, string> { { "_c1", "count" } });
             aggregated.RegisterTempTable("freebasedeletions");
-            var max = PerfBenchmark.SqlContext.Sql("select max(`count(C1)`) from freebasedeletions");
+            var max = PerfBenchmark.SqlContext.Sql("select max(`count(_c1)`) from freebasedeletions");
             var maxArray = max.Collect();
             var maxValue = maxArray.First();
-            var maxDeletions = PerfBenchmark.SqlContext.Sql("select * from freebasedeletions where `count(C1)` = " + maxValue.Get(0));
+            var maxDeletions = PerfBenchmark.SqlContext.Sql("select * from freebasedeletions where `count(_c1)` = " + maxValue.Get(0));
             maxDeletions.Show();
             //TODO - add perf suite for subquery
 
             stopwatch.Stop();
             PerfBenchmark.ExecutionTimeList.Add(stopwatch.Elapsed);
             Console.WriteLine("User with max deletions & count of deletions is listed above. Time elapsed {0}", stopwatch.Elapsed);
-            
+
         }
-        
-       
+
+
     }
 }
diff --git a/csharp/Perf/Microsoft.Spark.CSharp/PerfBenchmark.csproj b/csharp/Perf/Microsoft.Spark.CSharp/PerfBenchmark.csproj
index 2718f936..70fc5124 100644
--- a/csharp/Perf/Microsoft.Spark.CSharp/PerfBenchmark.csproj
+++ b/csharp/Perf/Microsoft.Spark.CSharp/PerfBenchmark.csproj
@@ -11,6 +11,7 @@
     <AssemblyName>SparkCLRPerf</AssemblyName>
     <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
     <FileAlignment>512</FileAlignment>
+    <CppDll Condition="Exists('..\..\..\cpp\x64')">HasCpp</CppDll>
   </PropertyGroup>
   <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
     <PlatformTarget>AnyCPU</PlatformTarget>
@@ -48,6 +49,16 @@
     </None>
     <None Include="data\deletionbenchmarktestdata.csv" />
   </ItemGroup>
+  <ItemGroup Condition=" '$(CppDll)' == 'HasCpp' ">
+    <None Include="$(SolutionDir)..\cpp\x64\$(ConfigurationName)\Riosock.dll">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+      <Link>Cpp\Riosock.dll</Link>
+    </None>
+    <None Include="$(SolutionDir)..\cpp\x64\$(ConfigurationName)\Riosock.pdb">
+      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+      <Link>Cpp\Riosock.pdb</Link>
+    </None>
+  </ItemGroup>
   <ItemGroup>
     <ProjectReference Include="..\..\Adapter\Microsoft.Spark.CSharp\Adapter.csproj">
       <Project>{ce999a96-f42b-4e80-b208-709d7f49a77c}</Project>
@@ -60,6 +71,10 @@
   </ItemGroup>
   <ItemGroup />
   <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+  <PropertyGroup>
+    <PostBuildEvent Condition=" '$(OS)' == 'Windows_NT' ">copy /y $(ProjectDir)..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\$(ConfigurationName)\CSharpWorker.* $(TargetDir)</PostBuildEvent>
+    <PostBuildEvent Condition=" '$(OS)' == 'Unix' ">cp -uv $(ProjectDir)../../../csharp/Worker/Microsoft.Spark.CSharp/bin/$(ConfigurationName)/CSharpWorker.* $(TargetDir)</PostBuildEvent>
+  </PropertyGroup>
   <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
        Other similar extension points exist, see Microsoft.Common.targets.
   <Target Name="BeforeBuild">
diff --git a/csharp/Perf/Microsoft.Spark.CSharp/Program.cs b/csharp/Perf/Microsoft.Spark.CSharp/Program.cs
index b2a09f64..e4476bd8 100644
--- a/csharp/Perf/Microsoft.Spark.CSharp/Program.cs
+++ b/csharp/Perf/Microsoft.Spark.CSharp/Program.cs
@@ -6,6 +6,7 @@
 using System.Linq;
 using System.Reflection;
 using System.Text;
+using System.Text.RegularExpressions;
 using System.Threading.Tasks;
 using Microsoft.Spark.CSharp.Core;
 using Microsoft.Spark.CSharp.Sql;
@@ -24,6 +25,17 @@ class PerfBenchmark
 
         public static void Main(string[] args)
         {
+            if (args.Length != 3)
+            {
+                var exe = System.Reflection.Assembly.GetEntryAssembly().Location;
+                Console.WriteLine(@"Usage   : {0}  spark-local-dir   run-count  data-path", exe);
+                Console.WriteLine(@"Example : {0}  D:\Temp\perfTest  10         hdfs:///perfdata/freebasedeletions/*", exe);
+                Console.WriteLine(@"Example : {0}  D:\Temp\perfTest  1          hdfs:///perf/data/deletions/deletions.csv-00000-of-00020", exe);
+                Console.WriteLine(@"Example : {0}  D:\Temp\perfTest  1          file:///d:/mobius/deletions/*", exe);
+                Console.WriteLine(@"Example : {0}  D:\Temp\perfTest  1          d:\mobius\deletions", exe);
+                return;
+            }
+
             Console.WriteLine("Arguments are {0}", string.Join(",", args));
 
             InitializeSparkContext(args);
@@ -51,7 +63,7 @@ internal static void RunBenchmarks(string[] args)
         {
             var perfSuites = Assembly.GetEntryAssembly().GetTypes()
                 .SelectMany(type => type.GetMethods(BindingFlags.NonPublic | BindingFlags.Static))
-                .Where(method => method.GetCustomAttributes(typeof (PerfSuiteAttribute), false).Length > 0)
+                .Where(method => method.GetCustomAttributes(typeof(PerfSuiteAttribute), false).Length > 0)
                 .OrderByDescending(method => method.Name);
 
             foreach (var perfSuite in perfSuites)
@@ -75,42 +87,46 @@ internal static void RunBenchmarks(string[] args)
         internal static void ReportResult()
         {
             Console.WriteLine("** Printing results of the perf run (C#) **");
-
+            var allMedianCosts = new SortedDictionary<string, long>();
             foreach (var perfResultItem in PerfResults)
             {
                 var perfResult = perfResultItem.Value;
 
-                var runTimeInSeconds = perfResult.Select(x => (long) x.TotalSeconds);
+                var runTimeInSeconds = perfResult.Select(x => (long)x.TotalSeconds);
                 //multiple enumeration happening - ignoring that for now
                 var max = runTimeInSeconds.Max();
                 var min = runTimeInSeconds.Min();
-                var avg = (long) runTimeInSeconds.Average();
+                var avg = (long)runTimeInSeconds.Average();
                 var median = GetMedianValue(runTimeInSeconds);
                 Console.WriteLine(
                     "** Execution time for {0} in seconds. Min={1}, Max={2}, Average={3}, Median={4}, Number of runs={5}, Individual execution duration=[{6}] **",
                     perfResultItem.Key, min, max, avg, median, runTimeInSeconds.Count(), string.Join(", ", runTimeInSeconds));
+                allMedianCosts[perfResultItem.Key] = median;
             }
 
             Console.WriteLine("** *** **");
+            Console.WriteLine("{0} {1} C# version: Run count = {2}, all median time costs[{3}] : {4}", DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"),
+                Regex.Replace(TimeZone.CurrentTimeZone.StandardName, @"(\w)\S*\s*", "$1"),
+                PerfResults.First().Value.Count, allMedianCosts.Count, string.Join("; ", allMedianCosts.Select(kv => kv.Key + "=" + kv.Value)));
         }
 
         private static long GetMedianValue(IEnumerable<long> runTimeInSeconds)
         {
             var values = runTimeInSeconds.ToArray();
             Array.Sort(values);
-              
+
             var itemCount = values.Length;
             if (itemCount == 1)
             {
                 return values[0];
             }
 
-            if (itemCount%2 == 0)
+            if (itemCount % 2 == 0)
             {
-                return (values[itemCount/2] + values[itemCount/2 - 1])/2;
+                return (values[itemCount / 2] + values[itemCount / 2 - 1]) / 2;
             }
 
-            return values[(itemCount-1)/2];
+            return values[(itemCount - 1) / 2];
 
         }
     }
diff --git a/python/perf/FreebaseDeletionsBenchmark.py b/python/perf/FreebaseDeletionsBenchmark.py
new file mode 100644
index 00000000..c335753d
--- /dev/null
+++ b/python/perf/FreebaseDeletionsBenchmark.py
@@ -0,0 +1,96 @@
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+"""
+Perf benchmark that users Freebase deletions data
+This data is licensed under CC-BY license (http:# creativecommons.org/licenses/by/2.5)
+Data is available for downloading : "Freebase Deleted Triples" at https://developers.google.com/freebase
+Data format - CSV, size - 8 GB uncompressed
+Columns in the dataset are
+    1. creation_timestamp (Unix epoch time in milliseconds)
+    2. creator
+    3. deletion_timestamp (Unix epoch time in milliseconds)
+    4. deletor
+    5. subject (MID)
+    6. predicate (MID)
+    7. object (MID/Literal)
+    8. language_code
+"""
+from __future__ import print_function
+import time
+from datetime import datetime
+from PerfBenchmark import PerfBenchmark
+
+class FreebaseDeletionsBenchmark:
+
+    @staticmethod
+    def RunRDDLineCount(args, sparkContext, sqlContext):
+        startTime = time.time()
+
+        lines = sparkContext.textFile(args[2])
+        count = lines.count()
+        elapsedDuration = time.time() - startTime
+        PerfBenchmark.ExecutionTimeList.append(elapsedDuration)
+
+        print(str(datetime.now()) + " RunRDDLineCount: Count of lines " + str(count) + ". Elapsed time = " + ("%.3f s" % elapsedDuration))
+
+
+    @staticmethod
+    def RunRDDMaxDeletionsByUser(args, sparkContext, sqlContext):
+        def map_columns(line) :
+            columns = line.split(',')
+            # data has some bad records - use bool flag to indicate corrupt rows
+            if len(columns) > 4 :
+                return (True, columns[0], columns[1], columns[2], columns[3])
+            else:
+                return (False, "X", "X", "X", "X") # invalid row placeholde
+
+        startTime = time.time()
+        lines = sparkContext.textFile(args[2])
+        parsedRows = lines.map(lambda s: map_columns(s))
+
+        flaggedRows = parsedRows.filter(lambda s: s[0]) # select good rows
+        selectedDeletions = flaggedRows.filter(lambda s: s[2] == s[4]) # select deletions made by same creators
+        userDeletions = selectedDeletions.map(lambda s: (s[2], 1))
+        userDeletionsparkContextount = userDeletions.reduceByKey(lambda x, y : x + y)
+        zeroValue = ("zerovalue", 0)
+        userWithMaxDeletions = userDeletionsparkContextount.fold(zeroValue, lambda kvp1, kvp2 : kvp1 if (kvp1[1] > kvp2[1]) else kvp2)
+
+        elapsedDuration = time.time() - startTime
+        PerfBenchmark.ExecutionTimeList.append(elapsedDuration)
+
+        print(str(datetime.now()) + " RunRDDMaxDeletionsByUser: User with max deletions is " + str(userWithMaxDeletions[0]) + ", count of deletions=" + \
+            str(userWithMaxDeletions[1]) + ". Elapsed time = " + ("%.3f s" % elapsedDuration))
+
+
+
+    @staticmethod
+    def RunDFLineCount(args, sparkContext, sqlContext):
+        startTime = time.time()
+
+        rows = sqlContext.read.format("com.databricks.spark.csv").load(args[2])
+        rowCount = rows.count()
+
+        elapsedDuration = time.time() - startTime
+        PerfBenchmark.ExecutionTimeList.append(elapsedDuration)
+
+        print(str(datetime.now()) + " RunDFLineCount: Count of rows " + str(rowCount) + ". Elapsed time = " + ("%.3f s" % elapsedDuration))
+
+
+    @staticmethod
+    def RunDFMaxDeletionsByUser(args, sparkContext, sqlContext):
+        startTime = time.time()
+        rows = sqlContext.read.format("com.databricks.spark.csv").load(args[2])
+        filtered = rows.filter("_c1 = _c3")
+        aggregated = filtered.groupBy("_c1").agg({"_c1" : "count"})
+        aggregated.registerTempTable("freebasedeletions")
+        max = sqlContext.sql("select max(`count(_c1)`) from freebasedeletions")
+        maxArray = max.collect()
+        maxValue = maxArray[0]
+        maxDeletions = sqlContext.sql("select * from freebasedeletions where `count(_c1)` = " + str(maxValue[0]))
+        maxDeletions.show()
+        # TODO - add perf suite for subquery
+        elapsedDuration = time.time() - startTime
+        PerfBenchmark.ExecutionTimeList.append(elapsedDuration)
+
+        print(str(datetime.now()) + " RunDFMaxDeletionsByUser: User with max deletions & count of deletions is listed above. Elapsed time = " + ("%.3f s" % elapsedDuration))
diff --git a/python/perf/PerfBenchmark.py b/python/perf/PerfBenchmark.py
new file mode 100644
index 00000000..2a86b895
--- /dev/null
+++ b/python/perf/PerfBenchmark.py
@@ -0,0 +1,61 @@
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
+
+import time, inspect, re
+from datetime import datetime
+
+class PerfBenchmark(object):
+    PerfNameResults = {} ## <string, List<long>>
+    ExecutionTimeList = []  # List<long>
+
+    @staticmethod
+    def RunPerfSuite(perfClass, args, sparkContext, sqlContext):
+        for name, fun in inspect.getmembers(perfClass, lambda fun : inspect.isfunction(fun) and fun.__name__.startswith("Run")) :
+            PerfBenchmark.ExecutionTimeList = []
+            runCount = int(args[1])
+            for k in range(runCount):
+                print(str(datetime.now()) + " Starting perf suite : " + str(perfClass.__name__) + "." + str(name) + " times[" + str(k + 1) + "]-" + str(runCount))
+                fun(args, sparkContext, sqlContext)
+
+            executionTimeListRef = []
+            for v in PerfBenchmark.ExecutionTimeList :
+                executionTimeListRef.append(v)
+
+            PerfBenchmark.PerfNameResults[name] = executionTimeListRef
+
+
+    @staticmethod
+    def ReportResult() :
+        print(str(datetime.now()) + " ** Printing results of the perf run (python) **")
+        allMedianCosts = {}
+        for name in PerfBenchmark.PerfNameResults :
+            perfResult = PerfBenchmark.PerfNameResults[name]
+            # print(str(datetime.now()) + " " + str(result) + " time costs : " + ", ".join(("%.3f" % e) for e in perfResult))
+            # multiple enumeration happening - ignoring that for now
+            precision = "%.0f"
+            minimum = precision % min(perfResult)
+            maximum = precision % max(perfResult)
+            runCount = len(perfResult)
+            avg = precision % (sum(perfResult) / runCount)
+            median = precision % PerfBenchmark.GetMedian(perfResult)
+            values = ", ".join((precision % e) for e in perfResult)
+            print(str(datetime.now()) + " ** Execution time for " + str(name) + " in seconds: " + \
+                "Min=" + str(minimum) + ", Max=" + str(maximum) + ", Average=" + str(avg) + ", Median=" + str(median) + \
+                ".  Run count=" + str(runCount) + ", Individual execution duration=[" + values + "]")
+            allMedianCosts[name] = median
+
+        print(str(datetime.now()) + " ** *** **")
+        print(time.strftime('%Y-%m-%d %H:%M:%S ') + re.sub(r'(\w)\S*\s*', r'\1', time.strftime('%Z')) + " Python version: Run count = " + str(runCount) + ", all median time costs[" + str(len(allMedianCosts)) + "] : " + \
+            "; ".join((e + "=" + allMedianCosts[e]) for e in sorted(allMedianCosts)))
+
+    @staticmethod
+    def GetMedian(values) :
+        itemCount = len(values)
+        values.sort()
+        if itemCount == 1:
+          return values[0]
+
+        if itemCount % 2 == 0:
+          return (values[int(itemCount / 2)] + values[int(itemCount / 2 - 1)]) / 2
+
+        return values[int((itemCount - 1) / 2)]
diff --git a/python/perf/spark-python-perf.py b/python/perf/spark-python-perf.py
new file mode 100644
index 00000000..b57af997
--- /dev/null
+++ b/python/perf/spark-python-perf.py
@@ -0,0 +1,33 @@
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license. See LICENSE file in the project root for full license information.
+#coding:utf8
+
+import sys, os, time
+from datetime import datetime
+from PerfBenchmark import PerfBenchmark
+from FreebaseDeletionsBenchmark import FreebaseDeletionsBenchmark
+
+if __name__ == "__main__":
+    argCount = len(sys.argv)
+    if not argCount == 3:
+        exeName = os.path.basename(__file__)
+        sys.stderr.write("Usage:     " + exeName + "  runCount   data\n")
+        sys.stderr.write("Example-1: " + exeName + "  10          hdfs:///perfdata/freebasedeletions/* \n")
+        sys.stderr.write("Example-2: " + exeName + "  1           hdfs:///perf/data/deletions/deletions.csv-00000-of-00020\n")
+        exit(-1)
+
+    print(str(datetime.now()) + " runCount = " + str(sys.argv[1]) + ", data = " + str(sys.argv[2]))
+
+    from pyspark import SparkContext,SQLContext
+    beginTime = time.time()
+
+    sparkContext = SparkContext()
+    sqlContext = SQLContext(sparkContext)
+
+    PerfBenchmark.RunPerfSuite(FreebaseDeletionsBenchmark, sys.argv, sparkContext, sqlContext)
+
+    sparkContext.stop()
+
+    PerfBenchmark.ReportResult()
+
+    print(str(datetime.now()) + " " + os.path.basename(__file__) + " : Finished python version benchmark test. Whole time = " + ("%.3f" % (time.time() - beginTime)) + " s.")
diff --git a/scala/perf/pom.xml b/scala/perf/pom.xml
index ef047399..0c8b4fac 100644
--- a/scala/perf/pom.xml
+++ b/scala/perf/pom.xml
@@ -74,7 +74,7 @@
       <dependency>
           <groupId>com.databricks</groupId>
           <artifactId>spark-csv_2.10</artifactId>
-          <version>1.3.0</version>
+          <version>1.4.0</version>
       </dependency>
   </dependencies>
 
diff --git a/scala/perf/src/main/com/microsoft/spark/csharp/FreebaseDeletionsBenchmark.scala b/scala/perf/src/main/com/microsoft/spark/csharp/FreebaseDeletionsBenchmark.scala
index 6f1e9100..333b4efa 100644
--- a/scala/perf/src/main/com/microsoft/spark/csharp/FreebaseDeletionsBenchmark.scala
+++ b/scala/perf/src/main/com/microsoft/spark/csharp/FreebaseDeletionsBenchmark.scala
@@ -10,7 +10,7 @@ import org.apache.spark.streaming.Duration
 /**
   * Perf benchmark that users Freebase deletions data
   * This data is licensed under CC-BY license (http://creativecommons.org/licenses/by/2.5)
-  * Data is available for download at https://developers.google.com/freebase/data)
+  * Data is available for downloading : "Freebase Deleted Triples" at https://developers.google.com/freebase
   * Data format - CSV, size - 8 GB uncompressed
   * Columns in the dataset are
   *     1. creation_timestamp (Unix epoch time in milliseconds)
@@ -33,10 +33,10 @@ object FreebaseDeletionsBenchmark {
     val elapsed = System.currentTimeMillis - startTime
 
     val elapsedDuration = new Duration(elapsed)
-    val totalSeconds = elapsedDuration.milliseconds/1000
+    val totalSeconds = elapsedDuration.milliseconds / 1000
     PerfBenchmark.executionTimeList += totalSeconds
 
-    println("Count of lines " + count + ". Time elapsed " + elapsedDuration)
+    println("RunRDDLineCount: Count of lines " + count + ". Time elapsed " + elapsedDuration)
   }
 
   @PerfSuite
@@ -58,7 +58,7 @@ object FreebaseDeletionsBenchmark {
     val userDeletions = selectedDeletions.map(s => new Tuple2(s._3, 1))
     val userDeletionsCount = userDeletions.reduceByKey((x, y) => x + y)
     val zeroValue = ("zerovalue", 0)
-    val userWithMaxDeletions = userDeletionsCount.fold(zeroValue)( (kvp1, kvp2) => {
+    val userWithMaxDeletions = userDeletionsCount.fold(zeroValue)((kvp1, kvp2) => {
       if (kvp1._2 > kvp2._2)
         kvp1
       else
@@ -67,11 +67,11 @@ object FreebaseDeletionsBenchmark {
 
     val elapsed = System.currentTimeMillis - startTime
     val elapsedDuration = new Duration(elapsed)
-    val totalSeconds = elapsedDuration.milliseconds/1000
+    val totalSeconds = elapsedDuration.milliseconds / 1000
     PerfBenchmark.executionTimeList += totalSeconds
 
-    println(s"User with max deletions is " + userWithMaxDeletions._1 + ", count of deletions="
-                  + userWithMaxDeletions._2 + s". Elapsed time=$elapsedDuration")
+    println(s"RunRDDMaxDeletionsByUser: User with max deletions is " + userWithMaxDeletions._1 + ", count of deletions="
+      + userWithMaxDeletions._2 + s". Elapsed time=$elapsedDuration")
   }
 
   @PerfSuite
@@ -83,10 +83,10 @@ object FreebaseDeletionsBenchmark {
 
     val elapsed = System.currentTimeMillis - startTime
     val elapsedDuration = new Duration(elapsed)
-    val totalSeconds = elapsedDuration.milliseconds/1000
+    val totalSeconds = elapsedDuration.milliseconds / 1000
     PerfBenchmark.executionTimeList += totalSeconds
 
-    println(s"Count of rows $rowCount. Time elapsed $elapsedDuration")
+    println(s"RunDFLineCount: Count of rows $rowCount. Time elapsed $elapsedDuration")
   }
 
   @PerfSuite
@@ -94,21 +94,21 @@ object FreebaseDeletionsBenchmark {
     val startTime = System.currentTimeMillis
 
     val rows = sqlContext.read.format("com.databricks.spark.csv").load(args(1))
-    val filtered = rows.filter("C1 = C3")
-    val aggregated = filtered.groupBy("C1").agg(("C1", "count"))
+    val filtered = rows.filter("_c1 = _c3")
+    val aggregated = filtered.groupBy("_c1").agg(("_c1", "count"))
     aggregated.registerTempTable("freebasedeletions")
-    val max = sqlContext.sql("select max(`count(C1)`) from freebasedeletions")
+    val max = sqlContext.sql("select max(`count(_c1)`) from freebasedeletions")
     val maxArray = max.collect
     val maxValue = maxArray(0)
-    val maxDeletions = sqlContext.sql("select * from freebasedeletions where `count(C1)` = " + maxValue.get(0))
+    val maxDeletions = sqlContext.sql("select * from freebasedeletions where `count(_c1)` = " + maxValue.get(0))
     maxDeletions.show
     //TODO - add perf suite for subquery
     val elapsed = System.currentTimeMillis - startTime
     val elapsedDuration = new Duration(elapsed)
-    val totalSeconds = elapsedDuration.milliseconds/1000
+    val totalSeconds = elapsedDuration.milliseconds / 1000
     PerfBenchmark.executionTimeList += totalSeconds
 
-    println(s"User with max deletions & count of deletions is listed above. Time elapsed $elapsedDuration")
+    println(s"RunDFMaxDeletionsByUser: User with max deletions & count of deletions is listed above. Time elapsed $elapsedDuration")
   }
 
 }
diff --git a/scala/perf/src/main/com/microsoft/spark/csharp/PerfBenchmark.scala b/scala/perf/src/main/com/microsoft/spark/csharp/PerfBenchmark.scala
index 47b0fab2..d81f53ee 100644
--- a/scala/perf/src/main/com/microsoft/spark/csharp/PerfBenchmark.scala
+++ b/scala/perf/src/main/com/microsoft/spark/csharp/PerfBenchmark.scala
@@ -3,8 +3,11 @@
 
 package com.microsoft.spark.csharp
 
+import java.io.File
+
 import org.apache.spark.sql.SQLContext
-import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.{SparkConf, SparkContext}
+
 import scala.collection.mutable.ListBuffer
 import scala.util.Sorting
 
@@ -17,6 +20,14 @@ object PerfBenchmark {
   val executionTimeList = scala.collection.mutable.ListBuffer.empty[Long]
 
   def main(args: Array[String]): Unit = {
+    if (args.length != 2) {
+      val jar = new File(PerfBenchmark.getClass.getProtectionDomain.getCodeSource.getLocation.toURI.getPath)
+      println(s"Usage   : $jar  run-times  data")
+      println(s"Example : $jar  10         hdfs:///perfdata/freebasedeletions/*")
+      println(s"Example : $jar  1          hdfs:///perf/data/deletions/deletions.csv-00000-of-00020")
+      sys.exit(1)
+    }
+
     val PerfSuite = Class.forName("com.microsoft.spark.csharp.PerfSuite")
     val sparkConf = new SparkConf().setAppName("SparkCLR perf suite - scala")
     val sparkContext = new SparkContext(sparkConf)
@@ -32,15 +43,15 @@ object PerfBenchmark {
     val freebaseDeletionsBenchmarkClass = Class.forName(className)
     val perfSuites = freebaseDeletionsBenchmarkClass.getDeclaredMethods
 
-    for ( perfSuiteMethod <- perfSuites) {
+    for (perfSuiteMethod <- perfSuites) {
       val perfSuiteName = perfSuiteMethod.getName
-      if (perfSuiteName.startsWith("Run")) { //TODO - use annotation type
+      if (perfSuiteName.startsWith("Run")) {
+        //TODO - use annotation type
         executionTimeList.clear
         var runCount = args(0).toInt
-        while (runCount > 0) {
-          println(s"Starting perf suite $perfSuiteName, runCount=$runCount")
+        for (k <- 1 to runCount) {
+          println(s"Starting perf suite $perfSuiteName : times[$k]-$runCount")
           perfSuiteMethod.invoke(freebaseDeletionsBenchmarkClass, args, sparkContext, sqlContext: SQLContext)
-          runCount = runCount - 1
         }
         val executionTimeListRef = scala.collection.mutable.ListBuffer.empty[Long]
         for (v <- executionTimeList) {
@@ -49,28 +60,26 @@ object PerfBenchmark {
         perfResults += (perfSuiteName -> executionTimeListRef)
       }
     }
-
   }
 
   def ReportResult(): Unit = {
     println("** Printing results of the perf run (scala) **")
-    for(result <- perfResults.keys) {
-        val perfResult = perfResults(result)
-        //multiple enumeration happening - ignoring that for now
-        val min = perfResult.min
-        val max = perfResult.max
-        val runCount = perfResult.length
-        val avg = perfResult.sum / runCount
-        val median = getMedian(perfResult.toList)
-        val values = new StringBuilder
-        for (value <- perfResult) {
-          values.append(value + ", ")
-        }
-
-        println(s"** Execution time for $result in seconds. Min=$min, Max=$max, Average=$avg, Median=$median, Number of runs=$runCount, Individual execution duration=[$values] **")
-      }
+    var allMedianCosts = collection.immutable.SortedMap[String, Long]()
+    for (result <- perfResults.keys) {
+      val perfResult = perfResults(result)
+      //multiple enumeration happening - ignoring that for now
+      val min = perfResult.min
+      val max = perfResult.max
+      val runCount = perfResult.length
+      val avg = perfResult.sum / runCount
+      val median = getMedian(perfResult.toList)
+      val values = perfResult.mkString(",")
+      println(s"** Execution time for $result in seconds. Min=$min, Max=$max, Average=$avg, Median=$median, Number of runs=$runCount, Individual execution duration=[$values] **")
+      allMedianCosts += (result -> median)
+    }
     println("** *** **")
-
+    val currentTime = new java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss zzz").format(new java.util.Date)
+    println(s"${currentTime} Scala version: Run count = ${perfResults.head._2.length}, all median time costs[${allMedianCosts.size}]: ${allMedianCosts.map(kv => kv._1 + "=" + kv._2).mkString("; ")}")
   }
 
   def getMedian(valuesList: List[Long]) = {
@@ -80,11 +89,11 @@ object PerfBenchmark {
     if (itemCount == 1)
       values(0)
 
-    if (itemCount%2 == 0) {
-      (values(itemCount/2) + values(itemCount/2 - 1))/2
+    if (itemCount % 2 == 0) {
+      (values(itemCount / 2) + values(itemCount / 2 - 1)) / 2
     }
 
-    values((itemCount-1)/2)
+    values((itemCount - 1) / 2)
   }
 }
 
diff --git a/scala/pom.xml b/scala/pom.xml
index 00ef6c87..1238963c 100644
--- a/scala/pom.xml
+++ b/scala/pom.xml
@@ -113,7 +113,7 @@
       <dependency>
           <groupId>com.databricks</groupId>
           <artifactId>spark-csv_2.10</artifactId>
-          <version>1.3.0</version>
+          <version>1.4.0</version>
       </dependency>
   </dependencies>
 

From af71a329bf61a08489fe273dd41f01eb6f994ff6 Mon Sep 17 00:00:00 2001
From: shakdoesgithub <shakib@gmail.com>
Date: Thu, 29 Sep 2016 13:41:32 -0700
Subject: [PATCH 10/15] Use copy task instead of OS commands so it builds on
 OSX

---
 csharp/Perf/Microsoft.Spark.CSharp/PerfBenchmark.csproj | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/csharp/Perf/Microsoft.Spark.CSharp/PerfBenchmark.csproj b/csharp/Perf/Microsoft.Spark.CSharp/PerfBenchmark.csproj
index 70fc5124..432b956b 100644
--- a/csharp/Perf/Microsoft.Spark.CSharp/PerfBenchmark.csproj
+++ b/csharp/Perf/Microsoft.Spark.CSharp/PerfBenchmark.csproj
@@ -71,10 +71,10 @@
   </ItemGroup>
   <ItemGroup />
   <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
-  <PropertyGroup>
-    <PostBuildEvent Condition=" '$(OS)' == 'Windows_NT' ">copy /y $(ProjectDir)..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\$(ConfigurationName)\CSharpWorker.* $(TargetDir)</PostBuildEvent>
-    <PostBuildEvent Condition=" '$(OS)' == 'Unix' ">cp -uv $(ProjectDir)../../../csharp/Worker/Microsoft.Spark.CSharp/bin/$(ConfigurationName)/CSharpWorker.* $(TargetDir)</PostBuildEvent>
-  </PropertyGroup>
+  <Target Name="CopyCSharpWorker"
+      DependsOnTargets="CoreBuild">
+    <Copy SkipUnchangedFiles="true" SourceFiles="$(ProjectDir)..\..\..\csharp\Worker\Microsoft.Spark.CSharp\bin\$(ConfigurationName)\CSharpWorker.*" DestinationFiles="$(TargetDir)" />
+  </Target>
   <!-- To modify your build process, add your task inside one of the targets below and uncomment it. 
        Other similar extension points exist, see Microsoft.Common.targets.
   <Target Name="BeforeBuild">

From c63db1834a3dc416a616ab5d9a5c722c1efc2bbf Mon Sep 17 00:00:00 2001
From: Shakib Bawa <shakibb@zillowgroup.com>
Date: Tue, 4 Oct 2016 13:16:59 -0700
Subject: [PATCH 11/15] use forward slash to work in linux/osx

---
 scripts/sparkclr-submit.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/sparkclr-submit.sh b/scripts/sparkclr-submit.sh
index 9482ec34..fab495b9 100755
--- a/scripts/sparkclr-submit.sh
+++ b/scripts/sparkclr-submit.sh
@@ -10,13 +10,13 @@ function spark_home_error() {
 	echo "[sparkclr-submit.sh] Note that SPARK_HOME environment variable should not have trailing /"
 	exit 1
 }
-	
+
 function java_home_error() {
 	echo "[sparkclr-submit.sh] Error - JAVA_HOME environment variable is not set"
 	echo "[sparkclr-submit.sh] Note that JAVA_HOME environment variable should not have trailing /"
 	exit 1
 }
-	
+
 function sparkclr_home_error() {
 	echo "[sparkclr-submit.sh] Error - SPARKCLR_HOME environment variable is not set"
 	echo "[sparkclr-submit.sh] SPARKCLR_HOME need to be set to the folder path for csharp-spark*.jar"
@@ -55,7 +55,7 @@ then
   exit 1
 fi
 
-export SPARK_JARS_CLASSPATH="$SPARK_JARS_DIR\*"
+export SPARK_JARS_CLASSPATH="$SPARK_JARS_DIR/*"
 
 export SPARKCLR_JAR=spark-clr_2.11-2.0.000-SNAPSHOT.jar
 export SPARKCLR_CLASSPATH="$SPARKCLR_HOME/lib/$SPARKCLR_JAR"
@@ -71,7 +71,7 @@ else
 
   # The launcher library prints the arguments to be submitted to spark-submit.sh. So read all the output of the launcher into a variable.
   export SPARK_ARGS=`"$JAVA_HOME/bin/java" -cp "$LAUNCH_CLASSPATH" org.apache.spark.launcher.SparkCLRSubmitArguments "$@"`
-  
+
   # launches the Spark job with spark-submit.sh
   echo "[sparkclr-submit.sh] Command to run $SPARK_ARGS"
   "$SPARK_HOME/bin/spark-submit" $SPARK_ARGS

From edd7b7fd8b73190dcf5c254ae1342fda43dee835 Mon Sep 17 00:00:00 2001
From: Kaarthik Sivashanmugam <skaarthik@users.noreply.github.com>
Date: Fri, 7 Oct 2016 17:22:08 -0700
Subject: [PATCH 12/15] fixing issue with HiveContext introduced during Spark
 2.0 upgrade

---
 .../Proxy/ISparkContextProxy.cs               |  1 +
 .../Proxy/Ipc/SparkConfIpcProxy.cs            |  2 +-
 .../Proxy/Ipc/SparkContextIpcProxy.cs         |  7 +++
 .../Microsoft.Spark.CSharp/Sql/HiveContext.cs | 20 +++++++--
 .../Microsoft.Spark.CSharp/Sql/SqlContext.cs  |  6 ++-
 .../Microsoft.Spark.CSharp.Adapter.Doc.XML    | 10 ++++-
 .../documentation/Mobius_API_Documentation.md |  5 ++-
 csharp/AdapterTest/HiveContextTest.cs         | 44 ++++++-------------
 .../Mocks/MockSparkContextProxy.cs            |  4 ++
 scala/pom.xml                                 |  2 +-
 .../spark/sql/api/csharp/SQLUtils.scala       | 10 ++++-
 11 files changed, 70 insertions(+), 41 deletions(-)

diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
index 2861e068..4580abf3 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/ISparkContextProxy.cs
@@ -15,6 +15,7 @@ namespace Microsoft.Spark.CSharp.Proxy
     internal interface ISparkContextProxy
     {
         ISparkConfProxy GetConf();
+        ISqlContextProxy CreateHiveContext();
         ISparkSessionProxy CreateSparkSession();
         IColumnProxy CreateColumnFromName(string name);
         IColumnProxy CreateFunction(string name, object self);
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkConfIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkConfIpcProxy.cs
index dbc9e083..f5312abc 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkConfIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkConfIpcProxy.cs
@@ -60,7 +60,7 @@ public string Get(string key, string defaultValue)
 
         public string GetSparkConfAsString()
          {
-             return SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.JvmBridgeUtils", "getSparkConfAsString").ToString();
+             return SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.JvmBridgeUtils", "getSparkConfAsString", new object[] { jvmSparkConfReference }).ToString();
          }
 }
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
index 2e0534e6..f430b45f 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Proxy/Ipc/SparkContextIpcProxy.cs
@@ -49,6 +49,13 @@ public ISparkSessionProxy CreateSparkSession()
                                 "createSparkSession", new object[] {jvmSparkContextReference})));
         }
 
+        public ISqlContextProxy CreateHiveContext()
+        {
+            return new SqlContextIpcProxy(new JvmObjectReference(
+                (string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod(
+                    "org.apache.spark.sql.api.csharp.SQLUtils", "createHiveContext", new object[] { jvmSparkContextReference })));
+        }
+
         public void CreateSparkContext(string master, string appName, string sparkHome, ISparkConfProxy conf)
         {
             object[] args = (new object[] { master, appName, sparkHome, (conf == null ? null : (conf as SparkConfIpcProxy).JvmSparkConfReference) }).Where(x => x != null).ToArray();
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/HiveContext.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/HiveContext.cs
index 9274cf5a..8df786b5 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/HiveContext.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/HiveContext.cs
@@ -7,7 +7,8 @@
 namespace Microsoft.Spark.CSharp.Sql
 {
     /// <summary>
-    /// A variant of Spark SQL that integrates with data stored in Hive. 
+    /// HiveContext is deprecated. Use SparkSession.Builder().EnableHiveSupport()
+	/// HiveContext is a variant of Spark SQL that integrates with data stored in Hive. 
     /// Configuration for Hive is read from hive-site.xml on the classpath.
     /// It supports running both SQL and HiveQL commands.
     /// </summary>
@@ -18,8 +19,21 @@ public class HiveContext : SqlContext
         /// </summary>
         /// <param name="sparkContext"></param>
         public HiveContext(SparkContext sparkContext)
-            : base(SparkSession.Builder().Config(sparkContext.SparkConf).EnableHiveSupport().GetOrCreate())
+            : base(sparkContext, sparkContext.SparkContextProxy.CreateHiveContext())
+        { }
+
+        internal HiveContext(SparkContext sparkContext, ISqlContextProxy sqlContextProxy)
+            : base(sparkContext, sqlContextProxy)
+        { }
+
+        /// <summary>
+        /// Executes a SQL query using Spark, returning the result as a DataFrame. The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'
+        /// </summary>
+        /// <param name="sqlQuery"></param>
+        /// <returns></returns>
+        public new DataFrame Sql(string sqlQuery)
         {
+            return new DataFrame(SqlContextProxy.Sql(sqlQuery), sparkContext);
         }
 
         /// <summary>
@@ -31,7 +45,7 @@ public HiveContext(SparkContext sparkContext)
         /// <param name="tableName"></param>
         public void RefreshTable(string tableName)
         {
-            SparkSession.Catalog.RefreshTable(tableName);
+            SqlContextProxy.RefreshTable(tableName);
         }
     }
 }
diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SqlContext.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SqlContext.cs
index 4f1bf7aa..03e9fb26 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SqlContext.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Sql/SqlContext.cs
@@ -18,7 +18,7 @@ public class SqlContext
         private readonly ILoggerService logger = LoggerServiceFactory.GetLogger(typeof(SqlContext));
 
         private readonly ISqlContextProxy sqlContextProxy;
-        private readonly SparkContext sparkContext;
+        protected readonly SparkContext sparkContext;
         internal ISqlContextProxy SqlContextProxy { get { return sqlContextProxy; } }
 
         private static SqlContext instance;
@@ -37,6 +37,8 @@ public SparkSession SparkSession
         internal SqlContext(SparkSession sparkSession, bool isRootContext)
         {
             this.sparkSession = sparkSession;
+            this.sparkContext = sparkSession.SparkContext;
+            this.sqlContextProxy = sparkSession.SparkSessionProxy.SqlContextProxy;
             this.isRootContext = isRootContext;
             if (instance == null) instance = this;
         }
@@ -71,7 +73,7 @@ public static SqlContext GetOrCreate(SparkContext sparkContext)
         {
             if (instance == null)
             {
-                return new SqlContext(sparkContext);
+                instance = new SqlContext(sparkContext);
             }
             return instance;
         }
diff --git a/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML b/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
index 0304bdcc..e28ad904 100644
--- a/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
+++ b/csharp/Adapter/documentation/Microsoft.Spark.CSharp.Adapter.Doc.XML
@@ -6176,7 +6176,8 @@
         </member>
         <member name="T:Microsoft.Spark.CSharp.Sql.HiveContext">
             <summary>
-            A variant of Spark SQL that integrates with data stored in Hive. 
+            HiveContext is deprecated. Use SparkSession.Builder().EnableHiveSupport()
+            HiveContext is a variant of Spark SQL that integrates with data stored in Hive. 
             Configuration for Hive is read from hive-site.xml on the classpath.
             It supports running both SQL and HiveQL commands.
             </summary>
@@ -6187,6 +6188,13 @@
             </summary>
             <param name="sparkContext"></param>
         </member>
+        <member name="M:Microsoft.Spark.CSharp.Sql.HiveContext.Sql(System.String)">
+            <summary>
+            Executes a SQL query using Spark, returning the result as a DataFrame. The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'
+            </summary>
+            <param name="sqlQuery"></param>
+            <returns></returns>
+        </member>
         <member name="M:Microsoft.Spark.CSharp.Sql.HiveContext.RefreshTable(System.String)">
             <summary>
             Invalidate and refresh all the cached the metadata of the given table.
diff --git a/csharp/Adapter/documentation/Mobius_API_Documentation.md b/csharp/Adapter/documentation/Mobius_API_Documentation.md
index 6ad46d6e..447e2e81 100644
--- a/csharp/Adapter/documentation/Mobius_API_Documentation.md
+++ b/csharp/Adapter/documentation/Mobius_API_Documentation.md
@@ -731,14 +731,15 @@
 ####Summary
   
             
-            A variant of Spark SQL that integrates with data stored in Hive. 
+            HiveContext is deprecated. Use SparkSession.Builder().EnableHiveSupport()
+            HiveContext is a variant of Spark SQL that integrates with data stored in Hive. 
             Configuration for Hive is read from hive-site.xml on the classpath.
             It supports running both SQL and HiveQL commands.
             
         
 ####Methods
 
-<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">RefreshTable</font></td><td>Invalidate and refresh all the cached the metadata of the given table. For performance reasons, Spark SQL or the external data source library it uses might cache certain metadata about a table, such as the location of blocks. When those change outside of Spark SQL, users should call this function to invalidate the cache.</td></tr></table>
+<table><tr><th>Name</th><th>Description</th></tr><tr><td><font color="blue">Sql</font></td><td>Executes a SQL query using Spark, returning the result as a DataFrame. The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'</td></tr><tr><td><font color="blue">RefreshTable</font></td><td>Invalidate and refresh all the cached the metadata of the given table. For performance reasons, Spark SQL or the external data source library it uses might cache certain metadata about a table, such as the location of blocks. When those change outside of Spark SQL, users should call this function to invalidate the cache.</td></tr></table>
 
 ---
   
diff --git a/csharp/AdapterTest/HiveContextTest.cs b/csharp/AdapterTest/HiveContextTest.cs
index 8e55f029..32864021 100644
--- a/csharp/AdapterTest/HiveContextTest.cs
+++ b/csharp/AdapterTest/HiveContextTest.cs
@@ -45,47 +45,31 @@ public void TestCleanUp()
         [Test]
         public void TestHiveContextConstructor()
         {
-            var mockSparkContextProxy = new Mock<ISparkContextProxy>();
-
-            var mockSparkSessionProxy = new Mock<ISparkSessionProxy>();
-            var mockCatalogProxy = new Mock<ICatalogProxy>();
-            mockCatalogProxy.Setup(m => m.RefreshTable(It.IsAny<string>()));
-            mockSparkSessionProxy.Setup(m => m.GetCatalog()).Returns(mockCatalogProxy.Object);
-            mockSparkContextProxy.Setup(m => m.CreateSparkSession()).Returns(mockSparkSessionProxy.Object);
-
-            var mockSparkConfProxy = new Mock<ISparkConfProxy>();
-            mockSparkConfProxy.Setup(m => m.GetSparkConfAsString())
-                .Returns("spark.master=master;spark.app.name=appname;config1=value1;config2=value2;");
+            var hiveContext = new HiveContext(new SparkContext("", ""));
+            Assert.IsNotNull((hiveContext.SqlContextProxy as MockSqlContextProxy).mockSqlContextReference);
+        }
 
-            var conf = new SparkConf(mockSparkConfProxy.Object);
-            var hiveContext = new HiveContext(new SparkContext(mockSparkContextProxy.Object, conf));
-            Assert.IsNotNull(hiveContext.SparkSession);
+        [Test]
+        public void TestHiveContextSql()
+        {
+            mockSqlContextProxy.Setup(m => m.Sql(It.IsAny<string>()));
+            var hiveContext = new HiveContext(new SparkContext("", ""), mockSqlContextProxy.Object);
+            hiveContext.Sql("SELECT * FROM ABC");
+            mockSqlContextProxy.Verify(m => m.Sql("SELECT * FROM ABC"));
         }
-        
+
         [Test]
         public void TestHiveContextRefreshTable()
         {
             // arrange
-            var mockSparkContextProxy = new Mock<ISparkContextProxy>();
-            var mockSparkSessionProxy = new Mock<ISparkSessionProxy>();
-            var mockCatalogProxy = new Mock<ICatalogProxy>();
-            mockCatalogProxy.Setup(m => m.RefreshTable(It.IsAny<string>()));
-            mockSparkSessionProxy.Setup(m => m.GetCatalog()).Returns(mockCatalogProxy.Object);
-            mockSparkContextProxy.Setup(m => m.CreateSparkSession()).Returns(mockSparkSessionProxy.Object);
-
-            var mockSparkConfProxy = new Mock<ISparkConfProxy>();
-            mockSparkConfProxy.Setup(m => m.GetSparkConfAsString())
-                .Returns("spark.master=master;spark.app.name=appname;config1=value1;config2=value2;");
-
-            var conf = new SparkConf(mockSparkConfProxy.Object);
-            var hiveContext = new HiveContext(new SparkContext(mockSparkContextProxy.Object, conf));
-            hiveContext.SparkSession.SparkSessionProxy = mockSparkSessionProxy.Object;
+            mockSqlContextProxy.Setup(m => m.RefreshTable(It.IsAny<string>()));
+            var hiveContext = new HiveContext(new SparkContext("", ""), mockSqlContextProxy.Object);
 
             // act
             hiveContext.RefreshTable("table");
 
             // assert
-            mockCatalogProxy.Verify(m => m.RefreshTable("table"));
+            mockSqlContextProxy.Verify(m => m.RefreshTable("table"));
         }
     }
 }
diff --git a/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs b/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
index 6a6b1d8b..668980ea 100644
--- a/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
+++ b/csharp/AdapterTest/Mocks/MockSparkContextProxy.cs
@@ -302,5 +302,9 @@ public ISparkSessionProxy CreateSparkSession()
         {
             return new MockSparkSessionProxy();
         }
+        public ISqlContextProxy CreateHiveContext()
+        {
+            return new MockSqlContextProxy(this);
+        }
     }
 }
diff --git a/scala/pom.xml b/scala/pom.xml
index 1238963c..52f2f75f 100644
--- a/scala/pom.xml
+++ b/scala/pom.xml
@@ -106,7 +106,7 @@
       <dependency>
           <groupId>org.apache.spark</groupId>
           <artifactId>spark-hive_2.11</artifactId>
-          <version>1.6.0</version>
+          <version>2.0.0</version>
           <!--the following is placeholder for building uber package. Please keep as-is-->
           <!--<scope>provided</scope>-->
       </dependency>
diff --git a/scala/src/main/org/apache/spark/sql/api/csharp/SQLUtils.scala b/scala/src/main/org/apache/spark/sql/api/csharp/SQLUtils.scala
index f13d0087..c01d76a7 100644
--- a/scala/src/main/org/apache/spark/sql/api/csharp/SQLUtils.scala
+++ b/scala/src/main/org/apache/spark/sql/api/csharp/SQLUtils.scala
@@ -15,8 +15,10 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.hive
 import org.apache.spark.sql.types.{DataType, FloatType, StructType}
 import org.apache.spark.sql._
-import java.util.{List => JList, Map => JMap, ArrayList => JArrayList}
+import java.util.{ArrayList => JArrayList, List => JList, Map => JMap}
+
 import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.sql.hive.HiveContext
 
 /**
  * Utility functions for DataFrame in SparkCLR
@@ -29,6 +31,12 @@ object SQLUtils {
        new SparkSession(sc)
   }
 
+  // this method is for back compat with older versions of Spark (1.4, 1.5 & 1.6)
+  // can be removed once Mobius upgrades to Spark 2.1
+  def createHiveContext(sc: SparkContext): SQLContext = {
+    new HiveContext(sc)
+  }
+
   def getSqlContext(ss: SparkSession): SQLContext = {
         ss.sqlContext
   }

From 7d920d12065c68759b1856949e096a2ca8e0486c Mon Sep 17 00:00:00 2001
From: skaarthik <skaarthik@gmail.com>
Date: Mon, 10 Oct 2016 19:11:55 -0700
Subject: [PATCH 13/15] fixing the issue in YARN clusters introduced during
 Spark 2.0 upgrade

---
 .../Configuration/ConfigurationService.cs                     | 4 +++-
 scala/src/main/org/apache/spark/api/csharp/CSharpRDD.scala    | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Configuration/ConfigurationService.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Configuration/ConfigurationService.cs
index 1e8abbae..cf630391 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Configuration/ConfigurationService.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Configuration/ConfigurationService.cs
@@ -65,7 +65,9 @@ internal ConfigurationService()
                 configuration = new SparkCLRConfiguration(appConfig);
                 runMode = RunMode.CLUSTER;
             }
-            else if (sparkMaster.Equals("yarn-client", StringComparison.OrdinalIgnoreCase) || sparkMaster.Equals("yarn-cluster", StringComparison.OrdinalIgnoreCase))
+            else if (sparkMaster.Equals("yarn-cluster", StringComparison.OrdinalIgnoreCase) ||
+                     sparkMaster.Equals("yarn-client", StringComparison.OrdinalIgnoreCase) ||
+                     sparkMaster.Equals("yarn", StringComparison.OrdinalIgnoreCase)) //supported in Spark 2.0
             {
                 configuration = new SparkCLRConfiguration(appConfig);
                 runMode = RunMode.YARN;
diff --git a/scala/src/main/org/apache/spark/api/csharp/CSharpRDD.scala b/scala/src/main/org/apache/spark/api/csharp/CSharpRDD.scala
index 1faf7766..d48e9f3b 100644
--- a/scala/src/main/org/apache/spark/api/csharp/CSharpRDD.scala
+++ b/scala/src/main/org/apache/spark/api/csharp/CSharpRDD.scala
@@ -68,7 +68,7 @@ class CSharpRDD(
     val func = SQLUtils.createCSharpFunction(command,
                                                     envVars,
                                                     cSharpIncludes,
-                                                    cSharpWorkerExecutable,
+                                                    cSharpWorker.getAbsolutePath,
                                                     unUsedVersionIdentifier,
                                                     broadcastVars,
                                                     accumulator)

From e64e342b33295eaa0671e646e4c2498d1b95b85d Mon Sep 17 00:00:00 2001
From: Kaarthik Sivashanmugam <skaarthik@users.noreply.github.com>
Date: Tue, 11 Oct 2016 12:38:06 -0700
Subject: [PATCH 14/15] adding instructions for HDI

---
 README.md                    |  2 +-
 notes/linux-instructions.md  |  3 +--
 notes/mobius-in-hdinsight.md | 26 ++++++++++++++++++++++++++
 3 files changed, 28 insertions(+), 3 deletions(-)
 create mode 100644 notes/mobius-in-hdinsight.md

diff --git a/README.md b/README.md
index a1ce8255..55b3d28c 100644
--- a/README.md
+++ b/README.md
@@ -112,7 +112,7 @@ Refer to the [docs folder](docs) for design overview and other info on Mobius
 |Build & run unit tests |[Build in Windows](notes/windows-instructions.md#building-mobius) |[Build in Linux](notes/linux-instructions.md#building-mobius-in-linux) |
 |Run samples (functional tests) in local mode |[Samples in Windows](notes/windows-instructions.md#running-samples) |[Samples in Linux](notes/linux-instructions.md#running-mobius-samples-in-linux) |
 |Run examples in local mode |[Examples in Windows](/notes/running-mobius-app.md#running-mobius-examples-in-local-mode) |[Examples in Linux](notes/linux-instructions.md#running-mobius-examples-in-linux) |
-|Run Mobius app |<ul><li>[Standalone cluster](notes/running-mobius-app.md#standalone-cluster)</li><li>[YARN cluster](notes/running-mobius-app.md#yarn-cluster)</li></ul> |<ul><li>[Linux cluster](notes/linux-instructions.md#running-mobius-applications-in-linux)</li><li>[Azure HDInsight Spark Cluster](/notes/linux-instructions.md#mobius-in-azure-hdinsight-spark-cluster)</li><li>[AWS EMR Spark Cluster](/notes/linux-instructions.md#mobius-in-amazon-web-services-emr-spark-cluster)</li> |
+|Run Mobius app |<ul><li>[Standalone cluster](notes/running-mobius-app.md#standalone-cluster)</li><li>[YARN cluster](notes/running-mobius-app.md#yarn-cluster)</li></ul> |<ul><li>[Linux cluster](notes/linux-instructions.md#running-mobius-applications-in-linux)</li><li>[Azure HDInsight Spark Cluster](/notes/mobius-in-hdinsight.md)</li><li>[AWS EMR Spark Cluster](/notes/linux-instructions.md#mobius-in-amazon-web-services-emr-spark-cluster)</li> |
 |Run Mobius Shell |<ul><li>[Local](notes/mobius-shell.md#run-shell)</li><li>[YARN](notes/mobius-shell.md#run-shell)</li></ul> | Not supported yet |
 
 ### Useful Links
diff --git a/notes/linux-instructions.md b/notes/linux-instructions.md
index e84f8e56..4a6e1972 100644
--- a/notes/linux-instructions.md
+++ b/notes/linux-instructions.md
@@ -52,8 +52,7 @@ If you are using CentOS, Fedora, or similar Linux distributions or OS X, follow
   * Update CSharpWorkerPath setting in Mobius application config (refer to the config files used in Mobius examples like the [config for with Pi example](https://github.com/skaarthik/Mobius/blob/linux/examples/Batch/pi/App.config#L61)) to point to [CSharpWorker.sh.exe](./linux-csharpworker-prefix-script.md) (make sure to set the correct value appropriate for the Spark mode to be used)
 
 ### Mobius in Azure HDInsight Spark Cluster
-* Mono version available in HDInsight cluster is 3.x. Mobius [requires](./linux-instructions.md#prerequisites) 4.2 or above. So, Mono has to be upgraded in HDInsight cluster to use Mobius.
-* Follow [instructions](./linux-instructions.md#requirements) for Ubuntu
+* Refer to [instructions](./mobius-in-hdinsight.md) for running Mobius application in HDI
 
 ### Mobius in Amazon Web Services EMR Spark Cluster
 * Follow [instructions](./linux-instructions.md#requirements) for CentOS
diff --git a/notes/mobius-in-hdinsight.md b/notes/mobius-in-hdinsight.md
new file mode 100644
index 00000000..9c04fb42
--- /dev/null
+++ b/notes/mobius-in-hdinsight.md
@@ -0,0 +1,26 @@
+#Using Mobius in HDInsight Spark Cluster
+Mobius [requires](./linux-instructions.md#prerequisites) Mono version 4.2 or above. Depending on the HDI cluster version, manual upgrade of Mono in head and worker nodes may be required. Refer to the table below for Mono upgrade requirements.
+
+|HDI Version |Mono Version |Mono Upgrade Required |
+|---|:------|:----|
+3.4 |3.4 |Yes |
+3.5 |4.6.1 |No |
+
+After ensuring that the correct version of Mono is available in the HDI cluster, [instructions](./linux-instructions.md#requirements) to run Mobius applications in HDI are similiar to that of any Ubuntu-based Spark cluster using YARN. Following steps illustrate how to run Mobius Pi example in HDI.
+
+```
+# login to head node
+# create mobius folder under /home/username
+mkdir mobius
+cd mobius
+# replace the url below with the correct version of Mobius
+wget https://github.com/Microsoft/Mobius/releases/download/v2.0.000-PREVIEW-2/spark-clr_2.11-2.0.000-PREVIEW-2.zip
+unzip spark-clr_2.11-2.0.000-PREVIEW-2.zip
+export SPARKCLR_HOME=/home/username/mobius/runtime
+cd runtime/scripts
+chmod +x sparkclr-submit.sh
+# make sure Mobius app has executable permissions
+chmod +x ../../examples/Batch/pi/SparkClrPi.exe
+# deploy mode can be client or cluster
+./sparkclr-submit.sh --master yarn --deploy-mode client --exe SparkClrPi.exe /home/username/mobius/examples/Batch/pi
+```

From 9a8fbe811d3556a4454c1b4544ed32a74f9e69d4 Mon Sep 17 00:00:00 2001
From: Kaarthik Sivashanmugam <skaarthik@users.noreply.github.com>
Date: Tue, 11 Oct 2016 12:38:22 -0700
Subject: [PATCH 15/15] updating AssemblyInfo versions

---
 .../Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs        | 5 +++--
 csharp/AdapterTest/Properties/AssemblyInfo.cs                | 4 ++--
 .../Perf/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs   | 4 ++--
 csharp/Repl/Properties/AssemblyInfo.cs                       | 4 ++--
 csharp/ReplTest/Properties/AssemblyInfo.cs                   | 4 ++--
 .../Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs        | 4 ++--
 csharp/Tests.Common/Properties/AssemblyInfo.cs               | 4 ++--
 .../Utils/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs  | 4 ++--
 .../Worker/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs | 4 ++--
 csharp/WorkerTest/Properties/AssemblyInfo.cs                 | 4 ++--
 10 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/csharp/Adapter/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs b/csharp/Adapter/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs
index b2c4cfc0..5f6b677b 100644
--- a/csharp/Adapter/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs
+++ b/csharp/Adapter/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs
@@ -30,5 +30,6 @@
 //      Build Number
 //      Revision
 //
-[assembly: AssemblyVersion("1.6.1.0")]
-[assembly: AssemblyFileVersion("1.6.1.0")]
+[assembly: AssemblyVersion("2.0")]
+[assembly: AssemblyFileVersion("2.0")]
+
diff --git a/csharp/AdapterTest/Properties/AssemblyInfo.cs b/csharp/AdapterTest/Properties/AssemblyInfo.cs
index c9fd48f6..91a4e560 100644
--- a/csharp/AdapterTest/Properties/AssemblyInfo.cs
+++ b/csharp/AdapterTest/Properties/AssemblyInfo.cs
@@ -30,5 +30,5 @@
 //      Build Number
 //      Revision
 //
-[assembly: AssemblyVersion("1.6.1.0")]
-[assembly: AssemblyFileVersion("1.6.1.0")]
+[assembly: AssemblyVersion("2.0")]
+[assembly: AssemblyFileVersion("2.0")]
\ No newline at end of file
diff --git a/csharp/Perf/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs b/csharp/Perf/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs
index 7df7032c..b6e947d8 100644
--- a/csharp/Perf/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs
+++ b/csharp/Perf/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs
@@ -34,5 +34,5 @@
 // You can specify all the values or you can default the Build and Revision Numbers 
 // by using the '*' as shown below:
 // [assembly: AssemblyVersion("1.0.*")]
-[assembly: AssemblyVersion("1.6.1.0")]
-[assembly: AssemblyFileVersion("1.6.1.0")]
+[assembly: AssemblyVersion("2.0")]
+[assembly: AssemblyFileVersion("2.0")]
diff --git a/csharp/Repl/Properties/AssemblyInfo.cs b/csharp/Repl/Properties/AssemblyInfo.cs
index 154b50f0..9efe4ae6 100644
--- a/csharp/Repl/Properties/AssemblyInfo.cs
+++ b/csharp/Repl/Properties/AssemblyInfo.cs
@@ -35,5 +35,5 @@
 // You can specify all the values or you can default the Build and Revision Numbers 
 // by using the '*' as shown below:
 // [assembly: AssemblyVersion("1.0.*")]
-[assembly: AssemblyVersion("1.6.1.0")]
-[assembly: AssemblyFileVersion("1.6.1.0")]
+[assembly: AssemblyVersion("2.0")]
+[assembly: AssemblyFileVersion("2.0")]
diff --git a/csharp/ReplTest/Properties/AssemblyInfo.cs b/csharp/ReplTest/Properties/AssemblyInfo.cs
index 261bcb13..72a12cd7 100644
--- a/csharp/ReplTest/Properties/AssemblyInfo.cs
+++ b/csharp/ReplTest/Properties/AssemblyInfo.cs
@@ -35,5 +35,5 @@
 // You can specify all the values or you can default the Build and Revision Numbers 
 // by using the '*' as shown below:
 // [assembly: AssemblyVersion("1.0.*")]
-[assembly: AssemblyVersion("1.6.1.0")]
-[assembly: AssemblyFileVersion("1.6.1.0")]
+[assembly: AssemblyVersion("2.0")]
+[assembly: AssemblyFileVersion("2.0")]
diff --git a/csharp/Samples/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs b/csharp/Samples/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs
index ade554ff..06379a7e 100644
--- a/csharp/Samples/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs
+++ b/csharp/Samples/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs
@@ -30,5 +30,5 @@
 //      Build Number
 //      Revision
 //
-[assembly: AssemblyVersion("1.6.1.0")]
-[assembly: AssemblyFileVersion("1.6.1.0")]
+[assembly: AssemblyVersion("2.0")]
+[assembly: AssemblyFileVersion("2.0")]
diff --git a/csharp/Tests.Common/Properties/AssemblyInfo.cs b/csharp/Tests.Common/Properties/AssemblyInfo.cs
index 74717c60..d315980a 100644
--- a/csharp/Tests.Common/Properties/AssemblyInfo.cs
+++ b/csharp/Tests.Common/Properties/AssemblyInfo.cs
@@ -35,5 +35,5 @@
 // You can specify all the values or you can default the Build and Revision Numbers 
 // by using the '*' as shown below:
 // [assembly: AssemblyVersion("1.0.*")]
-[assembly: AssemblyVersion("1.0.0.0")]
-[assembly: AssemblyFileVersion("1.0.0.0")]
+[assembly: AssemblyVersion("2.0")]
+[assembly: AssemblyFileVersion("2.0")]
diff --git a/csharp/Utils/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs b/csharp/Utils/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs
index d1988d52..127269b5 100644
--- a/csharp/Utils/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs
+++ b/csharp/Utils/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs
@@ -32,5 +32,5 @@
 // You can specify all the values or you can default the Build and Revision Numbers 
 // by using the '*' as shown below:
 // [assembly: AssemblyVersion("1.0.*")]
-[assembly: AssemblyVersion("1.6.0.0")]
-[assembly: AssemblyFileVersion("1.6.0.0")]
+[assembly: AssemblyVersion("2.0")]
+[assembly: AssemblyFileVersion("2.0")]
diff --git a/csharp/Worker/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs b/csharp/Worker/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs
index 2f8b77e1..2b9547ad 100644
--- a/csharp/Worker/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs
+++ b/csharp/Worker/Microsoft.Spark.CSharp/Properties/AssemblyInfo.cs
@@ -30,5 +30,5 @@
 //      Build Number
 //      Revision
 //
-[assembly: AssemblyVersion("1.6.1.0")]
-[assembly: AssemblyFileVersion("1.6.1.0")]
+[assembly: AssemblyVersion("2.0")]
+[assembly: AssemblyFileVersion("2.0")]
diff --git a/csharp/WorkerTest/Properties/AssemblyInfo.cs b/csharp/WorkerTest/Properties/AssemblyInfo.cs
index df526bcc..eea728bd 100644
--- a/csharp/WorkerTest/Properties/AssemblyInfo.cs
+++ b/csharp/WorkerTest/Properties/AssemblyInfo.cs
@@ -32,5 +32,5 @@
 // You can specify all the values or you can default the Build and Revision Numbers 
 // by using the '*' as shown below:
 // [assembly: AssemblyVersion("1.0.*")]
-[assembly: AssemblyVersion("1.6.1.0")]
-[assembly: AssemblyFileVersion("1.6.1.0")]
+[assembly: AssemblyVersion("2.0")]
+[assembly: AssemblyFileVersion("2.0")]

Name	Description
SetMaster	The master URL to connect to, such as "local" to run locally with one thread, "local[4]" to run locally with 4 cores, or "spark://master:7077" to run on a Spark standalone cluster.
SetAppName	Set a name for your application. Shown in the Spark web UI.
SetSparkHome	Set the location where Spark is installed on worker nodes.
Set	Set the value of a string config
GetInt	Get a int parameter value, falling back to a default if not set
Get	Get a string parameter value, falling back to a default if not set
Name	Description
GetActiveSparkContext	Get existing SparkContext
GetConf	Return a copy of this JavaSparkContext's configuration. The configuration ''cannot'' be changed at runtime.
TextFile	Read a text file from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI, and return it as an RDD of Strings.
Parallelize``1	Distribute a local collection to form an RDD. sc.Parallelize(new int[] {0, 2, 3, 4, 6}, 5).Glom().Collect() [[0], [2], [3], [4], [6]]
EmptyRDD	Create an RDD that has no partitions or elements.
WholeTextFiles	Read a directory of text files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. Each file is read as a single record and returned in a key-value pair, where the key is the path of each file, the value is the content of each file. For example, if you have the following files: {{{ hdfs://a-hdfs-path/part-00000 hdfs://a-hdfs-path/part-00001 ... hdfs://a-hdfs-path/part-nnnnn }}} Do {{{ RDD<KeyValuePair<string, string>> rdd = sparkContext.WholeTextFiles("hdfs://a-hdfs-path") }}} then `rdd` contains {{{ (a-hdfs-path/part-00000, its content) (a-hdfs-path/part-00001, its content) ... (a-hdfs-path/part-nnnnn, its content) }}} Small files are preferred, large file is also allowable, but may cause bad performance. minPartitions A suggestion value of the minimal splitting number for input data.
BinaryFiles	Read a directory of binary files from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI as a byte array. Each file is read as a single record and returned in a key-value pair, where the key is the path of each file, the value is the content of each file. For example, if you have the following files: {{{ hdfs://a-hdfs-path/part-00000 hdfs://a-hdfs-path/part-00001 ... hdfs://a-hdfs-path/part-nnnnn }}} Do RDD<KeyValuePair<string, byte[]>>"/> rdd = sparkContext.dataStreamFiles("hdfs://a-hdfs-path")`, then `rdd` contains {{{ (a-hdfs-path/part-00000, its content) (a-hdfs-path/part-00001, its content) ... (a-hdfs-path/part-nnnnn, its content) }}} @note Small files are preferred; very large files but may cause bad performance. @param minPartitions A suggestion value of the minimal splitting number for input data.
SequenceFile	Read a Hadoop SequenceFile with arbitrary key and value Writable class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is as follows: 1. A Java RDD is created from the SequenceFile or other InputFormat, and the key and value Writable classes 2. Serialization is attempted via Pyrolite pickling 3. If this fails, the fallback is to call 'toString' on each key and value 4. PickleSerializer is used to deserialize pickled objects on the Python side
NewAPIHadoopFile	Read a 'new API' Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is the same as for sc.sequenceFile. A Hadoop configuration can be passed in as a Python dict. This will be converted into a Configuration in Java
NewAPIHadoopRDD	Read a 'new API' Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict. This will be converted into a Configuration in Java. The mechanism is the same as for sc.sequenceFile.
HadoopFile	Read an 'old' Hadoop InputFormat with arbitrary key and value class from HDFS, a local file system (available on all nodes), or any Hadoop-supported file system URI. The mechanism is the same as for sc.sequenceFile. A Hadoop configuration can be passed in as a Python dict. This will be converted into a Configuration in Java.
HadoopRDD	Read an 'old' Hadoop InputFormat with arbitrary key and value class, from an arbitrary Hadoop configuration, which is passed in as a Python dict. This will be converted into a Configuration in Java. The mechanism is the same as for sc.sequenceFile.
Union``1	Build the union of a list of RDDs. This supports unions() of RDDs with different serialized formats, although this forces them to be reserialized using the default serializer: >>> path = os.path.join(tempdir, "union-text.txt") >>> with open(path, "w") as testFile: ... _ = testFile.write("Hello") >>> textFile = sc.textFile(path) >>> textFile.collect() [u'Hello'] >>> parallelized = sc.parallelize(["World!"]) >>> sorted(sc.union([textFile, parallelized]).collect()) [u'Hello', 'World!']
Broadcast``1	Broadcast a read-only variable to the cluster, returning a Broadcast object for reading it in distributed functions. The variable will be sent to each cluster only once.
Accumulator``1	Create an with the given initial value, using a given helper object to define how to add values of the data type if provided. Default AccumulatorParams are used for integers and floating-point numbers if you do not provide one. For other types, a custom AccumulatorParam can be used.
Stop	Shut down the SparkContext.
AddFile	Add a file to be downloaded with this Spark job on every node. The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs, use `SparkFiles.get(fileName)` to find its download location.
SetCheckpointDir	Set the directory under which RDDs are going to be checkpointed. The directory must be a HDFS path if running on a cluster.
SetJobGroup	Assigns a group ID to all the jobs started by this thread until the group ID is set to a different value or cleared. Often, a unit of execution in an application consists of multiple Spark actions or jobs. Application programmers can use this method to group all those jobs together and give a group description. Once set, the Spark web UI will associate such jobs with this group. The application can also use [[org.apache.spark.api.java.JavaSparkContext.cancelJobGroup]] to cancel all running jobs in this group. For example, {{{ // In the main thread: sc.setJobGroup("some_job_to_cancel", "some job description"); rdd.map(...).count(); // In a separate thread: sc.cancelJobGroup("some_job_to_cancel"); }}} If interruptOnCancel is set to true for the job group, then job cancellation will result in Thread.interrupt() being called on the job's executor threads. This is useful to help ensure that the tasks are actually stopped in a timely manner, but is off by default due to HDFS-1208, where HDFS may respond to Thread.interrupt() by marking nodes as dead.
SetLocalProperty	Set a local property that affects jobs submitted from this thread, such as the Spark fair scheduler pool.
GetLocalProperty	Get a local property set in this thread, or null if it is missing. See [[org.apache.spark.api.java.JavaSparkContext.setLocalProperty]].
SetLogLevel	Control our logLevel. This overrides any user-defined log settings. @param logLevel The desired log level as a string. Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN
CancelJobGroup	Cancel active jobs for the specified group. See for more information.
CancelAllJobs	Cancel all jobs that have been scheduled or are running.
Name	Description
Master	Sets the Spark master URL to connect to, such as "local" to run locally, "local[4]" to run locally with 4 cores, or "spark://master:7077" to run on a Spark standalone cluster.
AppName	Sets a name for the application, which will be shown in the Spark web UI. If no application name is set, a randomly generated name will be used.
Config	Sets a config option. Options set using this method are automatically propagated to both SparkConf and SparkSession's own configuration.
Config	Sets a config option. Options set using this method are automatically propagated to both SparkConf and SparkSession's own configuration.
Config	Sets a config option. Options set using this method are automatically propagated to both SparkConf and SparkSession's own configuration.
Config	Sets a config option. Options set using this method are automatically propagated to both SparkConf and SparkSession's own configuration.
Config	Sets a list of config options based on the given SparkConf
EnableHiveSupport	Enables Hive support, including connectivity to a persistent Hive metastore, support for Hive serdes, and Hive user-defined functions.
GetOrCreate	Gets an existing [[SparkSession]] or, if there is no existing one, creates a new one based on the options set in this builder.
Name	Description
ListDatabases	Returns a list of databases available across all sessions.
ListTables	Returns a list of tables in the current database or given database This includes all temporary tables.
ListColumns	Returns a list of columns for the given table in the current database or the given temporary table.
ListFunctions	Returns a list of functions registered in the specified database. This includes all temporary functions
SetCurrentDatabase	Sets the current default database in this session.
DropTempView	Drops the temporary view with the given view name in the catalog. If the view has been cached before, then it will also be uncached.
IsCached	Returns true if the table is currently cached in-memory.
CacheTable	Caches the specified table in-memory.
UnCacheTable	Removes the specified table from the in-memory cache.
RefreshTable	Invalidate and refresh all the cached metadata of the given table. For performance reasons, Spark SQL or the external data source library it uses might cache certain metadata about a table, such as the location of blocks.When those change outside of Spark SQL, users should call this function to invalidate the cache. If this table is cached as an InMemoryRelation, drop the original cached version and make the new version cached lazily.
ClearCache	Removes all cached tables from the in-memory cache.
CreateExternalTable	Creates an external table from the given path and returns the corresponding DataFrame. It will use the default data source configured by spark.sql.sources.default.
CreateExternalTable	Creates an external table from the given path on a data source and returns DataFrame
CreateExternalTable	Creates an external table from the given path based on a data source and a set of options. Then, returns the corresponding DataFrame.
CreateExternalTable	Create an external table from the given path based on a data source, a schema and a set of options.Then, returns the corresponding DataFrame.
Name	Description
ToDF	Converts this strongly typed collection of data to generic Dataframe. In contrast to the strongly typed objects that Dataset operations work on, a Dataframe returns generic[[Row]] objects that allow fields to be accessed by ordinal or name.
PrintSchema	Prints the schema to the console in a nice tree format.
Explain	Prints the plans (logical and physical) to the console for debugging purposes.
Explain	Prints the physical plan to the console for debugging purposes.
DTypes	Returns all column names and their data types as an array.
Columns	Returns all column names as an array.
Show	Displays the top 20 rows of Dataset in a tabular form. Strings more than 20 characters will be truncated, and all cells will be aligned right.
ShowSchema	Prints schema
Name	Description
Builder	Builder for SparkSession
NewSession	Start a new session with isolated SQL configurations, temporary tables, registered functions are isolated, but sharing the underlying [[SparkContext]] and cached data. Note: Other than the [[SparkContext]], all shared state is initialized lazily. This method will force the initialization of the shared state to ensure that parent and child sessions are set up with the same shared state. If the underlying catalog implementation is Hive, this will initialize the metastore, which may take some time.
Stop	Stop underlying SparkContext
Read	Returns a DataFrameReader that can be used to read non-streaming data in as a DataFrame
CreateDataFrame	Creates a from a RDD containing array of object using the given schema.
Table	Returns the specified table as a
Sql	Executes a SQL query using Spark, returning the result as a DataFrame. The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'