Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into topic/csv
Browse files Browse the repository at this point in the history
Conflicts:
	framian/src/main/scala/framian/FrameUtils.scala
	framian/src/main/scala/framian/UntypedColumn.scala
	framian/src/main/scala/framian/csv/CsvRowExtractor.scala
  • Loading branch information
tixxit committed Oct 29, 2014
2 parents 901639f + 5982d83 commit 077b595
Show file tree
Hide file tree
Showing 56 changed files with 2,876 additions and 1,118 deletions.
17 changes: 14 additions & 3 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ scalaVersion in ThisBuild := "2.11.2"

crossScalaVersions in ThisBuild := Seq("2.10.4", "2.11.2")

scalacOptions in ThisBuild ++= Seq("-deprecation", "-feature", "-unchecked", "-language:higherKinds")
scalacOptions in ThisBuild ++= Seq("-deprecation", "-feature", "-unchecked", "-language:higherKinds", "-optimize")


maxErrors in ThisBuild := 5
Expand All @@ -21,14 +21,25 @@ resolvers in ThisBuild ++= Seq(

lazy val root = project.
in(file(".")).
aggregate(framian, framianJsonBase, framianJsonPlay).
aggregate(framianMacros, framian, framianJsonBase, framianJsonPlay).
settings(
publish := (),
publishLocal := ()
)

lazy val framianMacros = project.
in(file("framian-macros"))

lazy val framian = project.
in(file("framian"))
in(file("framian")).
enablePlugins(BenchmarkPlugin).
dependsOn(framianMacros).
settings(
// map framian-macros project classes and sources into framian
mappings in (Compile, packageBin) <++= mappings in (framianMacros, Compile, packageBin),
mappings in (Compile, packageSrc) <++= mappings in (framianMacros, Compile, packageSrc)
)


lazy val framianJsonBase = project.
in(file("framian-json-base")).
Expand Down
48 changes: 23 additions & 25 deletions framian-json-base/src/main/scala/framian/json/JsonColumn.scala
Original file line number Diff line number Diff line change
Expand Up @@ -29,30 +29,44 @@ import scala.reflect.runtime.universe.TypeTag

import spire.syntax.monoid._

import framian.column.Mask

private[json] sealed trait JsonColumn {
import JsonColumn._

def pos: Int

final def toColumn: UntypedColumn = {
val text = Column.newBuilder[String]
val nums = Column.newBuilder[BigDecimal]
val bool = Column.newBuilder[Boolean]

@tailrec
def loop(col: JsonColumn, text: Buffer[String], nums: Buffer[BigDecimal], bools: Buffer[Boolean]): UntypedColumn = {
def loop(col: JsonColumn): UntypedColumn = {
col match {
case Start =>
text.toColumn |+| nums.toColumn |+| bools.toColumn
TypedColumn(text.result()) orElse
TypedColumn(nums.result()) orElse
TypedColumn(bool.result())
case Text(pos, value, prev) =>
text.set(pos, value)
loop(prev, text, nums, bools)
text.addValue(value)
nums.addNA()
bool.addNA()
loop(prev)
case Number(pos, value, prev) =>
nums.set(pos, value)
loop(prev, text, nums, bools)
text.addNA()
nums.addValue(value)
bool.addNA()
loop(prev)
case Bool(pos, value, prev) =>
bools.set(pos, value)
loop(prev, text, nums, bools)
text.addNA()
nums.addNA()
bool.addValue(value)
loop(prev)
}
}

loop(this, Buffer.create(), Buffer.create(), Buffer.create())
loop(this)
}
}

Expand All @@ -61,20 +75,4 @@ private[json] object JsonColumn {
case class Text(pos: Int, value: String, prev: JsonColumn) extends JsonColumn
case class Number(pos: Int, value: BigDecimal, prev: JsonColumn) extends JsonColumn
case class Bool(pos: Int, value: Boolean, prev: JsonColumn) extends JsonColumn

private final class Buffer[A: ClassTag: TypeTag](bitset: BitSet, var values: Array[A]) {
def set(pos: Int, value: A): Unit = {
values = if (null == values) new Array[A](pos + 1) else values
values(pos) = value
bitset.update(pos, true)
}

def toColumn: UntypedColumn =
if (values != null) TypedColumn(Column.fromArray(values).mask(bitset))
else UntypedColumn.empty
}

private object Buffer {
def create[A: ClassTag: TypeTag]() = new Buffer(new BitSet, null)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,7 @@ trait JsonExtractor extends JsonModule {
def extract(row: Int, cols: P): Cell[JsonValue] =
Cell.fromOption(inflate(for {
(path, col) <- cols
value <- col.foldRow(row)(Some(_), {
case NA => None
case NM => Some(JsonValue.jsonNull)
})
value <- col.foldRow(row)(None, Some(JsonValue.jsonNull), Some(_))
} yield (path -> value)))
}

Expand Down
12 changes: 12 additions & 0 deletions framian-macros/build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@

name := "framian-macros"

libraryDependencies += Dependencies.Compile.spire

unmanagedSourceDirectories in Compile += (sourceDirectory in Compile).value / s"scala_${scalaBinaryVersion.value}"

Dependencies.macroParadise

publish := ()

publishLocal := ()
112 changes: 112 additions & 0 deletions framian-macros/src/main/scala/framian/column/ColumnMacros.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package framian
package column

import scala.language.experimental.macros

import spire.macros.{ SyntaxUtil, InlineUtil }

import framian.macroutil.compat.{ Context, freshTermName }

class ColumnMacros[C <: Context](val c: C) {
import c.universe._

val util = new SyntaxUtil[c.type](c)
val inliner = new InlineUtil[c.type](c)

private def sanitize[A](e: c.Expr[A]): (c.Tree, List[c.Tree]) =
if (util.isClean(e)) {
(e.tree, Nil)
} else {
val name = freshTermName(c, "norm$")
(q"$name", List(q"val $name = $e"))
}

def foldRow[A, B](row: c.Expr[Int])(na: c.Expr[B], nm: c.Expr[B], f: c.Expr[A => B]): c.Expr[B] = {
val col = freshTermName(c, "foldRow$col$")
val value = freshTermName(c, "foldRow$value$")
val r = freshTermName(c, "foldRow$row$")
val (iter, prefix) = sanitize(f)

val tree = q"""
${c.prefix} match {
case ($col: _root_.framian.UnboxedColumn[_]) =>
val $r = $row
if ($col.isValueAt($r)) {
$iter($col.valueAt($r))
} else {
$col.nonValueAt($r) match {
case _root_.framian.NA => $na
case _root_.framian.NM => $nm
}
}

case $col =>
$col($row) match {
case _root_.framian.NA => $na
case _root_.framian.NM => $nm
case _root_.framian.Value($value) => $iter($value)
}
}
"""

val block = Block(prefix, tree)
inliner.inlineAndReset[B](block)
}

def foreach[A, U](from: c.Expr[Int], until: c.Expr[Int], rows: c.Expr[Int => Int], abortOnNM: c.Expr[Boolean])(f: c.Expr[(Int, A) => U]): c.Expr[Boolean] = {
val col = freshTermName(c, "foreach$col$")
val value = freshTermName(c, "foreach$value$")
val row = freshTermName(c, "foreach$row$")
val nm = freshTermName(c, "foreach$nm$")
val i = freshTermName(c, "foreach$i$")
val (getRow, stmts0) = sanitize(rows)
val (iter, stmts1) = sanitize(f)

val tree = q"""{
var $nm = false
${c.prefix} match {
case ($col: _root_.framian.UnboxedColumn[_]) =>
var $i = $from
while ($i < $until && !$nm) {
val $row = $getRow($i)
if ($col.isValueAt($row)) {
$iter($i, $col.valueAt($row))
} else if ($col.nonValueAt($row) == _root_.framian.NM) {
$nm = $abortOnNM
}
$i += 1
}

case $col =>
var $i = $from
while ($i < $until && !$nm) {
val $row = $getRow($i)
$col($row) match {
case _root_.framian.Value($value) => $iter($i, $value)
case _root_.framian.NM => $nm = $abortOnNM
case _root_.framian.NA =>
}
$i += 1
}
}
!$nm
}
"""

val block = Block(stmts0 ++ stmts1, tree)
inliner.inlineAndReset[Boolean](block)
}
}

object ColumnMacros {
def foldRowImpl[A, B](c: Context)(row: c.Expr[Int])(na: c.Expr[B], nm: c.Expr[B], f: c.Expr[A => B]): c.Expr[B] =
new ColumnMacros[c.type](c).foldRow(row)(na, nm, f)

def foreachImpl[A, U](c: Context)(from: c.Expr[Int], until: c.Expr[Int], rows: c.Expr[Int => Int])(f: c.Expr[(Int, A) => U]): c.Expr[Boolean] = {
import c.universe._
new ColumnMacros[c.type](c).foreach(from, until, rows, c.Expr[Boolean](q"true"))(f)
}

def foreachExtraImpl[A, U](c: Context)(from: c.Expr[Int], until: c.Expr[Int], rows: c.Expr[Int => Int], abortOnNM: c.Expr[Boolean])(f: c.Expr[(Int, A) => U]): c.Expr[Boolean] =
new ColumnMacros[c.type](c).foreach(from, until, rows, abortOnNM)(f)
}
11 changes: 11 additions & 0 deletions framian-macros/src/main/scala_2.10/framian/macroutil/compat.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package framian
package macroutil

import scala.language.experimental.macros

object compat {
type Context = scala.reflect.macros.Context

def freshTermName(c: Context, prefix: String): c.universe.TermName =
c.universe.newTermName(c.fresh(prefix))
}
12 changes: 12 additions & 0 deletions framian-macros/src/main/scala_2.11/framian/macroutil/compat.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package framian
package macroutil

import scala.language.experimental.macros

object compat {
// Whitebox is required by Spire, but I don't think Spire needs it either.
type Context = scala.reflect.macros.whitebox.Context

def freshTermName(c: Context, prefix: String): c.universe.TermName =
c.universe.TermName(c.freshName(prefix))
}
4 changes: 2 additions & 2 deletions framian/build.sbt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

name := "framian"

(sourceGenerators in Compile) <+= (sourceManaged in Compile) map Boilerplate.gen

libraryDependencies ++= {
import Dependencies._
Expand All @@ -27,9 +28,8 @@ initialCommands := """
| import spire.implicits._""".stripMargin('|')


testOptions in Test += Tests.Argument(TestFrameworks.Specs2, "html", "junitxml", "console")
testOptions in Test += Tests.Argument(TestFrameworks.Specs2, "junitxml", "console")

TestCoverage.settings


Publish.settings
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package framian.benchmark

import java.util.concurrent.TimeUnit

import scala.collection.immutable.BitSet
import scala.util.Random

import org.openjdk.jmh.annotations.{ Benchmark, Scope, State }
import org.openjdk.jmh.annotations.{ BenchmarkMode, Mode, OutputTimeUnit }

import framian.Column
import framian.column.Mask

class DenseColumnMapBenchmark {

@Benchmark
def squareArray(data: DoubleData) = {
val xs = data.data
val ys = new Array[Double](xs.length)
var i = 0
while (i < xs.length) {
val x = xs(i)
ys(i) = x * x
i += 1
}
ys
}

@Benchmark
def squareMaskedArray(data: DoubleData) = {
val xs = data.data
val ys = new Array[Double](xs.length)
var i = 0
while (i < xs.length) {
if (!(data.na(i) || data.nm(i))) {
val x = xs(i)
ys(i) = x * x
}
i += 1
}
ys
}

@Benchmark
def squareColumn(data: DoubleData) =
data.col.map(x => x * x)

@Benchmark
def squareBitSetMaskedArray(data: DoubleData) = {
val xs = data.data
val ys = new Array[Double](xs.length)
var i = 0
while (i < xs.length) {
if (!(data.na0(i) || data.nm0(i))) {
val x = xs(i)
ys(i) = x * x
}
i += 1
}
ys
}
}

@State(Scope.Benchmark)
class DoubleData {
val size = 1000
val rng = new Random(42)
val data: Array[Double] = Array.fill(size)(rng.nextDouble)
val na: Mask = Data.mask(rng, size, 0.1)
val nm: Mask = Data.mask(rng, size, 0.01)
val col: Column[Double] = Column.dense(data, na, nm)

val na0: BitSet = na.toBitSet
val nm0: BitSet = nm.toBitSet
}
26 changes: 26 additions & 0 deletions framian/src/benchmark/scala/framian/benchmark/Data.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package framian
package benchmark

import scala.util.Random

import framian.column.Mask

trait Data {
val size: Int = 1000
val rng: Random = new Random(42)
}

object Data {
final def work(col: Column[Int], size: Int): Int = {
var sum = 0
var i = 0
while (i < size) {
sum -= col(i).getOrElse(0)
i += 1
}
sum
}

def mask(rng: Random, n: Int, p: Double = 0.1): Mask =
Mask(Seq.fill(1000)(rng.nextDouble).zipWithIndex.filter(_._1 < p).map(_._2): _*)
}
Loading

0 comments on commit 077b595

Please sign in to comment.