feiwang3311 · feiwang3311 · Oct 13, 2018 · Oct 13, 2018 · Oct 13, 2018
diff --git a/src/main/scala/lantern/NNModule.scala b/src/main/scala/lantern/NNModule.scala
@@ -26,7 +26,7 @@ trait NNModule extends TensorDsl {
       for ((_, module) <- modules) module.forEachNamedParameter(f)
     }
     def enrichParameter(): Unit = {
-      for ((k, (tensorR, _)) <- parameters) parameters(k) = (tensorR, Some(Tensor.zeros(tensorR.x)))
+      for ((k, (tensorR, _)) <- parameters) parameters(k) = (tensorR, Some(Tensor.zeros_like(tensorR.x)))
       for ((_, module) <- modules) module.enrichParameter()
     }
     def forEachParameter(f: TensorR => Unit) = forEachNamedParameter{case (_, (tensorR, _)) => f(tensorR)}
@@ -51,32 +51,32 @@ trait NNModule extends TensorDsl {
 
   case class Linear1D(val inSize: Int, val outSize: Int, val name: String = "linear1d") extends Module {
     val scale: Float = 1.0f / sqrt(inSize).toFloat
-    val weight = TensorR(Tensor.rand(scale, inSize, outSize))
+    val weight = TensorR(Tensor.rand(Seq(inSize, outSize), scale))
     val bias = TensorR(Tensor.zeros(outSize))
     def apply(in: TensorR): TensorR @diff = in.dot(weight) + bias
   }
 
   case class Linear1DTrans(val inSize: Int, val outSize: Int, val name: String = "linear1dtrans") extends Module {
     val scale: Float = 1.0f / sqrt(inSize).toFloat
-    val weight = TensorR(Tensor.rand(scale, outSize, inSize))
+    val weight = TensorR(Tensor.rand(Seq(outSize, inSize), scale))
     val bias = TensorR(Tensor.zeros(outSize))
     def apply(in: TensorR): TensorR @diff = in.dot_trans(weight) + bias
   }
 
   case class Linear1D2(val inSize1: Int, val inSize2: Int, val outSize: Int, val name: String = "Linear1d2") extends Module {
     val scale1: Float = 1.0f / sqrt(inSize1).toFloat
     val scale2: Float = 1.0f / sqrt(inSize2).toFloat
-    val weight1 = TensorR(Tensor.rand(scale1, inSize1, outSize))
-    val weight2 = TensorR(Tensor.rand(scale2, inSize2, outSize))
+    val weight1 = TensorR(Tensor.rand(Seq(inSize1, outSize), scale1))
+    val weight2 = TensorR(Tensor.rand(Seq(inSize2, outSize), scale2))
     val bias    = TensorR(Tensor.zeros(outSize))
     def apply(in1: TensorR, in2: TensorR): TensorR @diff = in1.dot(weight1) + in2.dot(weight2) + bias
   }
 
   case class Linear1D2Trans(val inSize1: Int, val inSize2: Int, val outSize: Int, val name: String = "Linear1d2trans") extends Module {
     val scale1: Float = 1.0f / sqrt(inSize1).toFloat
     val scale2: Float = 1.0f / sqrt(inSize2).toFloat
-    val weight1 = TensorR(Tensor.rand(scale1, outSize, inSize1))
-    val weight2 = TensorR(Tensor.rand(scale2, outSize, inSize2))
+    val weight1 = TensorR(Tensor.rand(Seq(outSize, inSize1), scale1))
+    val weight2 = TensorR(Tensor.rand(Seq(outSize, inSize2), scale2))
     val bias    = TensorR(Tensor.zeros(outSize))
     def apply(in1: TensorR, in2: TensorR): TensorR @diff = in1.dot_trans(weight1) + in2.dot_trans(weight2) + bias
   }
@@ -85,7 +85,7 @@ trait NNModule extends TensorDsl {
     assert(kernelSize.size == 2, "kernel_size should be Seq[Int] of size 2")
     assert(stride.size == 2, "stride should be Seq[Int] of size 2")
     val scale: Float = 1.0f / sqrt(inChannel * kernelSize.head * kernelSize.last).toFloat
-    val kernel = TensorR(Tensor.rand(scale, outChannel, inChannel, kernelSize.head, kernelSize.last))
+    val kernel = TensorR(Tensor.rand(Seq(outChannel, inChannel, kernelSize.head, kernelSize.last), scale))
     val bias = if (useBiase) Some(TensorR(Tensor.zeros(outChannel))) else None
     def apply(in: TensorR): TensorR @diff = in.convBBP(kernel, bias, stride, Seq(pad, pad, pad, pad))
   }

diff --git a/src/main/scala/lantern/TensorDifferentiation.scala b/src/main/scala/lantern/TensorDifferentiation.scala
@@ -60,7 +60,7 @@ trait TensorDsl extends DslOps with Diff {
   }
 
   object Dataset {
-    class DataLoader(name: String, train: Boolean, mean: Float, std: Float, dims: Int*) {
+    class DataLoader(name: String, train: Boolean, mean: Float, std: Float, dims: Seq[Int]) {
 
       val fd = open(s"../data/bin/${name}_${if (train) "train" else "test"}.bin")
       val len = filelen(fd)
@@ -946,7 +946,7 @@ trait TensorDsl extends DslOps with Diff {
       val resWidth = convSize(this.shape(2) + padLeft + padRight, kernel.shape(2), strideRow)
       val resHeight = convSize(this.shape(3) + padUp + padDown, kernel.shape(3), strideCol)
       val res = bias match {
-        case Some(bias) => Tensor.fillWithBias(bias, 1, this.shape(0), kernel.shape(0), resWidth, resHeight)
+        case Some(bias) => Tensor.fillWithBias(Seq(this.shape(0), kernel.shape(0), resWidth, resHeight), bias, 1)
         case None => Tensor.zeros(this.shape(0), kernel.shape(0), resWidth, resHeight)
       }
 
@@ -1012,7 +1012,7 @@ trait TensorDsl extends DslOps with Diff {
 
       val resWidth = convSize(this.shape(1) + padLeft + padRight, kernel.shape(2), strideRow)
       val resHeight = convSize(this.shape(2) + padUp + padDown, kernel.shape(3), strideCol)
-      val res = Tensor.fillWithBias(bias, 0, kernel.shape(0), resWidth, resHeight)
+      val res = Tensor.fillWithBias(Seq(kernel.shape(0), resWidth, resHeight), bias, 0)
 
       val offOut = var_new(0)                         // offset for the res by channel
       val offWeight1 = var_new(0)                     // offset for the kernel by channel (dim_0)
@@ -1166,7 +1166,7 @@ trait TensorDsl extends DslOps with Diff {
 
       val resWidth = convSize(this.shape(2) + padUp + padDown, kernelRow, strideRow)
       val resHeight = convSize(this.shape(3) + padLeft + padRight, kernelCol, strideCol)
-      val res = Tensor.fill(0.0f, this.shape(0), this.shape(1), resWidth, resHeight)
+      val res = Tensor.zeros(this.shape(0), this.shape(1), resWidth, resHeight)
 
       for (i <- DataLoop(this.shape(0))) {
         val ptrInput = slice(this.data, i * this.shape.strides(0))
@@ -1212,7 +1212,7 @@ trait TensorDsl extends DslOps with Diff {
 
       val resHeight = this.shape(1) / strideRow
       val resWidth = this.shape(2) / strideCol
-      val res = Tensor.fill(scala.Float.MinValue, this.shape(0), resHeight, resWidth)
+      val res = Tensor.fill(Seq(this.shape(0), resHeight, resWidth), scala.Float.MinValue)
 
       // FIXME adhoc transform tensor to be using generic type!
       val savedIdx = NewArray[Int](res.scalarCount)
@@ -1264,7 +1264,7 @@ trait TensorDsl extends DslOps with Diff {
 
       val resWidth = convSize(this.shape(2) + padUp + padDown, kernelRow, strideRow)
       val resHeight = convSize(this.shape(3) + padLeft + padRight, kernelCol, strideCol)
-      val res = Tensor.fill(scala.Float.MinValue, this.shape(0), this.shape(1), resWidth, resHeight)
+      val res = Tensor.fill(Seq(this.shape(0), this.shape(1), resWidth, resHeight), scala.Float.MinValue)
       val savedIdx = NewArray[Int](res.scalarCount)
 
       for (i <- DataLoop(this.shape(0))) {
@@ -1366,7 +1366,7 @@ trait TensorDsl extends DslOps with Diff {
 
       val resWidth = convSize(this.shape(1) + padUp + padDown, kernelRow, strideRow)
       val resHeight = convSize(this.shape(2) + padLeft + padRight, kernelCol, strideCol)
-      val res = Tensor.fill(scala.Float.MinValue, this.shape(0), resWidth, resHeight)
+      val res = Tensor.fill(Seq(this.shape(0), resWidth, resHeight), scala.Float.MinValue)
       val savedIdx = NewArray[Int](res.scalarCount)
 
       this.maxPool_k_inplace(kernelRow, kernelCol, strideRow, strideCol, padUp, padDown, padLeft, padRight, res, savedIdx, 0)
@@ -1553,7 +1553,7 @@ trait TensorDsl extends DslOps with Diff {
     def randseed(seed: Int) = unchecked[Unit]("srand(", seed, ")")
     def randseed() = unchecked[Unit]("srand(time(NULL))")
     def rand(dims: Int*) = randinit(dims.toSeq, 1.0f, None)
-    def rand(scale: Float, dims: Int*) = randinit(dims.toSeq, scale, None)
+    def rand(dims: Seq[Int], scale: Float) = randinit(dims.toSeq, scale, None)
     def randinit(dim0: Int): Tensor = randinit(Seq(dim0), 1.0f, None)
     def randinit(dim0: Int, seed: Option[Int]): Tensor = randinit(Seq(dim0), 1.0f, seed)
     def randinit(dim0: Int, dim1: Int, scale: Float): Tensor = randinit(Seq(dim0, dim1), scale, None)
@@ -1577,9 +1577,9 @@ trait TensorDsl extends DslOps with Diff {
       new Tensor(res, dims)
     }
 
-    def fill(value: Rep[Float], dims: Int*): Tensor = backend.makeRepeatingTensor(dims, value)
+    def fill(dims: Seq[Int], value: Rep[Float]): Tensor = backend.makeRepeatingTensor(dims, value)
 
-    def fill(fFill: Seq[Rep[Int]] => Rep[Float], dims: Int*) = {
+    def fill(dims: Seq[Int], fFill: Seq[Rep[Int]] => Rep[Float]) = {
       val scalarCount = dims.product
       val res = backend.mallocArray[Float](scalarCount)
 
@@ -1601,7 +1601,7 @@ trait TensorDsl extends DslOps with Diff {
       new Tensor(res, dims)
     }
 
-    def fillWithBias(bias: Tensor, dim: Int, dims: Int*) = {
+    def fillWithBias(dims: Seq[Int], bias: Tensor, dim: Int) = {
       assert(dim < dims.size && dim >= 0, s"target dimension ${dim} is out of range ${dims}")
       assert(bias.rank == 1 && bias.scalarCount == dims.drop(dim).head, s"bias should be 1D and have the same length as given dim")
       val scalarCount = dims.product
@@ -1622,27 +1622,17 @@ trait TensorDsl extends DslOps with Diff {
       new Tensor(res, dims)
     }
 
-    def zeros(dims: Int*): Tensor = {
-      fill(0.0f, dims: _*)
-    }
-
-    def zeros(that: Tensor): Tensor = {
-      zeros(that.shape : _*)
-    }
-
-    def zeros_like(that: Tensor) = {
-      zeros(that.shape : _*)
-    }
-
     def scalar(value: Rep[Float]) = {
       val res = backend.mallocArray[Float](1)
       res(0) = value
       Tensor(res, 1)
     }
 
-    def ones(dims: Int*) = fill(1.0f, dims: _*)
-    def ones(that: Tensor) = fill(1.0f, that.shape: _*)
-    def halves(dims: Int*) = fill(0.5f, dims: _*)
+    def zeros(dims: Int*): Tensor = fill(dims, 0.0f)
+    def zeros_like(that: Tensor) = zeros(that.shape: _*)
+    def ones(dims: Int*) = fill(dims, 1.0f)
+    def ones_like(that: Tensor) = ones(that.shape: _*)
+    def halves(dims: Int*) = fill(dims, 0.5f)
 
     def expand(vector: Tensor, dim1: Int) = {
       assert(vector.rank == 1)

diff --git a/src/test/scala/lantern/MnistCNNTest.scala b/src/test/scala/lantern/MnistCNNTest.scala
@@ -50,7 +50,7 @@ class MnistCNN extends FunSuite {
       val (smRow1, smCol1) = (2, 2)
 
       // FIXME scale based on PyTorch
-      val varConv1 = TensorR(Tensor.rand(1.0f / sqrt(inChan1 * kRow1 * kCol1).toFloat, outChan1, inChan1, kRow1, kCol1))
+      val varConv1 = TensorR(Tensor.rand(Seq(outChan1, inChan1, kRow1, kCol1), 1.0f / sqrt(inChan1 * kRow1 * kCol1).toFloat))
       variables += varConv1
 
       // input size
@@ -62,23 +62,23 @@ class MnistCNN extends FunSuite {
       // stride maxpool
       val (smRow2, smCol2) = (2, 2)
 
-      val varConv2 = TensorR(Tensor.rand(1.0f / sqrt(inChan2 * kRow2 * kCol2).toFloat, outChan2, inChan2, kRow2, kCol2))
+      val varConv2 = TensorR(Tensor.rand(Seq(outChan2, inChan2, kRow2, kCol2), 1.0f / sqrt(inChan2 * kRow2 * kCol2).toFloat))
       variables += varConv2
 
       // Layer 3
       val (oRow2, oCol2) = (convSize(iRow2, kRow2, sRow2)/smRow2, convSize(iCol2, kCol2, sCol2)/smCol2)
       val (in3, out3) = (outChan2 * oRow2 * oCol2, 50)  // 320
 
-      val varA1 = TensorR(Tensor.rand(1.0f / sqrt(in3).toFloat, out3, in3))
-      val varB1 = TensorR(Tensor.rand(1.0f / sqrt(in3).toFloat, out3))
+      val varA1 = TensorR(Tensor.rand(Seq(out3, in3), 1.0f / sqrt(in3).toFloat))
+      val varB1 = TensorR(Tensor.rand(Seq(out3), 1.0f / sqrt(in3).toFloat))
       variables += varA1
       variables += varB1
 
       // Layer 4
       val (in4, out4) = (out3, 10)
 
-      val varA2 = TensorR(Tensor.rand(1.0f / sqrt(in4).toFloat, out4, in4))
-      val varB2 = TensorR(Tensor.rand(1.0f / sqrt(in4).toFloat, out4))
+      val varA2 = TensorR(Tensor.rand(Seq(out4, in4), 1.0f / sqrt(in4).toFloat))
+      val varB2 = TensorR(Tensor.rand(Seq(out4), 1.0f / sqrt(in4).toFloat))
       variables += varA2
       variables += varB2
 
@@ -87,12 +87,12 @@ class MnistCNN extends FunSuite {
       val lr = 0.0005f
       val mom = 0.0f
 
-      val momentum = if (mom > 0.0f) variables map(tR => Tensor.zeros(tR.d)) else ArrayBuffer[Tensor]()
+      val momentum = if (mom > 0.0f) variables map(tR => Tensor.zeros_like(tR.d)) else ArrayBuffer[Tensor]()
 
       val tot1 = NewArray[Long](2)
       val tot2 = NewArray[Long](2)
 
-      val train = new Dataset.DataLoader("mnist", true, mean, std, iChan1, iRow1, iCol1)
+      val train = new Dataset.DataLoader("mnist", true, mean, std, Seq(iChan1, iRow1, iCol1))
       printf("Start normalize\\n")
       train.normalize()
 
@@ -240,7 +240,7 @@ class MnistCNN extends FunSuite {
       val tot1 = NewArray[Long](2)
       val tot2 = NewArray[Long](2)
 
-      val train = new Dataset.DataLoader("mnist", true, mean = 0.1307f, std = 0.3081f, iChan1, iRow1, iCol1)
+      val train = new Dataset.DataLoader("mnist", true, mean = 0.1307f, std = 0.3081f, Seq(iChan1, iRow1, iCol1))
       train.normalize()
 
       val prepareTime = dataTimer.getElapsedTime / 1e6f

diff --git a/src/test/scala/lantern/TestCublas.scala b/src/test/scala/lantern/TestCublas.scala
@@ -37,7 +37,7 @@ class TestCublas extends LanternFunSuite {
         val result = m.dot(v).toCPU()
 
         backend = BackendCPU()
-        val expected = Tensor.fill(4, 2)
+        val expected = Tensor.fill(Seq(2), 4)
         Tensor.assertEqual(result, expected)
       }
     }
@@ -62,7 +62,7 @@ class TestCublas extends LanternFunSuite {
         val result = m1.dot(m2).toCPU()
 
         backend = BackendCPU()
-        val expected = Tensor.fill(4, 4, 4)
+        val expected = Tensor.fill(Seq(4, 4), 4)
         Tensor.assertEqual(result, expected)
       }
     }