Skip to content

Commit

Permalink
Add 1-persistent-targets and 2-worker-task examples (#2404)
Browse files Browse the repository at this point in the history
  • Loading branch information
lihaoyi authored Apr 3, 2023
1 parent d02ba59 commit 2abecd5
Show file tree
Hide file tree
Showing 12 changed files with 191 additions and 5 deletions.
81 changes: 81 additions & 0 deletions example/misc/1-persistent-targets/build.sc
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import mill._, scalalib._
import java.util.Arrays, java.io.ByteArrayOutputStream, java.util.zip.GZIPOutputStream

def data = T.source(millSourcePath / "data")

def compressedData = T.persistent{
println("Evaluating compressedData")
os.makeDir.all(T.dest / "cache")
println("os.list(T.dest / \"cache\") " + os.list(T.dest / "cache"))
os.remove.all(T.dest / "compressed")

for(p <- os.list(data().path)){
val compressedPath = T.dest / "compressed" / p.last
val bytes = os.read.bytes(p)
val hash = Arrays.hashCode(bytes)
val cachedPath = T.dest / "cache" / hash.toHexString
println("cachedPath " + cachedPath)
println("os.exists(cachedPath) " + os.exists(cachedPath))
if (!os.exists(cachedPath)) {
println("Compressing: " + p.last)
os.write(cachedPath, compressBytes(bytes))
} else {
println("Reading Cached from disk: " + p.last)
}
os.copy(cachedPath, compressedPath / os.up / s"${p.last}.gz", createFolders = true)
}

os.list(T.dest / "compressed").map(PathRef(_))
}

def compressBytes(input: Array[Byte]) = {
val bos = new ByteArrayOutputStream(input.length)
val gzip = new GZIPOutputStream(bos)
gzip.write(input)
gzip.close()
bos.toByteArray
}

// Persistent targets defined using `T.persistent` are similar to normal
// `Target`s, except their `T.dest` folder is not cleared before every
// evaluation. This makes them useful for caching things on disk in a more
// fine-grained manner than Mill's own Target-level caching.
//
// In this example, we implement a `compressedData` target that takes a folder
// of files in `inputData` and compresses them, while maintaining a cache of
// compressed contents for each file. That means that if the `inputData` folder
// is modified, but some files remain unchanged, those files would not be
// unnecessarily re-compressed when `compressedData` evaluates.
//
// Since persistent targets have long-lived state on disk that lives beyond a
// single evaluation, this raises the possibility of the disk contents getting
// into a bad state and causing all future evaluations to fail. It is left up
// to the person implementing the `T.persistent` to ensure their implementation
// is eventually consistent. You can also use `mill clean` to manually purge
// the disk contents to start fresh.

/* Example Usage
> ./mill show compressedData
Evaluating compressedData
Compressing: hello.txt
Compressing: world.txt
hello.txt.gz
world.txt.gz
> ./mill compressedData # when no input changes, compressedData does not evaluate at all
> sed -i 's/Hello/HELLO/g' data/hello.txt
> ./mill compressedData # when one input file changes, only that file is re-compressed
Compressing: hello.txt
Cached from disk: world.txt
> ./mill clean compressedData
> ./mill compressedData
Evaluating compressedData
Compressing: hello.txt
Compressing: world.txt
*/
1 change: 1 addition & 0 deletions example/misc/1-persistent-targets/data/hello.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Hello
1 change: 1 addition & 0 deletions example/misc/1-persistent-targets/data/world.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
World!
98 changes: 98 additions & 0 deletions example/misc/2-worker-tasks/build.sc
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import mill._, scalalib._
import java.util.Arrays, java.io.ByteArrayOutputStream, java.util.zip.GZIPOutputStream

def data = T.source(millSourcePath / "data")

def compressWorker = T.worker{ new CompressWorker(T.dest) }

def compressedData = T{
println("Evaluating compressedData")
for(p <- os.list(data().path)){
os.write(
T.dest / s"${p.last}.gz",
compressWorker().compress(p.last, os.read.bytes(p))
)
}
os.list(T.dest).map(PathRef(_))
}

class CompressWorker(dest: os.Path){
val cache = collection.mutable.Map.empty[Int, Array[Byte]]
def compress(name: String, bytes: Array[Byte]): Array[Byte] = {
val hash = Arrays.hashCode(bytes)
if (!cache.contains(hash)) {
val cachedPath = dest / hash.toHexString
println("cachedPath: " + cachedPath)
if (!os.exists(cachedPath)) {
println("Compressing: " + name)
cache(hash) = compressBytes(bytes)
os.write(cachedPath, cache(hash))
}else{
println("Cached from disk: " + name)
cache(hash) = os.read.bytes(cachedPath)
}
}else {
println("Cached from memory: " + name)
}
cache(hash)
}
}

def compressBytes(input: Array[Byte]) = {
val bos = new ByteArrayOutputStream(input.length)
val gzip = new GZIPOutputStream(bos)
gzip.write(input)
gzip.close()
bos.toByteArray
}

// Mill workers defined using `T.worker` are long-lived in-memory objects that
// can persistent across multiple evaluations. These are similar to persistent
// targets in that they let you cache things, but the fact that they let you
// cache the worker object in-memory allows for greater performance and
// flexibility: you are no longer limited to caching only serialization data
// and paying the cost of serializing it to disk every evaluation. This example
// uses a Worker to provide simple in-memory caching for compressed files.
//
// Common things to put in workers include:
//
// 1. References to third-party daemon processes, e.g. Webpack or wkhtmltopdf,
// which perform their own in-memory caching
//
// 2. Classloaders containing plugin code, to avoid classpath conflicts while
// also avoiding classloading cost every time the code is executed
//
// Workers live as long as the Mill process. By default, consecutive `mill`
// commands in the same folder will re-use the same Mill process and workers,
// unless `--no-server` is passed which will terminate the Mill process and
// workers after every command. Commands run repeatedly using `--watch` will
// also preserve the workers between them.
//
// Workers can also make use of their `T.dest` folder as a cache that persist
// when the worker shuts down, as a second layer of caching. The example usage
// below demonstrates how using the `--no-server` flag will make the worker
// read from its disk cache, where it would have normally read from its
// in-memory cache

/* Example Usage
> ./mill show compressedData
Evaluating compressedData
Compressing: hello.txt
Compressing: world.txt
hello.txt.gz
world.txt.gz
> ./mill compressedData # when no input changes, compressedData does not evaluate at all
> sed -i 's/Hello/HELLO/g' data/hello.txt
> ./mill compressedData # not --no-server, we read the data from memory
Compressing: hello.txt
Cached from memory: world.txt
> ./mill compressedData # --no-server, we read the data from disk
Compressing: hello.txt
Cached from disk: world.txt
*/
1 change: 1 addition & 0 deletions example/misc/2-worker-tasks/data/hello.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Hello
1 change: 1 addition & 0 deletions example/misc/2-worker-tasks/data/world.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
World!
12 changes: 7 additions & 5 deletions example/src/mill/integration/ExampleTestSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,14 @@ object ExampleTestSuite extends IntegrationTestSuite{
case string => (string, None)
}

val correctPlatform =
comment == None ||
(comment == Some("windows") && Util.windowsPlatform) ||
(comment == Some("mac/linux") && !Util.windowsPlatform)
val incorrectPlatform =
(comment.exists(_.startsWith("windows")) && !Util.windowsPlatform) ||
(comment.exists(_.startsWith("mac/linux")) && Util.windowsPlatform) ||
(comment.exists(_.startsWith("--no-server")) && integrationTestMode != "fork") ||
(comment.exists(_.startsWith("not --no-server")) && integrationTestMode == "fork")

if (correctPlatform) {

if (!incorrectPlatform) {
println("ExampleTestSuite: " + commandBlockLines.head)
processCommand(workspaceRoot, expectedSnippets, commandHead)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ abstract class IntegrationTestSuite extends TestSuite{
// destination instead of the folder containing the wrapper.

os.copy(scriptSourcePath, workspacePath)
os.remove.all(workspacePath / "out")
workspacePath
}

Expand Down

0 comments on commit 2abecd5

Please sign in to comment.