From ed0555b39e6844970c10c79a7d1f7cd3a94b7549 Mon Sep 17 00:00:00 2001 From: Eric Wang Date: Fri, 20 May 2022 04:25:54 +1000 Subject: [PATCH] feature: Allow user to provide their own content hash to minimise IO operation (#129) * content hash class to parse json file, di and args * use provided content hash whenever possible * update docs * use existing deserialiser Co-authored-by: Maxwell Elliott <56700854+tinder-maxwellelliott@users.noreply.github.com> --- README.md | 5 + cli/BUILD | 21 +++ .../bazel_diff/cli/GenerateHashesCommand.kt | 21 +++ .../main/kotlin/com/bazel_diff/di/Modules.kt | 4 + .../com/bazel_diff/hash/SourceFileHasher.kt | 42 ++++- .../interactor/DeserialiseHashesInteractor.kt | 5 +- .../com/bazel_diff/io/ContentHashProvider.kt | 14 ++ cli/src/test/kotlin/com/bazel_diff/Modules.kt | 7 +- .../bazel_diff/hash/SourceFileHasherTest.kt | 143 ++++++++++++++++++ .../kotlin/com/bazel_diff/hash/fixture/foo.ts | 1 + .../bazel_diff/io/ContentHashProviderTest.kt | 51 +++++++ .../com/bazel_diff/io/fixture/correct.json | 4 + .../com/bazel_diff/io/fixture/wrong.json | 5 + 13 files changed, 310 insertions(+), 13 deletions(-) create mode 100644 cli/src/main/kotlin/com/bazel_diff/io/ContentHashProvider.kt create mode 100644 cli/src/test/kotlin/com/bazel_diff/hash/SourceFileHasherTest.kt create mode 100644 cli/src/test/kotlin/com/bazel_diff/hash/fixture/foo.ts create mode 100644 cli/src/test/kotlin/com/bazel_diff/io/ContentHashProviderTest.kt create mode 100644 cli/src/test/kotlin/com/bazel_diff/io/fixture/correct.json create mode 100644 cli/src/test/kotlin/com/bazel_diff/io/fixture/wrong.json diff --git a/README.md b/README.md index 2d36df2..7f7dcb0 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,11 @@ workspace. -co, --bazelCommandOptions= Additional space separated Bazel command options used when invoking Bazel + --contentHashPath= + Path to content hash json file. It's a map which maps + relative file path from workspace path to its + content hash. Files in this map will skip content + hashing and use provided value -h, --help Show this help message and exit. -k, --[no-]keep_going This flag controls if `bazel query` will be executed with the `--keep_going` flag or not. Disabling this diff --git a/cli/BUILD b/cli/BUILD index 22077a6..b082b05 100644 --- a/cli/BUILD +++ b/cli/BUILD @@ -59,6 +59,15 @@ kt_jvm_test( runtime_deps = [":cli-test-lib"], ) +kt_jvm_test( + name = "SourceFileHasherTest", + data = [ + ":src/test/kotlin/com/bazel_diff/hash/fixture/foo.ts", + ], + test_class = "com.bazel_diff.hash.SourceFileHasherTest", + runtime_deps = [":cli-test-lib"], +) + kt_jvm_test( name = "CalculateImpactedTargetsInteractorTest", test_class = "com.bazel_diff.interactor.CalculateImpactedTargetsInteractorTest", @@ -89,6 +98,18 @@ kt_jvm_test( runtime_deps = [":cli-test-lib"], ) +kt_jvm_test( + name = "ContentHashProviderTest", + data = [ + ":src/test/kotlin/com/bazel_diff/io/fixture/correct.json", + ":src/test/kotlin/com/bazel_diff/io/fixture/wrong.json", + ], + test_class = "com.bazel_diff.io.ContentHashProviderTest", + runtime_deps = [ + ":cli-test-lib", + ], +) + kt_jvm_library( name = "cli-test-lib", testonly = True, diff --git a/cli/src/main/kotlin/com/bazel_diff/cli/GenerateHashesCommand.kt b/cli/src/main/kotlin/com/bazel_diff/cli/GenerateHashesCommand.kt index de618f2..9e2f37d 100644 --- a/cli/src/main/kotlin/com/bazel_diff/cli/GenerateHashesCommand.kt +++ b/cli/src/main/kotlin/com/bazel_diff/cli/GenerateHashesCommand.kt @@ -40,6 +40,14 @@ class GenerateHashesCommand : Callable { ) lateinit var bazelPath: Path + @CommandLine.Option( + names = ["--contentHashPath"], + description = ["Path to content hash json file. It's a map which maps relative file path from workspace path to its content hash. Files in this map will skip content hashing and use provided value"], + scope = CommandLine.ScopeType.INHERIT, + required = false + ) + var contentHashPath: File? = null + @CommandLine.Option( names = ["-so", "--bazelStartupOptions"], description = ["Additional space separated Bazel client startup options used when invoking Bazel"], @@ -81,12 +89,14 @@ class GenerateHashesCommand : Callable { override fun call(): Int { val output = validateOutput(outputPath) + validate(contentHashPath=contentHashPath) startKoin { modules( hasherModule( workspacePath, bazelPath, + contentHashPath, bazelStartupOptions, bazelCommandOptions, keepGoing, @@ -108,4 +118,15 @@ class GenerateHashesCommand : Callable { "No output path specified." ) } + + private fun validate(contentHashPath: File?) { + contentHashPath?.let { + if (!it.canRead()) { + throw CommandLine.ParameterException( + spec.commandLine(), + "Incorrect contentHashFilePath: file doesn't exist or can't be read." + ) + } + } + } } diff --git a/cli/src/main/kotlin/com/bazel_diff/di/Modules.kt b/cli/src/main/kotlin/com/bazel_diff/di/Modules.kt index 5547957..31ec400 100644 --- a/cli/src/main/kotlin/com/bazel_diff/di/Modules.kt +++ b/cli/src/main/kotlin/com/bazel_diff/di/Modules.kt @@ -6,17 +6,20 @@ import com.bazel_diff.hash.BuildGraphHasher import com.bazel_diff.hash.RuleHasher import com.bazel_diff.hash.SourceFileHasher import com.bazel_diff.hash.TargetHasher +import com.bazel_diff.io.ContentHashProvider import com.bazel_diff.log.Logger import com.bazel_diff.log.StdoutLogger import com.google.gson.GsonBuilder import org.koin.core.module.Module import org.koin.core.qualifier.named import org.koin.dsl.module +import java.io.File import java.nio.file.Path fun hasherModule( workingDirectory: Path, bazelPath: Path, + contentHashPath: File?, startupOptions: List, commandOptions: List, keepGoing: Boolean?, @@ -38,6 +41,7 @@ fun hasherModule( single { RuleHasher() } single { SourceFileHasher() } single(named("working-directory")) { workingDirectory } + single { ContentHashProvider(contentHashPath) } } fun loggingModule(verbose: Boolean) = module { diff --git a/cli/src/main/kotlin/com/bazel_diff/hash/SourceFileHasher.kt b/cli/src/main/kotlin/com/bazel_diff/hash/SourceFileHasher.kt index 30227fb..ae041cd 100644 --- a/cli/src/main/kotlin/com/bazel_diff/hash/SourceFileHasher.kt +++ b/cli/src/main/kotlin/com/bazel_diff/hash/SourceFileHasher.kt @@ -1,6 +1,7 @@ package com.bazel_diff.hash import com.bazel_diff.bazel.BazelSourceFileTarget +import com.bazel_diff.io.ContentHashProvider import com.bazel_diff.log.Logger import org.koin.core.component.KoinComponent import org.koin.core.component.inject @@ -9,21 +10,46 @@ import java.nio.file.Path import java.nio.file.Paths class SourceFileHasher : KoinComponent { - private val workingDirectory: Path by inject(qualifier = named("working-directory")) - private val logger: Logger by inject() + private val workingDirectory: Path + private val logger: Logger + private val relativeFilenameToContentHash: Map? + init { + val logger: Logger by inject() + this.logger = logger + } + + constructor() { + val workingDirectory: Path by inject(qualifier = named("working-directory")) + this.workingDirectory = workingDirectory + val contentHashProvider: ContentHashProvider by inject() + relativeFilenameToContentHash = contentHashProvider.filenameToHash + } + + constructor(workingDirectory: Path, relativeFilenameToContentHash: Map?) { + this.workingDirectory = workingDirectory + this.relativeFilenameToContentHash = relativeFilenameToContentHash + } fun digest(sourceFileTarget: BazelSourceFileTarget): ByteArray { return sha256 { val name = sourceFileTarget.name if (name.startsWith("//")) { val filenameSubstring = name.substring(2) - val filenamePath = filenameSubstring.replaceFirst(":".toRegex(), "/") - val absoluteFilePath = Paths.get(workingDirectory.toString(), filenamePath) - val file = absoluteFilePath.toFile() - if (file.exists() && file.isFile) { - putFile(file) + val filenamePath = filenameSubstring.replaceFirst( + ":".toRegex(), + if (filenameSubstring.startsWith(":")) "" else "/" + ) + if (relativeFilenameToContentHash?.contains(filenamePath) == true) { + val contentHash = relativeFilenameToContentHash.getValue(filenamePath) + safePutBytes(contentHash.toByteArray()) } else { - logger.w { "File $absoluteFilePath not found" } + val absoluteFilePath = Paths.get(workingDirectory.toString(), filenamePath) + val file = absoluteFilePath.toFile() + if (file.exists() && file.isFile) { + putFile(file) + } else { + logger.w { "File $absoluteFilePath not found" } + } } safePutBytes(sourceFileTarget.seed) safePutBytes(name.toByteArray()) diff --git a/cli/src/main/kotlin/com/bazel_diff/interactor/DeserialiseHashesInteractor.kt b/cli/src/main/kotlin/com/bazel_diff/interactor/DeserialiseHashesInteractor.kt index badde3b..d4be7db 100644 --- a/cli/src/main/kotlin/com/bazel_diff/interactor/DeserialiseHashesInteractor.kt +++ b/cli/src/main/kotlin/com/bazel_diff/interactor/DeserialiseHashesInteractor.kt @@ -1,6 +1,7 @@ package com.bazel_diff.interactor import com.google.gson.Gson +import com.google.gson.reflect.TypeToken import org.koin.core.component.KoinComponent import org.koin.core.component.inject import java.io.File @@ -13,7 +14,7 @@ class DeserialiseHashesInteractor : KoinComponent { * @param file path to file that has been pre-validated */ fun execute(file: File): Map { - val gsonHash: Map = HashMap() - return gson.fromJson(FileReader(file), gsonHash.javaClass) + val shape = object : TypeToken>() {}.type + return gson.fromJson(FileReader(file), shape) } } diff --git a/cli/src/main/kotlin/com/bazel_diff/io/ContentHashProvider.kt b/cli/src/main/kotlin/com/bazel_diff/io/ContentHashProvider.kt new file mode 100644 index 0000000..9041bff --- /dev/null +++ b/cli/src/main/kotlin/com/bazel_diff/io/ContentHashProvider.kt @@ -0,0 +1,14 @@ +package com.bazel_diff.io + +import com.bazel_diff.interactor.DeserialiseHashesInteractor +import java.io.File + +class ContentHashProvider(file: File?) { + // filename relative to workspace -> content hash of the file + val filenameToHash: Map? = if (file == null) null else readJson(file) + + private fun readJson(file: File): Map { + val deserialiser = DeserialiseHashesInteractor() + return deserialiser.execute(file) + } +} diff --git a/cli/src/test/kotlin/com/bazel_diff/Modules.kt b/cli/src/test/kotlin/com/bazel_diff/Modules.kt index b03974a..db51971 100644 --- a/cli/src/test/kotlin/com/bazel_diff/Modules.kt +++ b/cli/src/test/kotlin/com/bazel_diff/Modules.kt @@ -1,18 +1,17 @@ package com.bazel_diff import com.bazel_diff.bazel.BazelClient -import com.bazel_diff.bazel.BazelQueryService import com.bazel_diff.hash.BuildGraphHasher import com.bazel_diff.hash.RuleHasher import com.bazel_diff.hash.SourceFileHasher import com.bazel_diff.hash.TargetHasher +import com.bazel_diff.io.ContentHashProvider import com.bazel_diff.log.Logger -import com.bazel_diff.log.StdoutLogger import com.google.gson.GsonBuilder import org.koin.core.module.Module import org.koin.core.qualifier.named import org.koin.dsl.module -import java.nio.file.Path +import java.nio.file.Paths fun testModule(): Module = module { single { SilentLogger } @@ -22,6 +21,8 @@ fun testModule(): Module = module { single { RuleHasher() } single { SourceFileHasher() } single { GsonBuilder().setPrettyPrinting().create() } + single(named("working-directory")) { Paths.get("working-directory") } + single { ContentHashProvider(null) } } object SilentLogger : Logger { diff --git a/cli/src/test/kotlin/com/bazel_diff/hash/SourceFileHasherTest.kt b/cli/src/test/kotlin/com/bazel_diff/hash/SourceFileHasherTest.kt new file mode 100644 index 0000000..47c537b --- /dev/null +++ b/cli/src/test/kotlin/com/bazel_diff/hash/SourceFileHasherTest.kt @@ -0,0 +1,143 @@ +package com.bazel_diff.hash + +import assertk.assertThat +import assertk.assertions.isEqualTo +import assertk.assertions.isNull +import com.bazel_diff.bazel.BazelSourceFileTarget +import com.bazel_diff.extensions.toHexString +import com.bazel_diff.testModule +import kotlinx.coroutines.runBlocking +import org.junit.Rule +import org.junit.Test +import org.koin.test.KoinTest +import org.koin.test.KoinTestRule +import java.nio.file.Files +import java.nio.file.Paths + + +internal class SourceFileHasherTest: KoinTest { + private val repoAbsolutePath = Paths.get("").toAbsolutePath() + private val fixtureFileTarget = "//cli/src/test/kotlin/com/bazel_diff/hash/fixture:foo.ts" + private val fixtureFileContent: ByteArray + private val seed = "seed".toByteArray() + + init { + val path = Paths.get("cli/src/test/kotlin/com/bazel_diff/hash/fixture/foo.ts") + fixtureFileContent = Files.readAllBytes(path) + } + + + @get:Rule + val koinTestRule = KoinTestRule.create { + modules(testModule()) + } + + @Test + fun testHashConcreteFile() = runBlocking { + val hasher = SourceFileHasher(repoAbsolutePath, null) + val bazelSourceFileTarget = BazelSourceFileTarget(fixtureFileTarget, seed) + val actual = hasher.digest(bazelSourceFileTarget).toHexString() + val expected = sha256 { + safePutBytes(fixtureFileContent) + safePutBytes(seed) + safePutBytes(fixtureFileTarget.toByteArray()) + }.toHexString() + assertThat(actual).isEqualTo(expected) + } + + @Test + fun testSoftHashConcreteFile() = runBlocking { + val hasher = SourceFileHasher(repoAbsolutePath, null) + val bazelSourceFileTarget = BazelSourceFileTarget(fixtureFileTarget, seed) + val actual = hasher.softDigest(bazelSourceFileTarget)?.toHexString() + val expected = sha256 { + safePutBytes(fixtureFileContent) + safePutBytes(seed) + safePutBytes(fixtureFileTarget.toByteArray()) + }.toHexString() + assertThat(actual).isEqualTo(expected) + } + + @Test + fun testSoftHashNonExistedFile() = runBlocking { + val hasher = SourceFileHasher(repoAbsolutePath, null) + val bazelSourceFileTarget = BazelSourceFileTarget("//i/do/not/exist", seed) + val actual = hasher.softDigest(bazelSourceFileTarget) + assertThat(actual).isNull() + } + + @Test + fun testSoftHashExternalTarget() = runBlocking { + val target = "@bazel-diff//some:file" + val hasher = SourceFileHasher(repoAbsolutePath, null) + val bazelSourceFileTarget = BazelSourceFileTarget(target, seed) + val actual = hasher.softDigest(bazelSourceFileTarget) + assertThat(actual).isNull() + } + + @Test + fun testHashNonExistedFile() = runBlocking { + val target = "//i/do/not/exist" + val hasher = SourceFileHasher(repoAbsolutePath, null) + val bazelSourceFileTarget = BazelSourceFileTarget(target, seed) + val actual = hasher.digest(bazelSourceFileTarget).toHexString() + val expected = sha256 { + safePutBytes(seed) + safePutBytes(target.toByteArray()) + }.toHexString() + assertThat(actual).isEqualTo(expected) + } + + @Test + fun testHashExternalTarget() = runBlocking { + val target = "@bazel-diff//some:file" + val hasher = SourceFileHasher(repoAbsolutePath, null) + val bazelSourceFileTarget = BazelSourceFileTarget(target, seed) + val actual = hasher.digest(bazelSourceFileTarget).toHexString() + val expected = sha256 {}.toHexString() + assertThat(actual).isEqualTo(expected) + } + + @Test + fun testHashWithProvidedContentHash() = runBlocking { + val filenameToContentHash = hashMapOf("cli/src/test/kotlin/com/bazel_diff/hash/fixture/foo.ts" to "foo-content-hash") + val hasher = SourceFileHasher(repoAbsolutePath, filenameToContentHash) + val bazelSourceFileTarget = BazelSourceFileTarget(fixtureFileTarget, seed) + val actual = hasher.digest(bazelSourceFileTarget).toHexString() + val expected = sha256 { + safePutBytes("foo-content-hash".toByteArray()) + safePutBytes(seed) + safePutBytes(fixtureFileTarget.toByteArray()) + }.toHexString() + assertThat(actual).isEqualTo(expected) + } + + @Test + fun testHashWithProvidedContentHashButNotInKey() = runBlocking { + val filenameToContentHash = hashMapOf("cli/src/test/kotlin/com/bazel_diff/hash/fixture/bar.ts" to "foo-content-hash") + val hasher = SourceFileHasher(repoAbsolutePath, filenameToContentHash) + val bazelSourceFileTarget = BazelSourceFileTarget(fixtureFileTarget, seed) + val actual = hasher.digest(bazelSourceFileTarget).toHexString() + val expected = sha256 { + safePutBytes(fixtureFileContent) + safePutBytes(seed) + safePutBytes(fixtureFileTarget.toByteArray()) + }.toHexString() + assertThat(actual).isEqualTo(expected) + } + + @Test + fun testHashWithProvidedContentHashWithLeadingColon() = runBlocking { + val targetName = "//:cli/src/test/kotlin/com/bazel_diff/hash/fixture/bar.ts" + val filenameToContentHash = hashMapOf("cli/src/test/kotlin/com/bazel_diff/hash/fixture/bar.ts" to "foo-content-hash") + val hasher = SourceFileHasher(repoAbsolutePath, filenameToContentHash) + val bazelSourceFileTarget = BazelSourceFileTarget(targetName, seed) + val actual = hasher.digest(bazelSourceFileTarget).toHexString() + val expected = sha256 { + safePutBytes("foo-content-hash".toByteArray()) + safePutBytes(seed) + safePutBytes(targetName.toByteArray()) + }.toHexString() + assertThat(actual).isEqualTo(expected) + } +} diff --git a/cli/src/test/kotlin/com/bazel_diff/hash/fixture/foo.ts b/cli/src/test/kotlin/com/bazel_diff/hash/fixture/foo.ts new file mode 100644 index 0000000..9425850 --- /dev/null +++ b/cli/src/test/kotlin/com/bazel_diff/hash/fixture/foo.ts @@ -0,0 +1 @@ +console.log('123') diff --git a/cli/src/test/kotlin/com/bazel_diff/io/ContentHashProviderTest.kt b/cli/src/test/kotlin/com/bazel_diff/io/ContentHashProviderTest.kt new file mode 100644 index 0000000..6bfd7dd --- /dev/null +++ b/cli/src/test/kotlin/com/bazel_diff/io/ContentHashProviderTest.kt @@ -0,0 +1,51 @@ +package com.bazel_diff.io + +import assertk.assertThat +import assertk.assertions.* +import com.bazel_diff.testModule +import com.google.gson.JsonSyntaxException +import kotlinx.coroutines.runBlocking +import org.junit.Rule +import org.junit.Test +import org.koin.test.KoinTest +import org.koin.test.KoinTestRule +import java.io.File + +internal class ContentHashProviderTest: KoinTest { + @get:Rule + val koinTestRule = KoinTestRule.create { + modules(testModule()) + } + + @Test + fun testNullPath() = runBlocking { + val contentHashProvider = ContentHashProvider(null) + assertThat(contentHashProvider.filenameToHash).isNull() + } + + @Test + fun testNonExistingPath() = runBlocking { + assertThat { + ContentHashProvider(File("/not/exists")) + }.isFailure().hasClass(java.io.FileNotFoundException::class) + } + + @Test + fun testParseJsonFileWithWrongShape() = runBlocking { + val file = File("cli/src/test/kotlin/com/bazel_diff/io/fixture/wrong.json") + assertThat { + val a = ContentHashProvider(file) + println(a.filenameToHash) + }.isFailure().hasClass(JsonSyntaxException::class) + } + + @Test + fun testParseJsonFileWithCorrectShape() = runBlocking { + val file = File("cli/src/test/kotlin/com/bazel_diff/io/fixture/correct.json") + val map = ContentHashProvider(file).filenameToHash + assertThat(map).isNotNull().containsOnly( + "foo" to "content-hash-for-foo", + "bar" to "content-hash-for-bar" + ) + } +} diff --git a/cli/src/test/kotlin/com/bazel_diff/io/fixture/correct.json b/cli/src/test/kotlin/com/bazel_diff/io/fixture/correct.json new file mode 100644 index 0000000..100cc6c --- /dev/null +++ b/cli/src/test/kotlin/com/bazel_diff/io/fixture/correct.json @@ -0,0 +1,4 @@ +{ + "foo": "content-hash-for-foo", + "bar": "content-hash-for-bar" +} diff --git a/cli/src/test/kotlin/com/bazel_diff/io/fixture/wrong.json b/cli/src/test/kotlin/com/bazel_diff/io/fixture/wrong.json new file mode 100644 index 0000000..ebbc37d --- /dev/null +++ b/cli/src/test/kotlin/com/bazel_diff/io/fixture/wrong.json @@ -0,0 +1,5 @@ +{ + "foo": { + "bar": "abcd" + } +}