diff --git a/.travis.yml b/.travis.yml index c5662bc2..deb8432f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,7 @@ language: scala dist: xenial scala: - - 2.12.12 + - 2.13.4 jdk: - openjdk8 diff --git a/bfg-benchmark/build.sbt b/bfg-benchmark/build.sbt index 64963c35..5ab97208 100644 --- a/bfg-benchmark/build.sbt +++ b/bfg-benchmark/build.sbt @@ -1,8 +1,7 @@ import Dependencies._ -libraryDependencies ++= Seq( +libraryDependencies ++= guava ++ Seq( madgagCompress, - scalaIoFile, textmatching, scopt ) \ No newline at end of file diff --git a/bfg-benchmark/src/main/scala/Benchmark.scala b/bfg-benchmark/src/main/scala/Benchmark.scala index f0ea6b20..28ef1d55 100644 --- a/bfg-benchmark/src/main/scala/Benchmark.scala +++ b/bfg-benchmark/src/main/scala/Benchmark.scala @@ -2,12 +2,13 @@ import lib.Timing.measureTask import lib._ import model._ +import java.nio.file.Files +import java.nio.file.Files.isDirectory import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent._ import scala.concurrent.duration.Duration +import scala.jdk.StreamConverters._ import scala.sys.process._ -import scalax.file.PathMatcher.IsDirectory -import scalax.io.Codec /* * Vary BFG runs by: @@ -17,24 +18,22 @@ import scalax.io.Codec */ object Benchmark extends App { - implicit val codec = Codec.UTF8 - BenchmarkConfig.parser.parse(args, BenchmarkConfig()) map { config => - println(s"Using resources dir : ${config.resourcesDir.path}") + println(s"Using resources dir : ${config.resourcesDir}") - require(config.resourcesDir.exists, s"Resources dir not found : ${config.resourcesDir.path}") - require(config.jarsDir.exists, s"Jars dir not found : ${config.jarsDir.path}") - require(config.reposDir.exists, s"Repos dir not found : ${config.reposDir.path}") + require(Files.exists(config.resourcesDir), s"Resources dir not found : ${config.resourcesDir}") + require(Files.exists(config.jarsDir), s"Jars dir not found : ${config.jarsDir}") + require(Files.exists(config.reposDir), s"Repos dir not found : ${config.reposDir}") - val missingJars = config.bfgJars.filterNot(_.exists).map(_.toAbsolute.path) + val missingJars = config.bfgJars.filterNot(Files.exists(_)) require(missingJars.isEmpty, s"Missing BFG jars : ${missingJars.mkString(",")}") val tasksFuture = for { bfgInvocableEngineSet <- bfgInvocableEngineSet(config) } yield { val gfbInvocableEngineSetOpt = - if (config.onlyBfg) None else Some(InvocableEngineSet[GFBInvocation](GitFilterBranch, Seq(InvocableGitFilterBranch))) + Option.when(!config.onlyBfg)(InvocableEngineSet[GFBInvocation](GitFilterBranch, Seq(InvocableGitFilterBranch))) boogaloo(config, new RepoExtractor(config.scratchDir), Seq(bfgInvocableEngineSet) ++ gfbInvocableEngineSetOpt.toSeq) } @@ -59,27 +58,24 @@ object Benchmark extends App { def boogaloo(config: BenchmarkConfig, repoExtractor: RepoExtractor, invocableEngineSets: Seq[InvocableEngineSet[_ <: EngineInvocation]]) = { for { - repoSpecDir <- config.repoSpecDirs.toList - availableCommandDirs = (repoSpecDir / "commands").children().filter(IsDirectory).toList + repoSpecDir <- config.repoSpecDirs + availableCommandDirs = Files.list(repoSpecDir.resolve("commands")).toScala(Seq).filter(isDirectory(_)) // println(s"Available commands for $repoName : ${availableCommandDirs.map(_.name).mkString(", ")}") - commandDir <- availableCommandDirs.filter(p => config.commands(p.name)) + commandDir <- availableCommandDirs.filter(p => config.commands(p.getFileName.toString)) } yield { - - val repoName = repoSpecDir.name - - val commandName = commandDir.name + val commandName: String = commandDir.getFileName.toString commandName -> (for { invocableEngineSet <- invocableEngineSets } yield for { (invocable, processMaker) <- invocableEngineSet.invocationsFor(commandDir) } yield { - val cleanRepoDir = repoExtractor.extractRepoFrom(repoSpecDir / "repo.git.zip") - commandDir.children().foreach(p => p.copyTo(cleanRepoDir / p.name)) - val process = processMaker(cleanRepoDir) + val cleanRepoDir = repoExtractor.extractRepoFrom(repoSpecDir.resolve("repo.git.zip")) + Files.list(commandDir).toScala(Seq).foreach(p => Files.copy(p, cleanRepoDir.resolve(p.getFileName))) + val process = processMaker(cleanRepoDir.toFile) val duration = measureTask(s"$commandName - $invocable") { - process ! ProcessLogger(_ => Unit) + process ! ProcessLogger(_ => ()) } if (config.dieIfTaskTakesLongerThan.exists(_ < duration.toMillis)) { diff --git a/bfg-benchmark/src/main/scala/BenchmarkConfig.scala b/bfg-benchmark/src/main/scala/BenchmarkConfig.scala index 14aa4ea4..15476d08 100644 --- a/bfg-benchmark/src/main/scala/BenchmarkConfig.scala +++ b/bfg-benchmark/src/main/scala/BenchmarkConfig.scala @@ -1,16 +1,13 @@ import java.io.File - import com.madgag.textmatching.{Glob, TextMatcher} import scopt.OptionParser -import scalax.file.ImplicitConversions._ -import scalax.file.Path -import scalax.file.defaultfs.DefaultPath +import java.nio.file.{Path, Paths} object BenchmarkConfig { val parser = new OptionParser[BenchmarkConfig]("benchmark") { opt[File]("resources-dir").text("benchmark resources folder - contains jars and repos").action { - (v, c) => c.copy(resourcesDirOption = v) + (v, c) => c.copy(resourcesDirOption = v.toPath) } opt[String]("java").text("Java command paths").action { (v, c) => c.copy(javaCmds = v.split(',').toSeq) @@ -28,13 +25,13 @@ object BenchmarkConfig { (v, c) => c.copy(commands = TextMatcher(v, defaultType = Glob)) } opt[File]("scratch-dir").text("Temp-dir for job runs - preferably ramdisk, eg tmpfs.").action { - (v, c) => c.copy(scratchDir = v) + (v, c) => c.copy(scratchDir = v.toPath) } opt[Unit]("only-bfg") action { (_, c) => c.copy(onlyBfg = true) } text "Don't benchmark git-filter-branch" } } -case class BenchmarkConfig(resourcesDirOption: Path = Path.fromString(System.getProperty("user.dir")) / "bfg-benchmark" / "resources", - scratchDir: DefaultPath = Path.fromString("/dev/shm/"), +case class BenchmarkConfig(resourcesDirOption: Path = Paths.get(System.getProperty("user.dir"), "bfg-benchmark", "resources"), + scratchDir: Path = Paths.get("/dev/shm/"), javaCmds: Seq[String] = Seq("java"), bfgVersions: Seq[String] = Seq.empty, commands: TextMatcher = Glob("*"), @@ -42,13 +39,13 @@ case class BenchmarkConfig(resourcesDirOption: Path = Path.fromString(System.get dieIfTaskTakesLongerThan: Option[Int] = None, repoNames: Seq[String] = Seq.empty) { - lazy val resourcesDir = Path.fromString(resourcesDirOption.path).toAbsolute + lazy val resourcesDir: Path = resourcesDirOption.toAbsolutePath - lazy val jarsDir = resourcesDir / "jars" + lazy val jarsDir: Path = resourcesDir.resolve("jars") - lazy val reposDir = resourcesDir / "repos" + lazy val reposDir: Path = resourcesDir.resolve("repos") - lazy val bfgJars = bfgVersions.map(version => jarsDir / s"bfg-$version.jar") + lazy val bfgJars: Seq[Path] = bfgVersions.map(version => jarsDir.resolve(s"bfg-$version.jar")) - lazy val repoSpecDirs = repoNames.map(reposDir / _) + lazy val repoSpecDirs: Seq[Path] = repoNames.map(reposDir.resolve) } diff --git a/bfg-benchmark/src/main/scala/lib/Repo.scala b/bfg-benchmark/src/main/scala/lib/Repo.scala index 1c78e162..368ea0af 100644 --- a/bfg-benchmark/src/main/scala/lib/Repo.scala +++ b/bfg-benchmark/src/main/scala/lib/Repo.scala @@ -1,23 +1,26 @@ package lib +import com.google.common.io.MoreFiles +import com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE import com.madgag.compress.CompressUtil._ -import scalax.file.ImplicitConversions._ -import scalax.file.Path -import scalax.file.defaultfs.DefaultPath +import java.nio.file.{Files, Path} +import scala.util.Using -class RepoExtractor(scratchDir: DefaultPath) { +class RepoExtractor(scratchDir: Path) { - val repoDir = scratchDir / "repo.git" + val repoDir = scratchDir.resolve( "repo.git") def extractRepoFrom(zipPath: Path) = { - repoDir.deleteRecursively(force = true) + if (Files.exists(repoDir)) MoreFiles.deleteRecursively(repoDir, ALLOW_INSECURE) - repoDir.createDirectory() + Files.createDirectories(repoDir) - println(s"Extracting repo to ${repoDir.toAbsolute.path}") + println(s"Extracting repo to ${repoDir.toAbsolutePath}") - zipPath.inputStream.acquireFor { stream => unzip(stream, repoDir) } + Using(Files.newInputStream(zipPath)) { + stream => unzip(stream, repoDir.toFile) + } repoDir } diff --git a/bfg-benchmark/src/main/scala/model/BFGJar.scala b/bfg-benchmark/src/main/scala/model/BFGJar.scala index de39035c..2a5eec98 100644 --- a/bfg-benchmark/src/main/scala/model/BFGJar.scala +++ b/bfg-benchmark/src/main/scala/model/BFGJar.scala @@ -1,6 +1,6 @@ package model -import scalax.file.Path +import java.nio.file.Path object BFGJar { def from(path: Path) = BFGJar(path, Map.empty) diff --git a/bfg-benchmark/src/main/scala/model/InvocableEngine.scala b/bfg-benchmark/src/main/scala/model/InvocableEngine.scala index 585cfc20..1125e332 100644 --- a/bfg-benchmark/src/main/scala/model/InvocableEngine.scala +++ b/bfg-benchmark/src/main/scala/model/InvocableEngine.scala @@ -1,10 +1,13 @@ package model +import com.google.common.io.CharSource +import com.google.common.io.Files.asCharSource + +import java.io.File +import java.nio.charset.StandardCharsets.UTF_8 +import java.nio.file.{Files, Path} +import scala.jdk.StreamConverters._ import scala.sys.process.{Process, ProcessBuilder} -import scalax.file.ImplicitConversions._ -import scalax.file.Path -import scalax.file.defaultfs.DefaultPath -import scalax.io.Input trait EngineInvocation @@ -15,19 +18,19 @@ case class GFBInvocation(args: Seq[String]) extends EngineInvocation trait InvocableEngine[InvocationArgs <: EngineInvocation] { - def processFor(invocation: InvocationArgs)(repoPath: DefaultPath): ProcessBuilder + def processFor(invocation: InvocationArgs)(repoPath: File): ProcessBuilder } case class InvocableBFG(java: Java, bfgJar: BFGJar) extends InvocableEngine[BFGInvocation] { - def processFor(invocation: BFGInvocation)(repoPath: DefaultPath) = - Process(s"${java.javaCmd} -jar ${bfgJar.path.path} ${invocation.args}", repoPath) + def processFor(invocation: BFGInvocation)(repoPath: File) = + Process(s"${java.javaCmd} -jar ${bfgJar.path} ${invocation.args}", repoPath) } object InvocableGitFilterBranch extends InvocableEngine[GFBInvocation] { - def processFor(invocation: GFBInvocation)(repoPath: DefaultPath) = + def processFor(invocation: GFBInvocation)(repoPath: File) = Process(Seq("git", "filter-branch") ++ invocation.args, repoPath) } @@ -42,11 +45,11 @@ We want to allow the user to vary: trait EngineType[InvocationType <: EngineInvocation] { val configName: String - def argsFor(config: Input): InvocationType + def argsFor(config: CharSource): InvocationType def argsOptsFor(commandDir: Path): Option[InvocationType] = { - val paramsPath = commandDir / s"$configName.txt" - if (paramsPath.exists) Some(argsFor(paramsPath)) else None + val paramsPath = commandDir.resolve(s"$configName.txt") + if (Files.exists(paramsPath)) Some(argsFor(asCharSource(paramsPath.toFile, UTF_8))) else None } } @@ -54,12 +57,12 @@ case object BFG extends EngineType[BFGInvocation] { val configName = "bfg" - def argsFor(config: Input) = BFGInvocation(config.string) + def argsFor(config: CharSource) = BFGInvocation(config.read()) } case object GitFilterBranch extends EngineType[GFBInvocation] { val configName = "gfb" - def argsFor(config: Input) = GFBInvocation(config.lines().toSeq) + def argsFor(config: CharSource) = GFBInvocation(config.lines().toScala(Seq)) } diff --git a/bfg-benchmark/src/main/scala/model/InvocableEngineSet.scala b/bfg-benchmark/src/main/scala/model/InvocableEngineSet.scala index a677132d..8376c592 100644 --- a/bfg-benchmark/src/main/scala/model/InvocableEngineSet.scala +++ b/bfg-benchmark/src/main/scala/model/InvocableEngineSet.scala @@ -1,14 +1,14 @@ package model -import scalax.file.Path -import scalax.file.defaultfs.DefaultPath +import java.io.File +import java.nio.file.Path case class InvocableEngineSet[InvocationArgs <: EngineInvocation]( engineType: EngineType[InvocationArgs], invocableEngines: Seq[InvocableEngine[InvocationArgs]] ) { - def invocationsFor(commandDir: Path): Seq[(InvocableEngine[InvocationArgs], DefaultPath => scala.sys.process.ProcessBuilder)] = { + def invocationsFor(commandDir: Path): Seq[(InvocableEngine[InvocationArgs], File => scala.sys.process.ProcessBuilder)] = { for { args <- engineType.argsOptsFor(commandDir).toSeq invocable <- invocableEngines diff --git a/bfg-benchmark/src/test/scala/JavaVersionSpec.scala b/bfg-benchmark/src/test/scala/JavaVersionSpec.scala index adbb4628..03010d5d 100644 --- a/bfg-benchmark/src/test/scala/JavaVersionSpec.scala +++ b/bfg-benchmark/src/test/scala/JavaVersionSpec.scala @@ -1,4 +1,6 @@ -import org.scalatest.{FlatSpec, Matchers, OptionValues} +import org.scalatest.OptionValues +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers object JavaVersionSpec extends AnyFlatSpec with OptionValues with Matchers { "version" should "parse an example line" in { diff --git a/bfg-library/build.sbt b/bfg-library/build.sbt index c1e5ee00..e85e9376 100644 --- a/bfg-library/build.sbt +++ b/bfg-library/build.sbt @@ -1,4 +1,14 @@ import Dependencies._ -libraryDependencies ++= guava :+ scalaIoFile :+ textmatching :+ scalaGit :+ jgit :+ slf4jSimple :+ scalaGitTest % "test" +libraryDependencies ++= guava ++ Seq( + parCollections, + scalaCollectionPlus, + textmatching, + scalaGit, + jgit, + slf4jSimple, + lineSplitting, + scalaGitTest % Test, + "org.apache.commons" % "commons-text" % "1.9" % Test +) diff --git a/bfg-library/src/main/scala/com/madgag/collection/concurrent/ConcurrentMultiMap.scala b/bfg-library/src/main/scala/com/madgag/collection/concurrent/ConcurrentMultiMap.scala index 1f231c52..d0f76a0e 100644 --- a/bfg-library/src/main/scala/com/madgag/collection/concurrent/ConcurrentMultiMap.scala +++ b/bfg-library/src/main/scala/com/madgag/collection/concurrent/ConcurrentMultiMap.scala @@ -20,6 +20,7 @@ package com.madgag.collection.concurrent +import com.madgag.scala.collection.decorators._ class ConcurrentMultiMap[A, B] { @@ -34,5 +35,5 @@ class ConcurrentMultiMap[A, B] { this } - def toMap: Map[A, Set[B]] = m.toMap.mapValues(_.toSet) + def toMap: Map[A, Set[B]] = m.toMap.mapV(_.toSet) } diff --git a/bfg-library/src/main/scala/com/madgag/collection/concurrent/ConcurrentSet.scala b/bfg-library/src/main/scala/com/madgag/collection/concurrent/ConcurrentSet.scala index 317d0071..207796f0 100644 --- a/bfg-library/src/main/scala/com/madgag/collection/concurrent/ConcurrentSet.scala +++ b/bfg-library/src/main/scala/com/madgag/collection/concurrent/ConcurrentSet.scala @@ -20,29 +20,53 @@ package com.madgag.collection.concurrent -import scala.collection.mutable.{Set, SetLike} +import scala.collection.mutable.{AbstractSet, SetOps} +import scala.collection.{IterableFactory, IterableFactoryDefaults, mutable} - -class ConcurrentSet[A] extends Set[A] with SetLike[A, ConcurrentSet[A]] { +class ConcurrentSet[A]() + extends AbstractSet[A] + with SetOps[A, ConcurrentSet, ConcurrentSet[A]] + with IterableFactoryDefaults[A, ConcurrentSet] +{ val m: collection.concurrent.Map[A, Boolean] = collection.concurrent.TrieMap.empty - override def +=(elem: A): this.type = { + override def iterableFactory: IterableFactory[ConcurrentSet] = ConcurrentSet + + override def clear(): Unit = m.clear() + + override def addOne(elem: A): ConcurrentSet.this.type = { m.put(elem, true) this } - override def -=(elem: A): this.type = { + override def subtractOne(elem: A): ConcurrentSet.this.type = { m.remove(elem) this } - override def empty: this.type = { - m.empty - this - } - override def contains(elem: A): Boolean = m.contains(elem) override def iterator: Iterator[A] = m.keysIterator + +} + +object ConcurrentSet extends IterableFactory[ConcurrentSet] { + + @transient + private final val EmptySet = new ConcurrentSet() + + def empty[A]: ConcurrentSet[A] = EmptySet.asInstanceOf[ConcurrentSet[A]] + + def from[A](source: collection.IterableOnce[A]): ConcurrentSet[A] = + source match { + case hs: ConcurrentSet[A] => hs + case _ if source.knownSize == 0 => empty[A] + case _ => (newBuilder[A] ++= source).result() + } + + /** Create a new Builder which can be reused after calling `result()` without an + * intermediate call to `clear()` in order to build multiple related results. + */ + def newBuilder[A]: mutable.Builder[A, ConcurrentSet[A]] = ??? } diff --git a/bfg-library/src/main/scala/com/madgag/git/LFS.scala b/bfg-library/src/main/scala/com/madgag/git/LFS.scala index 50b53ec5..662736fa 100644 --- a/bfg-library/src/main/scala/com/madgag/git/LFS.scala +++ b/bfg-library/src/main/scala/com/madgag/git/LFS.scala @@ -20,34 +20,34 @@ package com.madgag.git -import java.nio.charset.Charset -import java.security.{DigestOutputStream, MessageDigest} - import com.google.common.base.Splitter import com.madgag.git.bfg.model.FileName import org.apache.commons.codec.binary.Hex._ import org.eclipse.jgit.lib.ObjectLoader -import scala.collection.JavaConverters._ -import scalax.file.Path -import scalax.file.defaultfs.DefaultPath +import java.nio.charset.Charset +import java.nio.charset.StandardCharsets.UTF_8 +import java.nio.file.{Files, Path} +import java.security.{DigestOutputStream, MessageDigest} +import scala.jdk.CollectionConverters._ +import scala.util.Using object LFS { - val ObjectsPath = Path("lfs" , "objects") + val ObjectsPath: Seq[String] = Seq("lfs" , "objects") - val PointerCharset = Charset.forName("UTF-8") + val PointerCharset: Charset = UTF_8 case class Pointer(shaHex: String, blobSize: Long) { - lazy val text = s"""|version https://git-lfs.github.com/spec/v1 - |oid sha256:$shaHex - |size $blobSize - |""".stripMargin + lazy val text: String = s"""|version https://git-lfs.github.com/spec/v1 + |oid sha256:$shaHex + |size $blobSize + |""".stripMargin - lazy val bytes = text.getBytes(PointerCharset) + lazy val bytes: Array[Byte] = text.getBytes(PointerCharset) - lazy val path = Path(shaHex.substring(0, 2), shaHex.substring(2, 4), shaHex) + lazy val path: Seq[String] = Seq(shaHex.substring(0, 2), shaHex.substring(2, 4), shaHex) } object Pointer { @@ -65,12 +65,12 @@ object LFS { val GitAttributesFileName = FileName(".gitattributes") - def pointerFor(loader: ObjectLoader, tmpFile: DefaultPath) = { + def pointerFor(loader: ObjectLoader, tmpFile: Path) = { val digest = MessageDigest.getInstance("SHA-256") - for { - outStream <- tmpFile.outputStream() - } loader.copyTo(new DigestOutputStream(outStream, digest)) + Using(Files.newOutputStream(tmpFile)) { outStream => + loader.copyTo(new DigestOutputStream(outStream, digest)) + } Pointer(encodeHexString(digest.digest()), loader.getSize) } diff --git a/bfg-library/src/main/scala/com/madgag/git/bfg/GitUtil.scala b/bfg-library/src/main/scala/com/madgag/git/bfg/GitUtil.scala index 934307f1..1be5d265 100644 --- a/bfg-library/src/main/scala/com/madgag/git/bfg/GitUtil.scala +++ b/bfg-library/src/main/scala/com/madgag/git/bfg/GitUtil.scala @@ -30,7 +30,8 @@ import org.eclipse.jgit.lib._ import org.eclipse.jgit.revwalk.RevWalk import org.eclipse.jgit.storage.file.WindowCacheConfig -import scala.collection.convert.ImplicitConversionsToScala._ +import scala.jdk.CollectionConverters._ +import scala.jdk.StreamConverters._ import scala.language.implicitConversions trait CleaningMapper[V] extends Cleaner[V] { @@ -51,7 +52,7 @@ object GitUtil { val ProbablyNoNonFileObjectsOverSizeThreshold: Long = 1024 * 1024 - def tweakStaticJGitConfig(massiveNonFileObjects: Option[Long]) { + def tweakStaticJGitConfig(massiveNonFileObjects: Option[Long]): Unit = { val wcConfig: WindowCacheConfig = new WindowCacheConfig() wcConfig.setStreamFileThreshold(Ints.saturatedCast(massiveNonFileObjects.getOrElse(ProbablyNoNonFileObjectsOverSizeThreshold))) wcConfig.install() @@ -62,22 +63,22 @@ object GitUtil { implicit val revWalk = new RevWalk(repo) implicit val objectReader = revWalk.getObjectReader - repo.getAllRefs.values.map(_.getObjectId).filter(_.open.getType == Constants.OBJ_COMMIT) - .map(_.asRevCommit).exists(_.getFooterLines(FormerCommitFooter.Key).nonEmpty) + repo.getAllRefs.values().stream().toScala(Seq).map(_.getObjectId).filter(_.open.getType == Constants.OBJ_COMMIT) + .map(_.asRevCommit).exists(_.getFooterLines(FormerCommitFooter.Key).asScala.nonEmpty) } implicit def cleaner2CleaningMapper[V](f: Cleaner[V]): CleaningMapper[V] = new CleaningMapper[V] { def apply(v: V) = f(v) } - def biggestBlobs(implicit objectDB: ObjectDirectory, progressMonitor: ProgressMonitor = NullProgressMonitor.INSTANCE): Stream[SizedObject] = { + def biggestBlobs(implicit objectDB: ObjectDirectory, progressMonitor: ProgressMonitor = NullProgressMonitor.INSTANCE): LazyList[SizedObject] = { Timing.measureTask("Scanning packfile for large blobs", ProgressMonitor.UNKNOWN) { val reader = objectDB.newReader objectDB.packedObjects.map { objectId => progressMonitor update 1 SizedObject(objectId, reader.getObjectSize(objectId, OBJ_ANY)) - }.toSeq.sorted.reverse.toStream.filter { oid => + }.toSeq.sorted.reverse.to(LazyList).filter { oid => oid.size > ProbablyNoNonFileObjectsOverSizeThreshold || reader.open(oid.objectId).getType == OBJ_BLOB } } diff --git a/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/BlobCharsetDetector.scala b/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/BlobCharsetDetector.scala index 74e81c5d..b005084b 100644 --- a/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/BlobCharsetDetector.scala +++ b/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/BlobCharsetDetector.scala @@ -20,36 +20,38 @@ package com.madgag.git.bfg.cleaner -import java.nio.ByteBuffer -import java.nio.charset.Charset -import java.nio.charset.CodingErrorAction._ - +import com.google.common.io.ByteStreams +import com.google.common.io.ByteStreams.toByteArray import com.madgag.git.bfg.model.TreeBlobEntry import org.eclipse.jgit.diff.RawText -import org.eclipse.jgit.lib.ObjectStream +import org.eclipse.jgit.lib.ObjectLoader -import scala.util.Try -import scalax.io.managed.InputStreamResource +import java.nio.ByteBuffer +import java.nio.charset.Charset +import java.nio.charset.CodingErrorAction._ +import scala.util.{Try, Using} trait BlobCharsetDetector { // should return None if this is a binary file that can not be converted to text - def charsetFor(entry: TreeBlobEntry, streamResource: InputStreamResource[ObjectStream]): Option[Charset] + def charsetFor(entry: TreeBlobEntry, objectLoader: ObjectLoader): Option[Charset] } object QuickBlobCharsetDetector extends BlobCharsetDetector { - val CharSets = Seq(Charset.forName("UTF-8"), Charset.defaultCharset(), Charset.forName("ISO-8859-1")).distinct + val CharSets: Seq[Charset] = + Seq(Charset.forName("UTF-8"), Charset.defaultCharset(), Charset.forName("ISO-8859-1")).distinct - def charsetFor(entry: TreeBlobEntry, streamResource: InputStreamResource[ObjectStream]): Option[Charset] = - Some(streamResource.bytes.take(8000).toArray).filterNot(RawText.isBinary).flatMap { + def charsetFor(entry: TreeBlobEntry, objectLoader: ObjectLoader): Option[Charset] = { + Using(ByteStreams.limit(objectLoader.openStream(), 8000))(toByteArray).toOption.filterNot(RawText.isBinary).flatMap { sampleBytes => val b = ByteBuffer.wrap(sampleBytes) CharSets.find(cs => Try(decode(b, cs)).isSuccess) } + } - private def decode(b: ByteBuffer, charset: Charset) { + private def decode(b: ByteBuffer, charset: Charset): Unit = { charset.newDecoder.onMalformedInput(REPORT).onUnmappableCharacter(REPORT).decode(b) } } diff --git a/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/BlobTextModifier.scala b/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/BlobTextModifier.scala index 4f4c26a6..a863706c 100644 --- a/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/BlobTextModifier.scala +++ b/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/BlobTextModifier.scala @@ -20,13 +20,14 @@ package com.madgag.git.bfg.cleaner -import java.io.ByteArrayOutputStream - import com.madgag.git.ThreadLocalObjectDatabaseResources import com.madgag.git.bfg.model.TreeBlobEntry +import com.madgag.linesplitting.LineBreakPreservingIterator import org.eclipse.jgit.lib.Constants.OBJ_BLOB +import org.eclipse.jgit.lib.ObjectLoader -import scalax.io.Resource +import java.io.{ByteArrayOutputStream, InputStreamReader} +import java.nio.charset.Charset object BlobTextModifier { @@ -50,17 +51,13 @@ trait BlobTextModifier extends TreeBlobModifier { def filterTextIn(e: TreeBlobEntry, lineCleaner: String => String): TreeBlobEntry = { def isDirty(line: String) = lineCleaner(line) != line + val loader = threadLocalObjectDBResources.reader().open(e.objectId) val opt = for { - loader <- Some(threadLocalObjectDBResources.reader().open(e.objectId)) - if loader.getSize < sizeThreshold - streamResource <- Some(Resource.fromInputStream(loader.openStream())) - charset <- charsetDetector.charsetFor(e, streamResource) - reader <- Some(streamResource.reader(charset)) - lines = reader.lines(includeTerminator = true) - if lines.exists(isDirty) + charset <- charsetDetector.charsetFor(e, loader) + if loader.getSize < sizeThreshold && linesFor(loader, charset).exists(isDirty) } yield { val b = new ByteArrayOutputStream(loader.getSize.toInt) - lines.view.map(lineCleaner).foreach(line => b.write(line.getBytes(charset))) + linesFor(loader, charset).map(lineCleaner).foreach(line => b.write(line.getBytes(charset))) val oid = threadLocalObjectDBResources.inserter().insert(OBJ_BLOB, b.toByteArray) e.copy(objectId = oid) } @@ -73,4 +70,8 @@ trait BlobTextModifier extends TreeBlobModifier { case None => entry.withoutName } } + + private def linesFor(loader: ObjectLoader, charset: Charset): Iterator[String] = { + new LineBreakPreservingIterator(new InputStreamReader(loader.openStream(), charset)) + } } diff --git a/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/LfsBlobConverter.scala b/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/LfsBlobConverter.scala index 50818e2d..e9a8ee4f 100644 --- a/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/LfsBlobConverter.scala +++ b/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/LfsBlobConverter.scala @@ -20,8 +20,8 @@ package com.madgag.git.bfg.cleaner -import java.nio.charset.Charset - +import com.google.common.io.ByteSource +import com.google.common.io.Files.createParentDirs import com.madgag.git.LFS._ import com.madgag.git._ import com.madgag.git.bfg.model._ @@ -30,17 +30,17 @@ import com.madgag.textmatching.{Glob, TextMatcher} import org.eclipse.jgit.internal.storage.file.FileRepository import org.eclipse.jgit.lib.{ObjectId, ObjectReader} -import scala.util.Try -import scalax.file.ImplicitConversions._ -import scalax.file.Path.createTempFile -import scalax.io.JavaConverters._ +import java.nio.charset.Charset +import java.nio.file.{Files, Path} +import scala.jdk.StreamConverters._ +import scala.util.{Try, Using} class LfsBlobConverter( lfsGlobExpression: String, repo: FileRepository ) extends TreeBlobModifier { - val lfsObjectsDir = repo.getDirectory / LFS.ObjectsPath + val lfsObjectsDir: Path = repo.getDirectory.toPath.resolve(LFS.ObjectsPath) val threadLocalObjectDBResources = repo.getObjectDatabase.threadLocalResources @@ -67,11 +67,12 @@ class LfsBlobConverter( } { case (_, oldGitAttributesId) => val objectLoader = threadLocalObjectDBResources.reader().open(oldGitAttributesId) - val oldAttributes = objectLoader.getCachedBytes.asInput.lines().toSeq - - if (oldAttributes.contains(gitAttributesLine)) oldGitAttributesId else { - storeBlob((oldAttributes :+ gitAttributesLine).mkString("\n")) - } + Using(ByteSource.wrap(objectLoader.getCachedBytes).asCharSource(UTF_8).lines()) { oldAttributesStream => + val oldAttributes = oldAttributesStream.toScala(Seq) + if (oldAttributes.contains(gitAttributesLine)) oldGitAttributesId else { + storeBlob((oldAttributes :+ gitAttributesLine).mkString("\n")) + } + }.get } cleanedBlobs.copy(entryMap = cleanedBlobs.entryMap + (GitAttributesFileName -> (RegularFile, newGitAttributesId))) } @@ -94,17 +95,19 @@ class LfsBlobConverter( def tryStoringLfsFileFor(blobId: ObjectId)(implicit r: ObjectReader): Try[Pointer] = { val loader = blobId.open - val tmpFile = createTempFile(s"bfg.git-lfs.conv-${blobId.name}") + val tmpFile: Path = Files.createTempFile(s"bfg.git-lfs.conv-${blobId.name}","dat") val pointer = pointerFor(loader, tmpFile) - val lfsPath = lfsObjectsDir / pointer.path + val lfsPath = lfsObjectsDir.resolve(pointer.path) + + createParentDirs(lfsPath.toFile) - val ensureLfsFile = Try(if (!lfsPath.exists) tmpFile moveTo lfsPath).recover { - case _ if lfsPath.size.contains(loader.getSize) => + val ensureLfsFile = Try(if (!Files.exists(lfsPath)) Files.move(tmpFile, lfsPath)).recover { + case _ if Files.exists(lfsPath) && Files.size(lfsPath) == loader.getSize => } - Try(tmpFile.delete(force = true)) + Try(Files.deleteIfExists(tmpFile)) ensureLfsFile.map(_ => pointer) } diff --git a/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/ObjectIdCleaner.scala b/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/ObjectIdCleaner.scala index 9a8c2d6f..51cf3313 100644 --- a/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/ObjectIdCleaner.scala +++ b/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/ObjectIdCleaner.scala @@ -139,7 +139,7 @@ class ObjectIdCleaner(config: ObjectIdCleaner.Config, objectDB: ObjectDatabase, } } - def recordChange(originalBlobs: TreeBlobs, fixedTreeBlobs: TreeBlobs) { + def recordChange(originalBlobs: TreeBlobs, fixedTreeBlobs: TreeBlobs): Unit = { val changedFiles: Set[TreeBlobEntry] = originalBlobs.entries.toSet -- fixedTreeBlobs.entries.toSet for (TreeBlobEntry(filename, _, oldId) <- changedFiles) { fixedTreeBlobs.objectId(filename) match { diff --git a/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/RepoRewriter.scala b/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/RepoRewriter.scala index 5d78dce4..b9d234fe 100644 --- a/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/RepoRewriter.scala +++ b/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/RepoRewriter.scala @@ -27,7 +27,8 @@ import org.eclipse.jgit.revwalk.RevSort._ import org.eclipse.jgit.revwalk.{RevCommit, RevWalk} import org.eclipse.jgit.transport.ReceiveCommand -import scala.collection.convert.ImplicitConversions._ +import scala.jdk.CollectionConverters._ +import scala.collection.parallel.CollectionConverters._ import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future @@ -81,24 +82,24 @@ object RepoRewriter { revWalk.sort(TOPO) // crucial to ensure we visit parents BEFORE children, otherwise blow stack revWalk.sort(REVERSE, true) // we want to start with the earliest commits and work our way up... - val startCommits = allRefs.map(_.targetObjectId.asRevObject(revWalk)).collect { case c: RevCommit => c } + val startCommits = allRefs.asScala.map(_.targetObjectId.asRevObject(revWalk)).collect { case c: RevCommit => c } - revWalk.markStart(startCommits) + revWalk.markStart(startCommits.asJavaCollection) revWalk } implicit val revWalk = createRevWalk implicit val reader = revWalk.getObjectReader - reporter.reportRefsForScan(allRefs) + reporter.reportRefsForScan(allRefs.asScala) reporter.reportObjectProtection(objectIdCleanerConfig)(repo.getObjectDatabase, revWalk) val objectIdCleaner = new ObjectIdCleaner(objectIdCleanerConfig, repo.getObjectDatabase, revWalk) - val commits = revWalk.toList + val commits = revWalk.asScala.toSeq - def clean(commits: Seq[RevCommit]) { + def clean(commits: Seq[RevCommit]): Unit = { reporter.reportCleaningStart(commits) Timing.measureTask("Cleaning commits", commits.size) { @@ -116,7 +117,7 @@ object RepoRewriter { } } - def updateRefsWithCleanedIds() { + def updateRefsWithCleanedIds(): Unit = { val refUpdateCommands = for (ref <- repo.nonSymbolicRefs; (oldId, newId) <- objectIdCleaner.substitution(ref.getObjectId) ) yield new ReceiveCommand(oldId, newId, ref.getName) @@ -133,7 +134,7 @@ object RepoRewriter { if (tip == objectIdCleaner(base)) false else super.isMergedInto(base, tip) } - refDatabase.newBatchUpdate.setAllowNonFastForwards(true).addCommand(refUpdateCommands) + refDatabase.newBatchUpdate.setAllowNonFastForwards(true).addCommand(refUpdateCommands.asJavaCollection) .execute(quickMergeCalcRevWalk, progressMonitor) } diff --git a/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/Reporter.scala b/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/Reporter.scala index 691b44d9..853ef153 100644 --- a/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/Reporter.scala +++ b/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/Reporter.scala @@ -1,10 +1,9 @@ package com.madgag.git.bfg.cleaner -import java.text.SimpleDateFormat -import java.util.Date - +import com.google.common.io.Files.asCharSink import com.madgag.collection.concurrent.ConcurrentMultiMap import com.madgag.git._ +import com.madgag.git.bfg.cleaner.Reporter.dump import com.madgag.git.bfg.cleaner.protection.{ProtectedObjectCensus, ProtectedObjectDirtReport} import com.madgag.git.bfg.model.FileName import com.madgag.text.Text._ @@ -16,48 +15,69 @@ import org.eclipse.jgit.lib._ import org.eclipse.jgit.revwalk.{RevCommit, RevWalk} import org.eclipse.jgit.transport.ReceiveCommand -import scala.collection.convert.ImplicitConversions._ +import java.nio.charset.StandardCharsets.UTF_8 +import java.nio.file.Files.createDirectories +import java.nio.file.Path +import java.time.ZonedDateTime +import java.time.format.DateTimeFormatter import scala.collection.immutable.SortedMap -import scalax.file.Path +import scala.jdk.CollectionConverters._ + + +object Reporter { + def dump(path: Path, iter: Iterable[String]): Unit = { + val sink = asCharSink(path.toFile, UTF_8) + + sink.writeLines(iter.asJava, "\n") + } +} trait Reporter { val progressMonitor: ProgressMonitor - def reportRefsForScan(allRefs: Traversable[Ref])(implicit objReader: ObjectReader) + def reportRefsForScan(allRefs: Iterable[Ref])(implicit objReader: ObjectReader): Unit - def reportRefUpdateStart(refUpdateCommands: Traversable[ReceiveCommand]) + def reportRefUpdateStart(refUpdateCommands: Iterable[ReceiveCommand]): Unit - def reportObjectProtection(objectIdCleanerConfig: ObjectIdCleaner.Config)(implicit objectDB: ObjectDatabase, revWalk: RevWalk) + def reportObjectProtection(objectIdCleanerConfig: ObjectIdCleaner.Config)(implicit objectDB: ObjectDatabase, revWalk: RevWalk): Unit - def reportCleaningStart(commits: Seq[RevCommit]) + def reportCleaningStart(commits: Seq[RevCommit]): Unit - def reportResults(commits: List[RevCommit], objectIdCleaner: ObjectIdCleaner) + def reportResults(commits: Seq[RevCommit], objectIdCleaner: ObjectIdCleaner): Unit } class CLIReporter(repo: Repository) extends Reporter { - lazy val reportsDir = { - val now = new Date() - def format(s: String) = new SimpleDateFormat(s).format(now) - val dir = Path.fromString(repo.topDirectory.getAbsolutePath + ".bfg-report") / format("yyyy-MM-dd") / format("HH-mm-ss") - dir.doCreateDirectory() + lazy val reportsDir: Path = { + val now = ZonedDateTime.now() + + val topDirPath = repo.topDirectory.toPath.toAbsolutePath + + val reportsDir = topDirPath.resolveSibling(s"${topDirPath.getFileName}.bfg-report") + + val dateFormatter = DateTimeFormatter.ofPattern("uuuu-MM-dd") + val timeFormatter = DateTimeFormatter.ofPattern("HH-mm-ss") + + val dir = reportsDir.resolve(now.format(dateFormatter)).resolve(now.format(timeFormatter)) + + createDirectories(dir) dir } lazy val progressMonitor = new TextProgressMonitor - def reportRefUpdateStart(refUpdateCommands: Traversable[ReceiveCommand]) { + def reportRefUpdateStart(refUpdateCommands: Iterable[ReceiveCommand]): Unit = { println(title("Updating " + plural(refUpdateCommands, "Ref"))) val summaryTableCells = refUpdateCommands.map(update => (update.getRefName, update.getOldId.shortName, update.getNewId.shortName)) Tables.formatTable(("Ref", "Before", "After"), summaryTableCells.toSeq).map("\t" + _).foreach(println) - println + println() } - def reportRefsForScan(allRefs: Traversable[Ref])(implicit objReader: ObjectReader) { + def reportRefsForScan(allRefs: Iterable[Ref])(implicit objReader: ObjectReader): Unit = { val refsByObjType = allRefs.groupBy { ref => objReader.open(ref.getObjectId).getType } withDefault Seq.empty @@ -71,7 +91,7 @@ class CLIReporter(repo: Repository) extends Reporter { // abort due to Dirty Tips on Private run - user needs to manually clean // warn due to Dirty Tips on Public run - it's not so serious if users publicise dirty tips. // if no protection - def reportObjectProtection(objectIdCleanerConfig: ObjectIdCleaner.Config)(implicit objectDB: ObjectDatabase, revWalk: RevWalk) { + def reportObjectProtection(objectIdCleanerConfig: ObjectIdCleaner.Config)(implicit objectDB: ObjectDatabase, revWalk: RevWalk): Unit = { println(title("Protected commits")) if (objectIdCleanerConfig.protectedObjectCensus.isEmpty) { @@ -89,7 +109,7 @@ class CLIReporter(repo: Repository) extends Reporter { case class DiffSideDetails(id: ObjectId, path: String, mode: FileMode, size: Option[Long]) - def reportProtectedCommitsAndTheirDirt(objectIdCleanerConfig: ObjectIdCleaner.Config)(implicit objectDB: ObjectDatabase, revWalk: RevWalk) { + def reportProtectedCommitsAndTheirDirt(objectIdCleanerConfig: ObjectIdCleaner.Config)(implicit objectDB: ObjectDatabase, revWalk: RevWalk): Unit = { implicit val reader = revWalk.getObjectReader def diffDetails(d: DiffEntry) = { @@ -107,8 +127,8 @@ class CLIReporter(repo: Repository) extends Reporter { (d.path +: extraInfo.toSeq).mkString(" ") } - val protectedDirtDir = reportsDir / "protected-dirt" - protectedDirtDir.doCreateDirectory() + val protectedDirtDir = reportsDir.resolve("protected-dirt") + createDirectories(protectedDirtDir) val reports = ProtectedObjectDirtReport.reportsFor(objectIdCleanerConfig, objectDB) @@ -128,10 +148,13 @@ class CLIReporter(repo: Repository) extends Reporter { dirtyFile => println("\t- " + dirtyFile) } - val protectorRefsFileNameSafe = protectorRevs.mkString("_").replace(protectedDirtDir.separator, "-") - val diffFile = protectedDirtDir / s"${report.revObject.shortName}-${protectorRefsFileNameSafe}.csv" + val protectorRefsFileNameSafe: String = protectorRevs.mkString("_").replace( + protectedDirtDir.getFileSystem.getSeparator, + "-" + ) + val diffFile = protectedDirtDir.resolve(s"${report.revObject.shortName}-$protectorRefsFileNameSafe.csv") - diffFile.writeStrings(diffEntries.map { + dump(diffFile, diffEntries.map { diffEntry => val de = diffDetails(diffEntry) @@ -140,7 +163,7 @@ class CLIReporter(repo: Repository) extends Reporter { val elems = Seq(de.id.name, diffEntry.getChangeType.name, de.mode.name, de.path, de.size.getOrElse(""), modifiedLines.getOrElse("")) elems.mkString(",") - }, "\n") + }) } } } @@ -154,7 +177,7 @@ class CLIReporter(repo: Repository) extends Reporter { | |Details of protected dirty content have been recorded here : | - |${protectedDirtDir.path + protectedDirtDir.separator} + |${protectedDirtDir.toAbsolutePath.toString + protectedDirtDir.getFileSystem.getSeparator} | |If you *really* want this content gone, make a manual commit that removes it, |and then run the BFG on a fresh copy of your repo. @@ -164,18 +187,18 @@ class CLIReporter(repo: Repository) extends Reporter { } def changedLinesFor(edits: EditList): String = { - edits.map { + edits.asScala.map { edit => Seq(edit.getBeginA + 1, edit.getEndA).distinct.mkString("-") }.mkString(";") } - def reportCleaningStart(commits: Seq[RevCommit]) { + def reportCleaningStart(commits: Seq[RevCommit]): Unit = { println(title("Cleaning")) println("Found " + commits.size + " commits") } - def reportResults(commits: List[RevCommit], objectIdCleaner: ObjectIdCleaner) { - def reportTreeDirtHistory() { + def reportResults(commits: Seq[RevCommit], objectIdCleaner: ObjectIdCleaner): Unit = { + def reportTreeDirtHistory(): Unit = { val dirtHistoryElements = math.max(20, math.min(60, commits.size)) def cut[A](xs: Seq[A], n: Int) = { @@ -209,8 +232,8 @@ class CLIReporter(repo: Repository) extends Reporter { reportTreeDirtHistory() - lazy val mapFile = reportsDir / "object-id-map.old-new.txt" - lazy val cacheStatsFile = reportsDir / "cache-stats.txt" + lazy val mapFile: Path = reportsDir.resolve("object-id-map.old-new.txt") + lazy val cacheStatsFile: Path = reportsDir.resolve("cache-stats.txt") val changedIds = objectIdCleaner.cleanedObjectMap() @@ -218,7 +241,7 @@ class CLIReporter(repo: Repository) extends Reporter { fileData: ConcurrentMultiMap[FileName, FI], actionType: String, tableTitles: Product - )(f: ((FileName,Set[FI])) => Product)(fi: FI => Seq[String]) { + )(f: ((FileName,Set[FI])) => Product)(fi: FI => Seq[String]): Unit = { implicit val fileNameOrdering = Ordering[String].on[FileName](_.string) val dataByFilename = SortedMap[FileName, Set[FI]](fileData.toMap.toSeq: _*) @@ -226,9 +249,11 @@ class CLIReporter(repo: Repository) extends Reporter { println(title(s"$actionType files")) Tables.formatTable(tableTitles, dataByFilename.map(f).toSeq).map("\t" + _).foreach(println) - (reportsDir / s"${actionType.toLowerCase}-files.txt").writeStrings(dataByFilename.flatMap { + val actionFile = reportsDir.resolve(s"${actionType.toLowerCase}-files.txt") + + dump(actionFile, dataByFilename.flatMap { case (filename, changes) => changes.map(fi.andThen(fid => (fid :+ filename).mkString(" "))) - }, "\n") + }) } } @@ -242,11 +267,11 @@ class CLIReporter(repo: Repository) extends Reporter { case (filename, oldIds) => (filename, Text.abbreviate(oldIds.map(oldId => oldId.shortName + oldId.sizeOpt.map(size => s" (${ByteSize.format(size)})").mkString), "...").mkString(", ")) } { oldId => Seq(oldId.name, oldId.sizeOpt.mkString) } - println(s"\n\nIn total, ${changedIds.size} object ids were changed. Full details are logged here:\n\n\t${reportsDir.path}") + println(s"\n\nIn total, ${changedIds.size} object ids were changed. Full details are logged here:\n\n\t$reportsDir") - mapFile.writeStrings(SortedMap[AnyObjectId, ObjectId](changedIds.toSeq: _*).view.map { case (o,n) => s"${o.name} ${n.name}"}, "\n") + dump(mapFile,SortedMap[AnyObjectId, ObjectId](changedIds.toSeq: _*).view.map { case (o,n) => s"${o.name} ${n.name}"}) - cacheStatsFile.writeStrings(objectIdCleaner.stats().seq.map(_.toString()), "\n") + dump(cacheStatsFile,objectIdCleaner.stats().map(_.toString())) println("\nBFG run is complete! When ready, run: git reflog expire --expire=now --all && git gc --prune=now --aggressive") diff --git a/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/protection/ProtectedObjectCensus.scala b/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/protection/ProtectedObjectCensus.scala index 55c2601d..33414bee 100644 --- a/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/protection/ProtectedObjectCensus.scala +++ b/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/protection/ProtectedObjectCensus.scala @@ -21,6 +21,7 @@ package com.madgag.git.bfg.cleaner.protection import com.madgag.git._ +import com.madgag.scala.collection.decorators._ import org.eclipse.jgit.lib.{ObjectId, Repository} import org.eclipse.jgit.revwalk._ @@ -72,7 +73,7 @@ object ProtectedObjectCensus { // blobs come from direct blob references and tag references // trees come from direct tree references, commit & tag references - val treeAndBlobProtection = objectProtection.keys.groupBy(treeOrBlobPointedToBy).mapValues(_.toSet) // use Either? + val treeAndBlobProtection = objectProtection.keys.groupUp(treeOrBlobPointedToBy)(_.toSet) // use Either? val directBlobProtection = treeAndBlobProtection collect { case (Left(blob), p) => blob.getId -> p @@ -80,7 +81,7 @@ object ProtectedObjectCensus { val treeProtection = treeAndBlobProtection collect { case (Right(tree), p) => tree -> p } - val indirectBlobProtection = treeProtection.keys.flatMap(tree => allBlobsUnder(tree).map(_ -> tree)).groupBy(_._1).mapValues(_.map(_._2).toSet) + val indirectBlobProtection = treeProtection.keys.flatMap(tree => allBlobsUnder(tree).map(_ -> tree)).groupUp(_._1)(_.map(_._2).toSet) ProtectedObjectCensus(objectProtection, treeProtection, directBlobProtection, indirectBlobProtection) } diff --git a/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/protection/ProtectedObjectDirtReport.scala b/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/protection/ProtectedObjectDirtReport.scala index 2667c4ed..6cf9336d 100644 --- a/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/protection/ProtectedObjectDirtReport.scala +++ b/bfg-library/src/main/scala/com/madgag/git/bfg/cleaner/protection/ProtectedObjectDirtReport.scala @@ -30,7 +30,7 @@ import org.eclipse.jgit.revwalk.{RevObject, RevWalk} import org.eclipse.jgit.treewalk.TreeWalk import org.eclipse.jgit.treewalk.filter.TreeFilter -import scala.collection.convert.ImplicitConversionsToScala._ +import scala.jdk.CollectionConverters._ object ProtectedObjectDirtReport { def reportsFor(objectIdCleanerConfig: ObjectIdCleaner.Config, objectDB: ObjectDatabase)(implicit revWalk: RevWalk) = { @@ -68,6 +68,6 @@ case class ProtectedObjectDirtReport(revObject: RevObject, originalTreeOrBlob: R tw.addTree(originalTreeOrBlob.asRevTree) tw.addTree(newId.asRevTree) tw.setFilter(TreeFilter.ANY_DIFF) - DiffEntry.scan(tw).filterNot(_.getChangeType == ADD) + DiffEntry.scan(tw).asScala.filterNot(_.getChangeType == ADD).toSeq } } diff --git a/bfg-library/src/main/scala/com/madgag/git/bfg/memo.scala b/bfg-library/src/main/scala/com/madgag/git/bfg/memo.scala index 6d32f4e9..8f032a18 100644 --- a/bfg-library/src/main/scala/com/madgag/git/bfg/memo.scala +++ b/bfg-library/src/main/scala/com/madgag/git/bfg/memo.scala @@ -20,11 +20,10 @@ package com.madgag.git.bfg +import scala.jdk.CollectionConverters._ import com.google.common.cache.{CacheBuilder, CacheLoader, CacheStats, LoadingCache} import com.madgag.git.bfg.cleaner._ -import scala.collection.JavaConverters._ - trait Memo[K, V] { def apply(z: K => V): MemoFunc[K, V] } @@ -50,7 +49,7 @@ object MemoUtil { (f: Cleaner[V]) => lazy val permanentCache = loaderCacheFor(f)(fix) - def fix(v: V) { + def fix(v: V): Unit = { // enforce that once any value is returned, it is 'good' and therefore an identity-mapped key as well permanentCache.put(v, v) } diff --git a/bfg-library/src/main/scala/com/madgag/git/bfg/model/Commit.scala b/bfg-library/src/main/scala/com/madgag/git/bfg/model/Commit.scala index 7b50454d..9a6b411a 100644 --- a/bfg-library/src/main/scala/com/madgag/git/bfg/model/Commit.scala +++ b/bfg-library/src/main/scala/com/madgag/git/bfg/model/Commit.scala @@ -1,14 +1,13 @@ package com.madgag.git.bfg.model -import java.nio.charset.{Charset, IllegalCharsetNameException, UnsupportedCharsetException} - import com.madgag.git._ import com.madgag.git.bfg.cleaner._ import org.eclipse.jgit.lib.Constants.OBJ_COMMIT import org.eclipse.jgit.lib._ import org.eclipse.jgit.revwalk.RevCommit -import scala.collection.convert.ImplicitConversionsToJava +import java.nio.charset.{Charset, IllegalCharsetNameException, UnsupportedCharsetException} +import scala.jdk.CollectionConverters._ /* * Copyright (c) 2012, 2013 Roberto Tyley @@ -37,10 +36,8 @@ object Commit { case class Commit(node: CommitNode, arcs: CommitArcs) { def toBytes: Array[Byte] = { - import ImplicitConversionsToJava._ - val c = new CommitBuilder - c.setParentIds(arcs.parents) + c.setParentIds(arcs.parents.asJava) c.setTreeId(arcs.tree) c.setAuthor(node.author) @@ -66,7 +63,7 @@ object CommitNode { } case class CommitNode(author: PersonIdent, committer: PersonIdent, message: String, encoding: Charset = Constants.CHARSET) { - lazy val subject = message.linesIterator.toStream.headOption + lazy val subject = message.linesIterator.to(LazyList).headOption lazy val lastParagraphBreak = message.lastIndexOf("\n\n") lazy val messageWithoutFooters = if (footers.isEmpty) message else (message take lastParagraphBreak) lazy val footers: List[Footer] = message.drop(lastParagraphBreak).linesIterator.collect { diff --git a/bfg-library/src/main/scala/com/madgag/git/bfg/model/package.scala b/bfg-library/src/main/scala/com/madgag/git/bfg/model/package.scala index 1684c48a..b6f5f6de 100644 --- a/bfg-library/src/main/scala/com/madgag/git/bfg/model/package.scala +++ b/bfg-library/src/main/scala/com/madgag/git/bfg/model/package.scala @@ -22,9 +22,15 @@ package com.madgag.git.bfg import org.eclipse.jgit.revwalk.RevCommit +import java.nio.file.Path + package object model { implicit class RichRevCommit(revCommit: RevCommit) { - lazy val arcs = CommitArcs(revCommit.getParents, revCommit.getTree) + lazy val arcs: CommitArcs = CommitArcs(revCommit.getParents.toIndexedSeq, revCommit.getTree) + } + + implicit class RichPath(path: Path) { + def resolve(pathSegments: Seq[String]): Path = pathSegments.foldLeft(path)(_ resolve _) } } diff --git a/bfg-library/src/main/scala/com/madgag/text/ByteSize.scala b/bfg-library/src/main/scala/com/madgag/text/ByteSize.scala index 649407ca..9c884e1e 100644 --- a/bfg-library/src/main/scala/com/madgag/text/ByteSize.scala +++ b/bfg-library/src/main/scala/com/madgag/text/ByteSize.scala @@ -33,10 +33,8 @@ object ByteSize { } def format(bytes: Long): String = { - if (bytes < unit) { - bytes + " B" - } else { - val exp = (log(bytes) / log(unit)).toInt + if (bytes < unit) s"$bytes B " else { + val exp = (log(bytes.toDouble) / log(unit)).toInt val pre = magnitudeChars(exp) "%.1f %sB".format(bytes / pow(unit, exp), pre) } diff --git a/bfg-library/src/main/scala/com/madgag/text/text.scala b/bfg-library/src/main/scala/com/madgag/text/text.scala index 8f019acc..d73350eb 100644 --- a/bfg-library/src/main/scala/com/madgag/text/text.scala +++ b/bfg-library/src/main/scala/com/madgag/text/text.scala @@ -20,11 +20,9 @@ package com.madgag.text -import scala.collection.GenTraversableOnce - object Text { - def abbreviate[A](elems: Traversable[A], truncationToken: A, maxElements: Int = 3) = { + def abbreviate[A](elems: Iterable[A], truncationToken: A, maxElements: Int = 3) = { val firstElems = elems.take(maxElements + 1) if (firstElems.size > maxElements) { firstElems.take(maxElements-1).toSeq :+ truncationToken @@ -33,5 +31,5 @@ object Text { } } - def plural[A](list: GenTraversableOnce[A], noun: String) = list.size + " " + noun + (if (list.size == 1) "" else "s") + def plural[A](list: Iterable[A], noun: String) = s"${list.size} $noun${if (list.size == 1) "" else "s"}" } diff --git a/bfg-library/src/test/scala/com/madgag/git/LFSSpec.scala b/bfg-library/src/test/scala/com/madgag/git/LFSSpec.scala index 49cec09e..1efeb4fd 100644 --- a/bfg-library/src/test/scala/com/madgag/git/LFSSpec.scala +++ b/bfg-library/src/test/scala/com/madgag/git/LFSSpec.scala @@ -27,8 +27,9 @@ import org.eclipse.jgit.lib.ObjectInserter import org.scalatest.OptionValues import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers -import scalax.file.Path -import scalax.file.Path._ + +import java.nio.file.Files +import java.nio.file.Files.createTempFile class LFSSpec extends AnyFlatSpec with Matchers with OptionValues { "Our implementation of Git LFS Pointers" should "create pointers that have the same Git id as the ones produced by `git lfs pointer`" in { @@ -42,19 +43,19 @@ class LFSSpec extends AnyFlatSpec with Matchers with OptionValues { it should "have the correctly sharded path" in { val pointer = LFS.Pointer("b2893eddd9b394bfb7efadafda2ae0be02c573fdd83a70f26c781a943f3b7016", 21616) - pointer.path shouldBe Path("b2", "89", "b2893eddd9b394bfb7efadafda2ae0be02c573fdd83a70f26c781a943f3b7016") + pointer.path shouldBe Seq("b2", "89", "b2893eddd9b394bfb7efadafda2ae0be02c573fdd83a70f26c781a943f3b7016") } it should "calculate pointers correctly directly from the Git database, creating a temporary file" in { implicit val repo = unpackRepo("/sample-repos/example.git.zip") implicit val (revWalk, reader) = repo.singleThreadedReaderTuple - val tmpFile = createTempFile(s"bfg.test.git-lfs.conv") + val tmpFile = createTempFile(s"bfg.test.git-lfs",".conv") val pointer = LFS.pointerFor(abbrId("06d7").open, tmpFile) pointer shouldBe Pointer("5f70bf18a086007016e948b04aed3b82103a36bea41755b6cddfaf10ace3c6ef", 1024) - tmpFile.size.value shouldBe 1024 + Files.size(tmpFile) shouldBe 1024 } } \ No newline at end of file diff --git a/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/LfsBlobConverterSpec.scala b/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/LfsBlobConverterSpec.scala index 1095f134..8b10691d 100644 --- a/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/LfsBlobConverterSpec.scala +++ b/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/LfsBlobConverterSpec.scala @@ -23,15 +23,18 @@ package com.madgag.git.bfg.cleaner import com.madgag.diff.{After, Before, MapDiff} import com.madgag.git.LFS.Pointer import com.madgag.git._ -import com.madgag.git.bfg.model.{BlobFileMode, FileName, Tree, TreeBlobs} +import com.madgag.git.bfg.model.{BlobFileMode, FileName, Tree, TreeBlobs, _} import com.madgag.git.test._ +import com.madgag.scala.collection.decorators._ import org.eclipse.jgit.internal.storage.file.FileRepository import org.eclipse.jgit.lib.ObjectId import org.scalatest.concurrent.Eventually import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers import org.scalatest.{Inspectors, OptionValues} -import scalax.file.ImplicitConversions._ + +import java.nio.file.Files.readAllBytes +import java.nio.file.{Files, Path} class LfsBlobConverterSpec extends AnyFlatSpec with Matchers with OptionValues with Inspectors with Eventually { @@ -85,7 +88,7 @@ class LfsBlobConverterSpec extends AnyFlatSpec with Matchers with OptionValues w fileBeforeAndAfter(After)._1 shouldBe fileBeforeAndAfter(Before)._1 - val fileIds = fileBeforeAndAfter.mapValues(_._2) + val fileIds = fileBeforeAndAfter.mapV(_._2) val (originalFileId, pointerObjectId) = (fileIds(Before), fileIds(After)) @@ -97,13 +100,13 @@ class LfsBlobConverterSpec extends AnyFlatSpec with Matchers with OptionValues w val pointer = Pointer.parse(pointerObjectId.open.getCachedBytes) - val lfsStoredFile = repo.getDirectory / "lfs" / "objects" / pointer.path + val lfsStoredFile: Path = repo.getDirectory.toPath.resolve(Seq("lfs", "objects") ++ pointer.path) - lfsStoredFile.exists shouldBe true + Files.exists(lfsStoredFile) shouldBe true - lfsStoredFile.size.value shouldBe pointer.blobSize + Files.size(lfsStoredFile) shouldBe pointer.blobSize - eventually { lfsStoredFile.bytes.toArray.blobId } shouldBe originalFileId + eventually { readAllBytes(lfsStoredFile).blobId } shouldBe originalFileId } def verifyPointersForChangedFiles(diff: MapDiff[FileName, (BlobFileMode, ObjectId)])(implicit repo: FileRepository) = { diff --git a/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/ObjectIdCleanerSpec.scala b/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/ObjectIdCleanerSpec.scala index 8c3177bc..dacb8186 100644 --- a/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/ObjectIdCleanerSpec.scala +++ b/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/ObjectIdCleanerSpec.scala @@ -30,7 +30,7 @@ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.Matcher import org.scalatest.matchers.should.Matchers -import scala.collection.convert.ImplicitConversionsToScala._ +import scala.jdk.CollectionConverters._ class ObjectIdCleanerSpec extends AnyFlatSpec with Matchers { @@ -52,9 +52,9 @@ class unpackedRepo(filePath: String) extends bfg.test.unpackedRepo(filePath) { class EnsureCleanerWith(config: ObjectIdCleaner.Config) { class RemoveDirtOfCommitsThat(commitM: Matcher[RevCommit]) extends Inspectors with Matchers { - def histOf(c: ObjectId) = repo.git.log.add(c).call.toSeq.reverse + def histOf(c: ObjectId) = repo.git.log.add(c).call.asScala.toSeq.reverse - def whenCleaning(oldCommit: ObjectId) { + def whenCleaning(oldCommit: ObjectId): Unit = { val cleaner = new ObjectIdCleaner(config, repo.getObjectDatabase, revWalk) forAtLeast(1, histOf(oldCommit)) { commit => commit should commitM diff --git a/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/RepoRewriteSpec.scala b/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/RepoRewriteSpec.scala index 9b2fda53..1a4ae168 100644 --- a/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/RepoRewriteSpec.scala +++ b/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/RepoRewriteSpec.scala @@ -24,7 +24,7 @@ import com.madgag.git._ import com.madgag.git.bfg.GitUtil._ import com.madgag.git.bfg.cleaner.ObjectIdSubstitutor._ import com.madgag.git.bfg.cleaner.protection.ProtectedObjectCensus -import com.madgag.git.bfg.model.TreeBlobEntry +import com.madgag.git.bfg.model.{FileName, RegularFile, TreeBlobEntry} import com.madgag.git.test._ import com.madgag.textmatching._ import org.apache.commons.io.FilenameUtils @@ -39,7 +39,7 @@ import java.net.URLEncoder import java.util.Properties import java.util.regex.Pattern._ import scala.PartialFunction.condOpt -import scala.collection.convert.ImplicitConversionsToScala._ +import scala.jdk.CollectionConverters._ class RepoRewriteSpec extends AnyFlatSpec with Matchers { @@ -52,7 +52,7 @@ class RepoRewriteSpec extends AnyFlatSpec with Matchers { val blobsToRemove = Set(abbrId("06d740")) RepoRewriter.rewrite(repo, ObjectIdCleaner.Config(ProtectedObjectCensus(Set("HEAD")), OldIdsPublic, Seq(FormerCommitFooter), treeBlobsCleaners = Seq(new BlobRemover(blobsToRemove)))) - val allCommits = repo.git.log.all.call.toSeq + val allCommits = repo.git.log.all.call.asScala.toSeq val unwantedBlobsByCommit = allCommits.flatMap(commit => { val unwantedBlobs = allBlobsReachableFrom(commit).intersect(blobsToRemove).map(_.shortName) @@ -118,30 +118,42 @@ class RepoRewriteSpec extends AnyFlatSpec with Matchers { originalContents should include("correcthorse") cleanedContents should not include "correcthorse" - propertiesIn(cleanedContents).toMap should have size propertiesIn(originalContents).size + propertiesIn(cleanedContents).asScala.toMap should have size propertiesIn(originalContents).size } + + def textReplacementOf(parentPath: String, fileNamePrefix: String, fileNamePostfix: String, before: String, after: String) = { implicit val repo = unpackRepo("/sample-repos/encodings.git.zip") + val beforeAndAfter = Seq(before, after).map(URLEncoder.encode(_, "UTF-8")).mkString("-") + val filename = s"$fileNamePrefix-ORIGINAL.$fileNamePostfix" + val beforeFile = s"$parentPath/$filename" + val afterFile = s"$parentPath/$fileNamePrefix-MODIFIED-$beforeAndAfter.$fileNamePostfix" + // val dirtyFile = repo.resolve(s"master:$beforeFile") val blobTextModifier = new BlobTextModifier { def lineCleanerFor(entry: TreeBlobEntry) = Some(quote(before).r --> (_ => after)) val threadLocalObjectDBResources = repo.getObjectDatabase.threadLocalResources } - RepoRewriter.rewrite(repo, ObjectIdCleaner.Config(ProtectedObjectCensus.None, treeBlobsCleaners = Seq(blobTextModifier))) - val beforeAndAfter = Seq(before, after).map(URLEncoder.encode(_, "UTF-8")).mkString("-") - - val beforeFile = s"$parentPath/$fileNamePrefix-ORIGINAL.$fileNamePostfix" - val afterFile = s"$parentPath/$fileNamePrefix-MODIFIED-$beforeAndAfter.$fileNamePostfix" + RepoRewriter.rewrite(repo, ObjectIdCleaner.Config(ProtectedObjectCensus.None, treeBlobsCleaners = Seq(blobTextModifier))) val cleanedFile = repo.resolve(s"master:$beforeFile") val expectedFile = repo.resolve(s"master:$afterFile") expectedFile should not be null + implicit val threadLocalObjectReader = repo.getObjectDatabase.threadLocalResources.reader() + // val dirty = dirtyFile.open.getBytes + val cleaned = cleanedFile.open.getBytes + val expected = expectedFile.open.getBytes + // val dirtyStr = new String(dirty) + val cleanedStr = new String(cleaned) + val expectedStr = new String(expected) + + cleanedStr shouldBe expectedStr cleanedFile shouldBe expectedFile } diff --git a/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/TreeBlobModifierSpec.scala b/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/TreeBlobModifierSpec.scala index c468c2b9..37eed9e7 100644 --- a/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/TreeBlobModifierSpec.scala +++ b/bfg-library/src/test/scala/com/madgag/git/bfg/cleaner/TreeBlobModifierSpec.scala @@ -28,7 +28,7 @@ import com.madgag.git.test._ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers -import scala.collection.convert.ImplicitConversionsToScala._ +import scala.jdk.CollectionConverters._ class TreeBlobModifierSpec extends AnyFlatSpec with Matchers { @@ -48,7 +48,7 @@ class TreeBlobModifierSpec extends AnyFlatSpec with Matchers { RepoRewriter.rewrite(repo, ObjectIdCleaner.Config(ProtectedObjectCensus(Set("HEAD")), OldIdsPublic, treeBlobsCleaners = Seq(countingTreeBlobModifier))) - val endCounts = countingTreeBlobModifier.counts.asMap().toMap + val endCounts = countingTreeBlobModifier.counts.asMap().asScala.toMap endCounts.size should be >= 4 all (endCounts.values) shouldBe 1 diff --git a/bfg-test/src/main/scala/com/madgag/git/bfg/test/unpackedRepo.scala b/bfg-test/src/main/scala/com/madgag/git/bfg/test/unpackedRepo.scala index 52af827f..b77fb1ef 100644 --- a/bfg-test/src/main/scala/com/madgag/git/bfg/test/unpackedRepo.scala +++ b/bfg-test/src/main/scala/com/madgag/git/bfg/test/unpackedRepo.scala @@ -12,7 +12,7 @@ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers import org.scalatest.matchers.{MatchResult, Matcher} -import scala.collection.convert.ImplicitConversionsToScala._ +import scala.jdk.CollectionConverters._ class unpackedRepo(filePath: String) extends AnyFlatSpec with Matchers { @@ -66,9 +66,9 @@ class unpackedRepo(filePath: String) extends AnyFlatSpec with Matchers { def commitHist(specificRefs: String*)(implicit repo: Repository): Seq[RevCommit] = { val logCommand = repo.git.log if (specificRefs.isEmpty) logCommand.all else specificRefs.foldLeft(logCommand)((lc, ref) => lc.add(repo.resolve(ref))) - }.call.toSeq.reverse + }.call.asScala.toSeq.reverse - def haveCommitWhereObjectIds(boom: Matcher[Traversable[ObjectId]])(implicit reader: ObjectReader): Matcher[RevCommit] = boom compose { + def haveCommitWhereObjectIds(boom: Matcher[Iterable[ObjectId]])(implicit reader: ObjectReader): Matcher[RevCommit] = boom compose { (c: RevCommit) => c.getTree.walk().map(_.getObjectId(0)).toSeq } @@ -84,7 +84,7 @@ class unpackedRepo(filePath: String) extends AnyFlatSpec with Matchers { r: Repository => commitHist(refs:_*)(r) } - def ensureRemovalOfBadEggs[S,T](expr : => Traversable[S], exprResultMatcher: Matcher[Traversable[S]])(block: => T) = { + def ensureRemovalOfBadEggs[S,T](expr : => Iterable[S], exprResultMatcher: Matcher[Iterable[S]])(block: => T) = { gc() expr should exprResultMatcher diff --git a/bfg/src/main/scala/com/madgag/git/bfg/cli/CLIConfig.scala b/bfg/src/main/scala/com/madgag/git/bfg/cli/CLIConfig.scala index 130e60ae..266f717b 100644 --- a/bfg/src/main/scala/com/madgag/git/bfg/cli/CLIConfig.scala +++ b/bfg/src/main/scala/com/madgag/git/bfg/cli/CLIConfig.scala @@ -20,8 +20,6 @@ package com.madgag.git.bfg.cli -import java.io.File - import com.madgag.git.bfg.BuildInfo import com.madgag.git.bfg.GitUtil._ import com.madgag.git.bfg.cleaner._ @@ -38,7 +36,9 @@ import org.eclipse.jgit.lib._ import org.eclipse.jgit.storage.file.FileRepositoryBuilder import scopt.{OptionParser, Read} -import scalax.file.ImplicitConversions._ +import java.io.File +import java.nio.file.Files +import scala.jdk.CollectionConverters._ object CLIConfig { @@ -54,6 +54,8 @@ object CLIConfig { } } + def readLinesFrom(v: File): Seq[String] = Files.readAllLines(v.toPath).asScala.toSeq + val exactVersion = BuildInfo.version + (if (BuildInfo.version.contains("-SNAPSHOT")) s" (${BuildInfo.gitDescription})" else "") head("bfg", exactVersion) @@ -66,7 +68,8 @@ object CLIConfig { (v, c) => c.copy(stripBiggestBlobs = Some(v)) } opt[File]("strip-blobs-with-ids").abbr("bi").valueName("").text("strip blobs with the specified Git object ids").action { - (v, c) => c.copy(stripBlobsWithIds = Some(v.lines().map(_.trim).filterNot(_.isEmpty).map(_.asObjectId).toSet)) + (v, c) => + c.copy(stripBlobsWithIds = Some(readLinesFrom(v).map(_.trim).filterNot(_.isEmpty).map(_.asObjectId).toSet)) } fileMatcher("delete-files").abbr("D").text("delete files with the specified names (eg '*.class', '*.{txt,log}' - matches on file name, not path within repo)").action { (v, c) => c.copy(deleteFiles = Some(v)) @@ -80,7 +83,7 @@ object CLIConfig { opt[File]("replace-text").abbr("rt").valueName("").text("filter content of files, replacing matched text. Match expressions should be listed in the file, one expression per line - " + "by default, each expression is treated as a literal, but 'regex:' & 'glob:' prefixes are supported, with '==>' to specify a replacement " + "string other than the default of '***REMOVED***'.").action { - (v, c) => c.copy(textReplacementExpressions = v.lines().filterNot(_.trim.isEmpty).toSeq) + (v, c) => c.copy(textReplacementExpressions = readLinesFrom(v).filterNot(_.trim.isEmpty)) } fileMatcher("filter-content-including").abbr("fi").text("do file-content filtering on files that match the specified expression (eg '*.{txt,properties}')").action { (v, c) => c.copy(filenameFilters = c.filenameFilters :+ Include(v)) @@ -117,7 +120,7 @@ object CLIConfig { c.copy(fixFilenameDuplicatesPreferring = ord) } - arg[File]("") optional() action { (x, c) => + arg[File]("").optional().action { (x, c) => c.copy(repoLocation = x) } text("file path for Git repository to clean") } } @@ -130,7 +133,7 @@ case class CLIConfig(stripBiggestBlobs: Option[Int] = None, fixFilenameDuplicatesPreferring: Option[Ordering[FileMode]] = None, filenameFilters: Seq[Filter[String]] = Nil, filterSizeThreshold: Long = BlobTextModifier.DefaultSizeThreshold, - textReplacementExpressions: Traversable[String] = List.empty, + textReplacementExpressions: Iterable[String] = List.empty, stripBlobsWithIds: Option[Set[ObjectId]] = None, lfsConversion: Option[String] = None, strictObjectChecking: Boolean = false, @@ -152,7 +155,7 @@ case class CLIConfig(stripBiggestBlobs: Option[Int] = None, lazy val folderDeletion: Option[Cleaner[TreeSubtrees]] = deleteFolders.map { textMatcher => { subtrees: TreeSubtrees => - TreeSubtrees(subtrees.entryMap.filterKeys(filename => !textMatcher(filename))) + TreeSubtrees(subtrees.entryMap.view.filterKeys(filename => !textMatcher(filename)).toMap) } } @@ -201,8 +204,8 @@ case class CLIConfig(stripBiggestBlobs: Option[Int] = None, implicit val progressMonitor: ProgressMonitor = new TextProgressMonitor() val sizeBasedBlobTargetSources = Seq( - stripBlobsBiggerThan.map(threshold => (s: Stream[SizedObject]) => s.takeWhile(_.size > threshold)), - stripBiggestBlobs.map(num => (s: Stream[SizedObject]) => s.take(num)) + stripBlobsBiggerThan.map(threshold => (s: LazyList[SizedObject]) => s.takeWhile(_.size > threshold)), + stripBiggestBlobs.map(num => (s: LazyList[SizedObject]) => s.take(num)) ).flatten if (sizeBasedBlobTargetSources.isEmpty) None else { diff --git a/bfg/src/main/scala/com/madgag/git/bfg/cli/Main.scala b/bfg/src/main/scala/com/madgag/git/bfg/cli/Main.scala index 3fe954d0..10f94994 100644 --- a/bfg/src/main/scala/com/madgag/git/bfg/cli/Main.scala +++ b/bfg/src/main/scala/com/madgag/git/bfg/cli/Main.scala @@ -27,7 +27,7 @@ import com.madgag.git.bfg.cleaner._ object Main extends App { if (args.isEmpty) { - CLIConfig.parser.showUsage + CLIConfig.parser.showUsage() } else { CLIConfig.parser.parse(args, CLIConfig()) map { @@ -36,7 +36,7 @@ object Main extends App { tweakStaticJGitConfig(config.massiveNonFileObjects) if (config.gitdir.isEmpty) { - CLIConfig.parser.showUsage + CLIConfig.parser.showUsage() Console.err.println("Aborting : " + config.repoLocation + " is not a valid Git repository.\n") } else { implicit val repo = config.repo @@ -52,7 +52,7 @@ object Main extends App { if (config.definesNoWork) { Console.err.println("Please specify tasks for The BFG :") - CLIConfig.parser.showUsage + CLIConfig.parser.showUsage() } else { println("Found " + config.objectProtection.fixedObjectIds.size + " objects to protect") diff --git a/bfg/src/test/scala/com/madgag/git/bfg/cli/MainSpec.scala b/bfg/src/test/scala/com/madgag/git/bfg/cli/MainSpec.scala index 038f0a63..883a2918 100644 --- a/bfg/src/test/scala/com/madgag/git/bfg/cli/MainSpec.scala +++ b/bfg/src/test/scala/com/madgag/git/bfg/cli/MainSpec.scala @@ -22,12 +22,14 @@ package com.madgag.git.bfg.cli import com.madgag.git._ import com.madgag.git.bfg.cli.test.unpackedRepo +import com.madgag.git.bfg.model._ import org.eclipse.jgit.lib.ObjectId import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers import org.scalatest.{Inspectors, OptionValues} -import scalax.file.ImplicitConversions._ -import scalax.file.Path + +import java.nio.file.Files +import scala.jdk.CollectionConverters._ class MainSpec extends AnyFlatSpec with Matchers with OptionValues with Inspectors { @@ -51,18 +53,18 @@ class MainSpec extends AnyFlatSpec with Matchers with OptionValues with Inspecto } "removing big blobs" should "definitely still remove blobs even if they have identical size" in new unpackedRepo("/sample-repos/moreThanOneBigBlobWithTheSameSize.git.zip") { - ensureRemovalOfBadEggs(packedBlobsOfSize(1024), (contain allElementsOf Set(abbrId("06d7"), abbrId("cb2c"))).matcher[Traversable[ObjectId]]) { + ensureRemovalOfBadEggs(packedBlobsOfSize(1024), (contain allElementsOf Set(abbrId("06d7"), abbrId("cb2c"))).matcher[Iterable[ObjectId]]) { run("--strip-blobs-bigger-than 512B") } } "converting to Git LFS" should "create a file in lfs/objects" in new unpackedRepo("/sample-repos/repoWithBigBlobs.git.zip") { - ensureRemovalOfBadEggs(packedBlobsOfSize(11238), (contain only abbrId("596c")).matcher[Traversable[ObjectId]]) { + ensureRemovalOfBadEggs(packedBlobsOfSize(11238), (contain only abbrId("596c")).matcher[Iterable[ObjectId]]) { run("--convert-to-git-lfs *.png --no-blob-protection") } - val lfsFile = repo.getDirectory / "lfs" / "objects" / "e0" / "eb" / "e0ebd49837a1cced34b9e7d3ff2fa68a8100df8f158f165ce139e366a941ba6e" + val lfsFile = repo.getDirectory.toPath.resolve(Seq("lfs", "objects", "e0", "eb", "e0ebd49837a1cced34b9e7d3ff2fa68a8100df8f158f165ce139e366a941ba6e")) - lfsFile.size.value shouldBe 11238 + Files.size(lfsFile) shouldBe 11238 } "removing a folder named '.git'" should "work" in new unpackedRepo("/sample-repos/badRepoContainingDotGitFolder.git.zip") { @@ -91,11 +93,11 @@ class MainSpec extends AnyFlatSpec with Matchers with OptionValues with Inspecto implicit val r = reader val badBlobs = Set(abbrId("db59"), abbrId("86f9")) - val blobIdsFile = Path.createTempFile() - blobIdsFile.writeStrings(badBlobs.map(_.name()), "\n") + val blobIdsFile = Files.createTempFile("test-strip-blobs",".ids") + Files.write(blobIdsFile, badBlobs.map(_.name()).asJava) ensureRemovalFrom(commitHist()).ofCommitsThat(haveCommitWhereObjectIds(contain(abbrId("db59")))) { - run(s"--strip-blobs-with-ids ${blobIdsFile.path}") + run(s"--strip-blobs-with-ids $blobIdsFile") } } diff --git a/bfg/src/test/scala/com/madgag/git/bfg/cli/test/unpackedRepo.scala b/bfg/src/test/scala/com/madgag/git/bfg/cli/test/unpackedRepo.scala index 4c99cfc5..af85de17 100644 --- a/bfg/src/test/scala/com/madgag/git/bfg/cli/test/unpackedRepo.scala +++ b/bfg/src/test/scala/com/madgag/git/bfg/cli/test/unpackedRepo.scala @@ -24,7 +24,7 @@ import com.madgag.git.bfg import com.madgag.git.bfg.cli.Main class unpackedRepo(filePath: String) extends bfg.test.unpackedRepo(filePath) { - def run(options: String) { + def run(options: String): Unit = { Main.main(options.split(' ') :+ repo.getDirectory.getAbsolutePath) } } \ No newline at end of file diff --git a/build.sbt b/build.sbt index 8ab2f988..db849b8f 100644 --- a/build.sbt +++ b/build.sbt @@ -3,7 +3,7 @@ import common._ organization in ThisBuild := "com.madgag" -scalaVersion in ThisBuild := "2.12.12" +scalaVersion in ThisBuild := "2.13.4" scalacOptions in ThisBuild ++= Seq("-deprecation", "-feature", "-language:postfixOps") @@ -27,12 +27,8 @@ lazy val bfg = bfgProject("bfg") enablePlugins(BuildInfoPlugin) dependsOn(bfgLib lazy val bfgBenchmark = bfgProject("bfg-benchmark") -publishMavenStyle in ThisBuild := true - publishTo in ThisBuild := sonatypePublishToBundle.value -pomIncludeRepository in ThisBuild := { _ => false } - pomExtra in ThisBuild := ( git@github.com:rtyley/bfg-repo-cleaner.git diff --git a/project/build.properties b/project/build.properties index c06db1bb..0b2e09c5 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.4.5 +sbt.version=1.4.7 diff --git a/project/dependencies.scala b/project/dependencies.scala index b43d3728..9dffae70 100644 --- a/project/dependencies.scala +++ b/project/dependencies.scala @@ -2,7 +2,7 @@ import sbt._ object Dependencies { - val scalaGitVersion = "4.0" + val scalaGitVersion = "4.3" val jgitVersionOverride = Option(System.getProperty("jgit.version")) @@ -13,6 +13,10 @@ object Dependencies { // the 1.7.2 here matches slf4j-api in jgit's dependencies val slf4jSimple = "org.slf4j" % "slf4j-simple" % "1.7.2" + val scalaCollectionPlus = "com.madgag" %% "scala-collection-plus" % "0.5" + + val parCollections = "org.scala-lang.modules" %% "scala-parallel-collections" % "1.0.0" + val scalaGit = "com.madgag.scala-git" %% "scala-git" % scalaGitVersion exclude("org.eclipse.jgit", "org.eclipse.jgit") val scalaGitTest = "com.madgag.scala-git" %% "scala-git-test" % scalaGitVersion @@ -21,14 +25,14 @@ object Dependencies { val madgagCompress = "com.madgag" % "util-compress" % "1.33" - val textmatching = "com.madgag" %% "scala-textmatching" % "2.3" + val textmatching = "com.madgag" %% "scala-textmatching" % "2.5" val scopt = "com.github.scopt" %% "scopt" % "3.7.1" val guava = Seq("com.google.guava" % "guava" % "30.1-jre", "com.google.code.findbugs" % "jsr305" % "2.0.3") - val scalaIoFile = "com.madgag" %% "scala-io-file" % "0.4.9" - val useNewerJava = "com.madgag" % "use-newer-java" % "0.1" + val lineSplitting = "com.madgag" %% "line-break-preserving-line-splitting" % "0.1.0" + }