diff --git a/Sources/tart/Commands/Clone.swift b/Sources/tart/Commands/Clone.swift index 8e59f250..4499700c 100644 --- a/Sources/tart/Commands/Clone.swift +++ b/Sources/tart/Commands/Clone.swift @@ -28,6 +28,9 @@ struct Clone: AsyncParsableCommand { @Option(help: "network concurrency to use when pulling a remote VM from the OCI-compatible registry") var concurrency: UInt = 4 + @Flag(help: .hidden) + var deduplicate: Bool = false + func validate() throws { if newName.contains("/") { throw ValidationError(" should be a local name") @@ -45,7 +48,7 @@ struct Clone: AsyncParsableCommand { if let remoteName = try? RemoteName(sourceName), !ociStorage.exists(remoteName) { // Pull the VM in case it's OCI-based and doesn't exist locally yet let registry = try Registry(host: remoteName.host, namespace: remoteName.namespace, insecure: insecure) - try await ociStorage.pull(remoteName, registry: registry, concurrency: concurrency) + try await ociStorage.pull(remoteName, registry: registry, concurrency: concurrency, deduplicate: deduplicate) } let sourceVM = try VMStorageHelper.open(sourceName) diff --git a/Sources/tart/Commands/Pull.swift b/Sources/tart/Commands/Pull.swift index ada456da..02c5e291 100644 --- a/Sources/tart/Commands/Pull.swift +++ b/Sources/tart/Commands/Pull.swift @@ -23,6 +23,9 @@ struct Pull: AsyncParsableCommand { @Option(help: "network concurrency to use when pulling a remote VM from the OCI-compatible registry") var concurrency: UInt = 4 + @Flag(help: .hidden) + var deduplicate: Bool = false + func validate() throws { if concurrency < 1 { throw ValidationError("network concurrency cannot be less than 1") @@ -43,6 +46,6 @@ struct Pull: AsyncParsableCommand { defaultLogger.appendNewLine("pulling \(remoteName)...") - try await VMStorageOCI().pull(remoteName, registry: registry, concurrency: concurrency) + try await VMStorageOCI().pull(remoteName, registry: registry, concurrency: concurrency, deduplicate: deduplicate) } } diff --git a/Sources/tart/OCI/Layerizer/Disk.swift b/Sources/tart/OCI/Layerizer/Disk.swift index 839b596e..051f5439 100644 --- a/Sources/tart/OCI/Layerizer/Disk.swift +++ b/Sources/tart/OCI/Layerizer/Disk.swift @@ -2,5 +2,5 @@ import Foundation protocol Disk { static func push(diskURL: URL, registry: Registry, chunkSizeMb: Int, concurrency: UInt, progress: Progress) async throws -> [OCIManifestLayer] - static func pull(registry: Registry, diskLayers: [OCIManifestLayer], diskURL: URL, concurrency: UInt, progress: Progress, localLayerCache: LocalLayerCache?) async throws + static func pull(registry: Registry, diskLayers: [OCIManifestLayer], diskURL: URL, concurrency: UInt, progress: Progress, localLayerCache: LocalLayerCache?, deduplicate: Bool) async throws } diff --git a/Sources/tart/OCI/Layerizer/DiskV1.swift b/Sources/tart/OCI/Layerizer/DiskV1.swift index e52419dd..ff114954 100644 --- a/Sources/tart/OCI/Layerizer/DiskV1.swift +++ b/Sources/tart/OCI/Layerizer/DiskV1.swift @@ -45,7 +45,7 @@ class DiskV1: Disk { return pushedLayers } - static func pull(registry: Registry, diskLayers: [OCIManifestLayer], diskURL: URL, concurrency: UInt, progress: Progress, localLayerCache: LocalLayerCache? = nil) async throws { + static func pull(registry: Registry, diskLayers: [OCIManifestLayer], diskURL: URL, concurrency: UInt, progress: Progress, localLayerCache: LocalLayerCache? = nil, deduplicate: Bool = false) async throws { if !FileManager.default.createFile(atPath: diskURL.path, contents: nil) { throw OCIError.FailedToCreateVmFile } diff --git a/Sources/tart/OCI/Layerizer/DiskV2.swift b/Sources/tart/OCI/Layerizer/DiskV2.swift index 7a9e346e..a00e0167 100644 --- a/Sources/tart/OCI/Layerizer/DiskV2.swift +++ b/Sources/tart/OCI/Layerizer/DiskV2.swift @@ -69,12 +69,12 @@ class DiskV2: Disk { } } - static func pull(registry: Registry, diskLayers: [OCIManifestLayer], diskURL: URL, concurrency: UInt, progress: Progress, localLayerCache: LocalLayerCache? = nil) async throws { + static func pull(registry: Registry, diskLayers: [OCIManifestLayer], diskURL: URL, concurrency: UInt, progress: Progress, localLayerCache: LocalLayerCache? = nil, deduplicate: Bool = false) async throws { // Support resumable pulls let pullResumed = FileManager.default.fileExists(atPath: diskURL.path) if !pullResumed { - if let localLayerCache = localLayerCache { + if deduplicate, let localLayerCache = localLayerCache { // Clone the local layer cache's disk and use it as a base, potentially // reducing the space usage since some blocks won't be written at all try FileManager.default.copyItem(at: localLayerCache.diskURL, to: diskURL) @@ -151,26 +151,31 @@ class DiskV2: Disk { // Also open the disk file for reading and verifying // its contents in case the local layer cache is used - let rdisk: FileHandle? = if localLayerCache != nil { + let rdisk: FileHandle? = if deduplicate && localLayerCache != nil { try FileHandle(forReadingFrom: diskURL) } else { nil } - // Check if we already have this layer contents in the local layer cache - if let localLayerCache = localLayerCache, let localLayerInfo = localLayerCache.findInfo(digest: diskLayer.digest, offsetHint: diskWritingOffset) { - // indicates that the locally cloned disk image has the same content at the given offset - let localHit = localLayerInfo.uncompressedContentDigest == uncompressedLayerContentDigest - && localLayerInfo.range.lowerBound == diskWritingOffset - // doesn't seem that localHit can ever be false if the localLayerCache is not nil - // but let's just add extra safety here and check it - if !localHit { + // Check if we already have this layer contents in the local layer cache, + // or perhaps even on the cloned disk (when the deduplication is enabled) + if let localLayerCache = localLayerCache, + let localLayerInfo = localLayerCache.findInfo(digest: diskLayer.digest, offsetHint: diskWritingOffset), + localLayerInfo.uncompressedContentDigest == uncompressedLayerContentDigest { + if deduplicate && localLayerInfo.range.lowerBound == diskWritingOffset { + // Do nothing, because the data is already on the disk that we've inherited from + } else { // Fulfil the layer contents from the local blob cache let data = localLayerCache.subdata(localLayerInfo.range) _ = try zeroSkippingWrite(disk, rdisk, fsBlockSize, diskWritingOffset, data) } + try disk.close() + if let rdisk = rdisk { + try rdisk.close() + } + // Update the progress progress.completedUnitCount += Int64(diskLayer.size) @@ -198,6 +203,10 @@ class DiskV2: Disk { try filter.finalize() try disk.close() + + if let rdisk = rdisk { + try rdisk.close() + } } globalDiskWritingOffset += uncompressedLayerSize diff --git a/Sources/tart/VMDirectory+OCI.swift b/Sources/tart/VMDirectory+OCI.swift index 4791476d..6b642d8d 100644 --- a/Sources/tart/VMDirectory+OCI.swift +++ b/Sources/tart/VMDirectory+OCI.swift @@ -11,7 +11,7 @@ enum OCIError: Error { } extension VMDirectory { - func pullFromRegistry(registry: Registry, manifest: OCIManifest, concurrency: UInt, localLayerCache: LocalLayerCache?) async throws { + func pullFromRegistry(registry: Registry, manifest: OCIManifest, concurrency: UInt, localLayerCache: LocalLayerCache?, deduplicate: Bool) async throws { // Pull VM's config file layer and re-serialize it into a config file let configLayers = manifest.layers.filter { $0.mediaType == configMediaType @@ -54,12 +54,13 @@ extension VMDirectory { do { try await diskImplType.pull(registry: registry, diskLayers: layers, diskURL: diskURL, concurrency: concurrency, progress: progress, - localLayerCache: localLayerCache) + localLayerCache: localLayerCache, + deduplicate: deduplicate) } catch let error where error is FilterError { throw RuntimeError.PullFailed("failed to decompress disk: \(error.localizedDescription)") } - if let llc = localLayerCache { + if deduplicate, let llc = localLayerCache { // set custom attribute to remember deduplicated bytes diskURL.setDeduplicatedBytes(llc.deduplicatedBytes) } diff --git a/Sources/tart/VMStorageOCI.swift b/Sources/tart/VMStorageOCI.swift index 25283d64..71009fa8 100644 --- a/Sources/tart/VMStorageOCI.swift +++ b/Sources/tart/VMStorageOCI.swift @@ -140,7 +140,7 @@ class VMStorageOCI: PrunableStorage { try list().filter { (_, _, isSymlink) in !isSymlink }.map { (_, vmDir, _) in vmDir } } - func pull(_ name: RemoteName, registry: Registry, concurrency: UInt) async throws { + func pull(_ name: RemoteName, registry: Registry, concurrency: UInt, deduplicate: Bool) async throws { SentrySDK.configureScope { scope in scope.setContext(value: ["imageName": name.description], key: "OCI") } @@ -203,10 +203,14 @@ class VMStorageOCI: PrunableStorage { if let llc = localLayerCache { let deduplicatedHuman = ByteCountFormatter.string(fromByteCount: Int64(llc.deduplicatedBytes), countStyle: .file) - defaultLogger.appendNewLine("found an image \(llc.name) that will allow us to deduplicate \(deduplicatedHuman), using it as a base...") + if deduplicate { + defaultLogger.appendNewLine("found an image \(llc.name) that will allow us to deduplicate \(deduplicatedHuman), using it as a base...") + } else { + defaultLogger.appendNewLine("found an image \(llc.name) that will allow us to avoid fetching \(deduplicatedHuman), will try use it...") + } } - try await tmpVMDir.pullFromRegistry(registry: registry, manifest: manifest, concurrency: concurrency, localLayerCache: localLayerCache) + try await tmpVMDir.pullFromRegistry(registry: registry, manifest: manifest, concurrency: concurrency, localLayerCache: localLayerCache, deduplicate: deduplicate) } recoverFromFailure: { error in if error is Retryable { print("Error: \(error.localizedDescription)")