Skip to content
This repository has been archived by the owner on Feb 12, 2024. It is now read-only.

(WIP) feat: dag import and export to and from CAR files #2953

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions packages/ipfs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
"@hapi/content": "^4.1.0",
"@hapi/hapi": "^18.4.0",
"@hapi/joi": "^15.1.0",
"@ipld/block": "^2.2.0",
"abort-controller": "^3.0.0",
"any-signal": "^1.1.0",
"array-shuffle": "^1.0.1",
Expand All @@ -79,6 +80,7 @@
"cids": "^0.8.0",
"class-is": "^1.1.0",
"dag-cbor-links": "^1.3.3",
"datastore-car": "^1.2.0",
"datastore-core": "^1.1.0",
"datastore-pubsub": "^0.3.2",
"debug": "^4.1.0",
Expand Down
31 changes: 31 additions & 0 deletions packages/ipfs/src/cli/commands/dag/export.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
'use strict'

const CID = require('cids')
const CAR = require('datastore-car')
const Block = require('@ipld/block')

module.exports = {
command: 'export <cid>',

describe: 'Streams the selected DAG as a CAR to stdout.',

async handler ({ ctx, cid }) {
const { ipfs, print } = ctx
const root = new CID(cid)

// getter interface for CAR.completeGraph
const get = async (cid) => {
const result = await ipfs.block.get(cid)
const block = Block.create(result.data, result.cid)
return block
}

try {
const car = await CAR.writeStream(process.stdout)
await CAR.completeGraph(root, get, car)
} catch (err) {
// TODO: should we print a special error for a failed ipfs.block.get() like `ipfs dag get`?
return print(`failed to compile export graph: ${err}`, true, true)
}
}
}
73 changes: 73 additions & 0 deletions packages/ipfs/src/cli/commands/dag/import.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
'use strict'

const fs = require('fs')
const CID = require('cids')
const CAR = require('datastore-car')
const Block = require('ipfs-block')
const { cidToString } = require('../../../utils/cid')

module.exports = {
command: 'import [carfile...]',

describe: 'Import the contents of CAR files from disk or stdin.',

async handler ({ ctx, carfile }) {
const { ipfs, print, getStdin } = ctx
let count = 0

if (carfile) { // files
for await (const file of carfile) {
print(`importing from ${file}...`)
count += await importCar(ipfs, print, fs.createReadStream(file))
}
} else { // stdin
print('importing CAR from stdin...')
count = await importCar(ipfs, print, getStdin())
}

print(`imported ${count} blocks`)
}
}

async function importCar (ipfs, print, inStream) {
const car = await CAR.readStreaming(inStream)
const roots = await car.getRoots()
const rootStatus = roots.reduce((p, cid) => {
p[cid.toString()] = false
return p
}, {})
let count = 0

// CAR.readStreaming acts like a datastore that we can streaming query()
for await (const { key, value } of car.query()) {
// key is the cid as a string and value is the binary block data
const cid = new CID(key)
const block = new Block(value, cid)
await ipfs.block.put(block)
if (rootStatus[key] !== undefined) {
// TODO: what if it's already true? double block in the CAR!
rootStatus[key] = true
}
count++
}

// A well-formed CAR will have roots that reference blocks that actually exist
// in the CAR body, but that may not be the case. Don't try to pin if the root
// didn't exist in the body.
// TODO: ^ go-car currently attempts to pin roots even if they don't exist in
// the CAR body, need to align behaviour
Comment on lines +57 to +58
Copy link

@ribasushi ribasushi Jun 30, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rvagg this is done in go-ipfs to allow a copy-less "transactional" operation:

<some source> | stream-dagger <many options> --emit-stdout=car-v0-fifos-xargs | xargs -0 ipfs dag import

What that mode does is print 2 fifo names on stdout. The first fifo contains all the data. The second contains the roots only ( because we can derive the roots only once we streamed everything over ). The full dag import context serves as a "transaction" of sorts, keeping GC at bay between the lengthy data stream and the pin at the very end.

Whether js-ipfs needs to support this at the same level as go-ipfs is an open question. /cc @mikeal

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i’d wait to support this until a later date if js-ipfs wants to prioritize it.

for (const [cid, found] of Object.entries(rootStatus)) {
if (!found) {
print(`malformed CAR, not pinning nonexistent root ${cidToString(cid)}`)
continue
}
const pinResults = await ipfs.pin.add(cid) // TODO: make it recursive or direct? { recursive: true }
pinResults.forEach((res) => {
print(`pinned root ${cidToString(res.cid)}`)
})
}

// TODO: handle zero-roots case?

return count
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Dataset description/sources

- lotus_testnet_export_256_multiroot.car
- Export of the first 256 block of the testnet chain, with 3 tipset roots. Exported from Lotus by @traviperson on 2019-03-18


- lotus_devnet_genesis.car
- Source: https://github.com/filecoin-project/lotus/blob/v0.2.10/build/genesis/devnet.car

- lotus_testnet_export_128.car
- Export of the first 128 block of the testnet chain, exported from Lotus by @traviperson on 2019-03-24


- lotus_devnet_genesis_shuffled_noroots.car
- lotus_testnet_export_128_shuffled_noroots.car
- versions of the above with an **empty** root array, and having all blocks shuffled

- lotus_devnet_genesis_shuffled_nulroot.car
- lotus_testnet_export_128_shuffled_nulroot.car
- versions identical to the above, but with a single "empty-block" root each ( in order to work around go-car not following the current "roots can be empty" spec )

- combined_naked_roots_genesis_and_128.car
- only the roots of `lotus_devnet_genesis.car` and `lotus_testnet_export_128.car`, to to be used in combination with the root-less parts to validate "transactional" pinning

Binary file not shown.
204 changes: 204 additions & 0 deletions packages/ipfs/test/sharness/t0054-dag-car-import-export.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
#!/usr/bin/env bash
#

test_description="Test car file import/export functionality"

. lib/test-lib.sh
export -f ipfsi

set -o pipefail

tar -C ../t0054-dag-car-import-export-data/ --strip-components=1 -Jxf ../t0054-dag-car-import-export-data/test_dataset_car_v0.tar.xz
tab=$'\t'

test_cmp_sorted() {
# use test_cmp to dump out the unsorted file contents as a diff
[[ "$( sort "$1" | sha256sum )" == "$( sort "$2" | sha256sum )" ]] \
|| test_cmp "$1" "$2"
}
export -f test_cmp_sorted

reset_blockstore() {
node=$1

ipfsi "$node" pin ls --quiet --type=recursive | ipfsi "$node" pin rm &>/dev/null
ipfsi "$node" repo gc &>/dev/null

test_expect_success "pinlist empty" '
[[ -z "$( ipfsi $node pin ls )" ]]
'
test_expect_success "nothing left to gc" '
[[ -z "$( ipfsi $node repo gc )" ]]
'
}

# hammer with concurrent gc to ensure nothing clashes
do_import() {
node="$1"; shift
(
touch spin.gc

while [[ -e spin.gc ]]; do ipfsi "$node" repo gc &>/dev/null; done &
while [[ -e spin.gc ]]; do ipfsi "$node" repo gc &>/dev/null; done &

ipfsi "$node" dag import "$@" 2>&1 && ipfsi "$node" repo verify &>/dev/null
result=$?

rm -f spin.gc &>/dev/null
wait || true # work around possible trigger of a bash bug on overloaded circleci
exit $result
)
}

run_online_imp_exp_tests() {

reset_blockstore 0
reset_blockstore 1

cat > basic_import_expected <<EOE
Pinned root${tab}bafkqaaa${tab}success
Pinned root${tab}bafy2bzaceaxm23epjsmh75yvzcecsrbavlmkcxnva66bkdebdcnyw3bjrc74u${tab}success
Pinned root${tab}bafy2bzaced4ueelaegfs5fqu4tzsh6ywbbpfk3cxppupmxfdhbpbhzawfw5oy${tab}success
EOE

cat >naked_root_import_json_expected <<EOE
{"Root":{"Cid":{"/":"bafy2bzaceaxm23epjsmh75yvzcecsrbavlmkcxnva66bkdebdcnyw3bjrc74u"},"PinErrorMsg":""}}
{"Root":{"Cid":{"/":"bafy2bzaced4ueelaegfs5fqu4tzsh6ywbbpfk3cxppupmxfdhbpbhzawfw5oy"},"PinErrorMsg":""}}
EOE


test_expect_success "basic import" '
do_import 0 \
../t0054-dag-car-import-export-data/combined_naked_roots_genesis_and_128.car \
../t0054-dag-car-import-export-data/lotus_testnet_export_128_shuffled_nulroot.car \
../t0054-dag-car-import-export-data/lotus_devnet_genesis_shuffled_nulroot.car \
> basic_import_actual
'

test_expect_success "basic import output as expected" '
test_cmp_sorted basic_import_expected basic_import_actual
'

test_expect_success "basic fetch+export 1" '
ipfsi 1 dag export bafy2bzaced4ueelaegfs5fqu4tzsh6ywbbpfk3cxppupmxfdhbpbhzawfw5oy > reexported_testnet_128.car
'
test_expect_success "export of shuffled testnet export identical to canonical original" '
test_cmp reexported_testnet_128.car ../t0054-dag-car-import-export-data/lotus_testnet_export_128.car
'

test_expect_success "basic fetch+export 2" '
ipfsi 1 dag export bafy2bzaceaxm23epjsmh75yvzcecsrbavlmkcxnva66bkdebdcnyw3bjrc74u > reexported_devnet_genesis.car
'
test_expect_success "export of shuffled devnet export identical to canonical original" '
test_cmp reexported_devnet_genesis.car ../t0054-dag-car-import-export-data/lotus_devnet_genesis.car
'

test_expect_success "pinlist on node1 still empty" '
[[ -z "$( ipfsi 1 pin ls )" ]]
'

test_expect_success "import/pin naked roots only, relying on local blockstore having all the data" '
ipfsi 1 dag import --enc=json ../t0054-dag-car-import-export-data/combined_naked_roots_genesis_and_128.car \
> naked_import_result_json_actual
'

test_expect_success "naked import output as expected" '
test_cmp_sorted naked_root_import_json_expected naked_import_result_json_actual
'

reset_blockstore 0
reset_blockstore 1

mkfifo pipe_testnet
mkfifo pipe_devnet

test_expect_success "fifo import" '
(
cat ../t0054-dag-car-import-export-data/lotus_testnet_export_128_shuffled_nulroot.car > pipe_testnet &
cat ../t0054-dag-car-import-export-data/lotus_devnet_genesis_shuffled_nulroot.car > pipe_devnet &

do_import 0 \
pipe_testnet \
pipe_devnet \
../t0054-dag-car-import-export-data/combined_naked_roots_genesis_and_128.car \
> basic_fifo_import_actual
result=$?

wait || true # work around possible trigger of a bash bug on overloaded circleci
exit "$result"
)
'
Comment on lines +109 to +130

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rvagg the exercise of the GC-lock and the import of FIFOs may not be something you are too inteerested in testing within js-ipfs. Just raising it here as it was a very important part of making 🗡️ viable.


test_expect_success "remove fifos" '
rm pipe_testnet pipe_devnet
'

test_expect_success "fifo-import output as expected" '
test_cmp_sorted basic_import_expected basic_fifo_import_actual
'
}


test_expect_success "set up testbed" '
iptb testbed create -type localipfs -count 2 -force -init
'
startup_cluster 2

run_online_imp_exp_tests

test_expect_success "shut down nodes" '
iptb stop && iptb_wait_stop
'


# We want to just init the repo, without using a daemon for stuff below
test_init_ipfs


test_expect_success "basic offline export of 'getting started' dag works" '
ipfs dag export "$HASH_WELCOME_DOCS" >/dev/null
'


echo "Error: merkledag: not found (currently offline, perhaps retry after attaching to the network)" > offline_fetch_error_expected
test_expect_success "basic offline export of nonexistent cid" '
! ipfs dag export QmYwAPJXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX 2> offline_fetch_error_actual >/dev/null
'
test_expect_success "correct error" '
test_cmp_sorted offline_fetch_error_expected offline_fetch_error_actual
'


cat >multiroot_import_json_expected <<EOE
{"Root":{"Cid":{"/":"bafy2bzaceb55n7uxyfaelplulk3ev2xz7gnq6crncf3ahnvu46hqqmpucizcw"},"PinErrorMsg":""}}
{"Root":{"Cid":{"/":"bafy2bzacebedrc4n2ac6cqdkhs7lmj5e4xiif3gu7nmoborihajxn3fav3vdq"},"PinErrorMsg":""}}
{"Root":{"Cid":{"/":"bafy2bzacede2hsme6hparlbr4g2x6pylj43olp4uihwjq3plqdjyrdhrv7cp4"},"PinErrorMsg":""}}
EOE
test_expect_success "multiroot import works" '
ipfs dag import --enc=json ../t0054-dag-car-import-export-data/lotus_testnet_export_256_multiroot.car > multiroot_import_json_actual
'
test_expect_success "multiroot import expected output" '
test_cmp_sorted multiroot_import_json_expected multiroot_import_json_actual
'


test_expect_success "pin-less import works" '
ipfs dag import --enc=json --pin-roots=false \
../t0054-dag-car-import-export-data/lotus_devnet_genesis.car \
../t0054-dag-car-import-export-data/lotus_testnet_export_128.car \
> no-pin_import_actual
'
test_expect_success "expected silence on --pin-roots=false" '
test_cmp /dev/null no-pin_import_actual
'


test_expect_success "naked root import works" '
ipfs dag import --enc=json ../t0054-dag-car-import-export-data/combined_naked_roots_genesis_and_128.car \
> naked_root_import_json_actual
'
test_expect_success "naked root import expected output" '
test_cmp_sorted naked_root_import_json_expected naked_root_import_json_actual
'

test_done