diff --git a/config/_default/goals.json b/config/_default/goals.json index a7ed907f..187644bb 100644 --- a/config/_default/goals.json +++ b/config/_default/goals.json @@ -249,7 +249,16 @@ }, "1.6": { "description": "Learn about the CAR format and how it helps data distribution", - "subgoals": [{}], + "subgoals": [ + { + "id": "1.61", + "description": "Get an idea of how the CAR format is beneficial to IPLD and how it is used today" + }, + { + "id": "1.62", + "description": "See the differences between the two CAR version formats" + } + ], "levels": ["deep"] } }, diff --git a/content/en/curriculum/ipld/the-car-format/index.md b/content/en/curriculum/ipld/the-car-format/index.md index 598b5ff1..52a9dfcc 100644 --- a/content/en/curriculum/ipld/the-car-format/index.md +++ b/content/en/curriculum/ipld/the-car-format/index.md @@ -9,6 +9,13 @@ weight: 270 category: lecture level: - deep +objectives: + show: true + goals: + - "1.6" + subgoals: + - 1.61 + - 1.62 --- ![](intro.png) @@ -48,6 +55,14 @@ CARv2 has a flexible approach to index formats. The header provides details abou The index at the end of the format provides information about what blocks are stored within the CARv1 data payload and _where_ they exist within the archive. A CARv2 reader implementation can load the index and then use its CID->offset mapping information to seek directly to the requested block and not have to hunt for it. The index _format_ is flexible, in that the first byte of the index identifies the format (which a given CARv2 implementation may or may not understand how to read) and the rest of the bytes conform to that format. There are currently two well-specified index formats, but there are a number of additional experimental index formats. Index formats may be selected depending on the suitability for a particular application or set of data - generation speed, usage performance, size, etc. Indexes typically only store the _Multihash_ of a block, rather than the entire CID, for efficiency reasons (but there are other interesting characteristics enabled by being able to look up a block by multihash rather than the entire CID, even if the _Multicodec_ is useful for decoding the block once it's found). +## Performance +Some considerations regarding performance: + +* Streaming: the CAR format is ideal for dumping blocks via streaming reads as the Header can be loaded first and minimal state is required for ongoing parsing. +* Individual block reads: as the CAR format contains no index information, reads require either a partial scan to discover the location of a required block or an external index must be maintained and referenced for a seek and partial read of that data. See below regarding indexing. +* DAG traversal: without an external index, traversal of a DAG specified by a "root" CID is not possible without dumping all blocks into a more convenient data store or by partial scans to find each block as required, which will likely be too inefficient to be practical. +* Modification: CARs may be appended after initial write as there is no constraint in the Header regarding total length. Care must be taken in appending if a CAR is intended to contain coherent DAG data. + #### Further Reading The CARv1 and CARv2 specifications, including specifications for CARv2 index formats, can be found on the IPLD specifications site: [ipld.io/specs/transport/car](https://ipld.io/specs/transport/car/)