diff --git a/.changeset/lovely-pianos-breathe.md b/.changeset/lovely-pianos-breathe.md new file mode 100644 index 000000000000..d0d2df7923a6 --- /dev/null +++ b/.changeset/lovely-pianos-breathe.md @@ -0,0 +1,68 @@ +--- +'astro': minor +--- + +Adds a new optional `parser` property to the built-in `file()` loader for content collections to support additional file types such as `toml` and `csv`. + +The `file()` loader now accepts a second argument that defines a `parser` function. This allows you to specify a custom parser (e.g. `toml.parse` or `csv-parse`) to create a collection from a file's contents. The `file()` loader will automatically detect and parse JSON and YAML files (based on their file extension) with no need for a `parser`. + +This works with any type of custom file formats including `csv` and `toml`. The following example defines a content collection `dogs` using a `.toml` file. +```toml +[[dogs]] +id = "..." +age = "..." + +[[dogs]] +id = "..." +age = "..." +``` +After importing TOML's parser, you can load the `dogs` collection into your project by passing both a file path and `parser` to the `file()` loader. +```typescript +import { defineCollection } from "astro:content" +import { file } from "astro/loaders" +import { parse as parseToml } from "toml" + +const dogs = defineCollection({ + loader: file("src/data/dogs.toml", { parser: (text) => parseToml(text).dogs }), + schema: /* ... */ +}) + +// it also works with CSVs! +import { parse as parseCsv } from "csv-parse/sync"; + +const cats = defineCollection({ + loader: file("src/data/cats.csv", { parser: (text) => parseCsv(text, { columns: true, skipEmptyLines: true })}) +}); +``` + +The `parser` argument also allows you to load a single collection from a nested JSON document. For example, this JSON file contains multiple collections: +```json +{"dogs": [{}], "cats": [{}]} +``` + +You can seperate these collections by passing a custom `parser` to the `file()` loader like so: +```typescript +const dogs = defineCollection({ + loader: file("src/data/pets.json", { parser: (text) => JSON.parse(text).dogs }) +}); +const cats = defineCollection({ + loader: file("src/data/pets.json", { parser: (text) => JSON.parse(text).cats }) +}); +``` + +And it continues to work with maps of `id` to `data` +```yaml +bubbles: + breed: "Goldfish" + age: 2 +finn: + breed: "Betta" + age: 1 +``` + +```typescript +const fish = defineCollection({ + loader: file("src/data/fish.yaml"), + schema: z.object({ breed: z.string(), age: z.number() }) +}); +``` diff --git a/packages/astro/src/content/loaders/file.ts b/packages/astro/src/content/loaders/file.ts index 22d498b12e39..d109f95b6994 100644 --- a/packages/astro/src/content/loaders/file.ts +++ b/packages/astro/src/content/loaders/file.ts @@ -1,25 +1,56 @@ import { promises as fs, existsSync } from 'node:fs'; import { fileURLToPath } from 'node:url'; +import yaml from 'js-yaml'; import { posixRelative } from '../utils.js'; import type { Loader, LoaderContext } from './types.js'; +export interface FileOptions { + /** + * the parsing function to use for this data + * @default JSON.parse or yaml.load, depending on the extension of the file + * */ + parser?: ( + text: string, + ) => Record> | Array>; +} + /** * Loads entries from a JSON file. The file must contain an array of objects that contain unique `id` fields, or an object with string keys. - * @todo Add support for other file types, such as YAML, CSV etc. * @param fileName The path to the JSON file to load, relative to the content directory. + * @param options Additional options for the file loader */ -export function file(fileName: string): Loader { +export function file(fileName: string, options?: FileOptions): Loader { if (fileName.includes('*')) { // TODO: AstroError throw new Error('Glob patterns are not supported in `file` loader. Use `glob` loader instead.'); } + let parse: ((text: string) => any) | null = null; + + const ext = fileName.split('.').at(-1); + if (ext === 'json') { + parse = JSON.parse; + } else if (ext === 'yml' || ext === 'yaml') { + parse = (text) => + yaml.load(text, { + filename: fileName, + }); + } + if (options?.parser) parse = options.parser; + + if (parse === null) { + // TODO: AstroError + throw new Error( + `No parser found for file '${fileName}'. Try passing a parser to the \`file\` loader.`, + ); + } + async function syncData(filePath: string, { logger, parseData, store, config }: LoaderContext) { - let json: Array>; + let data: Array> | Record>; try { - const data = await fs.readFile(filePath, 'utf-8'); - json = JSON.parse(data); + const contents = await fs.readFile(filePath, 'utf-8'); + data = parse!(contents); } catch (error: any) { logger.error(`Error reading data from ${fileName}`); logger.debug(error.message); @@ -28,28 +59,28 @@ export function file(fileName: string): Loader { const normalizedFilePath = posixRelative(fileURLToPath(config.root), filePath); - if (Array.isArray(json)) { - if (json.length === 0) { + if (Array.isArray(data)) { + if (data.length === 0) { logger.warn(`No items found in ${fileName}`); } - logger.debug(`Found ${json.length} item array in ${fileName}`); + logger.debug(`Found ${data.length} item array in ${fileName}`); store.clear(); - for (const rawItem of json) { + for (const rawItem of data) { const id = (rawItem.id ?? rawItem.slug)?.toString(); if (!id) { logger.error(`Item in ${fileName} is missing an id or slug field.`); continue; } - const data = await parseData({ id, data: rawItem, filePath }); - store.set({ id, data, filePath: normalizedFilePath }); + const parsedData = await parseData({ id, data: rawItem, filePath }); + store.set({ id, data: parsedData, filePath: normalizedFilePath }); } - } else if (typeof json === 'object') { - const entries = Object.entries>(json); + } else if (typeof data === 'object') { + const entries = Object.entries>(data); logger.debug(`Found object with ${entries.length} entries in ${fileName}`); store.clear(); for (const [id, rawItem] of entries) { - const data = await parseData({ id, data: rawItem, filePath }); - store.set({ id, data, filePath: normalizedFilePath }); + const parsedData = await parseData({ id, data: rawItem, filePath }); + store.set({ id, data: parsedData, filePath: normalizedFilePath }); } } else { logger.error(`Invalid data in ${fileName}. Must be an array or object.`); diff --git a/packages/astro/test/content-layer.test.js b/packages/astro/test/content-layer.test.js index 5be3953867ed..abf91f36345f 100644 --- a/packages/astro/test/content-layer.test.js +++ b/packages/astro/test/content-layer.test.js @@ -53,11 +53,11 @@ describe('Content Layer', () => { assert.equal(json.customLoader.length, 5); }); - it('Returns `file()` loader collection', async () => { - assert.ok(json.hasOwnProperty('fileLoader')); - assert.ok(Array.isArray(json.fileLoader)); + it('Returns json `file()` loader collection', async () => { + assert.ok(json.hasOwnProperty('jsonLoader')); + assert.ok(Array.isArray(json.jsonLoader)); - const ids = json.fileLoader.map((item) => item.data.id); + const ids = json.jsonLoader.map((item) => item.data.id); assert.deepEqual(ids, [ 'labrador-retriever', 'german-shepherd', @@ -97,6 +97,58 @@ describe('Content Layer', () => { ); }); + it('Returns nested json `file()` loader collection', async () => { + assert.ok(json.hasOwnProperty('nestedJsonLoader')); + assert.ok(Array.isArray(json.nestedJsonLoader)); + + const ids = json.nestedJsonLoader.map((item) => item.data.id); + assert.deepEqual(ids, ['bluejay', 'robin', 'sparrow', 'cardinal', 'goldfinch']); + }); + + it('Returns yaml `file()` loader collection', async () => { + assert.ok(json.hasOwnProperty('yamlLoader')); + assert.ok(Array.isArray(json.yamlLoader)); + + const ids = json.yamlLoader.map((item) => item.id); + assert.deepEqual(ids, [ + 'bubbles', + 'finn', + 'shadow', + 'spark', + 'splash', + 'nemo', + 'angel-fish', + 'gold-stripe', + 'blue-tail', + 'bubble-buddy', + ]); + }); + + it('Returns toml `file()` loader collection', async () => { + assert.ok(json.hasOwnProperty('tomlLoader')); + assert.ok(Array.isArray(json.tomlLoader)); + + const ids = json.tomlLoader.map((item) => item.data.id); + assert.deepEqual(ids, [ + 'crown', + 'nikes-on-my-feet', + 'stars', + 'never-let-me-down', + 'no-church-in-the-wild', + 'family-ties', + 'somebody', + 'honest', + ]); + }); + + it('Returns nested json `file()` loader collection', async () => { + assert.ok(json.hasOwnProperty('nestedJsonLoader')); + assert.ok(Array.isArray(json.nestedJsonLoader)); + + const ids = json.nestedJsonLoader.map((item) => item.data.id); + assert.deepEqual(ids, ['bluejay', 'robin', 'sparrow', 'cardinal', 'goldfinch']); + }); + it('Returns data entry by id', async () => { assert.ok(json.hasOwnProperty('dataEntry')); assert.equal(json.dataEntry.filePath?.split(sep).join(posixSep), 'src/data/dogs.json'); @@ -276,10 +328,10 @@ describe('Content Layer', () => { }); it('Returns `file()` loader collection', async () => { - assert.ok(json.hasOwnProperty('fileLoader')); - assert.ok(Array.isArray(json.fileLoader)); + assert.ok(json.hasOwnProperty('jsonLoader')); + assert.ok(Array.isArray(json.jsonLoader)); - const ids = json.fileLoader.map((item) => item.data.id); + const ids = json.jsonLoader.map((item) => item.data.id); assert.deepEqual(ids, [ 'labrador-retriever', 'german-shepherd', @@ -348,7 +400,7 @@ describe('Content Layer', () => { it('updates collection when data file is changed', async () => { const rawJsonResponse = await fixture.fetch('/collections.json'); const initialJson = devalue.parse(await rawJsonResponse.text()); - assert.equal(initialJson.fileLoader[0].data.temperament.includes('Bouncy'), false); + assert.equal(initialJson.jsonLoader[0].data.temperament.includes('Bouncy'), false); await fixture.editFile('/src/data/dogs.json', (prev) => { const data = JSON.parse(prev); @@ -359,7 +411,7 @@ describe('Content Layer', () => { await fixture.onNextDataStoreChange(); const updatedJsonResponse = await fixture.fetch('/collections.json'); const updated = devalue.parse(await updatedJsonResponse.text()); - assert.ok(updated.fileLoader[0].data.temperament.includes('Bouncy')); + assert.ok(updated.jsonLoader[0].data.temperament.includes('Bouncy')); await fixture.resetAllFiles(); }); }); diff --git a/packages/astro/test/fixtures/content-layer/package.json b/packages/astro/test/fixtures/content-layer/package.json index fc73ce6f7ac7..4057b1c35a64 100644 --- a/packages/astro/test/fixtures/content-layer/package.json +++ b/packages/astro/test/fixtures/content-layer/package.json @@ -4,6 +4,7 @@ "private": true, "dependencies": { "astro": "workspace:*", - "@astrojs/mdx": "workspace:*" + "@astrojs/mdx": "workspace:*", + "toml": "^3.0.0" } } diff --git a/packages/astro/test/fixtures/content-layer/src/content/config.ts b/packages/astro/test/fixtures/content-layer/src/content/config.ts index 402bad7fc5f5..776c44f6811e 100644 --- a/packages/astro/test/fixtures/content-layer/src/content/config.ts +++ b/packages/astro/test/fixtures/content-layer/src/content/config.ts @@ -1,6 +1,7 @@ import { defineCollection, z, reference } from 'astro:content'; import { file, glob } from 'astro/loaders'; import { loader } from '../loaders/post-loader.js'; +import { parse as parseToml } from 'toml'; const blog = defineCollection({ loader: loader({ url: 'https://jsonplaceholder.typicode.com/posts' }), @@ -118,6 +119,27 @@ const cats = defineCollection({ }), }); +const fish = defineCollection({ + loader: file('src/data/fish.yaml'), + schema: z.object({ + name: z.string(), + breed: z.string(), + age: z.number(), + }), +}); + +const birds = defineCollection({ + loader: file('src/data/birds.json', { + parser: (text) => JSON.parse(text).birds, + }), + schema: z.object({ + id: z.string(), + name: z.string(), + breed: z.string(), + age: z.number(), + }), +}); + // Absolute paths should also work const absoluteRoot = new URL('../../content/space', import.meta.url); @@ -198,14 +220,36 @@ const increment = defineCollection({ }, }); +const artists = defineCollection({ + loader: file('src/data/music.toml', { parser: (text) => parseToml(text).artists }), + schema: z.object({ + id: z.string(), + name: z.string(), + genre: z.string().array(), + }), +}); + +const songs = defineCollection({ + loader: file('src/data/music.toml', { parser: (text) => parseToml(text).songs }), + schema: z.object({ + id: z.string(), + name: z.string(), + artists: z.array(reference('artists')), + }), +}); + export const collections = { blog, dogs, cats, + fish, + birds, numbers, spacecraft, increment, images, + artists, + songs, probes, rodents, }; diff --git a/packages/astro/test/fixtures/content-layer/src/data/birds.json b/packages/astro/test/fixtures/content-layer/src/data/birds.json new file mode 100644 index 000000000000..3e7d83795a12 --- /dev/null +++ b/packages/astro/test/fixtures/content-layer/src/data/birds.json @@ -0,0 +1,34 @@ +{ + "birds": [ + { + "id": "bluejay", + "name": "Blue Jay", + "breed": "Cyanocitta cristata", + "age": 3 + }, + { + "id": "robin", + "name": "Robin", + "breed": "Turdus migratorius", + "age": 2 + }, + { + "id": "sparrow", + "name": "Sparrow", + "breed": "Passer domesticus", + "age": 1 + }, + { + "id": "cardinal", + "name": "Cardinal", + "breed": "Cardinalis cardinalis", + "age": 4 + }, + { + "id": "goldfinch", + "name": "Goldfinch", + "breed": "Spinus tristis", + "age": 2 + } + ] +} diff --git a/packages/astro/test/fixtures/content-layer/src/data/fish.yaml b/packages/astro/test/fixtures/content-layer/src/data/fish.yaml new file mode 100644 index 000000000000..a9ac4e4352b4 --- /dev/null +++ b/packages/astro/test/fixtures/content-layer/src/data/fish.yaml @@ -0,0 +1,42 @@ +# map of ids to data +bubbles: + name: "Bubbles" + breed: "Goldfish" + age: 2 +finn: + name: "Finn" + breed: "Betta" + age: 1 +shadow: + name: "Shadow" + breed: "Catfish" + age: 3 +spark: + name: "Spark" + breed: "Tetra" + age: 1 +splash: + name: "Splash" + breed: "Guppy" + age: 2 +nemo: + name: "Nemo" + breed: "Clownfish" + age: 3 +angel-fish: + name: "Angel Fish" + breed: "Angelfish" + age: 4 +gold-stripe: + name: "Gold Stripe" + breed: "Molly" + age: 1 +blue-tail: + name: "Blue Tail" + breed: "Swordtail" + age: 2 +bubble-buddy: + name: "Bubble Buddy" + breed: "Betta" + age: 3 + diff --git a/packages/astro/test/fixtures/content-layer/src/data/music.toml b/packages/astro/test/fixtures/content-layer/src/data/music.toml new file mode 100644 index 000000000000..89e15c9bbbf5 --- /dev/null +++ b/packages/astro/test/fixtures/content-layer/src/data/music.toml @@ -0,0 +1,89 @@ +[[artists]] +id = "kendrick-lamar" +name = "Kendrick Lamar" +genre = ["Hip-Hop", "Rap"] + +[[artists]] +id = "mac-miller" +name = "Mac Miller" +genre = ["Hip-Hop", "Rap"] + +[[artists]] +id = "jid" +name = "JID" +genre = ["Hip-Hop", "Rap"] + +[[artists]] +id = "yasiin-bey" +name = "Yasiin Bey" +genre = ["Hip-Hop", "Rap"] + +[[artists]] +id = "kanye-west" +name = "Kanye West" +genre = ["Hip-Hop", "Rap"] + +[[artists]] +id = "jay-z" +name = "JAY-Z" +genre = ["Hip-Hop", "Rap"] + +[[artists]] +id = "j-ivy" +name = "J. Ivy" +genre = ["Spoken Word", "Rap"] + +[[artists]] +id = "frank-ocean" +name = "Frank Ocean" +genre = ["R&B", "Hip-Hop"] + +[[artists]] +id = "the-dream" +name = "The-Dream" +genre = ["R&B", "Hip-Hop"] + +[[artists]] +id = "baby-keem" +name = "Baby Keem" +genre = ["Hip-Hop", "Rap"] + +[[songs]] +id = "crown" +name = "Crown" +artists = ["kendrick-lamar"] + +[[songs]] +id = "nikes-on-my-feet" +name = "Nikes on My Feet" +artists = ["mac-miller"] + +[[songs]] +id = "stars" +name = "Stars" +artists = ["jid", "yasiin-bey"] + +[[songs]] +id = "never-let-me-down" +name = "Never Let Me Down" +artists = ["kanye-west", "jay-z", "j-ivy"] + +[[songs]] +id = "no-church-in-the-wild" +name = "No Church In The Wild" +artists = ["jay-z", "kanye-west", "frank-ocean", "the-dream"] + +[[songs]] +id = "family-ties" +name = "family ties" +artists = ["kendrick-lamar", "baby-keem"] + +[[songs]] +id = "somebody" +name = "Somebody" +artists = ["jid"] + +[[songs]] +id = "honest" +name = "HONEST" +artists = ["baby-keem"] diff --git a/packages/astro/test/fixtures/content-layer/src/pages/collections.json.js b/packages/astro/test/fixtures/content-layer/src/pages/collections.json.js index 761ff7dba6fa..6bced27e45e3 100644 --- a/packages/astro/test/fixtures/content-layer/src/pages/collections.json.js +++ b/packages/astro/test/fixtures/content-layer/src/pages/collections.json.js @@ -5,7 +5,7 @@ export async function GET() { const customLoader = await getCollection('blog', (entry) => { return entry.data.id < 6; }); - const fileLoader = await getCollection('dogs'); + const jsonLoader = await getCollection('dogs'); const dataEntry = await getEntry('dogs', 'beagle'); @@ -23,10 +23,17 @@ export async function GET() { const simpleLoaderObject = await getCollection('rodents') const probes = await getCollection('probes'); + + const yamlLoader = await getCollection('fish'); + + const tomlLoader = await getCollection('songs'); + + const nestedJsonLoader = await getCollection('birds'); + return new Response( devalue.stringify({ customLoader, - fileLoader, + jsonLoader, dataEntry, simpleLoader, simpleLoaderObject, @@ -35,7 +42,10 @@ export async function GET() { referencedEntry, increment, images, - probes - }) + probes, + yamlLoader, + tomlLoader, + nestedJsonLoader, + }), ); } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 39a5ebb0b5d5..fc0a04a966c9 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -2722,6 +2722,9 @@ importers: astro: specifier: workspace:* version: link:../../.. + toml: + specifier: ^3.0.0 + version: 3.0.0 packages/astro/test/fixtures/content-layer-markdoc: dependencies: @@ -10411,6 +10414,9 @@ packages: resolution: {integrity: sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==} engines: {node: '>=0.6'} + toml@3.0.0: + resolution: {integrity: sha512-y/mWCZinnvxjTKYhJ+pYxwD0mRLVvOtdS2Awbgxln6iEnt4rk0yBxeSBHkGJcPucRiG0e55mwWp+g/05rsrd6w==} + totalist@3.0.1: resolution: {integrity: sha512-sf4i37nQ2LBx4m3wB74y+ubopq6W/dIzXg0FDGjsYnZHVa1Da8FH853wlL2gtUhg+xJXjfk3kUZS3BRoQeoQBQ==} engines: {node: '>=6'} @@ -16656,6 +16662,8 @@ snapshots: toidentifier@1.0.1: {} + toml@3.0.0: {} + totalist@3.0.1: {} tough-cookie@4.1.3: