Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: custom file formats in file content loader #12047

Merged
merged 12 commits into from
Sep 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions .changeset/lovely-pianos-breathe.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
---
'astro': minor
---

Adds a new optional `parser` property to the built-in `file()` loader for content collections to support additional file types such as `toml` and `csv`.

The `file()` loader now accepts a second argument that defines a `parser` function. This allows you to specify a custom parser (e.g. `toml.parse` or `csv-parse`) to create a collection from a file's contents. The `file()` loader will automatically detect and parse JSON and YAML files (based on their file extension) with no need for a `parser`.

This works with any type of custom file formats including `csv` and `toml`. The following example defines a content collection `dogs` using a `.toml` file.
```toml
[[dogs]]
id = "..."
age = "..."

[[dogs]]
id = "..."
age = "..."
```
After importing TOML's parser, you can load the `dogs` collection into your project by passing both a file path and `parser` to the `file()` loader.
```typescript
import { defineCollection } from "astro:content"
import { file } from "astro/loaders"
import { parse as parseToml } from "toml"

const dogs = defineCollection({
loader: file("src/data/dogs.toml", { parser: (text) => parseToml(text).dogs }),
schema: /* ... */
})

// it also works with CSVs!
import { parse as parseCsv } from "csv-parse/sync";

const cats = defineCollection({
loader: file("src/data/cats.csv", { parser: (text) => parseCsv(text, { columns: true, skipEmptyLines: true })})
});
```

The `parser` argument also allows you to load a single collection from a nested JSON document. For example, this JSON file contains multiple collections:
```json
{"dogs": [{}], "cats": [{}]}
```

You can seperate these collections by passing a custom `parser` to the `file()` loader like so:
```typescript
const dogs = defineCollection({
loader: file("src/data/pets.json", { parser: (text) => JSON.parse(text).dogs })
});
const cats = defineCollection({
loader: file("src/data/pets.json", { parser: (text) => JSON.parse(text).cats })
});
```

And it continues to work with maps of `id` to `data`
```yaml
bubbles:
breed: "Goldfish"
age: 2
finn:
breed: "Betta"
age: 1
```

```typescript
const fish = defineCollection({
loader: file("src/data/fish.yaml"),
schema: z.object({ breed: z.string(), age: z.number() })
});
```
61 changes: 46 additions & 15 deletions packages/astro/src/content/loaders/file.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,56 @@
import { promises as fs, existsSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import yaml from 'js-yaml';
import { posixRelative } from '../utils.js';
import type { Loader, LoaderContext } from './types.js';

export interface FileOptions {
/**
* the parsing function to use for this data
* @default JSON.parse or yaml.load, depending on the extension of the file
* */
parser?: (
text: string,
) => Record<string, Record<string, unknown>> | Array<Record<string, unknown>>;
}

/**
* Loads entries from a JSON file. The file must contain an array of objects that contain unique `id` fields, or an object with string keys.
* @todo Add support for other file types, such as YAML, CSV etc.
* @param fileName The path to the JSON file to load, relative to the content directory.
* @param options Additional options for the file loader
*/
export function file(fileName: string): Loader {
export function file(fileName: string, options?: FileOptions): Loader {
if (fileName.includes('*')) {
// TODO: AstroError
throw new Error('Glob patterns are not supported in `file` loader. Use `glob` loader instead.');
}

let parse: ((text: string) => any) | null = null;

const ext = fileName.split('.').at(-1);
if (ext === 'json') {
parse = JSON.parse;
} else if (ext === 'yml' || ext === 'yaml') {
parse = (text) =>
yaml.load(text, {
filename: fileName,
});
}
if (options?.parser) parse = options.parser;

if (parse === null) {
// TODO: AstroError
throw new Error(
`No parser found for file '${fileName}'. Try passing a parser to the \`file\` loader.`,
);
}

async function syncData(filePath: string, { logger, parseData, store, config }: LoaderContext) {
let json: Array<Record<string, unknown>>;
let data: Array<Record<string, unknown>> | Record<string, Record<string, unknown>>;

try {
const data = await fs.readFile(filePath, 'utf-8');
json = JSON.parse(data);
const contents = await fs.readFile(filePath, 'utf-8');
data = parse!(contents);
} catch (error: any) {
logger.error(`Error reading data from ${fileName}`);
logger.debug(error.message);
Expand All @@ -28,28 +59,28 @@ export function file(fileName: string): Loader {

const normalizedFilePath = posixRelative(fileURLToPath(config.root), filePath);

if (Array.isArray(json)) {
if (json.length === 0) {
if (Array.isArray(data)) {
if (data.length === 0) {
logger.warn(`No items found in ${fileName}`);
}
logger.debug(`Found ${json.length} item array in ${fileName}`);
logger.debug(`Found ${data.length} item array in ${fileName}`);
store.clear();
for (const rawItem of json) {
for (const rawItem of data) {
const id = (rawItem.id ?? rawItem.slug)?.toString();
if (!id) {
logger.error(`Item in ${fileName} is missing an id or slug field.`);
continue;
}
const data = await parseData({ id, data: rawItem, filePath });
store.set({ id, data, filePath: normalizedFilePath });
const parsedData = await parseData({ id, data: rawItem, filePath });
store.set({ id, data: parsedData, filePath: normalizedFilePath });
}
} else if (typeof json === 'object') {
const entries = Object.entries<Record<string, unknown>>(json);
} else if (typeof data === 'object') {
const entries = Object.entries<Record<string, unknown>>(data);
logger.debug(`Found object with ${entries.length} entries in ${fileName}`);
store.clear();
for (const [id, rawItem] of entries) {
const data = await parseData({ id, data: rawItem, filePath });
store.set({ id, data, filePath: normalizedFilePath });
const parsedData = await parseData({ id, data: rawItem, filePath });
store.set({ id, data: parsedData, filePath: normalizedFilePath });
}
} else {
logger.error(`Invalid data in ${fileName}. Must be an array or object.`);
Expand Down
70 changes: 61 additions & 9 deletions packages/astro/test/content-layer.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,11 @@ describe('Content Layer', () => {
assert.equal(json.customLoader.length, 5);
});

it('Returns `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('fileLoader'));
assert.ok(Array.isArray(json.fileLoader));
it('Returns json `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('jsonLoader'));
assert.ok(Array.isArray(json.jsonLoader));

const ids = json.fileLoader.map((item) => item.data.id);
const ids = json.jsonLoader.map((item) => item.data.id);
assert.deepEqual(ids, [
'labrador-retriever',
'german-shepherd',
Expand Down Expand Up @@ -97,6 +97,58 @@ describe('Content Layer', () => {
);
});

it('Returns nested json `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('nestedJsonLoader'));
assert.ok(Array.isArray(json.nestedJsonLoader));

const ids = json.nestedJsonLoader.map((item) => item.data.id);
assert.deepEqual(ids, ['bluejay', 'robin', 'sparrow', 'cardinal', 'goldfinch']);
});

it('Returns yaml `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('yamlLoader'));
assert.ok(Array.isArray(json.yamlLoader));

const ids = json.yamlLoader.map((item) => item.id);
assert.deepEqual(ids, [
'bubbles',
'finn',
'shadow',
'spark',
'splash',
'nemo',
'angel-fish',
'gold-stripe',
'blue-tail',
'bubble-buddy',
]);
});

it('Returns toml `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('tomlLoader'));
assert.ok(Array.isArray(json.tomlLoader));

const ids = json.tomlLoader.map((item) => item.data.id);
assert.deepEqual(ids, [
'crown',
'nikes-on-my-feet',
'stars',
'never-let-me-down',
'no-church-in-the-wild',
'family-ties',
'somebody',
'honest',
]);
});

it('Returns nested json `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('nestedJsonLoader'));
assert.ok(Array.isArray(json.nestedJsonLoader));

const ids = json.nestedJsonLoader.map((item) => item.data.id);
assert.deepEqual(ids, ['bluejay', 'robin', 'sparrow', 'cardinal', 'goldfinch']);
});

it('Returns data entry by id', async () => {
assert.ok(json.hasOwnProperty('dataEntry'));
assert.equal(json.dataEntry.filePath?.split(sep).join(posixSep), 'src/data/dogs.json');
Expand Down Expand Up @@ -276,10 +328,10 @@ describe('Content Layer', () => {
});

it('Returns `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('fileLoader'));
assert.ok(Array.isArray(json.fileLoader));
assert.ok(json.hasOwnProperty('jsonLoader'));
assert.ok(Array.isArray(json.jsonLoader));

const ids = json.fileLoader.map((item) => item.data.id);
const ids = json.jsonLoader.map((item) => item.data.id);
assert.deepEqual(ids, [
'labrador-retriever',
'german-shepherd',
Expand Down Expand Up @@ -348,7 +400,7 @@ describe('Content Layer', () => {
it('updates collection when data file is changed', async () => {
const rawJsonResponse = await fixture.fetch('/collections.json');
const initialJson = devalue.parse(await rawJsonResponse.text());
assert.equal(initialJson.fileLoader[0].data.temperament.includes('Bouncy'), false);
assert.equal(initialJson.jsonLoader[0].data.temperament.includes('Bouncy'), false);

await fixture.editFile('/src/data/dogs.json', (prev) => {
const data = JSON.parse(prev);
Expand All @@ -359,7 +411,7 @@ describe('Content Layer', () => {
await fixture.onNextDataStoreChange();
const updatedJsonResponse = await fixture.fetch('/collections.json');
const updated = devalue.parse(await updatedJsonResponse.text());
assert.ok(updated.fileLoader[0].data.temperament.includes('Bouncy'));
assert.ok(updated.jsonLoader[0].data.temperament.includes('Bouncy'));
await fixture.resetAllFiles();
});
});
Expand Down
3 changes: 2 additions & 1 deletion packages/astro/test/fixtures/content-layer/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"private": true,
"dependencies": {
"astro": "workspace:*",
"@astrojs/mdx": "workspace:*"
"@astrojs/mdx": "workspace:*",
"toml": "^3.0.0"
}
}
44 changes: 44 additions & 0 deletions packages/astro/test/fixtures/content-layer/src/content/config.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { defineCollection, z, reference } from 'astro:content';
import { file, glob } from 'astro/loaders';
import { loader } from '../loaders/post-loader.js';
import { parse as parseToml } from 'toml';

const blog = defineCollection({
loader: loader({ url: 'https://jsonplaceholder.typicode.com/posts' }),
Expand Down Expand Up @@ -118,6 +119,27 @@ const cats = defineCollection({
}),
});

const fish = defineCollection({
loader: file('src/data/fish.yaml'),
schema: z.object({
name: z.string(),
breed: z.string(),
age: z.number(),
}),
});

const birds = defineCollection({
loader: file('src/data/birds.json', {
parser: (text) => JSON.parse(text).birds,
}),
schema: z.object({
id: z.string(),
name: z.string(),
breed: z.string(),
age: z.number(),
}),
});

// Absolute paths should also work
const absoluteRoot = new URL('../../content/space', import.meta.url);

Expand Down Expand Up @@ -198,14 +220,36 @@ const increment = defineCollection({
},
});

const artists = defineCollection({
loader: file('src/data/music.toml', { parser: (text) => parseToml(text).artists }),
schema: z.object({
id: z.string(),
name: z.string(),
genre: z.string().array(),
}),
});

const songs = defineCollection({
loader: file('src/data/music.toml', { parser: (text) => parseToml(text).songs }),
schema: z.object({
id: z.string(),
name: z.string(),
artists: z.array(reference('artists')),
}),
});

export const collections = {
blog,
dogs,
cats,
fish,
birds,
numbers,
spacecraft,
increment,
images,
artists,
songs,
probes,
rodents,
};
34 changes: 34 additions & 0 deletions packages/astro/test/fixtures/content-layer/src/data/birds.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"birds": [
{
"id": "bluejay",
"name": "Blue Jay",
"breed": "Cyanocitta cristata",
"age": 3
},
{
"id": "robin",
"name": "Robin",
"breed": "Turdus migratorius",
"age": 2
},
{
"id": "sparrow",
"name": "Sparrow",
"breed": "Passer domesticus",
"age": 1
},
{
"id": "cardinal",
"name": "Cardinal",
"breed": "Cardinalis cardinalis",
"age": 4
},
{
"id": "goldfinch",
"name": "Goldfinch",
"breed": "Spinus tristis",
"age": 2
}
]
}
Loading
Loading