From 1dde616d2f26c6499d36d0e9e441424d946fc34f Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Fri, 1 Nov 2024 15:14:40 -0700 Subject: [PATCH] docs, shorthand, etc. --- docs/config.md | 42 ++++++---- docs/lib/duckdb.md | 91 ++++++++++++++++++++-- docs/sql.md | 46 +---------- package.json | 4 +- src/client/stdlib/duckdb.js | 10 +-- src/config.ts | 78 +++++++++++-------- src/duckdb.ts | 97 ++++++++++++++--------- src/npm.ts | 6 +- test/config-test.ts | 150 ++++++++++++++++++++++++++++++------ test/resolvers-test.ts | 8 +- 10 files changed, 361 insertions(+), 171 deletions(-) diff --git a/docs/config.md b/docs/config.md index 4070d02d3..cc2538be5 100644 --- a/docs/config.md +++ b/docs/config.md @@ -303,27 +303,43 @@ export default { ## duckdb -The **duckdb** option allows you to specify the DuckDB [extensions](./sql#extensions) that you want to self-host and make available in the `sql` and `DuckDBClient` instances. +The **duckdb** option configures [self-hosting](./lib/duckdb#self-hosting-of-extensions) and loading of [DuckDB extensions](./lib/duckdb#extensions) for use in [SQL code blocks](./sql) and the `sql` and `DuckDBClient` built-ins. For example, a geospatial data app might enable the [`spatial`](https://duckdb.org/docs/extensions/spatial/overview.html) and [`h3`](https://duckdb.org/community_extensions/extensions/h3.html) extensions like so: -Its **extensions** property is an object where keys are extension names, and values describe the **source** for the extension, and whether to **install** (self-host) it, and **load** it immediately. - -The **source** property is the reference of the repo from which to download the extension. It defaults to `core`, which points to `https://extensions.duckdb.org/`. You can use `core`, `community` (which points to `https://community-extensions.duckdb.org/`), or a custom URL, for example if you develop your own extensions. - -By default "json" and "parquet" are installed, but not loaded (since they are autoloaded, there is no reason to load them before we actually need them). If you don’t want to self-host an extension, set its **install** property to false. You will still be able to load it from its source by calling `INSTALL` and `LOAD`. +```js run=false +export default { + duckdb: { + extensions: ["spatial", "h3"] + } +}; +``` -As a shorthand, you can specify `name: true` to install and load the named extension from the "core" repository. (And `name: false` is shorthand for `{install: false, load: false}`.) +The **extensions** option can either be an array of extension names, or an object whose keys are extension names and whose values are configuration options for the given extension, including its **source** repository (defaulting to the keyword _core_ for core extensions, and otherwise _community_; can also be a custom repository URL), whether to **load** it immediately (defaulting to true, except for known extensions that support autoloading), and whether to **install** it (_i.e._ to self-host, defaulting to true). As additional shorthand, you can specify `[name]: true` to install and load the named extension from the default (_core_ or _community_) source repository, or `[name]: string` to install and load the named extension from the given source repository. -For example, a typical configuration for a geospatial data app might install and load “spatial” from `core` and “h3” from `community`: +The configuration above is equivalent to: ```js run=false -duckdb: { - extensions: { - spatial: true, - h3: {source: "community"} +export default { + duckdb: { + extensions: { + spatial: { + source: "https://extensions.duckdb.org/", + install: true, + load: true + }, + h3: { + source: "https://community-extensions.duckdb.org/", + install: true, + load: true + } + } } -} +}; ``` +The `json` and `parquet` are configured (and therefore self-hosted) by default. To expressly disable self-hosting of extension, you can set its **install** property to false, or equivalently pass null as the extension configuration object. + +For more, see [DuckDB extensions](./lib/duckdb#extensions). + ## markdownIt A hook for registering additional [markdown-it](https://github.com/markdown-it/markdown-it) plugins. For example, to use [markdown-it-footnote](https://github.com/markdown-it/markdown-it-footnote), first install the plugin with either `npm add markdown-it-footnote` or `yarn add markdown-it-footnote`, then register it like so: diff --git a/docs/lib/duckdb.md b/docs/lib/duckdb.md index e5fb4e1df..46b0ae140 100644 --- a/docs/lib/duckdb.md +++ b/docs/lib/duckdb.md @@ -65,7 +65,7 @@ const db2 = await DuckDBClient.of({base: FileAttachment("quakes.db")}); db2.queryRow(`SELECT COUNT() FROM base.events`) ``` -For externally-hosted data, you can create an empty `DuckDBClient` and load a table from a SQL query, say using [`read_parquet`](https://duckdb.org/docs/guides/import/parquet_import) or [`read_csv`](https://duckdb.org/docs/guides/import/csv_import). DuckDB offers many affordances to make this easier (in many cases it detects the file format and uses the correct loader automatically). +For externally-hosted data, you can create an empty `DuckDBClient` and load a table from a SQL query, say using [`read_parquet`](https://duckdb.org/docs/guides/import/parquet_import) or [`read_csv`](https://duckdb.org/docs/guides/import/csv_import). DuckDB offers many affordances to make this easier. (In many cases it detects the file format and uses the correct loader automatically.) ```js run=false const db = await DuckDBClient.of(); @@ -106,20 +106,95 @@ const sql = DuckDBClient.sql({quakes: `https://earthquake.usgs.gov/earthquakes/f SELECT * FROM quakes ORDER BY updated DESC; ``` -## Extensions +## Extensions -DuckDB’s [extensions](../sql#extensions) are supported. +[DuckDB extensions](https://duckdb.org/docs/extensions/overview.html) extend DuckDB’s functionality, adding support for additional file formats, new types, and domain-specific functions. For example, the [`json` extension](https://duckdb.org/docs/data/json/overview.html) provides a `read_json` method for reading JSON files: -By default, `DuckDBClient.of` and `DuckDBClient.sql` load the extensions referenced in the [configuration](../config#duckdb). If you want a different environment, you can pass options listing the extensions you want to load. +```sql echo +SELECT bbox FROM read_json('https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_day.geojson'); +``` + +To read a local file (or data loader), use `FileAttachment` and interpolation `${…}`: + +```sql echo +SELECT bbox FROM read_json(${FileAttachment("../quakes.json").href}); +``` + +For convenience, Framework configures the `json` and `parquet` extensions by default. Some other [core extensions](https://duckdb.org/docs/extensions/core_extensions.html) also autoload, meaning that you don’t need to explicitly enable them; however, Framework will only [self-host extensions](#self-hosting-of-extensions) if you explicitly configure them, and therefore we recommend that you always use the [**duckdb** config option](../config#duckdb) to configure DuckDB extensions. Any configured extensions will be automatically [installed and loaded](https://duckdb.org/docs/extensions/overview#explicit-install-and-load), making them available in SQL code blocks as well as the `sql` and `DuckDBClient` built-ins. + +For example, to configure the [`spatial` extension](https://duckdb.org/docs/extensions/spatial/overview.html): + +```js run=false +export default { + duckdb: { + extensions: ["spatial"] + } +}; +``` + +You can then use the `ST_Area` function to compute the area of a polygon: + +```sql echo run=false +SELECT ST_Area('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::GEOMETRY) as area; +``` + +To tell which extensions have been loaded, you can run the following query: + +```sql echo +FROM duckdb_extensions() WHERE loaded; +``` + +
+ +If the `duckdb_extensions()` function runs before DuckDB autoloads a core extension (such as `json`), it might not be included in the returned set. -For example, pass an empty array to instantiate a DuckDBClient with no loaded extensions (even if your configuration lists several extensions): +
+ +### Self-hosting of extensions + +As with [npm imports](../imports#self-hosting-of-npm-imports), configured DuckDB extensions are self-hosted, improving performance, stability, & security, and allowing you to develop offline. Extensions are downloaded to the DuckDB cache folder, which lives in .observablehq/cache/_duckdb within the source root (typically `src`). You can clear the cache and restart the preview server to re-fetch the latest versions of any DuckDB extensions. If you use an [autoloading core extension](https://duckdb.org/docs/extensions/core_extensions.html#list-of-core-extensions) that is not configured, DuckDB-Wasm [will load it](https://duckdb.org/docs/api/wasm/extensions.html#fetching-duckdb-wasm-extensions) from the default extension repository, `extensions.duckdb.org`, at runtime. + +## Configuring + +The second argument to `DuckDBClient.of` and `DuckDBClient.sql` is a [`DuckDBConfig`](https://shell.duckdb.org/docs/interfaces/index.DuckDBConfig.html) object which configures the behavior of DuckDB-Wasm. By default, Framework sets the `castBigIntToDouble` and `castTimestampToDate` query options to true. To instead use [`BigInt`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/BigInt): + +```js run=false +const bigdb = DuckDBClient.of({}, {query: {castBigIntToDouble: false}}); +``` + +By default, `DuckDBClient.of` and `DuckDBClient.sql` automatically load all [configured extensions](#extensions). To change the loaded extensions for a particular `DuckDBClient`, use the **extensions** config option. For example, pass an empty array to instantiate a DuckDBClient with no loaded extensions (even if your configuration lists several): ```js echo run=false -const simpledb = DuckDBClient.of({}, {load: []}); +const simpledb = DuckDBClient.of({}, {extensions: []}); ``` -Or, create a geospatial tagged template literal: +Alternatively, you can configure extensions to be self-hosted but not load by default using the **duckdb** config option and the `load: false` shorthand: + +```js run=false +export default { + duckdb: { + extensions: { + spatial: false, + h3: false + } + } +}; +``` + +You can then selectively load extensions as needed like so: ```js echo run=false -const geosql = DuckDBClient.sql({}, {load: ["spatial", "h3"]}); +const geosql = DuckDBClient.sql({}, {extensions: ["spatial", "h3"]}); ``` + +In the future, we’d like to allow DuckDB to be configured globally (beyond just [extensions](#extensions)) via the [**duckdb** config option](../config#duckdb); please upvote [#1791](https://github.com/observablehq/framework/issues/1791) if you are interested in this feature. + +## Versioning + +Framework currently uses [DuckDB-Wasm 1.29.0](https://github.com/duckdb/duckdb-wasm/releases/tag/v1.29.0), which aligns with [DuckDB 1.1.1](https://github.com/duckdb/duckdb/releases/tag/v1.1.1). You can load a different version of DuckDB-Wasm by importing `npm:@duckdb/duckdb-wasm` directly, for example: + +```js run=false +import * as duckdb from "npm:@duckdb/duckdb-wasm@1.28.0"; +``` + +However, you will not be able to change the version of DuckDB-Wasm used by SQL code blocks or the `sql` or `DuckDBClient` built-ins, nor can you use Framework’s support for self-hosting extensions with a different version of DuckDB-Wasm. diff --git a/docs/sql.md b/docs/sql.md index fbdb82eb2..4748989ca 100644 --- a/docs/sql.md +++ b/docs/sql.md @@ -29,7 +29,7 @@ sql:
For performance and reliability, we recommend using local files rather than loading data from external servers at runtime. You can use a data loader to take a snapshot of a remote data during build if needed.
-You can also register tables via code (say to have sources that are defined dynamically via user input) by defining the `sql` symbol with [DuckDBClient.sql](./lib/duckdb). +You can also register tables via code (say to have sources that are defined dynamically via user input) by defining the `sql` symbol with [DuckDBClient.sql](./lib/duckdb). To register [DuckDB extensions](./lib/duckdb#extensions), use the [**duckdb** config option](./config#duckdb). ## SQL code blocks @@ -206,47 +206,3 @@ Inputs.table(await sql([`SELECT * FROM gaia WHERE source_id IN (${[source_ids]}) When interpolating values into SQL queries, be careful to avoid [SQL injection](https://en.wikipedia.org/wiki/SQL_injection) by properly escaping or sanitizing user input. The example above is safe only because `source_ids` are known to be numeric. - -## Extensions - -DuckDB has a flexible extension mechanism that allows for dynamically loading extensions. These may extend DuckDB's functionality by providing support for additional file formats, introducing new types, and domain-specific functionality. - -Framework can download and host the extensions of your choice. By default, only "json" and "parquet" are self-hosted, but you can add more by specifying them in the [configuration](./config). The self-hosted extensions are served from the `/_duckdb/` directory with a content-hashed URL, ensuring optimal performance and allowing you to work offline and from a server you control. - -The self-hosted extensions are immediately available in all the `sql` code blocks and [DuckDBClient](./lib/duckdb) instances. For example, the query below works instantly since the "json" extension is configured: - -```sql echo -SELECT bbox FROM read_json('https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_day.geojson'); -``` - -Likewise, with the “spatial” extension configured, you could directly run: - -```sql echo run=false -SELECT ST_Area('POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))'::GEOMETRY) as area; -``` - -If you use an extension that is not self-hosted, DuckDB falls back to loading it directly from DuckDB’s servers. For example, this documentation does not have the “inet” extension configured for self-hosting. - -```sql echo -SELECT '127.0.0.1'::INET AS ipv4, '2001:db8:3c4d::/48'::INET AS ipv6; -``` - -During development, you can experiment freely with extensions that are not self-hosted. For example to try out the “h3” `community` extension: - -```sql echo run=false -INSTALL h3 FROM community; -LOAD h3; -SELECT format('{:x}', h3_latlng_to_cell(37.77, -122.43, 9)) AS cell_id; -``` - -(this returns the H3 cell [`892830828a3ffff`](https://h3geo.org/#hex=892830828a3ffff)) - -For performance and ergonomy, we strongly recommend adding all the extensions you actually use to the [configuration](./config#duckdb). - -
- -To tell which extensions are effectively in use on a page, inspect the network tab in your browser, or run the following query: `FROM duckdb_extensions() WHERE loaded;`. - -
- -These features are tied to DuckDB wasm’s 1.29 version, and strongly dependent on its development cycle. diff --git a/package.json b/package.json index 2ef44b4a3..16584430e 100644 --- a/package.json +++ b/package.json @@ -26,8 +26,8 @@ "test": "concurrently npm:test:mocha npm:test:tsc npm:test:lint npm:test:prettier", "test:coverage": "c8 --check-coverage --lines 80 --per-file yarn test:mocha", "test:build": "rimraf test/build && cross-env npm_package_version=1.0.0-test node build.js --sourcemap --outdir=test/build \"{src,test}/**/*.{ts,js,css}\" --ignore \"test/input/**\" --ignore \"test/output/**\" --ignore \"test/preview/dashboard/**\" --ignore \"**/*.d.ts\" && cp -r templates test/build", - "test:mocha": "yarn test:build && rimraf --glob test/.observablehq/cache test/input/build/*/.observablehq/cache && cross-env OBSERVABLE_TELEMETRY_DISABLE=1 TZ=America/Los_Angeles mocha --timeout 30000 -p \"test/build/test/**/*-test.js\" && yarn test:annotate", - "test:mocha:serial": "yarn test:build && rimraf --glob test/.observablehq/cache test/input/build/*/.observablehq/cache && cross-env OBSERVABLE_TELEMETRY_DISABLE=1 TZ=America/Los_Angeles mocha --timeout 30000 \"test/build/test/**/*-test.js\" && yarn test:annotate", + "test:mocha": "yarn test:build && rimraf --glob test/.observablehq/cache test/input/build/*/.observablehq/cache && cross-env OBSERVABLE_TELEMETRY_DISABLE=1 TZ=America/Los_Angeles mocha --timeout 30000 -p \"test/build/test/**/*-test.js\"", + "test:mocha:serial": "yarn test:build && rimraf --glob test/.observablehq/cache test/input/build/*/.observablehq/cache && cross-env OBSERVABLE_TELEMETRY_DISABLE=1 TZ=America/Los_Angeles mocha --timeout 30000 \"test/build/test/**/*-test.js\"", "test:annotate": "yarn test:build && cross-env OBSERVABLE_ANNOTATE_FILES=true TZ=America/Los_Angeles mocha --timeout 30000 \"test/build/test/**/annotate.js\"", "test:lint": "eslint src test --max-warnings=0", "test:prettier": "prettier --check src test", diff --git a/src/client/stdlib/duckdb.js b/src/client/stdlib/duckdb.js index 508fccd63..52bdf9488 100644 --- a/src/client/stdlib/duckdb.js +++ b/src/client/stdlib/duckdb.js @@ -32,7 +32,6 @@ import * as duckdb from "npm:@duckdb/duckdb-wasm"; // Baked-in manifest. // eslint-disable-next-line no-undef const manifest = DUCKDB_MANIFEST; - const candidates = { ...(manifest.bundles.includes("mvp") && { mvp: { @@ -49,7 +48,6 @@ const candidates = { }; const bundle = await duckdb.selectBundle(candidates); const activePlatform = manifest.bundles.find((key) => bundle.mainModule === candidates[key].mainModule); - const logger = new duckdb.ConsoleLogger(duckdb.LogLevel.WARNING); let db; @@ -179,7 +177,7 @@ export class DuckDBClient { config = {...config, query: {...config.query, castBigIntToDouble: true}}; } await db.open(config); - await registerExtensions(db, config); + await registerExtensions(db, config.extensions); await Promise.all(Object.entries(sources).map(([name, source]) => insertSource(db, name, source))); return new DuckDBClient(db); } @@ -191,14 +189,14 @@ export class DuckDBClient { Object.defineProperty(DuckDBClient.prototype, "dialect", {value: "duckdb"}); -async function registerExtensions(db, {load}) { +async function registerExtensions(db, extensions = []) { const connection = await db.connect(); try { await Promise.all( - manifest.extensions.map(([name, {[activePlatform]: ref, load: l}]) => + manifest.extensions.map(([name, {[activePlatform]: ref, load}]) => connection .query(`INSTALL "${name}" FROM '${ref.startsWith("https://") ? ref : import.meta.resolve(`../..${ref}`)}'`) - .then(() => (load ? load.includes(name) : l) && connection.query(`LOAD "${name}"`)) + .then(() => load && extensions.includes(name) && connection.query(`LOAD "${name}"`)) ) ); } finally { diff --git a/src/config.ts b/src/config.ts index 376787da7..2357799ad 100644 --- a/src/config.ts +++ b/src/config.ts @@ -8,7 +8,7 @@ import {pathToFileURL} from "node:url"; import he from "he"; import type MarkdownIt from "markdown-it"; import wrapAnsi from "wrap-ansi"; -import {DUCKDBBUNDLES} from "./duckdb.js"; +import {DUCKDB_BUNDLES, DUCKDB_CORE_EXTENSIONS} from "./duckdb.js"; import {visitFiles} from "./files.js"; import {formatIsoDate, formatLocaleDate} from "./format.js"; import type {FrontMatter} from "./frontMatter.js"; @@ -79,7 +79,19 @@ export interface SearchConfigSpec { export interface DuckDBConfig { bundles: string[]; - extensions: {[name: string]: {install?: false; load: boolean; source: string}}; + extensions: {[name: string]: DuckDBExtensionConfig}; +} + +export interface DuckDBExtensionConfig { + source: string; + install: boolean; + load: boolean; +} + +interface DuckDBExtensionConfigSpec { + source: unknown; + install: unknown; + load: unknown; } export interface Config { @@ -510,34 +522,40 @@ export function stringOrNull(spec: unknown): string | null { return spec == null || spec === false ? null : String(spec); } -function duckDBExtensionSource(source?: string): string { - return source === undefined || source === "core" - ? "https://extensions.duckdb.org" - : source === "community" - ? "https://community-extensions.duckdb.org" - : (source = String(source)).startsWith("https://") - ? source - : (() => { - throw new Error(`unsupported DuckDB extension source ${source}`); - })(); -} - +// TODO convert array of names +// TODO configure bundles? function normalizeDuckDB(spec: unknown): DuckDBConfig { - const extensions: {[name: string]: any} = {}; - for (const [name, config] of Object.entries(spec?.["extensions"] ?? {json: {load: false}, parquet: {load: false}})) { - if (!/^\w+$/.test(name)) throw new Error(`illegal extension name ${name}`); - if (config != null) { - extensions[name] = - config === true - ? {load: true, install: true, source: duckDBExtensionSource()} - : config === false - ? {load: false, install: false, source: duckDBExtensionSource()} - : { - source: duckDBExtensionSource(config["source"]), - install: Boolean(config["install"] ?? true), - load: Boolean(config["load"] ?? true) - }; - } + const extensions: {[name: string]: DuckDBExtensionConfig} = {}; + let extspec: Record = spec?.["extensions"] ?? {}; + if (Array.isArray(extspec)) extspec = Object.fromEntries(extspec.map((name) => [name, {}])); + if (extspec.json === undefined) extspec = {...extspec, json: false}; + if (extspec.parquet === undefined) extspec = {...extspec, parquet: false}; + for (const name in extspec) { + if (!/^\w+$/.test(name)) throw new Error(`invalid extension: ${name}`); + const vspec = extspec[name]; + if (vspec == null) continue; + const { + source = DUCKDB_CORE_EXTENSIONS.some(([n]) => n === name) ? "core" : "community", + install = true, + load = !DUCKDB_CORE_EXTENSIONS.find(([n]) => n === name)?.[1] + } = typeof vspec === "boolean" + ? {load: vspec} + : typeof vspec === "string" + ? {source: vspec} + : (vspec as DuckDBExtensionConfigSpec); + extensions[name] = { + source: normalizeDuckDBSource(String(source)), + install: Boolean(install), + load: Boolean(load) + }; } - return {bundles: DUCKDBBUNDLES, extensions}; + return {bundles: DUCKDB_BUNDLES, extensions}; +} + +function normalizeDuckDBSource(source: string): string { + if (source === "core") return "https://extensions.duckdb.org/"; + if (source === "community") return "https://community-extensions.duckdb.org/"; + const url = new URL(source); + if (url.protocol !== "https:") throw new Error(`invalid source: ${source}`); + return String(url); } diff --git a/src/duckdb.ts b/src/duckdb.ts index 3fa4c4e94..e481ca9f0 100644 --- a/src/duckdb.ts +++ b/src/duckdb.ts @@ -6,21 +6,35 @@ import {faint} from "./tty.js"; const downloadRequests = new Map>(); -export const DUCKDBWASMVERSION = "1.29.0"; -export const DUCKDBVERSION = "1.1.1"; -export const DUCKDBBUNDLES = ["eh", "mvp"]; +export const DUCKDB_WASM_VERSION = "1.29.0"; +export const DUCKDB_VERSION = "1.1.1"; +export const DUCKDB_BUNDLES = ["eh", "mvp"]; -async function getDuckDBExtension( - root: string, - platform: string, - source: string, - name: string, - aliases?: Map -) { - let ext = await resolveDuckDBExtension(root, platform, source, name); - if (aliases?.has(ext)) ext = aliases.get(ext)!; - return dirname(dirname(dirname(ext))); -} +// https://duckdb.org/docs/extensions/core_extensions.html +export const DUCKDB_CORE_EXTENSIONS: [name: string, autoload: boolean][] = [ + ["arrow", false], + ["autocomplete", true], + ["aws", true], + ["azure", true], + ["delta", true], + ["excel", true], + ["fts", true], + ["httpfs", true], + ["iceberg", false], + ["icu", true], + ["inet", true], + ["jemalloc", false], + ["json", true], + ["mysql", false], + ["parquet", true], + ["postgres", true], + ["spatial", false], + ["sqlite", true], + ["substrait", false], + ["tpcds", true], + ["tpch", true], + ["vss", false] +]; export async function getDuckDBManifest( duckdb: DuckDBConfig, @@ -30,28 +44,38 @@ export async function getDuckDBManifest( bundles: duckdb.bundles, extensions: await Promise.all( Array.from(Object.entries(duckdb.extensions), ([name, {install, load, source}]) => - (async () => { - return [ - name, - { - install, - load, - ...Object.fromEntries( - await Promise.all( - duckdb.bundles.map(async (platform) => [ - platform, - install ? await getDuckDBExtension(root, platform, source, name, aliases) : source - ]) - ) + (async () => [ + name, + { + install, + load, + ...Object.fromEntries( + await Promise.all( + duckdb.bundles.map(async (platform) => [ + platform, + install ? await getDuckDBExtension(root, platform, source, name, aliases) : source + ]) ) - } - ]; - })() + ) + } + ])() ) ) }; } +async function getDuckDBExtension( + root: string, + platform: string, + source: string, + name: string, + aliases?: Map +) { + let ext = await resolveDuckDBExtension(root, platform, source, name); + if (aliases?.has(ext)) ext = aliases.get(ext)!; + return dirname(dirname(dirname(ext))); +} + /** * Given a duckdb configuration and an extension name such as "parquet", saves * the binary to _duckdb/{hash}/v1.1.1/wasm_{p}/parquet.duckdb_extension.wasm @@ -66,20 +90,19 @@ export async function resolveDuckDBExtension( repo: string, name: string ): Promise { - if (!repo.startsWith("https://")) throw new Error(`invalid repo: ${repo}`); const cache = join(root, ".observablehq", "cache"); const file = `${name}.duckdb_extension.wasm`; - const ref = `${repo}/v${DUCKDBVERSION}/wasm_${platform}/${file}`.slice("https://".length); - const path = join("_duckdb", ref); + const url = new URL(`v${DUCKDB_VERSION}/wasm_${platform}/${file}`, repo); + if (url.protocol !== "https:") throw new Error(`invalid repo: ${repo}`); + const path = join("_duckdb", String(url).slice("https://".length)); const cachePath = join(cache, path); if (existsSync(cachePath)) return `/${path}`; let promise = downloadRequests.get(cachePath); if (promise) return promise; // coalesce concurrent requests promise = (async () => { - const href = `https://${ref}`; - console.log(`duckdb:${href} ${faint("→")} ${cachePath}`); - const response = await fetch(href); - if (!response.ok) throw new Error(`unable to fetch: ${href}`); + console.log(`duckdb:${url} ${faint("→")} ${cachePath}`); + const response = await fetch(url); + if (!response.ok) throw new Error(`unable to fetch: ${url}`); await mkdir(dirname(cachePath), {recursive: true}); await writeFile(cachePath, Buffer.from(await response.arrayBuffer())); return `/${path}`; diff --git a/src/npm.ts b/src/npm.ts index e614b57b2..984080df6 100644 --- a/src/npm.ts +++ b/src/npm.ts @@ -4,7 +4,7 @@ import {dirname, extname, join} from "node:path/posix"; import type {CallExpression} from "acorn"; import {simple} from "acorn-walk"; import {maxSatisfying, rsort, satisfies, validRange} from "semver"; -import {DUCKDBWASMVERSION} from "./duckdb.js"; +import {DUCKDB_WASM_VERSION} from "./duckdb.js"; import {isEnoent} from "./error.js"; import annotate from "./javascript/annotate.js"; import type {ExportNode, ImportNode, ImportReference} from "./javascript/imports.js"; @@ -164,7 +164,7 @@ export async function getDependencyResolver( (name === "arquero" || name === "@uwdata/mosaic-core" || name === "@duckdb/duckdb-wasm") && depName === "apache-arrow" // prettier-ignore ? "latest" // force Arquero, Mosaic & DuckDB-Wasm to use the (same) latest version of Arrow : name === "@uwdata/mosaic-core" && depName === "@duckdb/duckdb-wasm" - ? DUCKDBWASMVERSION // force Mosaic to use the latest (stable) version of DuckDB-Wasm + ? DUCKDB_WASM_VERSION // force Mosaic to use the latest (stable) version of DuckDB-Wasm : pkg.dependencies?.[depName] ?? pkg.devDependencies?.[depName] ?? pkg.peerDependencies?.[depName] ?? @@ -250,7 +250,7 @@ async function resolveNpmVersion(root: string, {name, range}: NpmSpecifier): Pro export async function resolveNpmImport(root: string, specifier: string): Promise { const { name, - range = name === "@duckdb/duckdb-wasm" ? DUCKDBWASMVERSION : undefined, + range = name === "@duckdb/duckdb-wasm" ? DUCKDB_WASM_VERSION : undefined, path = name === "mermaid" ? "dist/mermaid.esm.min.mjs/+esm" : name === "echarts" diff --git a/test/config-test.ts b/test/config-test.ts index 9138d678b..7be173cdc 100644 --- a/test/config-test.ts +++ b/test/config-test.ts @@ -8,14 +8,14 @@ const DUCKDB_DEFAULTS = { bundles: ["eh", "mvp"], extensions: { json: { + source: "https://extensions.duckdb.org/", install: true, - load: false, - source: "https://extensions.duckdb.org" + load: false }, parquet: { + source: "https://extensions.duckdb.org/", install: true, - load: false, - source: "https://extensions.duckdb.org" + load: false } } }; @@ -470,13 +470,54 @@ describe("normalizeConfig(duckdb)", () => { const {duckdb} = config({}, root); assert.deepEqual(duckdb, DUCKDB_DEFAULTS); }); - it("supports install:false and load:false", () => { + it("supports install: false and load: false", () => { const {duckdb} = config({duckdb: {extensions: {json: {install: false, load: false}}}}, root); assert.deepEqual(duckdb.extensions, { + ...DUCKDB_DEFAULTS.extensions, json: { + source: "https://extensions.duckdb.org/", install: false, - load: false, - source: "https://extensions.duckdb.org" + load: false + } + }); + }); + it("supports null", () => { + const {duckdb} = config({duckdb: {extensions: {json: null}}}, root); + assert.deepEqual( + duckdb.extensions, + Object.fromEntries(Object.entries(DUCKDB_DEFAULTS.extensions).filter(([name]) => name !== "json")) + ); + }); + it("defaults load: false for known auto-loading extensions", () => { + const {duckdb} = config({duckdb: {extensions: {aws: {}}}}, root); + assert.deepEqual(duckdb.extensions, { + ...DUCKDB_DEFAULTS.extensions, + aws: { + source: "https://extensions.duckdb.org/", + install: true, + load: false + } + }); + }); + it("defaults source: core for known core extensions", () => { + const {duckdb} = config({duckdb: {extensions: {mysql: {}}}}, root); + assert.deepEqual(duckdb.extensions, { + ...DUCKDB_DEFAULTS.extensions, + mysql: { + source: "https://extensions.duckdb.org/", + install: true, + load: true + } + }); + }); + it("defaults source: community for unknown extensions", () => { + const {duckdb} = config({duckdb: {extensions: {h3: {}}}}, root); + assert.deepEqual(duckdb.extensions, { + ...DUCKDB_DEFAULTS.extensions, + h3: { + source: "https://community-extensions.duckdb.org/", + install: true, + load: true } }); }); @@ -484,45 +525,108 @@ describe("normalizeConfig(duckdb)", () => { const {duckdb} = config( { duckdb: { - extensions: {foo: {source: "core"}, bar: {source: "community"}, baz: {source: "https://custom-domain"}} + extensions: { + foo: {source: "core"}, + bar: {source: "community"}, + baz: {source: "https://custom-domain"} + } } }, root ); assert.deepEqual(duckdb.extensions, { + ...DUCKDB_DEFAULTS.extensions, foo: { + source: "https://extensions.duckdb.org/", install: true, - load: true, - source: "https://extensions.duckdb.org" + load: true }, bar: { + source: "https://community-extensions.duckdb.org/", install: true, - load: true, - source: "https://community-extensions.duckdb.org" + load: true }, baz: { + source: "https://custom-domain/", // URL normalization install: true, - load: true, - source: "https://custom-domain" + load: true } }); }); - it("supports shorthand", () => { - const {duckdb} = config({duckdb: {extensions: {foo: true, bar: false}}}, root); + it("supports source: string shorthand", () => { + const {duckdb} = config( + { + duckdb: { + extensions: { + foo: "core", + bar: "community", + baz: "https://custom-domain" + } + } + }, + root + ); assert.deepEqual(duckdb.extensions, { + ...DUCKDB_DEFAULTS.extensions, foo: { + source: "https://extensions.duckdb.org/", install: true, - load: true, - source: "https://extensions.duckdb.org" + load: true }, bar: { - install: false, - load: false, - source: "https://extensions.duckdb.org" + source: "https://community-extensions.duckdb.org/", + install: true, + load: true + }, + baz: { + source: "https://custom-domain/", // URL normalization + install: true, + load: true + } + }); + }); + it("supports load: boolean shorthand", () => { + const {duckdb} = config({duckdb: {extensions: {json: true, foo: true, bar: false}}}, root); + assert.deepEqual(duckdb.extensions, { + ...DUCKDB_DEFAULTS.extensions, + json: { + source: "https://extensions.duckdb.org/", + install: true, + load: true + }, + foo: { + source: "https://community-extensions.duckdb.org/", + install: true, + load: true + }, + bar: { + source: "https://community-extensions.duckdb.org/", + install: true, + load: false + } + }); + }); + it("supports sources shorthand", () => { + const {duckdb} = config({duckdb: {extensions: ["spatial", "h3"]}}, root); + assert.deepEqual(duckdb.extensions, { + ...DUCKDB_DEFAULTS.extensions, + spatial: { + source: "https://extensions.duckdb.org/", + install: true, + load: true + }, + h3: { + source: "https://community-extensions.duckdb.org/", + install: true, + load: true } }); }); - it("rejects illegal names", () => { - assert.throws(() => config({duckdb: {extensions: {"*^/": true}}}, root)); + it("rejects invalid names", () => { + assert.throws(() => config({duckdb: {extensions: {"*^/": true}}}, root), /invalid extension/i); + }); + it("rejects invalid sources", () => { + assert.throws(() => config({duckdb: {extensions: {foo: "file:///path/to/extension"}}}, root), /invalid source/i); + assert.throws(() => config({duckdb: {extensions: {foo: "notasource"}}}, root), /invalid url/i); }); }); diff --git a/test/resolvers-test.ts b/test/resolvers-test.ts index ca886e503..090d34d21 100644 --- a/test/resolvers-test.ts +++ b/test/resolvers-test.ts @@ -88,8 +88,8 @@ describe("getResolvers(page, {root, path})", () => { }); }); -describe("resolveLink(href) with {cleanUrls: false}", () => { - const options = getOptions({root: "test/input", path: "sub/index.html", cleanUrls: false}); +describe("resolveLink(href) with {preserveExtension: true}", () => { + const options = getOptions({root: "test/input", path: "sub/index.html", preserveExtension: true}); const page = parseMarkdown("", options); async function getResolveLink() { const resolvers = await getResolvers(page, options); @@ -163,8 +163,8 @@ describe("resolveLink(href) with {cleanUrls: false}", () => { }); }); -describe("resolveLink(href) with {cleanUrls: true}", () => { - const options = getOptions({root: "test/input", path: "sub/index.html", cleanUrls: true}); +describe("resolveLink(href) with {preserveExtension: false}", () => { + const options = getOptions({root: "test/input", path: "sub/index.html", preserveExtension: false}); const page = parseMarkdown("", options); async function getResolveLink() { const resolvers = await getResolvers(page, options);