[Browser Rendering] Improve tutorial

cloudflare · Jan 14, 2025 · 99b5bb4 · 99b5bb4
1 parent dff72d6
commit 99b5bb4
Showing 1 changed file with 46 additions and 26 deletions.
diff --git a/src/content/docs/browser-rendering/how-to/ai.mdx b/src/content/docs/browser-rendering/how-to/ai.mdx
@@ -30,10 +30,15 @@ npm i zod
 npm i zod-to-json-schema
 ```
 
-3. Add your Browser Rendering binding to your new `wrangler.toml` configuration:
+3. Activate the nodejs compatibility flag and add your Browser Rendering binding to your new `wrangler.toml` configuration:
 
 ```toml
-browser = { binding = "BROWSER" }
+compatibility_flags = [ "nodejs_compat" ]
+```
+
+```toml
+[browser]
+binding = "MY_BROWSER"
 ```
 
 4.  In order to use [Workers AI](/workers-ai/), you need to get your [Account ID and API token](/workers-ai/get-started/rest-api/#1-get-api-token-and-account-id).
@@ -54,7 +59,6 @@ Then, with the user prompt, the desired output schema and the rendered text, pre
 Replace the contents of `src/index.ts` with the following skeleton script:
 
 ```ts
-// src/index.ts
 import { z } from "zod";
 import puppeteer from "@cloudflare/puppeteer";
 import zodToJsonSchema from "zod-to-json-schema";
@@ -67,16 +71,17 @@ export default {
     }
 
     // Your prompt and site to scrape
-    const userPrompt = "Extract the first post";
-    const targetUrl = "https://news.ycombinator.com/";
+    const userPrompt = "Extract the first post only.";
+    const targetUrl = "https://labs.apnic.net/";
 
     // Launch browser
-    const browser = await puppeteer.launch(env.BROWSER);
+    const browser = await puppeteer.launch(env.MY_BROWSER);
     const page = await browser.newPage();
     await page.goto(targetUrl);
 
     // Get website text
     const renderedText = await page.evaluate(() => {
+      // @ts-ignore js code to run in the browser context
       const body = document.querySelector("body");
       return body ? body.innerText : "";
     });
@@ -85,22 +90,29 @@ export default {
 
     // define your desired json schema
     const outputSchema = zodToJsonSchema(
-      z.object({ title: z.string(), url: z.string(), totalComments: z.number() })
+      z.object({ title: z.string(), url: z.string(), date: z.string() })
     );
 
     // Example prompt
     const prompt = `
     You are a sophisticated web scraper. You are given the user data extraction goal and the JSON schema for the output data format.
     Your task is to extract the requested information from the text and output it in the specified JSON schema format:
+
         ${JSON.stringify(outputSchema)}
+
+    DO NOT include anything else besides the JSON output, no markdown, no plaintext, just JSON.
+
     User Data Extraction Goal: ${userPrompt}
+
     Text extracted from the webpage: ${renderedText}`;
 
     // TODO call llm
-    //const result = await this.getLLMResult(env, prompt, outputSchema);
+    //const result = await getLLMResult(env, prompt, outputSchema);
     //return Response.json(result);
   }
-};
+
+} satisfies ExportedHandler<Env>;
+
 ```
 
 ## Call an LLM
@@ -164,16 +176,17 @@ export default {
     }
 
     // Your prompt and site to scrape
-    const userPrompt = "Extract the first post";
-    const targetUrl = "https://news.ycombinator.com/";
+    const userPrompt = "Extract the first post only.";
+    const targetUrl = "https://labs.apnic.net/";
 
     // Launch browser
-    const browser = await puppeteer.launch(env.BROWSER);
+    const browser = await puppeteer.launch(env.MY_BROWSER);
     const page = await browser.newPage();
     await page.goto(targetUrl);
 
     // Get website text
     const renderedText = await page.evaluate(() => {
+      // @ts-ignore js code to run in the browser context
       const body = document.querySelector("body");
       return body ? body.innerText : "";
     });
@@ -182,23 +195,31 @@ export default {
 
     // define your desired json schema
     const outputSchema = zodToJsonSchema(
-      z.object({ title: z.string(), url: z.string(), totalComments: z.number() })
+      z.object({ title: z.string(), url: z.string(), date: z.string() })
     );
 
     // Example prompt
     const prompt = `
     You are a sophisticated web scraper. You are given the user data extraction goal and the JSON schema for the output data format.
     Your task is to extract the requested information from the text and output it in the specified JSON schema format:
+
         ${JSON.stringify(outputSchema)}
+
+    DO NOT include anything else besides the JSON output, no markdown, no plaintext, just JSON.
+
     User Data Extraction Goal: ${userPrompt}
+
     Text extracted from the webpage: ${renderedText}`;
 
     // call llm
-    const result = await this.getLLMResult(env, prompt, outputSchema);
+    const result = await getLLMResult(env, prompt, outputSchema);
     return Response.json(result);
-  },
+  }
+
+} satisfies ExportedHandler<Env>;
 
-  async getLLMResult(env, prompt: string, schema?: any) {
+
+async function getLLMResult(env, prompt: string, schema?: any) {
     const model = "@hf/thebloke/deepseek-coder-6.7b-instruct-awq"
     const requestBody = {
         messages: [{
@@ -213,7 +234,7 @@ export default {
       method: "POST",
       headers: {
         "Content-Type": "application/json",
-        Authorization: `Bearer ${env.LLM_API_KEY}`,
+        Authorization: `Bearer ${env.API_TOKEN}`,
       },
       body: JSON.stringify(requestBody),
     });
@@ -223,18 +244,15 @@ export default {
     }
 
     // process response
-    const data = await response.json();
+    const data = await response.json() as { result: { response: string }};
     const text = data.result.response || '';
     const value = (text.match(/```(?:json)?\s*([\s\S]*?)\s*```/) || [null, text])[1];
     try {
       return JSON.parse(value);
     } catch(e) {
       console.error(`${e} . Response: ${value}`)
     }
-  },
-};
-
-
+}
 ```
 
 You can run this script to test it using Wrangler's `--remote` flag:
@@ -247,8 +265,10 @@ With your script now running, you can go to `http://localhost:8787/` and should
 
 ```json
 {
-    "title": "Debugging: Indispensable rules for finding even the most elusive problems",
-    "url": "dwheeler.com",
-    "totalComments": 143
+  "title": "IP Addresses in 2024",
+  "url": "http://example.com/ip-addresses-in-2024",
+  "date": "11 Jan 2025"
 }
-```
+```
+
+For more complex websites or prompts, you might need a better model. Check out the latest models in [Workers AI](/workers-ai/models/).