From 5f7c7af016c45276a9c2a2ddbef967b82a5a90bf Mon Sep 17 00:00:00 2001 From: Nathan Nowack Date: Mon, 15 Jan 2024 18:44:21 -0600 Subject: [PATCH 1/2] add more cookbook examples --- docs/ai/text/generation.md | 11 +- docs/examples/being_specific_about_types.md | 119 ++++++++++++++++++++ docs/examples/python_augmented_prompts.md | 90 +++++++++++++++ docs/welcome/quickstart.md | 4 +- mkdocs.yml | 2 + 5 files changed, 220 insertions(+), 6 deletions(-) create mode 100644 docs/examples/being_specific_about_types.md create mode 100644 docs/examples/python_augmented_prompts.md diff --git a/docs/ai/text/generation.md b/docs/ai/text/generation.md index ac48af5d6..866443942 100644 --- a/docs/ai/text/generation.md +++ b/docs/ai/text/generation.md @@ -35,11 +35,11 @@ Marvin can generate synthetic data according to a schema and instructions. Gener !!! success "Result" ```python - assert names == ['John', 'Emma', 'Michael', 'Sophia'] + print(names) # ['John', 'Emma', 'Michael', 'Sophia'] - assert french_names == ['Jean', 'Claire', 'Lucas', 'Emma'] + print(french_names) # ['Jean', 'Claire', 'Lucas', 'Emma'] - assert star_wars_names == ['Luke', 'Leia', 'Han', 'Anakin'] + print(star_wars_names) # ['Luke', 'Leia', 'Han', 'Anakin'] ``` === "Locations" @@ -63,12 +63,15 @@ Marvin can generate synthetic data according to a schema and instructions. Gener !!! success "Result" ```python - assert locations == [ + print(locations) + """ + [ Location(city='Washington', state='District of Columbia'), Location(city='Jackson', state='Mississippi'), Location(city='Cleveland', state='Ohio'), Location(city='Lincoln', state='Nebraska'), ] + """ ``` diff --git a/docs/examples/being_specific_about_types.md b/docs/examples/being_specific_about_types.md new file mode 100644 index 000000000..4984a264e --- /dev/null +++ b/docs/examples/being_specific_about_types.md @@ -0,0 +1,119 @@ +# Fully leveraging `pydantic` + +## `Annotated` and `Field` + +!!! example "Numbers in a valid range" + + Pydantic's `Field` lets us be very specific about what we want from the LLM. + + ```python + from typing import Annotated + import marvin + from pydantic import Field + from typing_extensions import TypedDict + + ActivationField = Field( + description=( + "A score between -1 (not descriptive) and 1" + " (very descriptive) for the given emotion" + ), + ge=-1, + le=1 + ) + + SentimentActivation = Annotated[float, ActivationField] + + class DetailedSentiment(TypedDict): + happy: SentimentActivation + sad: SentimentActivation + angry: SentimentActivation + surprised: SentimentActivation + amused: SentimentActivation + scared: SentimentActivation + + @marvin.fn + def sentiment_analysis(text: str) -> DetailedSentiment: + """Analyze the sentiment of a given text""" + + sentiment_analysis( + "dude i cannot believe how hard that" + " kangaroo just punched that guy 🤣" + " - he really had it coming, but glad he's ok" + ) + ``` + + !!! success "Result" + ```python + { + 'happy': 0.8, + 'sad': -0.1, + 'angry': -0.2, + 'surprised': 0.7, + 'amused': 1.0, + 'scared': -0.1 + } + ``` + +## Complex types + +!!! example "Using `BaseModel` and `Field`" + + To parse and validate complex nested types, use `BaseModel` and `Field`: + + + ```python + import marvin + from pydantic import BaseModel, Field + + class Location(BaseModel): + city: str + state: str | None = Field(description="Two-letter state code") + country: str + latitute: float | None = Field( + description="Latitude in degrees", + ge=-90, + le=90 + ) + longitude: float | None = Field( + description="Longitude in degrees", + ge=-180, + le=180 + ) + + class Traveler(BaseModel): + name: str + age: int | None = Field(description="Age in years") + + class Trip(BaseModel): + travelers: list[Traveler] + origin: Location + destination: Location + + trip = marvin.model(Trip)( + "Marvin and Ford are heading from Chi to SF for their 30th birthdays" + ) + ``` + + !!! success "Result" + ```python + Trip( + travelers=[ + Traveler(name='Marvin', age=30), + Traveler(name='Ford', age=30) + ], + origin=Location( + city='Chicago', + state='IL', + country='USA', + latitute=41.8781, + longitude=-87.6298 + ), + destination=Location( + city='San Francisco', + state='CA', + country='USA', + latitute=37.7749, + longitude=-122.4194 + ) + ) + ``` \ No newline at end of file diff --git a/docs/examples/python_augmented_prompts.md b/docs/examples/python_augmented_prompts.md new file mode 100644 index 000000000..dd2c05c12 --- /dev/null +++ b/docs/examples/python_augmented_prompts.md @@ -0,0 +1,90 @@ +# Augmenting prompts with Python + +## Web scraping + +!!! example "Fetch rich prompt material with Python" + + Using an http client to fetch HTML that an LLM will filter for a `list[RelatedArticle]`: + + ```python + import bs4 + import httpx + import marvin + from typing_extensions import TypedDict + + class RelatedArticle(TypedDict): + title: str + link: str + + + @marvin.fn + def retrieve_HN_articles(topic: str | None = None) -> list[RelatedArticle]: + """Retrieve only articles from HN that are related to a given topic""" + response = httpx.get("https://news.ycombinator.com/") + soup = bs4.BeautifulSoup(response.text, 'html.parser') + return [ + (link.text, link['href']) for link in soup.select('.titleline a') + ] + + retrieve_HN_articles("rust") + ``` + + + !!! success "Result" + ```python + [ + { + 'title': 'A lowering strategy for control effects in Rust', + 'link': 'https://www.abubalay.com/blog/2024/01/14/rust-effect-lowering' + }, + { + 'title': 'Show HN: A minimal working Rust / SDL2 / WASM browser game', + 'link': 'https://github.com/awwsmm/hello-rust-sdl2-wasm' + } + ] + ``` + + !!! Tip "Note" + You could also use `marvin.extract` to extract the `list[RelatedArticle]` from the output of the un-decorated function `retrieve_HN_articles`: + + ```python + related_articles = marvin.extract(retrieve_HN_articles(), RelatedArticle) + ``` + + +## Vectorstore-based RAG + +!!! example "Stuff `top k` document excerpts into a prompt" + + Using an http client to fetch HTML that an LLM will filter for a `list[RelatedArticle]`: + + ```python + from typing_extensions import TypedDict + import marvin + from marvin.tools.chroma import query_chroma # you must have a vectorstore with embedded documents + from marvin.utilities.asyncio import run_sync + + class Answer(TypedDict): + answer: str + supporting_links: list[str] | None + + @marvin.fn + def answer_question( + question: str, + top_k: int = 2, + style: str = "concise" + ) -> Answer: + """Answer a question given supporting context in the requested style""" + return run_sync(query_chroma(question, n_results=top_k)) + + answer_question("What are prefect blocks?", style="pirate") + ``` + + + !!! success "Result" + ```python + { + 'answer': "Ahoy! Prefect blocks be a primitive within Prefect fer storin' configuration and interfacin' with th' external systems. Ye can use 'em to manage credentials and interact with services like AWS, GitHub, and Slack. Arr, they be comin' with methods for uploadin' or downloadin' data, among other actions, and ye can register new ones with Prefect Cloud or server.", + 'supporting_links': ['https://docs.prefect.io/latest/concepts/blocks/'] + } + ``` \ No newline at end of file diff --git a/docs/welcome/quickstart.md b/docs/welcome/quickstart.md index df2db3a1a..f3c0b4ae7 100644 --- a/docs/welcome/quickstart.md +++ b/docs/welcome/quickstart.md @@ -375,8 +375,8 @@ Functions look like regular functions, but have no source code. Instead, an AI u @marvin.fn def sentiment_list(texts: list[str]) -> list[float]: """ - Given a list of `texts`, returns a list of numbers between 1 (positive) and - -1 (negative) indicating their respective sentiment scores. + Given a list of `texts`, returns a list of numbers between 1 + (positive) and -1 (negative) indicating the respective sentiment. """ diff --git a/mkdocs.yml b/mkdocs.yml index 95af21d95..59d301004 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -69,6 +69,8 @@ nav: - Entity deduplication: examples/deduplication.md # - GitHub Activity Digest: examples/github_digest.md - Slackbot: examples/slackbot.md + - Python augmented prompts: examples/python_augmented_prompts.md + - Being specific about types: examples/being_specific_about_types.md - Community: - community/index.md - Feedback 💙: community/feedback.md From a029609c5dcb3fcee00869f518250e8ba83b1a31 Mon Sep 17 00:00:00 2001 From: Nathan Nowack Date: Mon, 15 Jan 2024 19:59:42 -0600 Subject: [PATCH 2/2] syntax highlighting --- docs/ai/text/generation.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/ai/text/generation.md b/docs/ai/text/generation.md index 866443942..d50a24482 100644 --- a/docs/ai/text/generation.md +++ b/docs/ai/text/generation.md @@ -35,11 +35,14 @@ Marvin can generate synthetic data according to a schema and instructions. Gener !!! success "Result" ```python - print(names) # ['John', 'Emma', 'Michael', 'Sophia'] + print(names) + ['John', 'Emma', 'Michael', 'Sophia'] - print(french_names) # ['Jean', 'Claire', 'Lucas', 'Emma'] + print(french_names) + ['Jean', 'Claire', 'Lucas', 'Emma'] - print(star_wars_names) # ['Luke', 'Leia', 'Han', 'Anakin'] + print(star_wars_names) + ['Luke', 'Leia', 'Han', 'Anakin'] ``` === "Locations" @@ -64,14 +67,12 @@ Marvin can generate synthetic data according to a schema and instructions. Gener ```python print(locations) - """ [ Location(city='Washington', state='District of Columbia'), Location(city='Jackson', state='Mississippi'), Location(city='Cleveland', state='Ohio'), Location(city='Lincoln', state='Nebraska'), ] - """ ```