Merge pull request #779 from PrefectHQ/image-docs

Improve image generation docs
PrefectHQ · Jan 17, 2024 · 7da41ef · 7da41ef
2 parents d968da6 + 51bbb1e
commit 7da41ef
Show file tree

Hide file tree

Showing 14 changed files with 132 additions and 64 deletions.
diff --git a/README.md b/README.md
@@ -32,7 +32,7 @@ To verify your installation, run `marvin version` in your terminal.
 
 ## Tools
 
-Marvin consists of a variety of useful tools, all designed to be used independently. Each one represents a common LLM use case, and is designed to package that power into a simple, self-documenting interface.
+Marvin consists of a variety of useful tools, all designed to be used independently. Each one represents a common LLM use case, and packages that power into a simple, self-documenting interface.
 
 ### General
 

diff --git a/docs/assets/images/docs/images/robot.png b/docs/assets/images/docs/images/robot.png
diff --git a/docs/assets/images/docs/images/sunset_scifi.png b/docs/assets/images/docs/images/sunset_scifi.png
diff --git a/docs/assets/images/docs/images/sunset_summer.png b/docs/assets/images/docs/images/sunset_summer.png
diff --git a/docs/assets/images/docs/images/sunset_winter.png b/docs/assets/images/docs/images/sunset_winter.png
diff --git a/docs/docs/images/generation.md b/docs/docs/images/generation.md
@@ -57,48 +57,74 @@ In addition to passing prompts directly to the DALLE-3 API via the `paint` funct
 ```python
 @marvin.image
 def sunset(style: str, season: str):
-    return f"A view of a sunset, in the style of {style}, during {season}"
+    return f"""
+    A serene and empty beach scene during sunset with two silhouetted figures in the distance flying a kite. The sky is full of colorful clouds. Nothing is on the horizon.
+
+    It is {season} and the image is in the style of {style}.
+    """
 ```
 
 <div class="grid cards" markdown>
 - **Nature photograph in summer**
 
-    ---
+    ----
 
     ```python
     sunset(
         style="nature photography",
-        season="summer"
+        season="summer",
     )
     ```
     ![](/assets/images/docs/images/sunset_summer.png)
 
 - **Winter impressionism**
 
-    ---
+    ----
 
     ```python
     sunset(
         style="impressionism",
-        season="winter"
+        season="winter",
     )
     ```
 
     ![](/assets/images/docs/images/sunset_winter.png)
 
-- **Something else**
+- **Sci-fi Christmas in Australia**
 
-    ---
+    ----
 
     ```python
     sunset(
         style="sci-fi movie poster",
-        season="Christmas in Australia"
+        season="Christmas in Australia",
     )
     ```
+
     ![](/assets/images/docs/images/sunset_scifi.png)
 
-  </div>
+</div>
+
+## Model parameters
+
+You can pass parameters to the DALL-E 3 API via the `model_kwargs` argument of `paint` or `@image`. These parameters are passed directly to the API, so you can use any supported parameter.
+
+!!! example "Example: model parameters"
+    ```python
+    import marvin
+
+    image = marvin.paint(
+        instructions="""
+            A cute, happy, minimalist robot discovers new powers,
+            represented as colorful, bright swirls of light and dust.
+            Dark background. Digital watercolor.
+            """,
+        model_kwargs=dict(size="1792x1024", quality="hd"),
+    )
+    ```
+
+    !!! success "Result"
+        ![](/assets/images/docs/images/robot.png)
 
 ## Disabling prompt revision
 
@@ -152,42 +178,46 @@ Using the original prompt "a teacup", here are the results of calling this funct
 <div class="grid cards" markdown>
 - **No revision**
 
-    ---
+    ***
+
     ```python
     generate_image(
-      "a teacup",
-      revision_amount=0
+        "a teacup",
+        revision_amount=0,
     )
     ```
-    Final prompt:
 
     ![](/assets/images/docs/images/teacup_revision_0.png)
+
+    Final prompt:
+
     > a teacup
 
 - **25% revision**
 
-    ---
+    ***
 
     ```python
     generate_image(
         "a teacup",
-        revision_amount=0.25
+        revision_amount=0.25,
     )
     ```
 
     ![](/assets/images/docs/images/teacup_revision_025.png)
+
     Final prompt:
 
     > a porcelain teacup with intricate detailing, sitting on an oak table
 
 - **75% revision**
 
-    ---
+    ***
 
     ```python
     generate_image(
         "a teacup",
-        revision_amount=0.75
+        revision_amount=0.75,
     )
     ```
 
@@ -199,12 +229,12 @@ Using the original prompt "a teacup", here are the results of calling this funct
 
 - **100% revision**
 
-    ---
+    ***
 
     ```python
     generate_image(
         "a teacup",
-        revision_amount=1
+        revision_amount=1,
     )
     ```
 
@@ -243,7 +273,7 @@ image = marvin.paint(
 
 # save the image to disk
 marvin.utilities.images.base64_to_image(
-    image.data[0].b64_json, 
+    image.data[0].b64_json,
     path='path/to/your/image.png',
 )
 ```

diff --git a/docs/docs/text/classification.md b/docs/docs/text/classification.md
@@ -10,7 +10,7 @@ Marvin has a powerful classification tool that can be used to categorize text in
 </div>
 
 
-!!! example
+!!! example "Example: categorize user feedback"
     Categorize user feedback into labels such as "bug", "feature request", or "inquiry":
 
     ```python
@@ -45,15 +45,22 @@ Marvin's classification tool is designed to accommodate a variety of label forma
 
 When quick, ad-hoc categorization is required, a simple list of strings is the most straightforward approach. For example:
 
-```python
-response = marvin.classify(
-    "Reset my password", 
-    labels=["support request", "account issue", "general inquiry"]
-)
-assert response == "account issue"
-```
+!!! example "Example: sentiment analysis"
+
+    ```python
+    import marvin
+
+    sentiment = marvin.classify(
+        "Marvin is so easy to use!", 
+        labels=["positive", "negative", "meh"]
+    )
+    ```
+
+    !!! success "Result"
+        ```python
+        assert sentiment == "positive"
+        ```
 
-Here, the function easily discerns the nature of the request, demonstrating Marvin's adeptness at handling diverse categorization tasks.
 
 ### Enums
 

diff --git a/docs/docs/vision/extraction.md b/docs/docs/vision/extraction.md
@@ -27,22 +27,24 @@ The `marvin.beta.extract` function is an enhanced version of `marvin.extract` th
 
 
 
-!!! example "Example: Dog breeds"
+!!! example "Example: identifying dogs"
 
     We will extract the breed of each dog in this image:
 
-    ![](https://images.unsplash.com/photo-1548199973-03cce0bbc87b?q=80&w=2969&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D)
+    ![](https://images.unsplash.com/photo-1548199973-03cce0bbc87b?)
 
 
     ```python
     import marvin
 
-    img = marvin.beta.Image('https://images.unsplash.com/photo-1548199973-03cce0bbc87b?q=80&w=2969&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D')
+    img = marvin.beta.Image(
+        "https://images.unsplash.com/photo-1548199973-03cce0bbc87b?",
+    )
 
-    result = marvin.beta.extract(img, target=str, instructions='dog breeds')
+    result = marvin.beta.extract(img, target=str, instructions="dog breeds")
     ```
 
     !!! success "Result"
         ```python
-        result == ['Pembroke Welsh Corgi', 'Yorkshire Terrier']
+        result == ["Pembroke Welsh Corgi", "Yorkshire Terrier"]
         ```    
diff --git a/docs/docs/vision/transformation.md b/docs/docs/vision/transformation.md
@@ -2,10 +2,7 @@
 
 Marvin can use OpenAI's vision API to process images and convert them into structured data, transforming unstructured information into native types that are appropriate for a variety of programmatic use cases.
 
-The `marvin.beta.cast` function is an enhanced version of `marvin.cast` that accepts images as well as text. 
-
-
-
+The `marvin.beta.cast` function is an enhanced version of `marvin.cast` that accepts images as well as text.
 
 !!! tip "Beta"
     Please note that vision support in Marvin is still in beta, as OpenAI has not finalized the vision API yet. While it works as expected, it is subject to change.
@@ -17,7 +14,6 @@ The `marvin.beta.cast` function is an enhanced version of `marvin.cast` that acc
   </p>
 </div>
 
-
 <div class="admonition info">
   <p class="admonition-title">How it works</p>
   <p>
@@ -28,15 +24,13 @@ The `marvin.beta.cast` function is an enhanced version of `marvin.cast` that acc
 </div>
 
 
-
-
-!!! example "Example: Locations"
+!!! example "Example: locations"
 
     We will cast this image to a `Location` type:
 
     ![](https://images.unsplash.com/photo-1568515387631-8b650bbcdb90)
 
-    
+
     ```python
     import marvin
     from pydantic import BaseModel, Field
@@ -47,7 +41,9 @@ The `marvin.beta.cast` function is an enhanced version of `marvin.cast` that acc
         state: str = Field(description="2-letter state abbreviation")
 
 
-    img = marvin.beta.Image('https://images.unsplash.com/photo-1568515387631-8b650bbcdb90')
+    img = marvin.beta.Image(
+        "https://images.unsplash.com/photo-1568515387631-8b650bbcdb90",
+    )
     result = marvin.beta.cast(img, target=Location)
     ```
 
@@ -56,15 +52,13 @@ The `marvin.beta.cast` function is an enhanced version of `marvin.cast` that acc
         assert result == Location(city="New York", state="NY")
         ```
 
-
-
 !!! example "Example: getting information about a book"
 
     We will cast this image to a `Book` to extract key information:
 
     ![](https://hastie.su.domains/ElemStatLearn/CoverII_small.jpg){ width="250" }
 
-    
+
     ```python
     import marvin
     from pydantic import BaseModel
@@ -75,16 +69,47 @@ The `marvin.beta.cast` function is an enhanced version of `marvin.cast` that acc
         subtitle: str
         authors: list[str]
 
-
-    img = marvin.beta.Image('https://hastie.su.domains/ElemStatLearn/CoverII_small.jpg')
+
+    img = marvin.beta.Image(
+        "https://hastie.su.domains/ElemStatLearn/CoverII_small.jpg",
+    )
     result = marvin.beta.cast(img, target=Book)
     ```
 
     !!! success "Result"
         ```python
         assert result == Book(
-            title='The Elements of Statistical Learning', 
-            subtitle='Data Mining, Inference, and Prediction', 
+            title='The Elements of Statistical Learning',
+            subtitle='Data Mining, Inference, and Prediction',
             authors=['Trevor Hastie', 'Robert Tibshirani', 'Jerome Friedman']
         )
-        ```    
+        ```
+
+## Instructions
+
+If the target type isn't self-documenting, or you want to provide additional guidance, you can provide natural language `instructions` when calling `cast` in order to steer the output. 
+
+
+!!! example "Example: checking groceries"
+
+    Let's use this image to see if we got everything on our shopping list:
+
+    ![](https://images.unsplash.com/photo-1588964895597-cfccd6e2dbf9)
+
+    ```python
+    import marvin
+
+    shopping_list = ["bagels", "cabbage", "eggs", "apples", "oranges"]
+
+    missing_items = marvin.beta.cast(
+        marvin.beta.Image("https://images.unsplash.com/photo-1588964895597-cfccd6e2dbf9"), 
+        target=list[str], 
+        instructions=f"Did I forget anything on my list: {shopping_list}?",
+    )
+
+    ```
+
+    !!! success "Result"
+        ```python
+        assert missing_items == ["eggs", "oranges"]
+        ```
diff --git a/docs/welcome/tutorial.md b/docs/welcome/tutorial.md
@@ -329,12 +329,14 @@ These functions are available under `marvin.beta` and work identically to their
 
     Let's identify the breed of each dog in this image by using the beta `extract` function.
 
-    ![Two dogs moving toward the camera](https://images.unsplash.com/photo-1548199973-03cce0bbc87b?q=80&w=2969&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D)
+    ![Two dogs moving toward the camera](https://images.unsplash.com/photo-1548199973-03cce0bbc87b)
 
     ```python
     import marvin
 
-    img = marvin.beta.Image('https://images.unsplash.com/photo-1548199973-03cce0bbc87b?q=80&w=2969&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D')
+    img = marvin.beta.Image(
+        'https://images.unsplash.com/photo-1548199973-03cce0bbc87b',
+    )
 
     result = marvin.beta.extract(img, target=str, instructions='dog breeds')
     ```

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "marvin"
 dynamic = ["version"]
-description = "A lightweight AI engineering framework for building natural language interfaces that are reliable, scalable, and easy to trust."
+description = "A lightweight AI engineering toolkit for building natural language interfaces that are reliable, scalable, and easy to trust."
 readme = "README.md"
 license = { file = "LICENSE" }
 classifiers = [

diff --git a/src/marvin/ai/images.py b/src/marvin/ai/images.py
@@ -41,7 +41,7 @@ def generate_image(
     model_kwargs = model_kwargs or {}
     prompt_kwargs = prompt_kwargs or {}
     prompt = Environment.render(prompt_template, **prompt_kwargs)
-    request = ImageRequest(prompt=prompt, **model_kwargs)
+    request = ImageRequest(prompt=prompt.strip(), **model_kwargs)
     if marvin.settings.log_verbose:
         logger.debug_kv("Request", request.model_dump_json(indent=2))
     response = MarvinClient().generate_image(**request.model_dump())