meta-llama · ashwinb · Feb 2, 2025 · Feb 2, 2025
@@ -1,31 +1,37 @@
-[flake8]
 # Suggested config from pytorch that we can adapt
-select = B,C,E,F,N,P,T4,W,B9,TOR0,TOR1,TOR2
-max-line-length = 120
+lint.select = ["B", "C", "E" , "F" , "N", "W", "B9"]
+
+line-length = 120
+
 # C408 ignored because we like the dict keyword argument syntax
 # E501 is not flexible enough, we're using B950 instead
 # N812 ignored because import torch.nn.functional as F is PyTorch convention
 # N817 ignored because importing using acronyms is convention (DistributedDataParallel as DDP)
 # E731 allow usage of assigning lambda expressions
 # E701 let black auto-format statements on one line
 # E704 let black auto-format statements on one line
-ignore =
-    E203,E305,E402,E501,E721,E741,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,N812,N817,E731,E701,E704
+lint.ignore = [
+    "E203", "E305", "E402", "E501", "E721", "E741", "F405", "F821", "F841",
+    "C408", "E302", "W291", "E303", "N812", "N817", "E731", "E701",
+    # These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later.
+    "C901", "C405", "C414", "N803", "N999", "C403", "C416", "B028", "C419", "C401", "B023",
     # shebang has extra meaning in fbcode lints, so I think it's not worth trying
     # to line this up with executable bit
-    EXE001,
+    "EXE001",
     # random naming hints don't need
-    N802,
+    "N802",
     # these ignores are from flake8-bugbear; please fix!
-    B007,B008,B950
-optional-ascii-coding = True
-exclude =
-    ./.git,
-    ./docs/*,
-    ./build,
-    ./scripts,
-    ./venv,
-    *.pyi,
-    .pre-commit-config.yaml,
-    *.md,
-    .flake8
+    "B007", "B008"
+]
+
+exclude = [
+    "./.git",
+    "./docs/*",
+    "./build",
+    "./scripts",
+    "./venv",
+    "*.pyi",
+    ".pre-commit-config.yaml",
+    "*.md",
+    ".flake8"
+]
@@ -77,7 +77,7 @@ agent_config = AgentConfig(
     instructions="You are a helpful assistant",
     # Enable both RAG and tool usage
     toolgroups=[
-        {"name": "builtin::rag", "args": {"vector_db_ids": ["my_docs"]}}.
+        {"name": "builtin::rag", "args": {"vector_db_ids": ["my_docs"]}},
         "builtin::code_interpreter",
     ],
     # Configure safety
@@ -86,13 +86,9 @@ agent_config = AgentConfig(
     # Control the inference loop
     max_infer_iters=5,
     sampling_params={
-        "strategy": {
-            "type": "top_p",
-            "temperature": 0.7,
-            "top_p": 0.95
-        },
-        "max_tokens": 2048
-    }
+        "strategy": {"type": "top_p", "temperature": 0.7, "top_p": 0.95},
+        "max_tokens": 2048,
+    },
 )
 
 agent = Agent(client, agent_config)
@@ -101,11 +97,13 @@ session_id = agent.create_session("monitored_session")
 # Stream the agent's execution steps
 response = agent.create_turn(
     messages=[{"role": "user", "content": "Analyze this code and run it"}],
-    attachments=[{
-        "content": "https://raw.githubusercontent.com/example/code.py",
-        "mime_type": "text/plain"
-    }],
-    session_id=session_id
+    attachments=[
+        {
+            "content": "https://raw.githubusercontent.com/example/code.py",
+            "mime_type": "text/plain",
+        }
+    ],
+    session_id=session_id,
 )
 
 # Monitor each step of execution

@@ -15,6 +15,7 @@ This first example walks you through how to evaluate a model candidate served by
 
 ```python
 import datasets
+
 ds = datasets.load_dataset(path="llamastack/mmmu", name="Agriculture", split="dev")
 ds = ds.select_columns(["chat_completion_input", "input_query", "expected_answer"])
 eval_rows = ds.to_pandas().to_dict(orient="records")
@@ -43,7 +44,7 @@ system_message = {
 client.eval_tasks.register(
     eval_task_id="meta-reference::mmmu",
     dataset_id=f"mmmu-{subset}-{split}",
-    scoring_functions=["basic::regex_parser_multiple_choice_answer"]
+    scoring_functions=["basic::regex_parser_multiple_choice_answer"],
 )
 
 response = client.eval.evaluate_rows(
@@ -62,9 +63,9 @@ response = client.eval.evaluate_rows(
                 "max_tokens": 4096,
                 "repeat_penalty": 1.0,
             },
-            "system_message": system_message
-        }
-    }
+            "system_message": system_message,
+        },
+    },
 )
 ```
 
@@ -88,7 +89,7 @@ _ = client.datasets.register(
         "input_query": {"type": "string"},
         "expected_answer": {"type": "string"},
         "chat_completion_input": {"type": "chat_completion_input"},
-    }
+    },
 )
 
 eval_rows = client.datasetio.get_rows_paginated(
@@ -101,7 +102,7 @@ eval_rows = client.datasetio.get_rows_paginated(
 client.eval_tasks.register(
     eval_task_id="meta-reference::simpleqa",
     dataset_id=simpleqa_dataset_id,
-    scoring_functions=["llm-as-judge::405b-simpleqa"]
+    scoring_functions=["llm-as-judge::405b-simpleqa"],
 )
 
 response = client.eval.evaluate_rows(
@@ -120,8 +121,8 @@ response = client.eval.evaluate_rows(
                 "max_tokens": 4096,
                 "repeat_penalty": 1.0,
             },
-        }
-    }
+        },
+    },
 )
 ```
 
@@ -144,14 +145,14 @@ agent_config = {
         {
             "type": "brave_search",
             "engine": "tavily",
-            "api_key": userdata.get("TAVILY_SEARCH_API_KEY")
+            "api_key": userdata.get("TAVILY_SEARCH_API_KEY"),
         }
     ],
     "tool_choice": "auto",
     "tool_prompt_format": "json",
     "input_shields": [],
     "output_shields": [],
-    "enable_session_persistence": False
+    "enable_session_persistence": False,
 }
 
 response = client.eval.evaluate_rows(
@@ -163,7 +164,7 @@ response = client.eval.evaluate_rows(
         "eval_candidate": {
             "type": "agent",
             "config": agent_config,
-        }
-    }
+        },
+    },
 )
 ```
@@ -13,24 +13,18 @@ Here's how to set up basic evaluation:
 response = client.eval_tasks.register(
     eval_task_id="my_eval",
     dataset_id="my_dataset",
-    scoring_functions=["accuracy", "relevance"]
+    scoring_functions=["accuracy", "relevance"],
 )
 
 # Run evaluation
 job = client.eval.run_eval(
     task_id="my_eval",
     task_config={
         "type": "app",
-        "eval_candidate": {
-            "type": "agent",
-            "config": agent_config
-        }
-    }
+        "eval_candidate": {"type": "agent", "config": agent_config},
+    },
 )
 
 # Get results
-result = client.eval.job_result(
-    task_id="my_eval",
-    job_id=job.job_id
-)
+result = client.eval.job_result(task_id="my_eval", job_id=job.job_id)
 ```
@@ -34,15 +34,16 @@ chunks = [
     {
         "document_id": "doc1",
         "content": "Your document text here",
-        "mime_type": "text/plain"
+        "mime_type": "text/plain",
     },
-    ...
+    ...,
 ]
 client.vector_io.insert(vector_db_id, chunks)
 
 # You can then query for these chunks
-chunks_response = client.vector_io.query(vector_db_id, query="What do you know about...")
-
+chunks_response = client.vector_io.query(
+    vector_db_id, query="What do you know about..."
+)
 ```
 
 ### Using the RAG Tool
@@ -81,7 +82,6 @@ results = client.tool_runtime.rag_tool.query(
 One of the most powerful patterns is combining agents with RAG capabilities. Here's a complete example:
 
 ```python
-
 # Configure agent with memory
 agent_config = AgentConfig(
     model="Llama3.2-3B-Instruct",
@@ -91,35 +91,31 @@ agent_config = AgentConfig(
             "name": "builtin::rag",
             "args": {
                 "vector_db_ids": [vector_db_id],
-            }
+            },
         }
-    ]
+    ],
 )
 
 agent = Agent(client, agent_config)
 session_id = agent.create_session("rag_session")
 
 # Initial document ingestion
 response = agent.create_turn(
-    messages=[{
-        "role": "user",
-        "content": "I am providing some documents for reference."
-    }],
+    messages=[
+        {"role": "user", "content": "I am providing some documents for reference."}
+    ],
     documents=[
         dict(
             content="https://raw.githubusercontent.com/example/doc.rst",
-            mime_type="text/plain"
+            mime_type="text/plain",
         )
     ],
-    session_id=session_id
+    session_id=session_id,
 )
 
 # Query with RAG
 response = agent.create_turn(
-    messages=[{
-        "role": "user",
-        "content": "What are the key topics in the documents?"
-    }],
-    session_id=session_id
+    messages=[{"role": "user", "content": "What are the key topics in the documents?"}],
+    session_id=session_id,
 )
 ```
@@ -5,15 +5,11 @@ Safety is a critical component of any AI application. Llama Stack provides a Shi
 ```python
 # Register a safety shield
 shield_id = "content_safety"
-client.shields.register(
-    shield_id=shield_id,
-    provider_shield_id="llama-guard-basic"
-)
+client.shields.register(shield_id=shield_id, provider_shield_id="llama-guard-basic")
 
 # Run content through shield
 response = client.safety.run_shield(
-    shield_id=shield_id,
-    messages=[{"role": "user", "content": "User message here"}]
+    shield_id=shield_id, messages=[{"role": "user", "content": "User message here"}]
 )
 
 if response.violation:

@@ -8,24 +8,16 @@ The telemetry system supports three main types of events:
 - **Unstructured Log Events**: Free-form log messages with severity levels
 ```python
 unstructured_log_event = UnstructuredLogEvent(
-    message="This is a log message",
-    severity=LogSeverity.INFO
+    message="This is a log message", severity=LogSeverity.INFO
 )
 ```
 - **Metric Events**: Numerical measurements with units
 ```python
-metric_event = MetricEvent(
-    metric="my_metric",
-    value=10,
-    unit="count"
-)
+metric_event = MetricEvent(metric="my_metric", value=10, unit="count")
 ```
 - **Structured Log Events**: System events like span start/end. Extensible to add more structured log types.
 ```python
-structured_log_event = SpanStartPayload(
-    name="my_span",
-    parent_span_id="parent_span_id"
-)
+structured_log_event = SpanStartPayload(name="my_span", parent_span_id="parent_span_id")
 ```
 
 ### Spans and Traces