docs: add verbose param (#283)

ucbepic · Jan 22, 2025 · 05c4357 · 05c4357
1 parent 5b86a58
commit 05c4357
Show file tree

Hide file tree

Showing 6 changed files with 63 additions and 3 deletions.
diff --git a/docetl/operations/utils/api.py b/docetl/operations/utils/api.py
@@ -6,7 +6,9 @@
 
 from litellm import ModelResponse, RateLimitError, completion, embedding
 from rich import print as rprint
-from rich.console import Console
+from rich.console import Console, Group
+from rich.panel import Panel
+from rich.text import Text
 
 from docetl.utils import completion_cost
 
@@ -378,7 +380,7 @@ def call_llm(
         rate_limited_attempt = 0
         while attempt <= max_retries:
             try:
-                return timeout(timeout_seconds)(self._cached_call_llm)(
+                output = timeout(timeout_seconds)(self._cached_call_llm)(
                     key,
                     model,
                     op_type,
@@ -393,6 +395,31 @@ def call_llm(
                     initial_result=initial_result,
                     litellm_completion_kwargs=litellm_completion_kwargs,
                 )
+                # Log input and output if verbose
+                if verbose:
+                    # Truncate messages to 500 chars
+                    messages_str = str(messages)
+                    truncated_messages = (
+                        messages_str[:500] + "..."
+                        if len(messages_str) > 500
+                        else messages_str
+                    )
+
+                    # Log with nice formatting
+                    self.runner.console.print(
+                        Panel(
+                            Group(
+                                Text("Input:", style="bold cyan"),
+                                Text(truncated_messages),
+                                Text("\nOutput:", style="bold cyan"),
+                                Text(str(output)),
+                            ),
+                            title="[bold green]LLM Call Details[/bold green]",
+                            border_style="green",
+                        )
+                    )
+
+                return output
             except RateLimitError:
                 # TODO: this is a really hacky way to handle rate limits
                 # we should implement a more robust retry mechanism

diff --git a/docetl/utils.py b/docetl/utils.py
@@ -120,7 +120,11 @@ def extract_jinja_variables(template_string: str) -> List[str]:
 
 def completion_cost(response) -> float:
     try:
-        return lcc(response)
+        return (
+            response._completion_cost
+            if hasattr(response, "_completion_cost")
+            else lcc(response)
+        )
     except Exception:
         return 0.0
 

diff --git a/docs/operators/map.md b/docs/operators/map.md
@@ -149,6 +149,7 @@ This example demonstrates how the Map operation can transform long, unstructured
 | `timeout`                         | Timeout for each LLM call in seconds                                                            | 120                           |
 | `litellm_completion_kwargs` | Additional parameters to pass to LiteLLM completion calls. | {}                          |
 | `skip_on_error` | If true, skip the operation if the LLM returns an error. | False                          |
+| `bypass_cache` | If true, bypass the cache for this operation. | False                          |
 
 Note: If `drop_keys` is specified, `prompt` and `output` become optional parameters.
 

diff --git a/docs/operators/reduce.md b/docs/operators/reduce.md
@@ -65,6 +65,7 @@ This Reduce operation processes customer feedback grouped by department:
 | `timeout`                 | Timeout for each LLM call in seconds                                                                   | 120                         |
 | `max_retries_per_timeout` | Maximum number of retries per timeout                                                                  | 2                           |
 | `litellm_completion_kwargs` | Additional parameters to pass to LiteLLM completion calls. | {}                          |
+| `bypass_cache` | If true, bypass the cache for this operation. | False                          |
 
 ## Advanced Features
 

diff --git a/docs/operators/resolve.md b/docs/operators/resolve.md
@@ -128,6 +128,7 @@ After determining eligible pairs for comparison, the Resolve operation uses a Un
 | `max_retries_per_timeout` | Maximum number of retries per timeout                                             | 2                             |
 | `sample`                  | Number of samples to use for the operation                                                      |   None                        |
 | `litellm_completion_kwargs` | Additional parameters to pass to LiteLLM completion calls. | {}                          |
+| `bypass_cache` | If true, bypass the cache for this operation. | False                          |
 
 ## Best Practices
 

diff --git a/tests/basic/test_basic_map.py b/tests/basic/test_basic_map.py
@@ -358,4 +358,30 @@ def test_map_operation_with_max_tokens(simple_map_config, map_sample_data, api_w
     # Since we limited max_tokens to 10, each response should be relatively short
     # The sentiment field should contain just the sentiment value without much extra text
     assert all(len(result["sentiment"]) <= 20 for result in results)
+
+def test_map_operation_with_verbose(simple_map_config, map_sample_data, api_wrapper):
+    # Add verbose configuration
+    map_config_with_verbose = {
+        **simple_map_config,
+        "verbose": True,
+        "bypass_cache": True
+    }
+
+    operation = MapOperation(api_wrapper, map_config_with_verbose, "gpt-4o-mini", 4)
+
+    # Execute the operation
+    results, cost = operation.execute(map_sample_data)
+
+    # Assert that we have results for all input items
+    assert len(results) == len(map_sample_data)
+
+    # Check that all results have a sentiment
+    assert all("sentiment" in result for result in results)
+
+    # Verify that all sentiments are valid
+    valid_sentiments = ["positive", "negative", "neutral"]
+    assert all(
+        any(vs in result["sentiment"] for vs in valid_sentiments) for result in results
+    )
+