forked from ag2ai/ag2
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* rebase with create a docling_query_engine * FIx format * Add DocumentTriageAgent * Add parser agent * Update and rebase * Add document agent and tested basic features * Update document agent * Update utils to support auto create directories and path validations * Update docling_query_engine to support Path input type * Update docling_doc_ingest_agent * Wrap up document agent phase 1 * Document agent init commit * Delete tmp notebook * pre-commit run on all files * Move document agent files to agents/experimental/document_agent * Update file paths * Update notebook * Clean up logging * Update docling_query_engine to support Path input type * Move document agent files to agents/experimental/document_agent * Document Agent: Add docstrings * Fix incompatibility error for python 3.9 * Add DocumentAgent to experimental init, tweak prompt * Update structure output config and add checker for query engine index * Update document_agent unit tests * test fixed * polishing * wip * Error management, prompt tweaks, summary agent system message * pre-commit run on new files * Allow collection name to be used, keeping ingested document names * Notebooks updated * Logging update --------- Co-authored-by: Eric-Shang <[email protected]> Co-authored-by: Davor Runje <[email protected]> Co-authored-by: Mark Sze <[email protected]>
- Loading branch information
1 parent
11ae546
commit 74902f3
Showing
21 changed files
with
1,061 additions
and
112 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -190,3 +190,5 @@ local_cache | |
notebook/result.png | ||
|
||
notebook/coding | ||
|
||
chroma |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from typing import Any, Protocol, runtime_checkable\n", | ||
"\n", | ||
"from autogen import Agent\n", | ||
"\n", | ||
"\n", | ||
"class RunResponse:\n", | ||
" pass\n", | ||
"\n", | ||
"\n", | ||
"class Cost:\n", | ||
" def __init__(self, **kwargs: Any):\n", | ||
" self._cost: dict[str, Any] = kwargs.copy()\n", | ||
"\n", | ||
" @staticmethod\n", | ||
" def _add_elements(key: str, x: dict[str, Any], y: dict[str, Any]) -> Any:\n", | ||
" if key in x and key in y:\n", | ||
" return x[key] + y[key]\n", | ||
" elif key in x:\n", | ||
" return x[key]\n", | ||
" elif key in y:\n", | ||
" return y[key]\n", | ||
" else:\n", | ||
" raise KeyError(f\"Key {key} not found in either dictionary\")\n", | ||
"\n", | ||
" def __add__(self, other: \"Cost\") -> \"Cost\":\n", | ||
" keys = set(self._cost.keys()) | set(other._cost.keys())\n", | ||
" return Cost(**{key: self._add_elements(key, self._cost, other._cost) for key in keys})\n", | ||
"\n", | ||
"\n", | ||
"@runtime_checkable\n", | ||
"class EventProtocol(Protocol):\n", | ||
" @property\n", | ||
" def cost(self) -> Cost:\n", | ||
" return Cost()\n", | ||
"\n", | ||
"\n", | ||
"class RunResponse:\n", | ||
" @property\n", | ||
" def events(self) -> list[EventProtocol]:\n", | ||
" pass\n", | ||
"\n", | ||
"\n", | ||
"def run(\n", | ||
" *agents: Agent,\n", | ||
") -> RunResponse:\n", | ||
" return RunResponse" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"agents: list[Agent] = []\n", | ||
"\n", | ||
"response = run(*agents, message=\"What is the meaning of life?\")\n", | ||
"\n", | ||
"total_cost = Cost(0)\n", | ||
"for m in response.events:\n", | ||
" total_cost += m.cost\n", | ||
" if isinstance(m, InputRequest):\n", | ||
" s = input(m.prompt)\n", | ||
" m.respond(s)\n", | ||
" elif isinstance(m, OutputMessage):\n", | ||
" print(m.message)\n", | ||
" elif isinstance(m, ToolRequest):\n", | ||
" tool = m.tool" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": ".venv-3.10-core", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.10" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
112 changes: 112 additions & 0 deletions
112
autogen/agents/experimental/document_agent/docling_doc_ingest_agent.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
import logging | ||
from pathlib import Path | ||
from typing import Literal, Optional, Union | ||
|
||
from .... import ConversableAgent | ||
from ....agentchat.contrib.swarm_agent import SwarmResult | ||
from ....doc_utils import export_module | ||
from ..document_agent.parser_utils import docling_parse_docs | ||
from .docling_query_engine import DoclingMdQueryEngine | ||
from .document_utils import preprocess_path | ||
|
||
__all__ = ["DoclingDocIngestAgent"] | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
DOCLING_PARSE_TOOL_NAME = "docling_parse_docs" | ||
|
||
DEFAULT_DOCLING_PARSER_PROMPT = f""" | ||
You are an expert in parsing and understanding text. You can use {DOCLING_PARSE_TOOL_NAME} tool to parse various documents and extract information from them. You can only use the tool once per turn. | ||
""" | ||
|
||
|
||
@export_module("autogen.agents.experimental") | ||
class DoclingDocIngestAgent(ConversableAgent): | ||
""" | ||
A DoclingDocIngestAgent is a swarm agent that ingests documents using the docling_parse_docs tool. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
name: Optional[str] = None, | ||
llm_config: Optional[Union[dict, Literal[False]]] = None, # type: ignore[type-arg] | ||
parsed_docs_path: Optional[Union[Path, str]] = None, | ||
query_engine: Optional[DoclingMdQueryEngine] = None, | ||
return_agent_success: str = "TaskManagerAgent", | ||
return_agent_error: str = "ErrorManagerAgent", | ||
collection_name: Optional[str] = None, | ||
): | ||
""" | ||
Initialize the DoclingDocIngestAgent. | ||
Args: | ||
name (str): The name of the DoclingDocIngestAgent. | ||
llm_config (Optional[Union[dict, Literal[False]]]): The configuration for the LLM. | ||
parsed_docs_path (Union[Path, str]): The path where parsed documents will be stored. | ||
query_engine (Optional[DoclingMdQueryEngine]): The DoclingMdQueryEngine to use for querying documents. | ||
collection_name (Optional[str]): The unique name for the Chromadb collection. Set this to a value to reuse a collection. If a query_engine is provided, this will be ignored. | ||
""" | ||
name = name or "DoclingDocIngestAgent" | ||
|
||
parsed_docs_path = parsed_docs_path or Path("./parsed_docs") | ||
parsed_docs_path = preprocess_path(str_or_path=parsed_docs_path, mk_path=True) | ||
|
||
self.docling_query_engine = query_engine or DoclingMdQueryEngine(collection_name=collection_name) | ||
|
||
def data_ingest_task(context_variables: dict) -> SwarmResult: # type: ignore[type-arg] | ||
""" | ||
A tool for Swarm agent to ingests documents using the docling_parse_docs to parse documents to markdown | ||
and add them to the docling_query_engine. | ||
Args: | ||
context_variables (dict): The context variables for the task. | ||
Returns: | ||
SwarmResult: The result of the task. | ||
""" | ||
|
||
try: | ||
input_file_path = "" | ||
tasks = context_variables.get("DocumentsToIngest", []) | ||
while tasks: | ||
task = tasks.pop() | ||
input_file_path = task["path_or_url"] | ||
output_files = docling_parse_docs( | ||
input_file_path=input_file_path, output_dir_path=parsed_docs_path, output_formats=["markdown"] | ||
) | ||
|
||
# Limit to one output markdown file for now. | ||
if output_files: | ||
output_file = output_files[0] | ||
if output_file.suffix == ".md": | ||
self.docling_query_engine.add_docs(new_doc_paths=[output_file]) | ||
|
||
# Keep track of documents ingested | ||
context_variables["DocumentsIngested"].append(input_file_path) | ||
|
||
context_variables["CompletedTaskCount"] += 1 | ||
logger.info("data_ingest_task context_variables:", context_variables) | ||
|
||
except Exception as e: | ||
return SwarmResult( | ||
agent=return_agent_error, | ||
values=f"Data Ingestion Task Failed, Error {e}: '{input_file_path}'", | ||
context_variables=context_variables, | ||
) | ||
|
||
return SwarmResult( | ||
agent=return_agent_success, | ||
values=f"Data Ingestion Task Completed for {input_file_path}", | ||
context_variables=context_variables, | ||
) | ||
|
||
super().__init__( | ||
name=name, | ||
llm_config=llm_config, | ||
functions=[data_ingest_task], | ||
system_message=DEFAULT_DOCLING_PARSER_PROMPT, | ||
) |
Oops, something went wrong.