Skip to content

Commit

Permalink
Document agent phase 1 (ag2ai#978)
Browse files Browse the repository at this point in the history
* rebase with create a docling_query_engine

* FIx format

* Add DocumentTriageAgent

* Add parser agent

* Update and rebase

* Add document agent and tested basic features

* Update document agent

* Update utils to support auto create directories and path validations

* Update docling_query_engine to support Path input type

* Update docling_doc_ingest_agent

* Wrap up document agent phase 1

* Document agent init commit

* Delete tmp notebook

* pre-commit run on all files

* Move document agent files to agents/experimental/document_agent

* Update file paths

* Update notebook

* Clean up logging

* Update docling_query_engine to support Path input type

* Move document agent files to agents/experimental/document_agent

* Document Agent: Add docstrings

* Fix incompatibility error for python 3.9

* Add DocumentAgent to experimental init, tweak prompt

* Update structure output config and add checker for query engine index

* Update document_agent unit tests

* test fixed

* polishing

* wip

* Error management, prompt tweaks, summary agent system message

* pre-commit run on new files

* Allow collection name to be used, keeping ingested document names

* Notebooks updated

* Logging update

---------

Co-authored-by: Eric-Shang <[email protected]>
Co-authored-by: Davor Runje <[email protected]>
Co-authored-by: Mark Sze <[email protected]>
  • Loading branch information
4 people authored Feb 19, 2025
1 parent 11ae546 commit 74902f3
Show file tree
Hide file tree
Showing 21 changed files with 1,061 additions and 112 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -190,3 +190,5 @@ local_cache
notebook/result.png

notebook/coding

chroma
4 changes: 3 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ repos:
notebook/agentchat_graph_rag_neo4j.ipynb |
notebook/agentchat_swarm_graphrag_telemetry_trip_planner.ipynb |
website/node_modules/.* |
website/notebooks/.*
website/notebooks/.* |
test/agents/experimental/document_agent/pdf_parsed/Toast_financial_report.md |
test/agents/experimental/document_agent/pdf_parsed/nvidia_10k_2024.md
)$
# See https://jaredkhan.com/blog/mypy-pre-commit
- repo: local
Expand Down
129 changes: 129 additions & 0 deletions RFCs/000/RFC-000-global-run-function.ipynb.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from typing import Any, Protocol, runtime_checkable\n",
"\n",
"from autogen import Agent\n",
"\n",
"\n",
"class RunResponse:\n",
" pass\n",
"\n",
"\n",
"class Cost:\n",
" def __init__(self, **kwargs: Any):\n",
" self._cost: dict[str, Any] = kwargs.copy()\n",
"\n",
" @staticmethod\n",
" def _add_elements(key: str, x: dict[str, Any], y: dict[str, Any]) -> Any:\n",
" if key in x and key in y:\n",
" return x[key] + y[key]\n",
" elif key in x:\n",
" return x[key]\n",
" elif key in y:\n",
" return y[key]\n",
" else:\n",
" raise KeyError(f\"Key {key} not found in either dictionary\")\n",
"\n",
" def __add__(self, other: \"Cost\") -> \"Cost\":\n",
" keys = set(self._cost.keys()) | set(other._cost.keys())\n",
" return Cost(**{key: self._add_elements(key, self._cost, other._cost) for key in keys})\n",
"\n",
"\n",
"@runtime_checkable\n",
"class EventProtocol(Protocol):\n",
" @property\n",
" def cost(self) -> Cost:\n",
" return Cost()\n",
"\n",
"\n",
"class RunResponse:\n",
" @property\n",
" def events(self) -> list[EventProtocol]:\n",
" pass\n",
"\n",
"\n",
"def run(\n",
" *agents: Agent,\n",
") -> RunResponse:\n",
" return RunResponse"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"agents: list[Agent] = []\n",
"\n",
"response = run(*agents, message=\"What is the meaning of life?\")\n",
"\n",
"total_cost = Cost(0)\n",
"for m in response.events:\n",
" total_cost += m.cost\n",
" if isinstance(m, InputRequest):\n",
" s = input(m.prompt)\n",
" m.respond(s)\n",
" elif isinstance(m, OutputMessage):\n",
" print(m.message)\n",
" elif isinstance(m, ToolRequest):\n",
" tool = m.tool"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv-3.10-core",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 1 addition & 1 deletion autogen/agentchat/conversable_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -1743,7 +1743,7 @@ def get_chat_results(self, chat_index: Optional[int] = None) -> Union[list[ChatR
else:
return self._finished_chats

def reset(self):
def reset(self) -> None:
"""Reset the agent."""
self.clear_history()
self.reset_consecutive_auto_reply_counter()
Expand Down
11 changes: 10 additions & 1 deletion autogen/agents/experimental/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,17 @@

from .deep_research import DeepResearchAgent
from .discord import DiscordAgent
from .document_agent import DoclingDocIngestAgent, DocumentAgent
from .slack import SlackAgent
from .telegram import TelegramAgent
from .websurfer import WebSurferAgent

__all__ = ["DeepResearchAgent", "DiscordAgent", "SlackAgent", "TelegramAgent", "WebSurferAgent"]
__all__ = [
"DeepResearchAgent",
"DiscordAgent",
"DoclingDocIngestAgent",
"DocumentAgent",
"SlackAgent",
"TelegramAgent",
"WebSurferAgent",
]
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
#
# SPDX-License-Identifier: Apache-2.0

from .docling_doc_ingest_agent import DoclingDocIngestAgent
from .document_agent import DocumentAgent
from .document_utils import handle_input
from .parser_utils import docling_parse_docs

__all__ = ["docling_parse_docs", "handle_input"]
__all__ = ["DoclingDocIngestAgent", "DocumentAgent", "docling_parse_docs", "handle_input"]
112 changes: 112 additions & 0 deletions autogen/agents/experimental/document_agent/docling_doc_ingest_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
#
# SPDX-License-Identifier: Apache-2.0

import logging
from pathlib import Path
from typing import Literal, Optional, Union

from .... import ConversableAgent
from ....agentchat.contrib.swarm_agent import SwarmResult
from ....doc_utils import export_module
from ..document_agent.parser_utils import docling_parse_docs
from .docling_query_engine import DoclingMdQueryEngine
from .document_utils import preprocess_path

__all__ = ["DoclingDocIngestAgent"]

logger = logging.getLogger(__name__)

DOCLING_PARSE_TOOL_NAME = "docling_parse_docs"

DEFAULT_DOCLING_PARSER_PROMPT = f"""
You are an expert in parsing and understanding text. You can use {DOCLING_PARSE_TOOL_NAME} tool to parse various documents and extract information from them. You can only use the tool once per turn.
"""


@export_module("autogen.agents.experimental")
class DoclingDocIngestAgent(ConversableAgent):
"""
A DoclingDocIngestAgent is a swarm agent that ingests documents using the docling_parse_docs tool.
"""

def __init__(
self,
name: Optional[str] = None,
llm_config: Optional[Union[dict, Literal[False]]] = None, # type: ignore[type-arg]
parsed_docs_path: Optional[Union[Path, str]] = None,
query_engine: Optional[DoclingMdQueryEngine] = None,
return_agent_success: str = "TaskManagerAgent",
return_agent_error: str = "ErrorManagerAgent",
collection_name: Optional[str] = None,
):
"""
Initialize the DoclingDocIngestAgent.
Args:
name (str): The name of the DoclingDocIngestAgent.
llm_config (Optional[Union[dict, Literal[False]]]): The configuration for the LLM.
parsed_docs_path (Union[Path, str]): The path where parsed documents will be stored.
query_engine (Optional[DoclingMdQueryEngine]): The DoclingMdQueryEngine to use for querying documents.
collection_name (Optional[str]): The unique name for the Chromadb collection. Set this to a value to reuse a collection. If a query_engine is provided, this will be ignored.
"""
name = name or "DoclingDocIngestAgent"

parsed_docs_path = parsed_docs_path or Path("./parsed_docs")
parsed_docs_path = preprocess_path(str_or_path=parsed_docs_path, mk_path=True)

self.docling_query_engine = query_engine or DoclingMdQueryEngine(collection_name=collection_name)

def data_ingest_task(context_variables: dict) -> SwarmResult: # type: ignore[type-arg]
"""
A tool for Swarm agent to ingests documents using the docling_parse_docs to parse documents to markdown
and add them to the docling_query_engine.
Args:
context_variables (dict): The context variables for the task.
Returns:
SwarmResult: The result of the task.
"""

try:
input_file_path = ""
tasks = context_variables.get("DocumentsToIngest", [])
while tasks:
task = tasks.pop()
input_file_path = task["path_or_url"]
output_files = docling_parse_docs(
input_file_path=input_file_path, output_dir_path=parsed_docs_path, output_formats=["markdown"]
)

# Limit to one output markdown file for now.
if output_files:
output_file = output_files[0]
if output_file.suffix == ".md":
self.docling_query_engine.add_docs(new_doc_paths=[output_file])

# Keep track of documents ingested
context_variables["DocumentsIngested"].append(input_file_path)

context_variables["CompletedTaskCount"] += 1
logger.info("data_ingest_task context_variables:", context_variables)

except Exception as e:
return SwarmResult(
agent=return_agent_error,
values=f"Data Ingestion Task Failed, Error {e}: '{input_file_path}'",
context_variables=context_variables,
)

return SwarmResult(
agent=return_agent_success,
values=f"Data Ingestion Task Completed for {input_file_path}",
context_variables=context_variables,
)

super().__init__(
name=name,
llm_config=llm_config,
functions=[data_ingest_task],
system_message=DEFAULT_DOCLING_PARSER_PROMPT,
)
Loading

0 comments on commit 74902f3

Please sign in to comment.