diff --git a/.secrets.baseline b/.secrets.baseline index 55fe13dbac..b3023005ee 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -143,7 +143,7 @@ "filename": "autogen/oai/openai_utils.py", "hashed_secret": "aa5bc2e0df7182f74186f26d6e9063b9d57603ec", "is_verified": false, - "line_number": 352, + "line_number": 353, "is_secret": false }, { @@ -151,7 +151,7 @@ "filename": "autogen/oai/openai_utils.py", "hashed_secret": "cbb43d092552e9af4b21efc76bc8c49c071c1d81", "is_verified": false, - "line_number": 353, + "line_number": 354, "is_secret": false }, { @@ -159,7 +159,7 @@ "filename": "autogen/oai/openai_utils.py", "hashed_secret": "79d8b9da0f827f788759bdbe5b9254a02c74d877", "is_verified": false, - "line_number": 573, + "line_number": 577, "is_secret": false } ], @@ -773,7 +773,7 @@ "filename": "test/agentchat/contrib/graph_rag/test_native_neo4j_graph_rag.py", "hashed_secret": "5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8", "is_verified": false, - "line_number": 78, + "line_number": 75, "is_secret": false } ], @@ -783,7 +783,7 @@ "filename": "test/agentchat/contrib/graph_rag/test_neo4j_graph_rag.py", "hashed_secret": "5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8", "is_verified": false, - "line_number": 38, + "line_number": 35, "is_secret": false } ], @@ -1035,7 +1035,7 @@ "filename": "test/oai/test_utils.py", "hashed_secret": "f72c85879027f6160ce36e1c5074ef8207bfe105", "is_verified": false, - "line_number": 26, + "line_number": 30, "is_secret": false }, { @@ -1043,7 +1043,7 @@ "filename": "test/oai/test_utils.py", "hashed_secret": "4c88039c5079180dacb0e29d715055d95b2b7589", "is_verified": false, - "line_number": 35, + "line_number": 39, "is_secret": false }, { @@ -1051,7 +1051,7 @@ "filename": "test/oai/test_utils.py", "hashed_secret": "7460e665be1988cc62f1caf9d47716b07d55858c", "is_verified": false, - "line_number": 65, + "line_number": 69, "is_secret": false }, { @@ -1059,7 +1059,7 @@ "filename": "test/oai/test_utils.py", "hashed_secret": "b5c2827eb65bf13b87130e7e3c424ba9ff07cd67", "is_verified": false, - "line_number": 72, + "line_number": 76, "is_secret": false }, { @@ -1067,7 +1067,7 @@ "filename": "test/oai/test_utils.py", "hashed_secret": "178c7a21b087dfafc826a21b61aff284c71fd258", "is_verified": false, - "line_number": 198, + "line_number": 202, "is_secret": false }, { @@ -1075,7 +1075,7 @@ "filename": "test/oai/test_utils.py", "hashed_secret": "aa5c90e1b80bb987f562ac30eaa1a71c832892f5", "is_verified": false, - "line_number": 199, + "line_number": 203, "is_secret": false }, { @@ -1083,7 +1083,7 @@ "filename": "test/oai/test_utils.py", "hashed_secret": "4489f55309f29853a4075cbbdf1f18b584809726", "is_verified": false, - "line_number": 201, + "line_number": 205, "is_secret": false }, { @@ -1091,7 +1091,7 @@ "filename": "test/oai/test_utils.py", "hashed_secret": "95cfb33d5e102631e226e7ff9da4b17d6ba5f3e4", "is_verified": false, - "line_number": 213, + "line_number": 217, "is_secret": false }, { @@ -1099,7 +1099,7 @@ "filename": "test/oai/test_utils.py", "hashed_secret": "7943297a6a2188abe697bd1e0189fdd1274818be", "is_verified": false, - "line_number": 215, + "line_number": 219, "is_secret": false }, { @@ -1107,7 +1107,7 @@ "filename": "test/oai/test_utils.py", "hashed_secret": "8cc86c45479a8e0bbb1ddea57d3e195b611241f2", "is_verified": false, - "line_number": 235, + "line_number": 239, "is_secret": false }, { @@ -1115,7 +1115,7 @@ "filename": "test/oai/test_utils.py", "hashed_secret": "eda6571eea7bd0ac4553ac9d745631f1f2bec7a4", "is_verified": false, - "line_number": 237, + "line_number": 241, "is_secret": false }, { @@ -1123,7 +1123,7 @@ "filename": "test/oai/test_utils.py", "hashed_secret": "0ad02c88ffd9754bfbfc24ade0bf8bc48d76b232", "is_verified": false, - "line_number": 246, + "line_number": 250, "is_secret": false }, { @@ -1131,7 +1131,7 @@ "filename": "test/oai/test_utils.py", "hashed_secret": "11841233da3f9f37c5fa14e8b482dde913db6edf", "is_verified": false, - "line_number": 254, + "line_number": 258, "is_secret": false }, { @@ -1139,7 +1139,7 @@ "filename": "test/oai/test_utils.py", "hashed_secret": "11cac88cbfa53881646b024097f531c4f234151b", "is_verified": false, - "line_number": 432, + "line_number": 436, "is_secret": false }, { @@ -1147,7 +1147,7 @@ "filename": "test/oai/test_utils.py", "hashed_secret": "8e8324e8ea2ec13efb774680c6e3850625e575e6", "is_verified": false, - "line_number": 432, + "line_number": 436, "is_secret": false }, { @@ -1155,7 +1155,7 @@ "filename": "test/oai/test_utils.py", "hashed_secret": "8e2fa04ab430ff4817e87e3294f33727fc78ed6c", "is_verified": false, - "line_number": 435, + "line_number": 439, "is_secret": false } ], @@ -1187,16 +1187,6 @@ "is_secret": false } ], - "test/tools/experimental/browser_use/test_browser_use.py": [ - { - "type": "Secret Keyword", - "filename": "test/tools/experimental/browser_use/test_browser_use.py", - "hashed_secret": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3", - "is_verified": false, - "line_number": 47, - "is_secret": false - } - ], "test/tools/experimental/crawl4ai/test_crawl4ai.py": [ { "type": "Secret Keyword", @@ -1616,5 +1606,5 @@ } ] }, - "generated_at": "2025-02-20T06:43:14Z" + "generated_at": "2025-02-20T10:09:56Z" } diff --git a/autogen/agentchat/contrib/graph_rag/__init__.py b/autogen/agentchat/contrib/graph_rag/__init__.py index a80fb86b56..37e0dfc3b8 100644 --- a/autogen/agentchat/contrib/graph_rag/__init__.py +++ b/autogen/agentchat/contrib/graph_rag/__init__.py @@ -2,4 +2,8 @@ # # SPDX-License-Identifier: Apache-2.0 -__all__: list[str] = [] +from .document import Document, DocumentType +from .graph_query_engine import GraphQueryEngine, GraphStoreQueryResult +from .graph_rag_capability import GraphRagCapability + +__all__ = ["Document", "DocumentType", "GraphQueryEngine", "GraphRagCapability", "GraphStoreQueryResult"] diff --git a/autogen/agentchat/contrib/graph_rag/document.py b/autogen/agentchat/contrib/graph_rag/document.py index cc8f0f45ae..7f145aac15 100644 --- a/autogen/agentchat/contrib/graph_rag/document.py +++ b/autogen/agentchat/contrib/graph_rag/document.py @@ -4,9 +4,11 @@ # # Portions derived from https://github.com/microsoft/autogen are under the MIT License. # SPDX-License-Identifier: MIT -from dataclasses import dataclass +from dataclasses import dataclass, field from enum import Enum, auto -from typing import Optional +from typing import Any, Optional + +__all__ = ["Document", "DocumentType"] class DocumentType(Enum): @@ -23,5 +25,5 @@ class Document: """A wrapper of graph store query results.""" doctype: DocumentType - data: Optional[object] = None - path_or_url: Optional[str] = "" + data: Optional[Any] = None + path_or_url: Optional[str] = field(default_factory=lambda: "") diff --git a/autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py b/autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py index a44d733e90..228b640c7d 100644 --- a/autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +++ b/autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py @@ -4,7 +4,7 @@ import os import warnings -from typing import Optional +from typing import Any, Optional from ....import_utils import optional_import_block, require_optional_import from .document import Document @@ -23,7 +23,7 @@ class FalkorGraphQueryEngine: """This is a wrapper for FalkorDB KnowledgeGraph.""" - def __init__( + def __init__( # type: ignore[no-any-unimported] self, name: str, host: str = "127.0.0.1", @@ -57,10 +57,10 @@ def __init__( self.model = model or OpenAiGenerativeModel("gpt-4o") self.model_config = KnowledgeGraphModelConfig.with_model(model) self.ontology = ontology - self.knowledge_graph = None + self.knowledge_graph: Optional["KnowledgeGraph"] = None # type: ignore[no-any-unimported] self.falkordb = FalkorDB(host=self.host, port=self.port, username=self.username, password=self.password) - def connect_db(self): + def connect_db(self) -> None: """Connect to an existing knowledge graph.""" if self.name in self.falkordb.list_graphs(): try: @@ -86,11 +86,11 @@ def connect_db(self): else: raise ValueError(f"Knowledge graph '{self.name}' does not exist") - def init_db(self, input_doc: list[Document]): + def init_db(self, input_doc: list[Document]) -> None: """Build the knowledge graph with input documents.""" sources = [] for doc in input_doc: - if os.path.exists(doc.path_or_url): + if doc.path_or_url and os.path.exists(doc.path_or_url): sources.append(Source(doc.path_or_url)) if sources: @@ -123,7 +123,7 @@ def init_db(self, input_doc: list[Document]): def add_records(self, new_records: list[Document]) -> bool: raise NotImplementedError("This method is not supported by FalkorDB SDK yet.") - def query(self, question: str, n_results: int = 1, **kwargs) -> GraphStoreQueryResult: + def query(self, question: str, n_results: int = 1, **kwargs: Any) -> GraphStoreQueryResult: """Query the knowledge graph with a question and optional message history. Args: @@ -153,17 +153,17 @@ def delete(self) -> bool: self.falkordb.select_graph(self.ontology_table_name).delete() return True - def __get_ontology_storage_graph(self) -> "Graph": + def __get_ontology_storage_graph(self) -> "Graph": # type: ignore[no-any-unimported] return self.falkordb.select_graph(self.ontology_table_name) - def _save_ontology_to_db(self, ontology: "Ontology"): + def _save_ontology_to_db(self, ontology: "Ontology") -> None: # type: ignore[no-any-unimported] """Save graph ontology to a separate table with {graph_name}_ontology""" if self.ontology_table_name in self.falkordb.list_graphs(): raise ValueError(f"Knowledge graph {self.name} is already created.") graph = self.__get_ontology_storage_graph() ontology.save_to_graph(graph) - def _load_ontology_from_db(self) -> "Ontology": + def _load_ontology_from_db(self) -> "Ontology": # type: ignore[no-any-unimported] if self.ontology_table_name not in self.falkordb.list_graphs(): raise ValueError(f"Knowledge graph {self.name} has not been created.") graph = self.__get_ontology_storage_graph() diff --git a/autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py b/autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py index 3d65ba6d84..7e2cdbcd83 100644 --- a/autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py +++ b/autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py @@ -21,10 +21,18 @@ def __init__(self, query_engine: FalkorGraphQueryEngine): """Initialize GraphRAG capability with a graph query engine""" self.query_engine = query_engine - def add_to_agent(self, agent: UserProxyAgent): + def add_to_agent(self, agent: ConversableAgent) -> None: """Add FalkorDB GraphRAG capability to a UserProxyAgent. + + Args: + agent: The UserProxyAgent instance to add the capability to. + The restriction to a UserProxyAgent to make sure the returned message does not contain information retrieved from the graph DB instead of any LLMs. + """ + if not isinstance(agent, UserProxyAgent): + raise Exception("FalkorDB GraphRAG capability can only be added to a UserProxyAgent.") + self.graph_rag_agent = agent # Validate the agent config @@ -62,7 +70,8 @@ def _reply_using_falkordb_query( Returns: A tuple containing a boolean indicating success and the assistant's reply. """ - question = self._messages_summary(messages, recipient.system_message) + # todo: fix typing, this is not correct + question = self._messages_summary(messages, recipient.system_message) # type: ignore[arg-type] result: GraphStoreQueryResult = self.query_engine.query(question) return True, result.answer if result.answer else "I'm sorry, I don't have an answer for that." @@ -77,10 +86,7 @@ def _messages_summary(self, messages: Union[dict[str, Any], str], system_message """ if isinstance(messages, str): - if system_message: - summary = f"IMPORTANT: {system_message}\nContext:\n\n{messages}" - else: - return messages + return (f"IMPORTANT: {system_message}\n" if system_message else "") + f"Context:\n\n{messages}" elif isinstance(messages, list): summary = "" diff --git a/autogen/agentchat/contrib/graph_rag/graph_query_engine.py b/autogen/agentchat/contrib/graph_rag/graph_query_engine.py index 0432459b42..5dd088080b 100644 --- a/autogen/agentchat/contrib/graph_rag/graph_query_engine.py +++ b/autogen/agentchat/contrib/graph_rag/graph_query_engine.py @@ -5,10 +5,12 @@ # Portions derived from https://github.com/microsoft/autogen are under the MIT License. # SPDX-License-Identifier: MIT from dataclasses import dataclass, field -from typing import Optional, Protocol +from typing import Any, Optional, Protocol, runtime_checkable from .document import Document +__all__ = ["GraphQueryEngine", "GraphStoreQueryResult"] + @dataclass class GraphStoreQueryResult: @@ -19,16 +21,17 @@ class GraphStoreQueryResult: """ answer: Optional[str] = None - results: list = field(default_factory=list) + results: list[Any] = field(default_factory=list) +@runtime_checkable class GraphQueryEngine(Protocol): """An abstract base class that represents a graph query engine on top of a underlying graph database. This interface defines the basic methods for graph-based RAG. """ - def init_db(self, input_doc: Optional[list[Document]] = None): + def init_db(self, input_doc: Optional[list[Document]] = None) -> None: """This method initializes graph database with the input documents or records. Usually, it takes the following steps, 1. connecting to a graph database. @@ -41,10 +44,10 @@ def init_db(self, input_doc: Optional[list[Document]] = None): """ pass - def add_records(self, new_records: list) -> bool: + def add_records(self, new_records: list[Any]) -> bool: """Add new records to the underlying database and add to the graph if required.""" pass - def query(self, question: str, n_results: int = 1, **kwargs) -> GraphStoreQueryResult: + def query(self, question: str, n_results: int = 1, **kwarg: Any) -> GraphStoreQueryResult: """This method transform a string format question into database query and return the result.""" pass diff --git a/autogen/agentchat/contrib/graph_rag/graph_rag_capability.py b/autogen/agentchat/contrib/graph_rag/graph_rag_capability.py index 114a189f80..4af33e0fae 100644 --- a/autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +++ b/autogen/agentchat/contrib/graph_rag/graph_rag_capability.py @@ -8,6 +8,8 @@ from ..capabilities.agent_capability import AgentCapability from .graph_query_engine import GraphQueryEngine +__all__ = ["GraphRagCapability"] + class GraphRagCapability(AgentCapability): """A graph-based RAG capability uses a graph query engine to give a conversable agent the graph-based RAG ability. @@ -52,10 +54,10 @@ class GraphRagCapability(AgentCapability): ``` """ - def __init__(self, query_engine: GraphQueryEngine): + def __init__(self, query_engine: GraphQueryEngine) -> None: """Initialize graph-based RAG capability with a graph query engine""" ... - def add_to_agent(self, agent: ConversableAgent): + def add_to_agent(self, agent: ConversableAgent) -> None: """Add the capability to an agent""" ... diff --git a/autogen/agentchat/contrib/graph_rag/neo4j_graph_query_engine.py b/autogen/agentchat/contrib/graph_rag/neo4j_graph_query_engine.py index d01cb7c286..e03050f88a 100644 --- a/autogen/agentchat/contrib/graph_rag/neo4j_graph_query_engine.py +++ b/autogen/agentchat/contrib/graph_rag/neo4j_graph_query_engine.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 import os import sys -from typing import Optional, Union +from typing import Any, Optional, Union if sys.version_info >= (3, 10): from typing import TypeAlias @@ -12,11 +12,12 @@ from ....import_utils import optional_import_block, require_optional_import from .document import Document, DocumentType -from .graph_query_engine import GraphQueryEngine, GraphStoreQueryResult +from .graph_query_engine import GraphStoreQueryResult with optional_import_block(): from llama_index.core import PropertyGraphIndex, SimpleDirectoryReader from llama_index.core.base.embeddings.base import BaseEmbedding + from llama_index.core.chat_engine.types import ChatMode from llama_index.core.indices.property_graph import ( DynamicLLMPathExtractor, SchemaLLMPathExtractor, @@ -25,13 +26,14 @@ from llama_index.core.llms import LLM from llama_index.core.readers.json import JSONReader from llama_index.core.schema import Document as LlamaDocument + from llama_index.core.schema import TransformComponent from llama_index.embeddings.openai import OpenAIEmbedding from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore from llama_index.llms.openai import OpenAI @require_optional_import("llama_index", "neo4j") -class Neo4jGraphQueryEngine(GraphQueryEngine): +class Neo4jGraphQueryEngine: """This class serves as a wrapper for a property graph query engine backed by LlamaIndex and Neo4j, facilitating the creating, connecting, updating, and querying of LlamaIndex property graphs. @@ -51,7 +53,7 @@ class Neo4jGraphQueryEngine(GraphQueryEngine): For usage, please refer to example notebook/agentchat_graph_rag_neo4j.ipynb """ - def __init__( + def __init__( # type: ignore[no-any-unimported] self, host: str = "bolt://localhost", port: int = 7687, @@ -94,7 +96,7 @@ def __init__( self.schema = schema self.strict = strict - def init_db(self, input_doc: Optional[list[Document]] = None): + def init_db(self, input_doc: Optional[list[Document]] = None) -> None: """Build the knowledge graph with input documents.""" self.documents = self._load_doc(input_doc if input_doc is not None else []) @@ -120,7 +122,7 @@ def init_db(self, input_doc: Optional[list[Document]] = None): show_progress=True, ) - def connect_db(self): + def connect_db(self) -> None: """Connect to an existing knowledge graph database.""" self.graph_store = Neo4jPropertyGraphStore( username=self.username, @@ -139,7 +141,7 @@ def connect_db(self): show_progress=True, ) - def add_records(self, new_records: list) -> bool: + def add_records(self, new_records: list[Document]) -> bool: """Add new records to the knowledge graph. Must be local files. Args: @@ -166,7 +168,7 @@ def add_records(self, new_records: list) -> bool: print(f"Error adding records: {e}") return False - def query(self, question: str, n_results: int = 1, **kwargs) -> GraphStoreQueryResult: + def query(self, question: str, n_results: int = 1, **kwargs: Any) -> GraphStoreQueryResult: """Query the property graph with a question using LlamaIndex chat engine. We use the condense_plus_context chat mode which condenses the conversation history and the user query into a standalone question, @@ -185,7 +187,7 @@ def query(self, question: str, n_results: int = 1, **kwargs) -> GraphStoreQueryR # Initialize chat engine if not already initialized if not hasattr(self, "chat_engine"): - self.chat_engine = self.index.as_chat_engine(chat_mode="condense_plus_context", llm=self.llm) + self.chat_engine = self.index.as_chat_engine(chat_mode=ChatMode.CONDENSE_PLUS_CONTEXT, llm=self.llm) response = self.chat_engine.chat(question) return GraphStoreQueryResult(answer=str(response)) @@ -197,7 +199,7 @@ def _clear(self) -> None: with self.graph_store._driver.session() as session: session.run("MATCH (n) DETACH DELETE n;") - def _load_doc(self, input_doc: list[Document]) -> list["LlamaDocument"]: + def _load_doc(self, input_doc: list[Document]) -> list["LlamaDocument"]: # type: ignore[no-any-unimported] """Load documents from the input files. Currently support the following file types: .csv - comma-separated values .docx - Microsoft Word @@ -214,7 +216,7 @@ def _load_doc(self, input_doc: list[Document]) -> list["LlamaDocument"]: .json JSON files """ for doc in input_doc: - if not os.path.exists(doc.path_or_url): + if not os.path.exists(doc.path_or_url): # type: ignore[arg-type] raise ValueError(f"Document file not found: {doc.path_or_url}") common_type_input_files = [] @@ -228,11 +230,11 @@ def _load_doc(self, input_doc: list[Document]) -> list["LlamaDocument"]: if common_type_input_files: loaded_documents.extend(SimpleDirectoryReader(input_files=common_type_input_files).load_data()) for json_file in json_type_input_files: - loaded_documents.extend(JSONReader().load_data(input_file=json_file)) + loaded_documents.extend(JSONReader().load_data(input_file=json_file)) # type: ignore[arg-type] return loaded_documents - def _create_kg_extractors(self): + def _create_kg_extractors(self) -> list["TransformComponent"]: # type: ignore[no-any-unimported] """If strict is True, extract paths following a strict schema of allowed relationships for each entity. @@ -242,13 +244,13 @@ def _create_kg_extractors(self): # To add more extractors, please refer to https://docs.llamaindex.ai/en/latest/module_guides/indexing/lpg_index_guide/#construction """ # - kg_extractors = [ + kg_extractors: list["TransformComponent"] = [ # type: ignore[no-any-unimported] SchemaLLMPathExtractor( llm=self.llm, possible_entities=self.entities, possible_relations=self.relations, kg_validation_schema=self.schema, - strict=self.strict, + strict=self.strict if self.strict else False, ), ] diff --git a/autogen/agentchat/contrib/graph_rag/neo4j_graph_rag_capability.py b/autogen/agentchat/contrib/graph_rag/neo4j_graph_rag_capability.py index c952ac7558..507667b71a 100644 --- a/autogen/agentchat/contrib/graph_rag/neo4j_graph_rag_capability.py +++ b/autogen/agentchat/contrib/graph_rag/neo4j_graph_rag_capability.py @@ -22,10 +22,13 @@ def __init__(self, query_engine: Neo4jGraphQueryEngine): """Initialize GraphRAG capability with a graph query engine""" self.query_engine = query_engine - def add_to_agent(self, agent: UserProxyAgent): + def add_to_agent(self, agent: ConversableAgent) -> None: """Add Neo4j GraphRAG capability to a UserProxyAgent. The restriction to a UserProxyAgent to make sure the returned message only contains information retrieved from the graph DB instead of any LLMs. """ + if not isinstance(agent, UserProxyAgent): + raise Exception("Neo4j GraphRAG capability can only be added to a UserProxyAgent.") + self.graph_rag_agent = agent # Validate the agent config @@ -60,16 +63,21 @@ def _reply_using_neo4j_query( Returns: A tuple containing a boolean indicating success and the assistant's reply. """ + if not messages: + return False, None + question = self._get_last_question(messages[-1]) + if not question: + return False, None - result: GraphStoreQueryResult = self.query_engine.query(question) + result: GraphStoreQueryResult = self.query_engine.query(question) # type: ignore[arg-type] return True, result.answer - def _get_last_question(self, message: Union[dict[str, Any], str]) -> None: + def _get_last_question(self, message: Union[dict[str, Any], str]) -> Optional[Union[str, dict[str, Any]]]: """Retrieves the last message from the conversation history.""" if isinstance(message, str): return message if isinstance(message, dict) and "content" in message: - return message["content"] + return message["content"] # type: ignore[no-any-return] return None diff --git a/autogen/agentchat/contrib/graph_rag/neo4j_native_graph_query_engine.py b/autogen/agentchat/contrib/graph_rag/neo4j_native_graph_query_engine.py index 909da92c7b..a747ba10a5 100644 --- a/autogen/agentchat/contrib/graph_rag/neo4j_native_graph_query_engine.py +++ b/autogen/agentchat/contrib/graph_rag/neo4j_native_graph_query_engine.py @@ -4,11 +4,11 @@ import asyncio import logging -from typing import List, Optional, Union +from typing import Any, List, Optional, Union from ....import_utils import optional_import_block, require_optional_import from .document import Document, DocumentType -from .graph_query_engine import GraphQueryEngine, GraphStoreQueryResult +from .graph_query_engine import GraphStoreQueryResult with optional_import_block(): from neo4j import GraphDatabase @@ -26,13 +26,13 @@ @require_optional_import(["neo4j", "neo4j_graphrag"], "neo4j") -class Neo4jNativeGraphQueryEngine(GraphQueryEngine): +class Neo4jNativeGraphQueryEngine: """A graph query engine implemented using the Neo4j GraphRAG SDK. Provides functionality to initialize a knowledge graph, create a vector index, and query the graph using Neo4j and LLM. """ - def __init__( + def __init__( # type: ignore[no-any-unimported] self, host: str = "neo4j://localhost", port: int = 7687, @@ -75,7 +75,7 @@ def __init__( self.relations = relations self.potential_schema = potential_schema - def init_db(self, input_doc: Union[list[Document], None] = None): + def init_db(self, input_doc: Optional[Union[list[Document]]] = None) -> None: """Initialize the Neo4j graph database using the provided input doc. Currently this method only supports single document input (only reads the first doc). @@ -123,7 +123,7 @@ def add_records(self, new_records: list[Document]) -> bool: return True - def query(self, question: str, n_results: int = 1, **kwargs) -> GraphStoreQueryResult: + def query(self, question: str, n_results: int = 1, **kwargs: Any) -> GraphStoreQueryResult: """Query the Neo4j database using a natural language question. Args: @@ -142,7 +142,7 @@ def query(self, question: str, n_results: int = 1, **kwargs) -> GraphStoreQueryR return GraphStoreQueryResult(answer=result.answer) - def _create_index(self, name: str): + def _create_index(self, name: str) -> None: """Create a vector index for the Neo4j knowledge graph. Args: @@ -159,13 +159,13 @@ def _create_index(self, name: str): ) logger.info(f"Vector index '{name}' created successfully.") - def _clear_db(self): + def _clear_db(self) -> None: """Clear all nodes and relationships from the Neo4j database.""" logger.info("Clearing all nodes and relationships in the database...") self.driver.execute_query("MATCH (n) DETACH DELETE n;") logger.info("Database cleared successfully.") - def _initialize_kg_builders(self): + def _initialize_kg_builders(self) -> None: """Initialize the knowledge graph builders""" logger.info("Initializing the knowledge graph builders...") self.text_kg_builder = SimpleKGPipeline( @@ -199,7 +199,8 @@ def _build_graph(self, input_doc: List[Document]) -> None: logger.info("Building the knowledge graph...") for doc in input_doc: if doc.doctype == DocumentType.TEXT: - with open(doc.path_or_url) as file: + # todo: we assume this is a path, and not URL + with open(doc.path_or_url) as file: # type: ignore[arg-type] text = file.read() asyncio.run(self.text_kg_builder.run_async(text=text)) elif doc.doctype == DocumentType.PDF: diff --git a/autogen/agentchat/contrib/graph_rag/neo4j_native_graph_rag_capability.py b/autogen/agentchat/contrib/graph_rag/neo4j_native_graph_rag_capability.py index 7077d7cc96..c01af34150 100644 --- a/autogen/agentchat/contrib/graph_rag/neo4j_native_graph_rag_capability.py +++ b/autogen/agentchat/contrib/graph_rag/neo4j_native_graph_rag_capability.py @@ -20,7 +20,7 @@ def __init__(self, query_engine: Neo4jNativeGraphQueryEngine): """Initialize GraphRAG capability with a neo4j native graph query engine""" self.query_engine = query_engine - def add_to_agent(self, agent: ConversableAgent): + def add_to_agent(self, agent: ConversableAgent) -> None: """Add native Neo4j GraphRAG capability to a ConversableAgent. llm_config of the agent must be None/False (default) to make sure the returned message only contains information retrieved from the graph DB instead of any LLMs. """ @@ -60,7 +60,8 @@ def _reply_using_native_neo4j_query( Returns: A tuple containing a boolean indicating success and the assistant's reply. """ - question = self._messages_summary(messages, recipient.system_message) + # todo: fix typing, this is not correct + question = self._messages_summary(messages, recipient.system_message) # type: ignore[arg-type] result: GraphStoreQueryResult = self.query_engine.query(question) return True, result.answer if result.answer else "I'm sorry, I don't have an answer for that." @@ -75,10 +76,7 @@ def _messages_summary(self, messages: Union[dict[str, Any], str], system_message """ if isinstance(messages, str): - if system_message: - summary = f"IMPORTANT: {system_message}\nContext:\n\n{messages}" - else: - return messages + return (f"IMPORTANT: {system_message}\n" if system_message else "") + f"Context:\n\n{messages}" elif isinstance(messages, list): summary = "" diff --git a/autogen/agentchat/contrib/rag/__init__.py b/autogen/agentchat/contrib/rag/__init__.py new file mode 100644 index 0000000000..d54cd822f2 --- /dev/null +++ b/autogen/agentchat/contrib/rag/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors +# +# SPDX-License-Identifier: Apache-2.0 + +from .query_engine import VectorDbQueryEngine + +__all__ = ["VectorDbQueryEngine"] diff --git a/autogen/agentchat/contrib/rag/query_engine.py b/autogen/agentchat/contrib/rag/query_engine.py new file mode 100644 index 0000000000..17ecacc55f --- /dev/null +++ b/autogen/agentchat/contrib/rag/query_engine.py @@ -0,0 +1,79 @@ +# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors +# +# SPDX-License-Identifier: Apache-2.0 + +from pathlib import Path +from typing import Any, Optional, Protocol, Union, runtime_checkable + +from ....doc_utils import export_module + +__all__ = ["VectorDbQueryEngine"] + + +@export_module("autogen.agentchat.contrib.rag") +@runtime_checkable +class VectorDbQueryEngine(Protocol): + """An abstract base class that represents a query engine on top of an underlying vector database. + + This interface defines the basic methods for RAG. + """ + + def init_db( + self, + new_doc_dir: Optional[Union[Path, str]] = None, + new_doc_paths: Optional[list[Union[Path, str]]] = None, + *args: Any, + **kwargs: Any, + ) -> bool: + """Initialize the database with the input documents or records. + + This method initializes database with the input documents or records. + Usually, it takes the following steps, + 1. connecting to a database. + 2. insert records + 3. build indexes etc. + + Args: + new_doc_dir: a dir of input documents that are used to create the records in database. + new_doc_paths: + a list of input documents that are used to create the records in database. + a document can be a path to a file or a url. + *args: Any additional arguments + **kwargs: Any additional keyword arguments + + Returns: + bool: True if initialization is successful, False otherwise + """ + ... + + def add_records( + self, + new_doc_dir: Optional[Union[Path, str]] = None, + new_doc_paths_or_urls: Optional[list[Union[Path, str]]] = None, + *args: Any, + **kwargs: Any, + ) -> bool: + """Add new documents to the underlying database and add to the index.""" + ... + + def connect_db(self, *args: Any, **kwargs: Any) -> bool: + """Connect to the database. + + Args: + *args: Any additional arguments + **kwargs: Any additional keyword arguments + + Returns: + bool: True if connection is successful, False otherwise + """ + ... + + def query(self, question: str, *args: Any, **kwargs: Any) -> str: + """Transform a string format question into database query and return the result. + + Args: + question: a string format question + *args: Any additional arguments + **kwargs: Any additional keyword arguments + """ + ... diff --git a/autogen/import_utils.py b/autogen/import_utils.py index d859a0696f..15fd5f9074 100644 --- a/autogen/import_utils.py +++ b/autogen/import_utils.py @@ -83,7 +83,7 @@ def __init__(self, o: T, missing_modules: Iterable[str], dep_target: str): def accept(cls, o: Any) -> bool: ... @abstractmethod - def patch(self) -> T: ... + def patch(self, except_for: Iterable[str]) -> T: ... def get_object_with_metadata(self) -> Any: return self.o @@ -122,7 +122,13 @@ def decorator(subclass: type["PatchObject[Any]"]) -> type["PatchObject[Any]"]: return decorator @classmethod - def create(cls, o: T, *, missing_modules: Iterable[str], dep_target: str) -> Optional["PatchObject[T]"]: + def create( + cls, + o: T, + *, + missing_modules: Iterable[str], + dep_target: str, + ) -> Optional["PatchObject[T]"]: for subclass in cls._registry: if subclass.accept(o): return subclass(o, missing_modules, dep_target) @@ -135,7 +141,10 @@ class PatchCallable(PatchObject[F]): def accept(cls, o: Any) -> bool: return inspect.isfunction(o) or inspect.ismethod(o) - def patch(self) -> F: + def patch(self, except_for: Iterable[str]) -> F: + if self.o.__name__ in except_for: + return self.o + f: Callable[..., Any] = self.o @wraps(f.__call__) # type: ignore[operator] @@ -154,7 +163,16 @@ def accept(cls, o: Any) -> bool: # return inspect.ismethoddescriptor(o) return isinstance(o, staticmethod) - def patch(self) -> F: + def patch(self, except_for: Iterable[str]) -> F: + if hasattr(self.o, "__name__"): + name = self.o.__name__ + elif hasattr(self.o, "__func__"): + name = self.o.__func__.__name__ + else: + raise ValueError(f"Cannot determine name for object {self.o}") + if name in except_for: + return self.o + f: Callable[..., Any] = self.o.__func__ # type: ignore[attr-defined] @wraps(f) @@ -175,7 +193,10 @@ class PatchInit(PatchObject[F]): def accept(cls, o: Any) -> bool: return inspect.ismethoddescriptor(o) and o.__name__ == "__init__" - def patch(self) -> F: + def patch(self, except_for: Iterable[str]) -> F: + if self.o.__name__ in except_for: + return self.o + f: Callable[..., Any] = self.o @wraps(f) @@ -196,11 +217,14 @@ class PatchProperty(PatchObject[Any]): def accept(cls, o: Any) -> bool: return inspect.isdatadescriptor(o) and hasattr(o, "fget") - def patch(self) -> property: + def patch(self, except_for: Iterable[str]) -> property: if not hasattr(self.o, "fget"): raise ValueError(f"Cannot patch property without getter: {self.o}") f: Callable[..., Any] = self.o.fget + if f.__name__ in except_for: + return self.o # type: ignore[no-any-return] + @wraps(f) def _call(*args: Any, **kwargs: Any) -> Any: raise ImportError(self.msg) @@ -219,14 +243,20 @@ class PatchClass(PatchObject[type[Any]]): def accept(cls, o: Any) -> bool: return inspect.isclass(o) - def patch(self) -> type[Any]: - # Patch __init__ method if possible + def patch(self, except_for: Iterable[str]) -> type[Any]: + if self.o.__name__ in except_for: + return self.o for name, member in inspect.getmembers(self.o): + # Patch __init__ method if possible, but not other internal methods if name.startswith("__") and name != "__init__": continue patched = patch_object( - member, missing_modules=self.missing_modules, dep_target=self.dep_target, fail_if_not_patchable=False + member, + missing_modules=self.missing_modules, + dep_target=self.dep_target, + fail_if_not_patchable=False, + except_for=except_for, ) with suppress(AttributeError): setattr(self.o, name, patched) @@ -234,15 +264,30 @@ def patch(self) -> type[Any]: return self.o -def patch_object(o: T, *, missing_modules: Iterable[str], dep_target: str, fail_if_not_patchable: bool = True) -> T: +def patch_object( + o: T, + *, + missing_modules: Iterable[str], + dep_target: str, + fail_if_not_patchable: bool = True, + except_for: Optional[Union[str, Iterable[str]]] = None, +) -> T: patcher = PatchObject.create(o, missing_modules=missing_modules, dep_target=dep_target) if fail_if_not_patchable and patcher is None: raise ValueError(f"Cannot patch object of type {type(o)}") - return patcher.patch() if patcher else o + except_for = except_for if except_for is not None else [] + except_for = [except_for] if isinstance(except_for, str) else except_for + + return patcher.patch(except_for=except_for) if patcher else o -def require_optional_import(modules: Union[str, Iterable[str]], dep_target: str) -> Callable[[T], T]: +def require_optional_import( + modules: Union[str, Iterable[str]], + dep_target: str, + *, + except_for: Optional[Union[str, Iterable[str]]] = None, +) -> Callable[[T], T]: """Decorator to handle optional module dependencies Args: @@ -259,7 +304,7 @@ def decorator(o: T) -> T: else: def decorator(o: T) -> T: - return patch_object(o, missing_modules=missing_modules, dep_target=dep_target) + return patch_object(o, missing_modules=missing_modules, dep_target=dep_target, except_for=except_for) return decorator diff --git a/autogen/oai/anthropic.py b/autogen/oai/anthropic.py index 9cbe02bc93..a1d1e95003 100644 --- a/autogen/oai/anthropic.py +++ b/autogen/oai/anthropic.py @@ -384,7 +384,10 @@ def _add_response_format_to_system(self, params: dict[str, Any]): return # Get the schema of the Pydantic model - schema = self._response_format.model_json_schema() + if isinstance(self._response_format, dict): + schema = self._response_format + else: + schema = self._response_format.model_json_schema() # Add instructions for JSON formatting format_content = f"""Please provide your response as a JSON object that matches the following schema: @@ -425,16 +428,25 @@ def _extract_json_response(self, response: Message) -> Any: json_str = content[json_start : json_end + 1] try: - # Parse JSON and validate against the Pydantic model + # Parse JSON and validate against the Pydantic model if Pydantic model was provided json_data = json.loads(json_str) - return self._response_format.model_validate(json_data) + if isinstance(self._response_format, dict): + return json_str + else: + return self._response_format.model_validate(json_data) + except Exception as e: raise ValueError(f"Failed to parse response as valid JSON matching the schema for Structured Output: {e!s}") def _format_json_response(response: Any) -> str: """Formats the JSON response for structured outputs using the format method if it exists.""" - return response.format() if isinstance(response, FormatterProtocol) else response + if isinstance(response, str): + return response + elif isinstance(response, FormatterProtocol): + return response.format() + else: + return response.model_dump_json() @require_optional_import("anthropic", "anthropic") diff --git a/autogen/oai/client.py b/autogen/oai/client.py index 2e64e9ff46..d31a09ea36 100644 --- a/autogen/oai/client.py +++ b/autogen/oai/client.py @@ -53,6 +53,7 @@ if openai.__version__ >= "1.1.0": TOOL_ENABLED = True ERROR = None + from openai.lib._pydantic import _ensure_strict_json_schema else: ERROR: Optional[ImportError] = ImportError("Please install openai>=1 and diskcache to use autogen.OpenAIWrapper.") OpenAI = object @@ -258,7 +259,9 @@ def __init__(self, config): class OpenAIClient: """Follows the Client protocol and wraps the OpenAI client.""" - def __init__(self, client: Union[OpenAI, AzureOpenAI], response_format: Optional[BaseModel] = None): + def __init__( + self, client: Union[OpenAI, AzureOpenAI], response_format: Union[BaseModel, dict[str, Any], None] = None + ): self._oai_client = client self.response_format = response_format if ( @@ -395,9 +398,23 @@ def create(self, params: dict[str, Any]) -> ChatCompletion: def _create_or_parse(*args, **kwargs): if "stream" in kwargs: kwargs.pop("stream") - kwargs["response_format"] = type_to_response_format_param( - self.response_format or params["response_format"] - ) + + if isinstance(kwargs["response_format"], dict): + kwargs["response_format"] = { + "type": "json_schema", + "json_schema": { + "schema": _ensure_strict_json_schema( + kwargs["response_format"], path=(), root=kwargs["response_format"] + ), + "name": "response_format", + "strict": True, + }, + } + else: + kwargs["response_format"] = type_to_response_format_param( + self.response_format or params["response_format"] + ) + return self._oai_client.chat.completions.create(*args, **kwargs) create_or_parse = _create_or_parse @@ -987,7 +1004,7 @@ def yes_or_no_filter(context, response): **params, **{"response_format": json.dumps(TypeAdapter(params["response_format"]).json_schema())}, } - if "response_format" in params + if "response_format" in params and not isinstance(params["response_format"], dict) else params ) request_ts = get_current_ts() diff --git a/autogen/oai/gemini.py b/autogen/oai/gemini.py index a61c202bcb..208b147475 100644 --- a/autogen/oai/gemini.py +++ b/autogen/oai/gemini.py @@ -252,7 +252,12 @@ def create(self, params: dict) -> ChatCompletion: self._response_format = params.get("response_format") generation_config["response_mime_type"] = "application/json" - response_schema = dict(jsonref.replace_refs(params.get("response_format").model_json_schema())) + response_format_schema_raw = params.get("response_format") + + if isinstance(response_format_schema_raw, dict): + response_schema = dict(jsonref.replace_refs(response_format_schema_raw)) + else: + response_schema = dict(jsonref.replace_refs(params.get("response_format").model_json_schema())) if "$defs" in response_schema: response_schema.pop("$defs") generation_config["response_schema"] = response_schema @@ -571,9 +576,12 @@ def _convert_json_response(self, response: str) -> Any: return response try: - # Parse JSON and validate against the Pydantic model + # Parse JSON and validate against the Pydantic model if Pydantic model was provided json_data = json.loads(response) - return self._response_format.model_validate(json_data) + if isinstance(self._response_format, dict): + return json_data + else: + return self._response_format.model_validate(json_data) except Exception as e: raise ValueError(f"Failed to parse response as valid JSON matching the schema for Structured Output: {e!s}") diff --git a/autogen/oai/ollama.py b/autogen/oai/ollama.py index 137e1867e7..ecabcdf0e5 100644 --- a/autogen/oai/ollama.py +++ b/autogen/oai/ollama.py @@ -237,7 +237,11 @@ def create(self, params: dict) -> ChatCompletion: # https://ollama.com/blog/structured-outputs if params.get("response_format"): self._response_format = params["response_format"] - ollama_params["format"] = params.get("response_format").model_json_schema() + ollama_params["format"] = ( + params.get("response_format").model_json_schema() + if isinstance(self._response_format, BaseModel) + else params.get("response_format") + ) # Token counts will be returned prompt_tokens = 0 @@ -491,8 +495,11 @@ def _convert_json_response(self, response: str) -> Any: return response try: - # Parse JSON and validate against the Pydantic model - return self._response_format.model_validate_json(response) + # Parse JSON and validate against the Pydantic model if Pydantic model was provided + if isinstance(self._response_format, dict): + return response + else: + return self._response_format.model_validate_json(response) except Exception as e: raise ValueError(f"Failed to parse response as valid JSON matching the schema for Structured Output: {e!s}") diff --git a/autogen/oai/openai_utils.py b/autogen/oai/openai_utils.py index bf476c421d..b391a5d081 100644 --- a/autogen/oai/openai_utils.py +++ b/autogen/oai/openai_utils.py @@ -4,6 +4,7 @@ # # Portions derived from https://github.com/microsoft/autogen are under the MIT License. # SPDX-License-Identifier: MIT +import importlib import importlib.metadata import json import logging @@ -554,6 +555,9 @@ def config_list_from_json( with open(config_list_path) as json_file: config_list = json.load(json_file) + + config_list = filter_config(config_list, filter_dict) + return filter_config(config_list, filter_dict) diff --git a/autogen/tools/experimental/browser_use/browser_use.py b/autogen/tools/experimental/browser_use/browser_use.py index afeab371d2..17cae57f6b 100644 --- a/autogen/tools/experimental/browser_use/browser_use.py +++ b/autogen/tools/experimental/browser_use/browser_use.py @@ -14,10 +14,8 @@ with optional_import_block(): from browser_use import Agent, Controller from browser_use.browser.browser import Browser, BrowserConfig - from langchain_anthropic import ChatAnthropic - from langchain_google_genai import ChatGoogleGenerativeAI - from langchain_ollama import ChatOllama - from langchain_openai import AzureChatOpenAI, ChatOpenAI + + from .langchain_factory import LangchainFactory __all__ = ["BrowserUseResult", "BrowserUseTool"] @@ -37,7 +35,14 @@ class BrowserUseResult(BaseModel): @require_optional_import( - ["langchain_anthropic", "langchain_google_genai", "langchain_ollama", "langchain_openai", "browser_use"], + [ + "langchain_anthropic", + "langchain_google_genai", + "langchain_ollama", + "langchain_openai", + "langchain_core", + "browser_use", + ], "browser-use", ) @export_module("autogen.tools.experimental") @@ -88,7 +93,7 @@ async def browser_use( # type: ignore[no-any-unimported] browser: Annotated[Browser, Depends(on(browser))], agent_kwargs: Annotated[dict[str, Any], Depends(on(agent_kwargs))], ) -> BrowserUseResult: - llm = BrowserUseTool._get_llm(llm_config) + llm = LangchainFactory.create_base_chat_model(llm_config) max_steps = agent_kwargs.pop("max_steps", 100) @@ -121,51 +126,3 @@ def _get_controller(llm_config: dict[str, Any]) -> Any: else llm_config.get("response_format") ) return Controller(output_model=response_format) - - @staticmethod - def _get_llm( - llm_config: dict[str, Any], - ) -> Any: - if "config_list" not in llm_config: - if "model" in llm_config: - return ChatOpenAI(model=llm_config["model"]) - raise ValueError("llm_config must be a valid config dictionary.") - - try: - model = llm_config["config_list"][0]["model"] - api_type = llm_config["config_list"][0].get("api_type", "openai") - - # Ollama does not require an api_key - api_key = None if api_type == "ollama" else llm_config["config_list"][0]["api_key"] - - if api_type == "deepseek" or api_type == "azure" or api_type == "azure": - base_url = llm_config["config_list"][0].get("base_url") - if not base_url: - raise ValueError(f"base_url is required for {api_type} api type.") - if api_type == "azure": - api_version = llm_config["config_list"][0].get("api_version") - if not api_version: - raise ValueError(f"api_version is required for {api_type} api type.") - - except (KeyError, TypeError) as e: - raise ValueError(f"llm_config must be a valid config dictionary: {e}") - - if api_type == "openai": - return ChatOpenAI(model=model, api_key=api_key) - elif api_type == "azure": - return AzureChatOpenAI( - model=model, - api_key=api_key, - azure_endpoint=base_url, - api_version=api_version, - ) - elif api_type == "deepseek": - return ChatOpenAI(model=model, api_key=api_key, base_url=base_url) - elif api_type == "anthropic": - return ChatAnthropic(model=model, api_key=api_key) - elif api_type == "google": - return ChatGoogleGenerativeAI(model=model, api_key=api_key) - elif api_type == "ollama": - return ChatOllama(model=model, num_ctx=32000) - else: - raise ValueError(f"Currently unsupported language model api type for browser use: {api_type}") diff --git a/autogen/tools/experimental/browser_use/langchain_factory.py b/autogen/tools/experimental/browser_use/langchain_factory.py new file mode 100644 index 0000000000..fe1fdeb1f5 --- /dev/null +++ b/autogen/tools/experimental/browser_use/langchain_factory.py @@ -0,0 +1,162 @@ +# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors +# +# SPDX-License-Identifier: Apache-2.0 + +from abc import ABC, abstractmethod +from copy import deepcopy +from typing import Any, Callable + +from ....import_utils import optional_import_block, require_optional_import + +with optional_import_block(): + from langchain_anthropic import ChatAnthropic + from langchain_core.language_models import BaseChatModel + from langchain_google_genai import ChatGoogleGenerativeAI + from langchain_ollama import ChatOllama + from langchain_openai import AzureChatOpenAI, ChatOpenAI + + +__all__ = ["LangchainFactory"] + + +@require_optional_import( + ["langchain_anthropic", "langchain_google_genai", "langchain_ollama", "langchain_openai", "langchain_core"], + "browser-use", + except_for=["__init__", "register_factory"], +) +class LangchainFactory(ABC): + _factories: set["LangchainFactory"] = set() + + @classmethod + def create_base_chat_model(cls, llm_config: dict[str, Any]) -> "BaseChatModel": # type: ignore [no-any-unimported] + first_llm_config = cls.get_first_llm_config(llm_config) + for factory in LangchainFactory._factories: + if factory.accepts(first_llm_config): + return factory.create(first_llm_config) + + raise ValueError("Could not find a factory for the given config.") + + @classmethod + def register_factory(cls) -> Callable[[type["LangchainFactory"]], type["LangchainFactory"]]: + def decorator(factory: type["LangchainFactory"]) -> type["LangchainFactory"]: + cls._factories.add(factory()) + return factory + + return decorator + + @classmethod + def get_first_llm_config(cls, llm_config: dict[str, Any]) -> dict[str, Any]: + llm_config = deepcopy(llm_config) + if "config_list" not in llm_config: + if "model" in llm_config: + return llm_config + raise ValueError("llm_config must be a valid config dictionary.") + + if len(llm_config["config_list"]) == 0: + raise ValueError("Config list must contain at least one config.") + return llm_config["config_list"][0] # type: ignore [no-any-return] + + @classmethod + def prepare_config(cls, first_llm_config: dict[str, Any]) -> dict[str, Any]: + for pop_keys in ["api_type", "response_format"]: + first_llm_config.pop(pop_keys, None) + return first_llm_config + + @classmethod + @abstractmethod + def create(cls, first_llm_config: dict[str, Any]) -> "BaseChatModel": # type: ignore [no-any-unimported] + ... + + @classmethod + @abstractmethod + def get_api_type(cls) -> str: ... + + @classmethod + def accepts(cls, first_llm_config: dict[str, Any]) -> bool: + return first_llm_config.get("api_type", "openai") == cls.get_api_type() # type: ignore [no-any-return] + + +@LangchainFactory.register_factory() +class ChatOpenAIFactory(LangchainFactory): + @classmethod + def create(cls, first_llm_config: dict[str, Any]) -> "ChatOpenAI": # type: ignore [no-any-unimported] + first_llm_config = cls.prepare_config(first_llm_config) + + return ChatOpenAI(**first_llm_config) + + @classmethod + def get_api_type(cls) -> str: + return "openai" + + +@LangchainFactory.register_factory() +class DeepSeekFactory(ChatOpenAIFactory): + @classmethod + def create(cls, first_llm_config: dict[str, Any]) -> "ChatOpenAI": # type: ignore [no-any-unimported] + if "base_url" not in first_llm_config: + raise ValueError("base_url is required for deepseek api type.") + return super().create(first_llm_config) + + @classmethod + def get_api_type(cls) -> str: + return "deepseek" + + +@LangchainFactory.register_factory() +class ChatAnthropicFactory(LangchainFactory): + @classmethod + def create(cls, first_llm_config: dict[str, Any]) -> "ChatAnthropic": # type: ignore [no-any-unimported] + first_llm_config = cls.prepare_config(first_llm_config) + + return ChatAnthropic(**first_llm_config) + + @classmethod + def get_api_type(cls) -> str: + return "anthropic" + + +@LangchainFactory.register_factory() +class ChatGoogleGenerativeAIFactory(LangchainFactory): + @classmethod + def create(cls, first_llm_config: dict[str, Any]) -> "ChatGoogleGenerativeAI": # type: ignore [no-any-unimported] + first_llm_config = cls.prepare_config(first_llm_config) + + return ChatGoogleGenerativeAI(**first_llm_config) + + @classmethod + def get_api_type(cls) -> str: + return "google" + + +@LangchainFactory.register_factory() +class AzureChatOpenAIFactory(LangchainFactory): + @classmethod + def create(cls, first_llm_config: dict[str, Any]) -> "AzureChatOpenAI": # type: ignore [no-any-unimported] + first_llm_config = cls.prepare_config(first_llm_config) + for param in ["base_url", "api_version"]: + if param not in first_llm_config: + raise ValueError(f"{param} is required for azure api type.") + first_llm_config["azure_endpoint"] = first_llm_config.pop("base_url") + + return AzureChatOpenAI(**first_llm_config) + + @classmethod + def get_api_type(cls) -> str: + return "azure" + + +@LangchainFactory.register_factory() +class ChatOllamaFactory(LangchainFactory): + @classmethod + def create(cls, first_llm_config: dict[str, Any]) -> "ChatOllama": # type: ignore [no-any-unimported] + first_llm_config = cls.prepare_config(first_llm_config) + first_llm_config["base_url"] = first_llm_config.pop("client_host", None) + if "num_ctx" not in first_llm_config: + # In all Browser Use examples, num_ctx is set to 32000 + first_llm_config["num_ctx"] = 32000 + + return ChatOllama(**first_llm_config) + + @classmethod + def get_api_type(cls) -> str: + return "ollama" diff --git a/autogen/tools/experimental/crawl4ai/crawl4ai.py b/autogen/tools/experimental/crawl4ai/crawl4ai.py index 97dbf3f57a..e300449a59 100644 --- a/autogen/tools/experimental/crawl4ai/crawl4ai.py +++ b/autogen/tools/experimental/crawl4ai/crawl4ai.py @@ -125,7 +125,8 @@ def _get_lite_llm_config(llm_config: dict[str, Any]) -> dict[str, Any]: model = llm_config["model"] api_type = "openai" lite_llm_config = {"api_token": os.getenv("OPENAI_API_KEY")} - raise ValueError("llm_config must be a valid config dictionary.") + else: + raise ValueError("llm_config must be a valid config dictionary.") else: try: lite_llm_config = llm_config["config_list"][0].copy() @@ -136,6 +137,8 @@ def _get_lite_llm_config(llm_config: dict[str, Any]) -> dict[str, Any]: api_type = lite_llm_config.pop("api_type", "openai") # type: ignore[assignment] # litellm uses "gemini" instead of "google" for the api_type api_type = api_type if api_type != "google" else "gemini" + if api_type == "ollama" and "client_host" in lite_llm_config: + lite_llm_config["api_base"] = lite_llm_config.pop("client_host") except (KeyError, TypeError): raise ValueError("llm_config must be a valid config dictionary.") diff --git a/codecov.yml b/codecov.yml index 1f9ff2d104..49283f0463 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,7 +1,8 @@ codecov: require_ci_to_pass: yes notify: - after_n_builds: 15 + # manual_trigger: true + after_n_builds: 30 wait_for_ci: yes coverage: @@ -14,16 +15,27 @@ coverage: threshold: 1% base: auto if_ci_failed: success + branches: + - main patch: default: # Settings for new code in PRs informational: true target: auto threshold: 1% + if_ci_failed: success base: auto # Configure codecov bot behavior -comment: false +comment: +# Show only changed files in PR comment + layout: "condensed_header, condensed_files, condensed_footer" + behavior: new + # Hide PR comment if there are no changes in coverage + require_changes: true + # Only post comment after all builds finish + after_n_builds: 30 + hide_project_coverage: true # Ignore certain paths/files ignore: diff --git a/notebook/agentchat_structured_outputs_from_config.ipynb b/notebook/agentchat_structured_outputs_from_config.ipynb new file mode 100644 index 0000000000..9288db8de9 --- /dev/null +++ b/notebook/agentchat_structured_outputs_from_config.ipynb @@ -0,0 +1,245 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Structured output from json configuration\n", + "\n", + "Various LLM providers offer functionality for defining a structure of the messages generated by LLMs and AG2 enables this by propagating `response_format`, in the LLM configuration for your agents, to the underlying client.\n", + " \n", + "You can define the JSON structure of the output in the `response_format` field in the LLM configuration.\n", + "\n", + "To assist in determining the JSON structure, you can generate a valid schema using `.model_json_schema()` on a predefined pydantic model, for more info, see [here](https://docs.pydantic.dev/latest/concepts/json_schema/). Your schema should be [OpenAPI specification](https://github.com/OAI/OpenAPI-Specification) compliant and have a **title** field defined for the root model which will be loaded as a `response_format` for the Agent.\n", + "\n", + "For more info on structured outputs, see [our documentation](https://docs.ag2.ai/docs/user-guide/basic-concepts/structured-outputs).\n", + "\n", + "\n", + "````{=mdx}\n", + ":::info Requirements\n", + "Install `ag2`:\n", + "```bash\n", + "pip install ag2\n", + "```\n", + "\n", + "> **Note:** If you have been using `autogen` or `pyautogen`, all you need to do is upgrade it using: \n", + "> ```bash\n", + "> pip install -U autogen\n", + "> ```\n", + "> or \n", + "> ```bash\n", + "> pip install -U pyautogen\n", + "> ```\n", + "> as `pyautogen`, `autogen`, and `ag2` are aliases for the same PyPI package. \n", + "\n", + "\n", + "For more information, please refer to the [installation guide](/docs/user-guide/basic-concepts/installing-ag2).\n", + ":::\n", + "````" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Supported clients\n", + "AG2 has structured output support for following client providers:\n", + "- OpenAI (`openai`)\n", + "- Anthropic (`anthropic`)\n", + "- Google Gemini (`google`)\n", + "- Ollama (`ollama`)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set your API Endpoint\n", + "\n", + "The [`config_list_from_json`](https://docs.ag2.ai/docs/api-reference/autogen/config_list_from_json#config-list-from-json) function loads a list of configurations from an environment variable or a JSON file.\n", + "\n", + "Here is an example of a configuration using the `gpt-4o-mini` model that will use a `MathReasoning` response format. To use it, paste it into your `OAI_CONFIG_LIST` file and set the `api_key` to your OpenAI API key.\n", + "\n", + "```json\n", + "[\n", + " {\n", + " \"model\": \"gpt-4o-mini\",\n", + " \"api_key\": \"\",\n", + " \"response_format\": {\n", + " \"$defs\":{\n", + " \"Step\":{\n", + " \"properties\":{\n", + " \"explanation\":{\n", + " \"title\":\"Explanation\",\n", + " \"type\":\"string\"\n", + " },\n", + " \"output\":{\n", + " \"title\":\"Output\",\n", + " \"type\":\"string\"\n", + " }\n", + " },\n", + " \"required\":[\n", + " \"explanation\",\n", + " \"output\"\n", + " ],\n", + " \"title\":\"Step\",\n", + " \"type\":\"object\"\n", + " }\n", + " },\n", + " \"properties\":{\n", + " \"steps\":{\n", + " \"items\":{\n", + " \"$ref\":\"#/$defs/Step\"\n", + " },\n", + " \"title\":\"Steps\",\n", + " \"type\":\"array\"\n", + " },\n", + " \"final_answer\":{\n", + " \"title\":\"Final Answer\",\n", + " \"type\":\"string\"\n", + " }\n", + " },\n", + " \"required\":[\n", + " \"steps\",\n", + " \"final_answer\"\n", + " ],\n", + " \"title\":\"MathReasoning\",\n", + " \"type\":\"object\"\n", + " }, \n", + " \"tags\": [\"gpt-4o-mini-response-format\"]\n", + " }\n", + "]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import autogen\n", + "\n", + "# Load the configuration including the response format\n", + "config_list = autogen.config_list_from_json(\n", + " \"OAI_CONFIG_LIST\",\n", + " filter_dict={\n", + " \"tags\": [\"gpt-4o-response-format\"],\n", + " },\n", + ")\n", + "\n", + "# Output the configuration, showing that it matches the configuration file.\n", + "config_list" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "````{=mdx}\n", + ":::tip\n", + "Learn more about configuring LLMs for agents [here](/docs/topics/llm_configuration).\n", + ":::\n", + "````" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example: math reasoning\n", + "\n", + "Using structured output, we can enforce chain-of-thought reasoning in the model to output an answer in a structured, step-by-step way." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Define chat actors\n", + "\n", + "Now we can define the agents that will solve the posed math problem. \n", + "We will keep this example simple; we will use a `UserProxyAgent` to input the math problem and an `AssistantAgent` to solve it.\n", + "\n", + "The `AssistantAgent` will be constrained to solving the math problem step-by-step by using the `MathReasoning` response format we defined above.\n", + "\n", + "The `response_format` is added to the LLM configuration and then this configuration is applied to the agent." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm_config = {\n", + " \"config_list\": config_list,\n", + " \"cache_seed\": 42,\n", + "}\n", + "\n", + "user_proxy = autogen.UserProxyAgent(\n", + " name=\"User_proxy\",\n", + " system_message=\"A human admin.\",\n", + " human_input_mode=\"NEVER\",\n", + ")\n", + "\n", + "assistant = autogen.AssistantAgent(\n", + " name=\"Math_solver\",\n", + " llm_config=llm_config, # Response Format is in the configuration\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Start the chat\n", + "\n", + "Let's now start the chat and prompt the assistant to solve a simple equation. The assistant agent should return a response solving the equation using a step-by-step `MathReasoning` model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "summary = user_proxy.initiate_chat(\n", + " assistant, message=\"how can I solve 8x + 7 = -23\", max_turns=1, summary_method=\"last_msg\"\n", + ").summary\n", + "\n", + "summary" + ] + } + ], + "metadata": { + "front_matter": { + "description": "OpenAI offers a functionality for defining a structure of the messages generated by LLMs, AutoGen enables this functionality by propagating response_format passed to your agents to the underlying client.", + "tags": [ + "structured output" + ] + }, + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pyproject.toml b/pyproject.toml index 79df30ee4d..539aee52f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -389,6 +389,8 @@ files = [ "autogen/messages", "autogen/import_utils.py", "autogen/agents/experimental/document_agent", + "autogen/agentchat/contrib/rag", + "autogen/agentchat/contrib/graph_rag", "website/*.py", "test/io", "test/tools", @@ -399,6 +401,8 @@ files = [ "test/test_import_utils.py", "test/test_import.py", "test/agents/experimental/document_agent", +# "test/agentchat/contrib/rag", + "test/agentchat/contrib/graph_rag", "test/website", ] diff --git a/test/agentchat/contrib/graph_rag/test_falkor_graph_rag.py b/test/agentchat/contrib/graph_rag/test_falkor_graph_rag.py index cd576d253b..79d60aa72d 100644 --- a/test/agentchat/contrib/graph_rag/test_falkor_graph_rag.py +++ b/test/agentchat/contrib/graph_rag/test_falkor_graph_rag.py @@ -8,10 +8,9 @@ import pytest -from autogen.agentchat.contrib.graph_rag.document import Document, DocumentType +from autogen.agentchat.contrib.graph_rag import Document, DocumentType, GraphStoreQueryResult from autogen.agentchat.contrib.graph_rag.falkor_graph_query_engine import ( FalkorGraphQueryEngine, - GraphStoreQueryResult, ) from autogen.import_utils import optional_import_block, skip_on_missing_imports @@ -28,7 +27,7 @@ reason=reason, ) @skip_on_missing_imports(["falkordb", "graphrag_sdk"], "neo4j") -def test_falkor_db_query_engine(): +def test_falkor_db_query_engine() -> None: """Test FalkorDB Query Engine. 1. create a test FalkorDB Query Engine with a schema. 2. Initialize it with an input txt file. @@ -62,4 +61,4 @@ def test_falkor_db_query_engine(): query_result: GraphStoreQueryResult = query_engine.query(question=question) # Assert - assert query_result.answer.find("Keanu Reeves") >= 0 + assert query_result.answer.find("Keanu Reeves") >= 0 # type: ignore[union-attr] diff --git a/test/agentchat/contrib/graph_rag/test_graph_rag_basic.py b/test/agentchat/contrib/graph_rag/test_graph_rag_basic.py index 5687f22613..1020d508fd 100644 --- a/test/agentchat/contrib/graph_rag/test_graph_rag_basic.py +++ b/test/agentchat/contrib/graph_rag/test_graph_rag_basic.py @@ -11,7 +11,7 @@ from autogen.agentchat.conversable_agent import ConversableAgent -def test_dry_run(): +def test_dry_run() -> None: """Dry run for basic graph rag objects.""" mock_graph_query_engine = Mock(spec=GraphQueryEngine) diff --git a/test/agentchat/contrib/graph_rag/test_native_neo4j_graph_rag.py b/test/agentchat/contrib/graph_rag/test_native_neo4j_graph_rag.py index d4cb9abb26..311e17db86 100644 --- a/test/agentchat/contrib/graph_rag/test_native_neo4j_graph_rag.py +++ b/test/agentchat/contrib/graph_rag/test_native_neo4j_graph_rag.py @@ -7,11 +7,8 @@ import pytest -from autogen.agentchat.contrib.graph_rag.document import Document, DocumentType -from autogen.agentchat.contrib.graph_rag.neo4j_native_graph_query_engine import ( - GraphStoreQueryResult, - Neo4jNativeGraphQueryEngine, -) +from autogen.agentchat.contrib.graph_rag import Document, DocumentType, GraphStoreQueryResult +from autogen.agentchat.contrib.graph_rag.neo4j_native_graph_query_engine import Neo4jNativeGraphQueryEngine from autogen.import_utils import skip_on_missing_imports from ....conftest import reason @@ -25,7 +22,7 @@ # Test fixture for creating and initializing a query engine @pytest.fixture(scope="module") -def neo4j_native_query_engine(): +def neo4j_native_query_engine() -> Neo4jNativeGraphQueryEngine: input_path = "./test/agentchat/contrib/graph_rag/BUZZ_Employee_Handbook.txt" input_document = [Document(doctype=DocumentType.TEXT, path_or_url=input_path)] @@ -88,7 +85,7 @@ def neo4j_native_query_engine(): # Test fixture for auto generated knowledge graph @pytest.fixture(scope="module") -def neo4j_native_query_engine_auto(): +def neo4j_native_query_engine_auto() -> Neo4jNativeGraphQueryEngine: input_path = "./test/agentchat/contrib/graph_rag/BUZZ_Employee_Handbook.txt" input_document = [Document(doctype=DocumentType.TEXT, path_or_url=input_path)] @@ -112,13 +109,13 @@ def neo4j_native_query_engine_auto(): reason=reason, ) @skip_on_missing_imports(["neo4j", "neo4j_graphrag"], "neo4j") -def test_neo4j_native_query_engine(neo4j_native_query_engine): +def test_neo4j_native_query_engine(neo4j_native_query_engine: Neo4jNativeGraphQueryEngine) -> None: """Test querying with initialized knowledge graph""" question = "Which company is the employer?" query_result: GraphStoreQueryResult = neo4j_native_query_engine.query(question=question) logger.info(query_result.answer) - assert query_result.answer.find("BUZZ") >= 0 + assert query_result.answer.find("BUZZ") >= 0 # type: ignore[union-attr] @pytest.mark.openai @@ -128,18 +125,18 @@ def test_neo4j_native_query_engine(neo4j_native_query_engine): reason=reason, ) @skip_on_missing_imports(["neo4j", "neo4j_graphrag"], "neo4j") -def test_neo4j_native_query_auto(neo4j_native_query_engine_auto): +def test_neo4j_native_query_auto(neo4j_native_query_engine_auto: Neo4jNativeGraphQueryEngine) -> None: """Test querying with auto-generated property graph""" question = "Which company is the employer?" query_result: GraphStoreQueryResult = neo4j_native_query_engine_auto.query(question=question) logger.info(query_result.answer) - assert query_result.answer.find("BUZZ") >= 0 + assert query_result.answer.find("BUZZ") >= 0 # type: ignore[union-attr] @pytest.mark.neo4j @skip_on_missing_imports("neo4j_graphrag", "unknown") -def test_neo4j_add_records(neo4j_native_query_engine): +def test_neo4j_add_records(neo4j_native_query_engine: Neo4jNativeGraphQueryEngine) -> None: """Test the add_records functionality of the Neo4j Query Engine.""" input_path = "./test/agentchat/contrib/graph_rag/the_matrix.txt" input_documents = [Document(doctype=DocumentType.TEXT, path_or_url=input_path)] @@ -153,4 +150,4 @@ def test_neo4j_add_records(neo4j_native_query_engine): logger.info(query_result.answer) - assert query_result.answer.find("Keanu Reeves") >= 0 + assert query_result.answer.find("Keanu Reeves") >= 0 # type: ignore[union-attr] diff --git a/test/agentchat/contrib/graph_rag/test_neo4j_graph_rag.py b/test/agentchat/contrib/graph_rag/test_neo4j_graph_rag.py index 7120a2ea18..f9f3703b28 100644 --- a/test/agentchat/contrib/graph_rag/test_neo4j_graph_rag.py +++ b/test/agentchat/contrib/graph_rag/test_neo4j_graph_rag.py @@ -10,11 +10,8 @@ import pytest -from autogen.agentchat.contrib.graph_rag.document import Document, DocumentType -from autogen.agentchat.contrib.graph_rag.neo4j_graph_query_engine import ( - GraphStoreQueryResult, - Neo4jGraphQueryEngine, -) +from autogen.agentchat.contrib.graph_rag import Document, DocumentType, GraphStoreQueryResult +from autogen.agentchat.contrib.graph_rag.neo4j_graph_query_engine import Neo4jGraphQueryEngine from autogen.import_utils import skip_on_missing_imports from ....conftest import reason @@ -29,7 +26,7 @@ # Test fixture for creating and initializing a query engine with a JSON input file @pytest.fixture(scope="module") @skip_on_missing_imports(["llama_index"], "neo4j") -def neo4j_query_engine_with_json(): +def neo4j_query_engine_with_json() -> Neo4jGraphQueryEngine: input_path = "./test/agentchat/contrib/graph_rag/layout_parser_paper_parsed_elements.json" input_documents = [Document(doctype=DocumentType.JSON, path_or_url=input_path)] # Create Neo4jGraphQueryEngine @@ -48,7 +45,7 @@ def neo4j_query_engine_with_json(): # Test fixture for creating and initializing a query engine @pytest.fixture(scope="module") -def neo4j_query_engine(): +def neo4j_query_engine() -> Neo4jGraphQueryEngine: input_path = "./test/agentchat/contrib/graph_rag/BUZZ_Employee_Handbook.docx" input_documents = [Document(doctype=DocumentType.TEXT, path_or_url=input_path)] @@ -90,7 +87,7 @@ def neo4j_query_engine(): database="neo4j", # Change if you want to store the graphh in your custom database entities=entities, # possible entities relations=relations, # possible relations - schema=schema, + schema=schema, # type: ignore[arg-type] strict=True, # enofrce the extracted triplets to be in the schema ) @@ -101,7 +98,7 @@ def neo4j_query_engine(): # Test fixture to test auto-generation without given schema @pytest.fixture(scope="module") -def neo4j_query_engine_auto(): +def neo4j_query_engine_auto() -> Neo4jGraphQueryEngine: """Test the engine with auto-generated property graph""" input_path = "./test/agentchat/contrib/graph_rag/BUZZ_Employee_Handbook.txt" @@ -125,7 +122,7 @@ def neo4j_query_engine_auto(): reason=reason, ) @skip_on_missing_imports(["llama_index"], "neo4j") -def test_neo4j_query_engine(neo4j_query_engine): +def test_neo4j_query_engine(neo4j_query_engine: Neo4jGraphQueryEngine) -> None: """Test querying functionality of the Neo4j Query Engine.""" question = "Which company is the employer?" @@ -134,7 +131,7 @@ def test_neo4j_query_engine(neo4j_query_engine): logger.info(query_result.answer) - assert query_result.answer.find("BUZZ") >= 0 + assert query_result.answer.find("BUZZ") >= 0 # type: ignore[union-attr] @pytest.mark.openai @@ -144,7 +141,7 @@ def test_neo4j_query_engine(neo4j_query_engine): reason=reason, ) @skip_on_missing_imports(["llama_index"], "neo4j") -def test_neo4j_add_records(neo4j_query_engine): +def test_neo4j_add_records(neo4j_query_engine: Neo4jGraphQueryEngine) -> None: """Test the add_records functionality of the Neo4j Query Engine.""" input_path = "./test/agentchat/contrib/graph_rag/the_matrix.txt" input_documents = [Document(doctype=DocumentType.TEXT, path_or_url=input_path)] @@ -158,7 +155,7 @@ def test_neo4j_add_records(neo4j_query_engine): logger.info(query_result.answer) - assert query_result.answer.find("Keanu Reeves") >= 0 + assert query_result.answer.find("Keanu Reeves") >= 0 # type: ignore[union-attr] @pytest.mark.openai @@ -168,13 +165,13 @@ def test_neo4j_add_records(neo4j_query_engine): reason=reason, ) @skip_on_missing_imports(["llama_index"], "neo4j") -def test_neo4j_auto(neo4j_query_engine_auto): +def test_neo4j_auto(neo4j_query_engine_auto: Neo4jGraphQueryEngine) -> None: """Test querying with auto-generated property graph""" question = "Which company is the employer?" query_result: GraphStoreQueryResult = neo4j_query_engine_auto.query(question=question) logger.info(query_result.answer) - assert query_result.answer.find("BUZZ") >= 0 + assert query_result.answer.find("BUZZ") >= 0 # type: ignore[union-attr] @pytest.mark.openai @@ -184,10 +181,10 @@ def test_neo4j_auto(neo4j_query_engine_auto): reason=reason, ) @skip_on_missing_imports(["llama_index"], "neo4j") -def test_neo4j_json_auto(neo4j_query_engine_with_json): +def test_neo4j_json_auto(neo4j_query_engine_with_json: Neo4jGraphQueryEngine) -> None: """Test querying with auto-generated property graph from a JSON file.""" question = "What are current layout detection models in the LayoutParser model zoo?" query_result: GraphStoreQueryResult = neo4j_query_engine_with_json.query(question=question) logger.info(query_result.answer) - assert query_result.answer.find("PRImA") >= 0 + assert query_result.answer.find("PRImA") >= 0 # type: ignore[union-attr] diff --git a/test/agentchat/test_structured_output.py b/test/agentchat/test_structured_output.py index ad3e21baa0..9e5a6f5d71 100644 --- a/test/agentchat/test_structured_output.py +++ b/test/agentchat/test_structured_output.py @@ -13,21 +13,50 @@ import autogen -from ..conftest import Credentials - - -@pytest.mark.openai -def test_structured_output(credentials_gpt_4o: Credentials): - class ResponseModel(BaseModel): - question: str - short_answer: str - reasoning: str - difficulty: float - - config_list = credentials_gpt_4o.config_list +from ..conftest import ( + Credentials, + credentials_gemini_flash, + credentials_gpt_4o_mini, + suppress_gemini_resource_exhausted, +) + +credentials_structured_output = [ + pytest.param( + credentials_gpt_4o_mini.__name__, + marks=pytest.mark.openai, + ), + pytest.param( + credentials_gemini_flash.__name__, + marks=pytest.mark.gemini, + ), +] + + +class ResponseModel(BaseModel): + question: str + short_answer: str + reasoning: str + difficulty: float + + +@pytest.mark.parametrize( + "credentials_from_test_param", + credentials_structured_output, + indirect=True, +) +@pytest.mark.parametrize( + "response_format", + [ + ResponseModel, + ResponseModel.model_json_schema(), + ], +) +@suppress_gemini_resource_exhausted +def test_structured_output(credentials_from_test_param, response_format): + config_list = credentials_from_test_param.config_list for config in config_list: - config["response_format"] = ResponseModel + config["response_format"] = response_format llm_config = {"config_list": config_list, "cache_seed": 43} @@ -55,20 +84,20 @@ class ResponseModel(BaseModel): raise AssertionError(f"Agent did not return a structured report. Exception: {e}") -@pytest.mark.openai -def test_global_structured_output(credentials_gpt_4o: Credentials): - class ResponseModel(BaseModel): - question: str - short_answer: str - reasoning: str - difficulty: float - - config_list = credentials_gpt_4o.config_list +@pytest.mark.parametrize( + "credentials_from_test_param", + credentials_structured_output, + indirect=True, +) +@pytest.mark.parametrize("response_format", [ResponseModel, ResponseModel.model_json_schema()]) +@suppress_gemini_resource_exhausted +def test_structured_output_global(credentials_from_test_param, response_format): + config_list = credentials_from_test_param.config_list llm_config = { "config_list": config_list, "cache_seed": 43, - "response_format": ResponseModel, + "response_format": response_format, } user_proxy = autogen.UserProxyAgent( diff --git a/test/conftest.py b/test/conftest.py index 05cf881257..9897475424 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -398,7 +398,6 @@ def user_proxy() -> UserProxyAgent: ), ] - T = TypeVar("T", bound=Callable[..., Any]) diff --git a/test/oai/test_utils.py b/test/oai/test_utils.py index fe0376a5ed..7aa8871d58 100755 --- a/test/oai/test_utils.py +++ b/test/oai/test_utils.py @@ -17,7 +17,11 @@ import pytest import autogen -from autogen.oai.openai_utils import DEFAULT_AZURE_API_VERSION, filter_config, is_valid_api_key +from autogen.oai.openai_utils import ( + DEFAULT_AZURE_API_VERSION, + filter_config, + is_valid_api_key, +) from ..conftest import MOCK_OPEN_AI_API_KEY diff --git a/test/test_import_utils.py b/test/test_import_utils.py index e56df08977..afa893eefa 100644 --- a/test/test_import_utils.py +++ b/test/test_import_utils.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Type +from typing import Any, Optional, Type, Union import pytest @@ -32,39 +32,48 @@ def test_optional_import_block(self) -> None: class TestRequiresOptionalImportCallables: - def test_function_attributes(self) -> None: + @pytest.mark.parametrize("except_for", [None, "dummy_function", ["dummy_function"]]) + def test_function_attributes(self, except_for: Optional[Union[str, list[str]]]) -> None: def dummy_function() -> None: """Dummy function to test requires_optional_import""" pass dummy_function.__module__ = "some_random_module.dummy_stuff" - actual = require_optional_import("some_optional_module", "optional_dep")(dummy_function) + actual = require_optional_import("some_optional_module", "optional_dep", except_for=except_for)(dummy_function) assert actual is not None assert actual.__module__ == "some_random_module.dummy_stuff" assert actual.__name__ == "dummy_function" assert actual.__doc__ == "Dummy function to test requires_optional_import" - with pytest.raises( - ImportError, - match=r"Module 'some_optional_module' needed for some_random_module.dummy_stuff.dummy_function is missing, please install it using 'pip install ag2\[optional_dep\]'", - ): + if not except_for: + with pytest.raises( + ImportError, + match=r"Module 'some_optional_module' needed for some_random_module.dummy_stuff.dummy_function is missing, please install it using 'pip install ag2\[optional_dep\]'", + ): + actual() + else: actual() - def test_function_call(self) -> None: - @require_optional_import("some_optional_module", "optional_dep") + @pytest.mark.parametrize("except_for", [None, "dummy_function", ["dummy_function"]]) + def test_function_call(self, except_for: Optional[Union[str, list[str]]]) -> None: + @require_optional_import("some_optional_module", "optional_dep", except_for=except_for) def dummy_function() -> None: """Dummy function to test requires_optional_import""" pass - with pytest.raises( - ImportError, - match=r"Module 'some_optional_module' needed for test.test_import_utils.dummy_function is missing, please install it using 'pip install ag2\[optional_dep\]'", - ): + if not except_for: + with pytest.raises( + ImportError, + match=r"Module 'some_optional_module' needed for test.test_import_utils.dummy_function is missing, please install it using 'pip install ag2\[optional_dep\]'", + ): + dummy_function() + else: dummy_function() - def test_method_attributes(self) -> None: + @pytest.mark.parametrize("except_for", [None, "dummy_method", ["dummy_method"]]) + def test_method_attributes(self, except_for: Optional[Union[str, list[str]]]) -> None: class DummyClass: def dummy_method(self) -> None: """Dummy method to test requires_optional_import""" @@ -76,9 +85,9 @@ def dummy_method(self) -> None: DummyClass.__module__ = "some_random_module.dummy_stuff" DummyClass.dummy_method.__module__ = "some_random_module.dummy_stuff" - DummyClass.dummy_method = require_optional_import("some_optional_module", "optional_dep")( # type: ignore[method-assign] - DummyClass.dummy_method - ) + DummyClass.dummy_method = require_optional_import( # type: ignore[method-assign] + "some_optional_module", "optional_dep", except_for=except_for + )(DummyClass.dummy_method) assert DummyClass.dummy_method is not None assert DummyClass.dummy_method.__module__ == "some_random_module.dummy_stuff" @@ -86,54 +95,74 @@ def dummy_method(self) -> None: assert DummyClass.dummy_method.__doc__ == "Dummy method to test requires_optional_import" dummy = DummyClass() - with pytest.raises( - ImportError, - match=r"Module 'some_optional_module' needed for some_random_module.dummy_stuff.dummy_method is missing, please install it using 'pip install ag2\[optional_dep\]", - ): + + if not except_for: + with pytest.raises( + ImportError, + match=r"Module 'some_optional_module' needed for some_random_module.dummy_stuff.dummy_method is missing, please install it using 'pip install ag2\[optional_dep\]", + ): + dummy.dummy_method() + else: dummy.dummy_method() - def test_method_call(self) -> None: + @pytest.mark.parametrize("except_for", [None, "dummy_method", ["dummy_method"]]) + def test_method_call(self, except_for: Optional[Union[str, list[str]]]) -> None: class DummyClass: - @require_optional_import("some_optional_module", "optional_dep") + @require_optional_import("some_optional_module", "optional_dep", except_for=except_for) def dummy_method(self) -> None: """Dummy method to test requires_optional_import""" pass dummy = DummyClass() - with pytest.raises( - ImportError, - match=r"Module 'some_optional_module' needed for test.test_import_utils.dummy_method is missing, please install it using 'pip install ag2\[optional_dep\]'", - ): + + if not except_for: + with pytest.raises( + ImportError, + match=r"Module 'some_optional_module' needed for test.test_import_utils.dummy_method is missing, please install it using 'pip install ag2\[optional_dep\]'", + ): + dummy.dummy_method() + else: dummy.dummy_method() - def test_static_call(self) -> None: + @pytest.mark.parametrize("except_for", [None, "dummy_static_function", ["dummy_static_function"]]) + def test_static_call(self, except_for: Optional[Union[str, list[str]]]) -> None: class DummyClass: - @require_optional_import("some_optional_module", "optional_dep") + @require_optional_import("some_optional_module", "optional_dep", except_for=except_for) @staticmethod def dummy_static_function() -> None: """Dummy static function to test requires_optional_import""" pass dummy = DummyClass() - with pytest.raises( - ImportError, - match=r"Module 'some_optional_module' needed for test.test_import_utils.dummy_static_function is missing, please install it using 'pip install ag2\[optional_dep\]'", - ): + + if not except_for: + with pytest.raises( + ImportError, + match=r"Module 'some_optional_module' needed for test.test_import_utils.dummy_static_function is missing, please install it using 'pip install ag2\[optional_dep\]'", + ): + dummy.dummy_static_function() + else: dummy.dummy_static_function() - def test_property_call(self) -> None: + @pytest.mark.parametrize("except_for", [None, "dummy_property", ["dummy_property"]]) + def test_property_call(self, except_for: Optional[Union[str, list[str]]]) -> None: class DummyClass: @property - @require_optional_import("some_optional_module", "optional_dep") + @require_optional_import("some_optional_module", "optional_dep", except_for=except_for) def dummy_property(self) -> int: """Dummy property to test requires_optional_import""" return 4 dummy = DummyClass() - with pytest.raises( - ImportError, - match=r"Module 'some_optional_module' needed for test.test_import_utils.dummy_property is missing, please install it using 'pip install ag2\[optional_dep\]'", - ): + + if not except_for: + with pytest.raises( + ImportError, + match=r"Module 'some_optional_module' needed for test.test_import_utils.dummy_property is missing, please install it using 'pip install ag2\[optional_dep\]'", + ): + dummy.dummy_property + + else: dummy.dummy_property diff --git a/test/tools/experimental/browser_use/test_browser_use.py b/test/tools/experimental/browser_use/test_browser_use.py index b75098c3eb..8cdcedf5f0 100644 --- a/test/tools/experimental/browser_use/test_browser_use.py +++ b/test/tools/experimental/browser_use/test_browser_use.py @@ -18,7 +18,14 @@ @skip_on_missing_imports( - ["langchain_anthropic", "langchain_google_genai", "langchain_ollama", "langchain_openai", "browser_use"], + [ + "langchain_anthropic", + "langchain_google_genai", + "langchain_ollama", + "langchain_openai", + "langchain_core", + "browser_use", + ], "browser-use", ) class TestBrowserUseToolOpenai: @@ -39,80 +46,6 @@ def test_browser_use_tool_init(self, mock_credentials: Credentials) -> None: } assert browser_use_tool.function_schema == expected_schema - @pytest.mark.parametrize( - ("config_list", "llm_class_name"), - [ - ( - [ - {"api_type": "openai", "model": "gpt-4o-mini", "api_key": "test"}, - ], - "ChatOpenAI", - ), - ( - [ - {"api_type": "deepseek", "model": "deepseek-model", "api_key": "test", "base_url": "test"}, - ], - "ChatOpenAI", - ), - ( - [ - { - "api_type": "azure", - "model": "gpt-4o-mini", - "api_key": "test", - "base_url": "test", - "api_version": "test", - }, - ], - "AzureChatOpenAI", - ), - ( - [ - {"api_type": "google", "model": "gemini", "api_key": "test"}, - ], - "ChatGoogleGenerativeAI", - ), - ( - [ - {"api_type": "anthropic", "model": "sonnet", "api_key": "test"}, - ], - "ChatAnthropic", - ), - ( - [{"api_type": "ollama", "model": "mistral:7b-instruct-v0.3-q6_K"}], - "ChatOllama", - ), - ], - ) - def test_get_llm( # type: ignore[no-any-unimported] - self, - config_list: list[dict[str, str]], - llm_class_name: str, - ) -> None: - llm = BrowserUseTool._get_llm(llm_config={"config_list": config_list}) - assert llm.__class__.__name__ == llm_class_name - - @pytest.mark.parametrize( - ("config_list", "error_msg"), - [ - ( - [ - {"api_type": "deepseek", "model": "gpt-4o-mini", "api_key": "test"}, - ], - "base_url is required for deepseek api type.", - ), - ( - [ - {"api_type": "azure", "model": "gpt-4o-mini", "api_key": "test", "base_url": "test"}, - ], - "api_version is required for azure api type.", - ), - ], - ) - def test_get_llm_raises_if_mandatory_key_missing(self, config_list: list[dict[str, str]], error_msg: str) -> None: - with pytest.raises(ValueError, match=error_msg): - BrowserUseTool._get_llm(llm_config={"config_list": config_list}) - @pytest.mark.parametrize( "credentials_from_test_param", credentials_browser_use, diff --git a/test/tools/experimental/browser_use/test_langchain_factory.py b/test/tools/experimental/browser_use/test_langchain_factory.py new file mode 100644 index 0000000000..b6972613ae --- /dev/null +++ b/test/tools/experimental/browser_use/test_langchain_factory.py @@ -0,0 +1,184 @@ +# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors +# +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any, Optional + +import pytest + +from autogen.import_utils import optional_import_block, skip_on_missing_imports + +with optional_import_block(): + from langchain_openai import AzureChatOpenAI, ChatOpenAI + + from autogen.tools.experimental.browser_use.langchain_factory import ChatOpenAIFactory, LangchainFactory + + +@skip_on_missing_imports( + ["langchain_anthropic", "langchain_google_genai", "langchain_ollama", "langchain_openai", "langchain_core"], + "browser-use", +) +class TestLangchainFactory: + test_api_key = "test" # pragma: allowlist secret + + def test_number_of_factories(self) -> None: + assert len(LangchainFactory._factories) == 6 + + @pytest.mark.parametrize( + ("llm_config", "expected"), + [ + ( + {"model": "gpt-4o-mini", "api_key": test_api_key}, + {"model": "gpt-4o-mini", "api_key": test_api_key}, + ), + ( + {"config_list": [{"model": "gpt-4o-mini", "api_key": test_api_key}]}, + {"model": "gpt-4o-mini", "api_key": test_api_key}, + ), + ( + { + "config_list": [ + {"model": "gpt-4o-mini", "api_key": test_api_key}, + {"model": "gpt-4o", "api_key": test_api_key}, + ] + }, + {"model": "gpt-4o-mini", "api_key": test_api_key}, + ), + ], + ) + def test_get_first_llm_config(self, llm_config: dict[str, Any], expected: dict[str, Any]) -> None: + assert LangchainFactory.get_first_llm_config(llm_config) == expected + + @pytest.mark.parametrize( + ("llm_config", "error_message"), + [ + ({}, "llm_config must be a valid config dictionary."), + ({"config_list": []}, "Config list must contain at least one config."), + ], + ) + def test_get_first_llm_config_incorrect_config(self, llm_config: dict[str, Any], error_message: str) -> None: + with pytest.raises(ValueError, match=error_message): + LangchainFactory.get_first_llm_config(llm_config) + + @pytest.mark.parametrize( + ("config_list", "llm_class_name", "base_url"), + [ + ( + [ + {"api_type": "openai", "model": "gpt-4o-mini", "api_key": test_api_key}, + ], + "ChatOpenAI", + None, + ), + ( + [ + { + "api_type": "deepseek", + "model": "deepseek-model", + "api_key": test_api_key, + "base_url": "test-url", + }, + ], + "ChatOpenAI", + "test-url", + ), + ( + [ + { + "api_type": "azure", + "model": "gpt-4o-mini", + "api_key": test_api_key, + "base_url": "test-url", + "api_version": "test", + }, + ], + "AzureChatOpenAI", + "test-url", + ), + ( + [ + {"api_type": "google", "model": "gemini", "api_key": test_api_key}, + ], + "ChatGoogleGenerativeAI", + None, + ), + ( + [ + {"api_type": "anthropic", "model": "sonnet", "api_key": test_api_key}, + ], + "ChatAnthropic", + None, + ), + ( + [{"api_type": "ollama", "model": "mistral:7b-instruct-v0.3-q6_K"}], + "ChatOllama", + None, + ), + ( + [{"api_type": "ollama", "model": "mistral:7b-instruct-v0.3-q6_K", "client_host": "test-url"}], + "ChatOllama", + "test-url", + ), + ], + ) + def test_create_base_chat_model( # type: ignore[no-any-unimported] + self, + config_list: list[dict[str, str]], + llm_class_name: str, + base_url: Optional[str], + ) -> None: + llm = LangchainFactory.create_base_chat_model(llm_config={"config_list": config_list}) + assert llm.__class__.__name__ == llm_class_name + if llm_class_name == "AzureChatOpenAI": + assert isinstance(llm, AzureChatOpenAI) + assert llm.azure_endpoint == base_url + elif llm_class_name == "ChatOpenAI" and base_url: + assert isinstance(llm, ChatOpenAI) + assert llm.openai_api_base == base_url + elif base_url: + assert hasattr(llm, "base_url") + assert llm.base_url == base_url + + @pytest.mark.parametrize( + ("config_list", "error_msg"), + [ + ( + [ + {"api_type": "deepseek", "model": "gpt-4o-mini", "api_key": test_api_key}, + ], + "base_url is required for deepseek api type.", + ), + ( + [ + {"api_type": "azure", "model": "gpt-4o-mini", "api_key": test_api_key, "base_url": "test"}, + ], + "api_version is required for azure api type.", + ), + ], + ) + def test_create_base_chat_model_raises_if_mandatory_key_missing( + self, config_list: list[dict[str, str]], error_msg: str + ) -> None: + with pytest.raises(ValueError, match=error_msg): + LangchainFactory.create_base_chat_model(llm_config={"config_list": config_list}) + + +@skip_on_missing_imports( + ["langchain_anthropic", "langchain_google_genai", "langchain_ollama", "langchain_openai", "langchain_core"], + "browser-use", +) +class TestChatOpenAIFactory: + test_api_key = "test" # pragma: allowlist secret + + @pytest.mark.parametrize( + "llm_config", + [ + {"model": "gpt-4o-mini", "api_key": test_api_key}, + {"config_list": [{"model": "gpt-4o-mini", "api_key": test_api_key}]}, + ], + ) + def test_create(self, llm_config: dict[str, Any], monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + + actual = ChatOpenAIFactory.create_base_chat_model(llm_config) + assert isinstance(actual, ChatOpenAI) diff --git a/test/tools/experimental/crawl4ai/test_crawl4ai.py b/test/tools/experimental/crawl4ai/test_crawl4ai.py index 8581ec0072..e7890833ad 100644 --- a/test/tools/experimental/crawl4ai/test_crawl4ai.py +++ b/test/tools/experimental/crawl4ai/test_crawl4ai.py @@ -69,6 +69,7 @@ async def test_without_llm(self) -> None: {"api_type": "anthropic", "model": "sonnet", "api_key": "test"}, ], [{"api_type": "ollama", "model": "mistral:7b"}], + [{"api_type": "ollama", "model": "mistral:7b", "client_host": "http://127.0.0.1:11434"}], ], ) def test_get_provider_and_api_key(self, config_list: list[dict[str, Any]]) -> None: @@ -80,7 +81,10 @@ def test_get_provider_and_api_key(self, config_list: list[dict[str, Any]]) -> No provider = f"{api_type}/{model}" if api_type == "ollama": - assert lite_llm_config == {"provider": provider} + if "client_host" in config_list[0]: + assert lite_llm_config == {"provider": provider, "api_base": config_list[0]["client_host"]} + else: + assert lite_llm_config == {"provider": provider} else: assert all(key in lite_llm_config for key in ["provider", "api_token"]) assert lite_llm_config["provider"] == provider