diff --git a/test/agentchat/contrib/test_reasoning_agent.py b/test/agentchat/contrib/test_reasoning_agent.py index f751275b71..2c78e58847 100644 --- a/test/agentchat/contrib/test_reasoning_agent.py +++ b/test/agentchat/contrib/test_reasoning_agent.py @@ -533,5 +533,54 @@ def mock_response(*args, **kwargs): assert "Paris" in ground_truth +def test_reasoning_agent_code_execution(mock_credentials: Credentials): + """Test that ReasoningAgent properly executes code in responses""" + + # Create agent with code execution enabled + with patch("autogen.agentchat.conversable_agent.ConversableAgent.generate_oai_reply") as mock_oai_reply: + agent = ReasoningAgent( + "test_agent", + llm_config=mock_credentials.llm_config, + code_execution_config={"use_docker": False, "work_dir": "mypy_cache"}, + ) + + def mock_response(*args, **kwargs): + instance = args[0] + if instance.name == "tot_thinker": + return True, { + "content": """Reflection +Let's solve this with Python. + +Possible Options: +Option 1: Calculate factorial with Python +```python +def factorial(n): + if n == 0: + return 1 + return n * factorial(n-1) + +print(f"Factorial of 5 is {factorial(5)}") +``` + +Option 2: TERMINATE""" + } + elif instance.name == "reasoner_user_proxy": + # Mock the code execution result + return True, {"content": "Factorial of 5 is 120"} + elif instance.name == "test_agent": + return True, {"content": "The factorial of 5 is 120"} + return True, {"content": "5"} + + mock_oai_reply.side_effect = mock_response + + # Test code execution + response = agent._beam_reply("Calculate factorial of 5") + + # Verify code was executed + assert "Factorial of 5 is 120" in agent._root.children[0].content + assert "Code Execution Result:" in agent._root.children[0].content + assert response == "The factorial of 5 is 120" + + if __name__ == "__main__": pytest.main([__file__])