From 818eaddb6d502d071f8b65b890fbaf4d2e86c3a0 Mon Sep 17 00:00:00 2001 From: ArslanSaleem Date: Thu, 6 Feb 2025 15:59:36 +0100 Subject: [PATCH 1/3] feat(sandbox): add sandbox support in pai.chat and df.chat --- README.md | 54 ++++++++++++++++++-- pandasai/__init__.py | 6 ++- pandasai/dataframe/base.py | 6 ++- tests/unit_tests/dataframe/test_dataframe.py | 13 ++++- tests/unit_tests/test_pandasai_init.py | 12 +++-- 5 files changed, 80 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index e480ea15f..387955a9d 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ You can either decide to use PandaAI in your Jupyter notebooks, Streamlit apps, ## ☁️ Using the platform -The library can be used alongside our powerful data platform, making end-to-end conversational data analytics possible with as little as a few lines of code. +The library can be used alongside our powerful data platform, making end-to-end conversational data analytics possible with as little as a few lines of code. Load your data, save them as a dataframe, and push them to the platform @@ -36,9 +36,10 @@ dataset = pai.create(path="your-organization/dataset-name", dataset.push() ``` + Your team can now access and query this data using natural language through the platform. -![PandaAI](assets/demo.gif) +![PandaAI](assets/demo.gif) ## 📚 Using the library @@ -144,6 +145,54 @@ pai.chat("Who gets paid the most?", employees_df, salaries_df) Olivia gets paid the most. ``` +#### Docker Sandbox + +You can run PandaAI in a Docker sandbox, providing a secure, isolated environment to execute code safely and mitigate the risk of malicious attacks. + +##### Python Requirements + +```bash +pip install "pandasai-docker" +``` + +##### Usage + +```python +import pandasai as pai +from pandasai_docker import DockerSandbox + +# Initialize the sandbox +sandbox = DockerSandbox() +sandbox.start() + +employees_data = { + 'EmployeeID': [1, 2, 3, 4, 5], + 'Name': ['John', 'Emma', 'Liam', 'Olivia', 'William'], + 'Department': ['HR', 'Sales', 'IT', 'Marketing', 'Finance'] +} + +salaries_data = { + 'EmployeeID': [1, 2, 3, 4, 5], + 'Salary': [5000, 6000, 4500, 7000, 5500] +} + +employees_df = pai.DataFrame(employees_data) +salaries_df = pai.DataFrame(salaries_data) + +# By default, unless you choose a different LLM, it will use BambooLLM. +# You can get your free API key signing up at https://app.pandabi.ai (you can also configure it in your .env file) +pai.api_key.set("your-pai-api-key") + +pai.chat("Who gets paid the most?", employees_df, salaries_df, sandbox=sandbox) + +# Don't forget to stop the sandbox when done +sandbox.stop() +``` + +``` +Olivia gets paid the most. +``` + You can find more examples in the [examples](examples) directory. ## 📜 License @@ -161,7 +210,6 @@ If you are interested in managed PandaAI Cloud or self-hosted Enterprise Offerin - [Examples](examples) for example notebooks - [Discord](https://discord.gg/KYKj9F2FRH) for discussion with the community and PandaAI team - ## 🤝 Contributing Contributions are welcome! Please check the outstanding issues and feel free to open a pull request. diff --git a/pandasai/__init__.py b/pandasai/__init__.py index 840fc30c1..cff74f2df 100644 --- a/pandasai/__init__.py +++ b/pandasai/__init__.py @@ -22,6 +22,7 @@ from pandasai.helpers.path import find_project_root, get_validated_dataset_path from pandasai.helpers.session import get_pandaai_session from pandasai.query_builders import SqlQueryBuilder +from pandasai.sandbox.sandbox import Sandbox from .agent import Agent from .constants import LOCAL_SOURCE_TYPES, SQL_SOURCE_TYPES @@ -158,13 +159,14 @@ def clear_cache(filename: str = None): cache.clear() -def chat(query: str, *dataframes: DataFrame): +def chat(query: str, *dataframes: DataFrame, sandbox: Optional[Sandbox] = None): """ Start a new chat interaction with the assistant on Dataframe(s). Args: query (str): The query to run against the dataframes. *dataframes: Variable number of dataframes to query. + sandbox (Sandbox, optional): The sandbox to execute code securily. Returns: The result of the query. @@ -173,7 +175,7 @@ def chat(query: str, *dataframes: DataFrame): if not dataframes: raise ValueError("At least one dataframe must be provided.") - _current_agent = Agent(list(dataframes)) + _current_agent = Agent(list(dataframes), sandbox=sandbox) return _current_agent.chat(query) diff --git a/pandasai/dataframe/base.py b/pandasai/dataframe/base.py index baac0be71..12612894e 100644 --- a/pandasai/dataframe/base.py +++ b/pandasai/dataframe/base.py @@ -23,6 +23,7 @@ if TYPE_CHECKING: from pandasai.agent.base import Agent + from pandasai.sandbox.sandbox import Sandbox class DataFrame(pd.DataFrame): @@ -94,12 +95,13 @@ def column_hash(self): def type(self) -> str: return "pd.DataFrame" - def chat(self, prompt: str) -> BaseResponse: + def chat(self, prompt: str, sandbox: Optional[Sandbox] = None) -> BaseResponse: """ Interact with the DataFrame using natural language. Args: prompt (str): The natural language query or instruction. + sandbox (Sandbox, optional): The sandbox to execute code securily. Returns: str: The response to the prompt. @@ -109,7 +111,7 @@ def chat(self, prompt: str) -> BaseResponse: Agent, ) - self._agent = Agent([self]) + self._agent = Agent([self], sandbox=sandbox) return self._agent.chat(prompt) diff --git a/tests/unit_tests/dataframe/test_dataframe.py b/tests/unit_tests/dataframe/test_dataframe.py index 8d0c874b5..4fe193593 100644 --- a/tests/unit_tests/dataframe/test_dataframe.py +++ b/tests/unit_tests/dataframe/test_dataframe.py @@ -32,7 +32,18 @@ def test_chat_creates_agent(self, mock_env, mock_agent, sample_dict_data): sample_df = DataFrame(sample_dict_data) mock_env.return_value = {"PANDABI_API_URL": "localhost:8000"} sample_df.chat("Test query") - mock_agent.assert_called_once_with([sample_df]) + mock_agent.assert_called_once_with([sample_df], sandbox=None) + + @patch("pandasai.agent.Agent") + @patch("os.environ") + def test_chat_creates_agent_with_sandbox( + self, mock_env, mock_agent, sample_dict_data + ): + sandbox = MagicMock() + sample_df = DataFrame(sample_dict_data) + mock_env.return_value = {"PANDABI_API_URL": "localhost:8000"} + sample_df.chat("Test query", sandbox=sandbox) + mock_agent.assert_called_once_with([sample_df], sandbox=sandbox) @patch("pandasai.Agent") def test_chat_reuses_existing_agent(self, sample_df): diff --git a/tests/unit_tests/test_pandasai_init.py b/tests/unit_tests/test_pandasai_init.py index b8e783cbc..b536a850c 100644 --- a/tests/unit_tests/test_pandasai_init.py +++ b/tests/unit_tests/test_pandasai_init.py @@ -56,7 +56,13 @@ def sqlite_connection_json(self): def test_chat_creates_agent(self, sample_df): with patch("pandasai.Agent") as MockAgent: pandasai.chat("Test query", sample_df) - MockAgent.assert_called_once_with([sample_df]) + MockAgent.assert_called_once_with([sample_df], sandbox=None) + + def test_chat_sanbox_passed_to_agent(self, sample_df): + with patch("pandasai.Agent") as MockAgent: + sandbox = MagicMock() + pandasai.chat("Test query", sample_df, sandbox=sandbox) + MockAgent.assert_called_once_with([sample_df], sandbox=sandbox) def test_chat_without_dataframes_raises_error(self): with pytest.raises(ValueError, match="At least one dataframe must be provided"): @@ -82,7 +88,7 @@ def test_chat_with_multiple_dataframes(self, sample_dataframes): result = pandasai.chat("What is the sum of column A?", *sample_dataframes) - MockAgent.assert_called_once_with(sample_dataframes) + MockAgent.assert_called_once_with(sample_dataframes, sandbox=None) mock_agent_instance.chat.assert_called_once_with( "What is the sum of column A?" ) @@ -98,7 +104,7 @@ def test_chat_with_single_dataframe(self, sample_dataframes): "What is the average of column X?", sample_dataframes[1] ) - MockAgent.assert_called_once_with([sample_dataframes[1]]) + MockAgent.assert_called_once_with([sample_dataframes[1]], sandbox=None) mock_agent_instance.chat.assert_called_once_with( "What is the average of column X?" ) From 71ac6304206449670665465fc1cd7da8e0a3b990 Mon Sep 17 00:00:00 2001 From: ArslanSaleem Date: Thu, 6 Feb 2025 17:41:02 +0100 Subject: [PATCH 2/3] fix: typos in function name and comments --- pandasai/dataframe/base.py | 4 ++-- tests/unit_tests/test_pandasai_init.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandasai/dataframe/base.py b/pandasai/dataframe/base.py index 12612894e..076241689 100644 --- a/pandasai/dataframe/base.py +++ b/pandasai/dataframe/base.py @@ -20,10 +20,10 @@ from pandasai.exceptions import DatasetNotFound, PandaAIApiKeyError from pandasai.helpers.dataframe_serializer import DataframeSerializer from pandasai.helpers.session import get_pandaai_session +from pandasai.sandbox.sandbox import Sandbox if TYPE_CHECKING: from pandasai.agent.base import Agent - from pandasai.sandbox.sandbox import Sandbox class DataFrame(pd.DataFrame): @@ -101,7 +101,7 @@ def chat(self, prompt: str, sandbox: Optional[Sandbox] = None) -> BaseResponse: Args: prompt (str): The natural language query or instruction. - sandbox (Sandbox, optional): The sandbox to execute code securily. + sandbox (Sandbox, optional): The sandbox to execute code securely. Returns: str: The response to the prompt. diff --git a/tests/unit_tests/test_pandasai_init.py b/tests/unit_tests/test_pandasai_init.py index b536a850c..98136d3e8 100644 --- a/tests/unit_tests/test_pandasai_init.py +++ b/tests/unit_tests/test_pandasai_init.py @@ -58,7 +58,7 @@ def test_chat_creates_agent(self, sample_df): pandasai.chat("Test query", sample_df) MockAgent.assert_called_once_with([sample_df], sandbox=None) - def test_chat_sanbox_passed_to_agent(self, sample_df): + def test_chat_sandbox_passed_to_agent(self, sample_df): with patch("pandasai.Agent") as MockAgent: sandbox = MagicMock() pandasai.chat("Test query", sample_df, sandbox=sandbox) From c350b0e11391abc8bb0f80b06f96d41858dc4f52 Mon Sep 17 00:00:00 2001 From: Gabriele Venturi Date: Thu, 6 Feb 2025 18:10:02 +0100 Subject: [PATCH 3/3] chore: fix typo --- pandasai/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandasai/__init__.py b/pandasai/__init__.py index cff74f2df..d3265fa5d 100644 --- a/pandasai/__init__.py +++ b/pandasai/__init__.py @@ -166,7 +166,7 @@ def chat(query: str, *dataframes: DataFrame, sandbox: Optional[Sandbox] = None): Args: query (str): The query to run against the dataframes. *dataframes: Variable number of dataframes to query. - sandbox (Sandbox, optional): The sandbox to execute code securily. + sandbox (Sandbox, optional): The sandbox to execute code securely. Returns: The result of the query.