From ab26ba7fb8cc86200723df5c463d69316c05a408 Mon Sep 17 00:00:00 2001 From: Zaid Khan Date: Wed, 18 Dec 2024 12:47:45 -0500 Subject: [PATCH] docs: Added tool use examples. --- README.md | 5 +++ examples/tool_use/open_ended.py | 67 ++++++++++++++++++++++++++++++++ examples/tool_use/skill_list.py | 68 +++++++++++++++++++++++++++++++++ 3 files changed, 140 insertions(+) create mode 100644 examples/tool_use/open_ended.py create mode 100644 examples/tool_use/skill_list.py diff --git a/README.md b/README.md index 9dfe939..01a3a92 100644 --- a/README.md +++ b/README.md @@ -124,6 +124,9 @@ See `docs/components.md` for more details on the API. - Open-Ended Environment: `examples/livecodebench/open_ended_environment.py` - Skill-List Environment: `examples/livecodebench/skill_list_environment.py` - Skill-Tree Environment: `examples/livecodebench/skill_tree_environment.py` +- MnMs (Tool Use) + - Open-Ended Environment: `examples/tool_use/open_ended.py` + - Skill-List Environment: `examples/tool_use/skill_list.py` ## Running Examples Run the examples from the repository root. Set `CUDA_VISIBLE_DEVICES` to the GPUs you want to use and make sure to set `num_gpus` in `ray.init()` to the number of GPUs you have available. @@ -140,6 +143,8 @@ This repository uses code from the following repositories: - [LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory) - [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness) - [LiveCodeBench](https://github.com/LiveCodeBench/LiveCodeBench) +- [MnMs](https://github.com/RAIVNLab/mnms) +- [NaturalBench](https://github.com/Baiqi-Li/NaturalBench) We thank the authors of the above repositories for making their code freely available. diff --git a/examples/tool_use/open_ended.py b/examples/tool_use/open_ended.py new file mode 100644 index 0000000..6a60150 --- /dev/null +++ b/examples/tool_use/open_ended.py @@ -0,0 +1,67 @@ +import warnings +from pathlib import Path + +from dataenvgym.gym.accumulate_train_loop import ( + IoProvider, + run_generic_accumulation_train_loop, +) +from dataenvgym.gym.data_generation_agents.tool_use.baselines.open_ended import ( + DataGenerationAgent, +) +from dataenvgym.gym.tasks.tool_use.mnms.task import MnmsSplit, MnmsTask +from dataenvgym.gym.trainable_predictors.tool_use.vllm_predictor import ( + LLAMA3_8B_INSTRUCT_INFERENCE_CONFIG, + LLAMA3_8B_INSTRUCT_TRAINER_CONFIG, + CodeGenerationTrainablePredictor, + ParallelVllmCodeGenerationPredictor, + SftCodeGenerationTrainer, +) +from sklearn.exceptions import UndefinedMetricWarning + +warnings.filterwarnings("ignore", category=UndefinedMetricWarning) + + +def main(): + num_gpus = 8 + experiment_dir = Path("workspace/tool_use_open_ended_example") + experiment_dir.mkdir(parents=True, exist_ok=True) + + predictor_config = LLAMA3_8B_INSTRUCT_INFERENCE_CONFIG.set_with_gpu_count(num_gpus) + predictor = ParallelVllmCodeGenerationPredictor(config=predictor_config) + trainer_config = LLAMA3_8B_INSTRUCT_TRAINER_CONFIG.set_with_gpu_count( + num_gpus + ).set_working_directory(experiment_dir) + trainer_config.overrides = ["cutoff_len=1600"] + trainer = SftCodeGenerationTrainer( + config=trainer_config, + ) + + trainable_predictor = CodeGenerationTrainablePredictor( + predictor=predictor, + trainer=trainer, + ) + + val_task = MnmsTask(split=MnmsSplit.VAL) + test_task = MnmsTask(split=MnmsSplit.TEST) + + data_strategy = DataGenerationAgent( + datum_to_generate_per_error=2, + logging_folder=experiment_dir / "data_strategy_outputs", + data_generation_per_invocation_limit=60, + ) + + io_provider = IoProvider(experiment_dir=experiment_dir) + + run_generic_accumulation_train_loop( + validation_tasks=[val_task], + test_tasks=[test_task], + trainable_predictor=trainable_predictor, + training_data_production_strategy=data_strategy, + io_provider=io_provider, + num_cycles=10, + accumulation_iterations_per_cycle=1, + ) + + +if __name__ == "__main__": + main() diff --git a/examples/tool_use/skill_list.py b/examples/tool_use/skill_list.py new file mode 100644 index 0000000..7b827ff --- /dev/null +++ b/examples/tool_use/skill_list.py @@ -0,0 +1,68 @@ +import warnings +from pathlib import Path + +from dataenvgym.gym.accumulate_train_loop import ( + IoProvider, + run_generic_accumulation_train_loop, +) +from dataenvgym.gym.data_generation_agents.tool_use.baselines.skill_list import ( + DataGenerationAgent, +) +from dataenvgym.gym.tasks.tool_use.mnms.task import MnmsSplit, MnmsTask +from dataenvgym.gym.trainable_predictors.tool_use.vllm_predictor import ( + LLAMA3_8B_INSTRUCT_INFERENCE_CONFIG, + LLAMA3_8B_INSTRUCT_TRAINER_CONFIG, + CodeGenerationTrainablePredictor, + ParallelVllmCodeGenerationPredictor, + SftCodeGenerationTrainer, +) +from sklearn.exceptions import UndefinedMetricWarning + +warnings.filterwarnings("ignore", category=UndefinedMetricWarning) + + +def main(): + num_gpus = 8 + experiment_dir = Path("workspace/tool_use_skill_list_example") + experiment_dir.mkdir(parents=True, exist_ok=True) + + predictor_config = LLAMA3_8B_INSTRUCT_INFERENCE_CONFIG.set_with_gpu_count(num_gpus) + predictor = ParallelVllmCodeGenerationPredictor(config=predictor_config) + trainer_config = LLAMA3_8B_INSTRUCT_TRAINER_CONFIG.set_with_gpu_count( + num_gpus + ).set_working_directory(experiment_dir) + trainer_config.overrides = ["cutoff_len=1600"] + trainer = SftCodeGenerationTrainer( + config=trainer_config, + ) + + trainable_predictor = CodeGenerationTrainablePredictor( + predictor=predictor, + trainer=trainer, + ) + + val_task = MnmsTask(split=MnmsSplit.VAL) + test_task = MnmsTask(split=MnmsSplit.TEST) + + data_strategy = DataGenerationAgent( + datum_to_generate_per_skill=2, + max_skills_to_use_during_generation=30, + logging_folder=experiment_dir / "data_strategy_outputs", + sampling_method="randomly_sample_imperfect_skills", + ) + + io_provider = IoProvider(experiment_dir=experiment_dir) + + run_generic_accumulation_train_loop( + validation_tasks=[val_task], + test_tasks=[test_task], + trainable_predictor=trainable_predictor, + training_data_production_strategy=data_strategy, + io_provider=io_provider, + num_cycles=10, + accumulation_iterations_per_cycle=1, + ) + + +if __name__ == "__main__": + main()