Skip to content

Commit

Permalink
docs: Added tool use examples.
Browse files Browse the repository at this point in the history
  • Loading branch information
codezakh committed Dec 18, 2024
1 parent 2caaea7 commit ab26ba7
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 0 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ See `docs/components.md` for more details on the API.
- Open-Ended Environment: `examples/livecodebench/open_ended_environment.py`
- Skill-List Environment: `examples/livecodebench/skill_list_environment.py`
- Skill-Tree Environment: `examples/livecodebench/skill_tree_environment.py`
- MnMs (Tool Use)
- Open-Ended Environment: `examples/tool_use/open_ended.py`
- Skill-List Environment: `examples/tool_use/skill_list.py`

## Running Examples
Run the examples from the repository root. Set `CUDA_VISIBLE_DEVICES` to the GPUs you want to use and make sure to set `num_gpus` in `ray.init()` to the number of GPUs you have available.
Expand All @@ -140,6 +143,8 @@ This repository uses code from the following repositories:
- [LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory)
- [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness)
- [LiveCodeBench](https://github.com/LiveCodeBench/LiveCodeBench)
- [MnMs](https://github.com/RAIVNLab/mnms)
- [NaturalBench](https://github.com/Baiqi-Li/NaturalBench)

We thank the authors of the above repositories for making their code freely available.

Expand Down
67 changes: 67 additions & 0 deletions examples/tool_use/open_ended.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import warnings
from pathlib import Path

from dataenvgym.gym.accumulate_train_loop import (
IoProvider,
run_generic_accumulation_train_loop,
)
from dataenvgym.gym.data_generation_agents.tool_use.baselines.open_ended import (
DataGenerationAgent,
)
from dataenvgym.gym.tasks.tool_use.mnms.task import MnmsSplit, MnmsTask
from dataenvgym.gym.trainable_predictors.tool_use.vllm_predictor import (
LLAMA3_8B_INSTRUCT_INFERENCE_CONFIG,
LLAMA3_8B_INSTRUCT_TRAINER_CONFIG,
CodeGenerationTrainablePredictor,
ParallelVllmCodeGenerationPredictor,
SftCodeGenerationTrainer,
)
from sklearn.exceptions import UndefinedMetricWarning

warnings.filterwarnings("ignore", category=UndefinedMetricWarning)


def main():
num_gpus = 8
experiment_dir = Path("workspace/tool_use_open_ended_example")
experiment_dir.mkdir(parents=True, exist_ok=True)

predictor_config = LLAMA3_8B_INSTRUCT_INFERENCE_CONFIG.set_with_gpu_count(num_gpus)
predictor = ParallelVllmCodeGenerationPredictor(config=predictor_config)
trainer_config = LLAMA3_8B_INSTRUCT_TRAINER_CONFIG.set_with_gpu_count(
num_gpus
).set_working_directory(experiment_dir)
trainer_config.overrides = ["cutoff_len=1600"]
trainer = SftCodeGenerationTrainer(
config=trainer_config,
)

trainable_predictor = CodeGenerationTrainablePredictor(
predictor=predictor,
trainer=trainer,
)

val_task = MnmsTask(split=MnmsSplit.VAL)
test_task = MnmsTask(split=MnmsSplit.TEST)

data_strategy = DataGenerationAgent(
datum_to_generate_per_error=2,
logging_folder=experiment_dir / "data_strategy_outputs",
data_generation_per_invocation_limit=60,
)

io_provider = IoProvider(experiment_dir=experiment_dir)

run_generic_accumulation_train_loop(
validation_tasks=[val_task],
test_tasks=[test_task],
trainable_predictor=trainable_predictor,
training_data_production_strategy=data_strategy,
io_provider=io_provider,
num_cycles=10,
accumulation_iterations_per_cycle=1,
)


if __name__ == "__main__":
main()
68 changes: 68 additions & 0 deletions examples/tool_use/skill_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import warnings
from pathlib import Path

from dataenvgym.gym.accumulate_train_loop import (
IoProvider,
run_generic_accumulation_train_loop,
)
from dataenvgym.gym.data_generation_agents.tool_use.baselines.skill_list import (
DataGenerationAgent,
)
from dataenvgym.gym.tasks.tool_use.mnms.task import MnmsSplit, MnmsTask
from dataenvgym.gym.trainable_predictors.tool_use.vllm_predictor import (
LLAMA3_8B_INSTRUCT_INFERENCE_CONFIG,
LLAMA3_8B_INSTRUCT_TRAINER_CONFIG,
CodeGenerationTrainablePredictor,
ParallelVllmCodeGenerationPredictor,
SftCodeGenerationTrainer,
)
from sklearn.exceptions import UndefinedMetricWarning

warnings.filterwarnings("ignore", category=UndefinedMetricWarning)


def main():
num_gpus = 8
experiment_dir = Path("workspace/tool_use_skill_list_example")
experiment_dir.mkdir(parents=True, exist_ok=True)

predictor_config = LLAMA3_8B_INSTRUCT_INFERENCE_CONFIG.set_with_gpu_count(num_gpus)
predictor = ParallelVllmCodeGenerationPredictor(config=predictor_config)
trainer_config = LLAMA3_8B_INSTRUCT_TRAINER_CONFIG.set_with_gpu_count(
num_gpus
).set_working_directory(experiment_dir)
trainer_config.overrides = ["cutoff_len=1600"]
trainer = SftCodeGenerationTrainer(
config=trainer_config,
)

trainable_predictor = CodeGenerationTrainablePredictor(
predictor=predictor,
trainer=trainer,
)

val_task = MnmsTask(split=MnmsSplit.VAL)
test_task = MnmsTask(split=MnmsSplit.TEST)

data_strategy = DataGenerationAgent(
datum_to_generate_per_skill=2,
max_skills_to_use_during_generation=30,
logging_folder=experiment_dir / "data_strategy_outputs",
sampling_method="randomly_sample_imperfect_skills",
)

io_provider = IoProvider(experiment_dir=experiment_dir)

run_generic_accumulation_train_loop(
validation_tasks=[val_task],
test_tasks=[test_task],
trainable_predictor=trainable_predictor,
training_data_production_strategy=data_strategy,
io_provider=io_provider,
num_cycles=10,
accumulation_iterations_per_cycle=1,
)


if __name__ == "__main__":
main()

0 comments on commit ab26ba7

Please sign in to comment.