Skip to content

Commit

Permalink
Prepping for 7b training
Browse files Browse the repository at this point in the history
  • Loading branch information
jakep-allenai committed Sep 25, 2024
1 parent 5f9b234 commit 24b30b2
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 2 deletions.
4 changes: 2 additions & 2 deletions pdelfin/eval/runeval.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,5 +257,5 @@ def do_eval(gold_data_path: str, eval_data_path: str, ) -> tuple[float, list[dic


if __name__ == "__main__":
result = do_eval(gold_data_path="s3://ai2-oe-data/jakep/openai_batch_done_v2/",
eval_data_path="s3://ai2-oe-data/birr-dev/qwen2-vl/outputs/for-jake/2b/2024-09-24/")
result = do_eval(gold_data_path="s3://ai2-oe-data/jakep/openai_batch_done_eval_mini",
eval_data_path="s3://ai2-oe-data/jakep/qwen2vl/Qwen_Qwen2-VL-2B-Instruct-4c8e4c-01J8N1D42YV9F20AHFE6D3WK21/")
84 changes: 84 additions & 0 deletions pdelfin/train/config/qwen2vl-7b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
model:
name_or_path: Qwen/Qwen2-VL-7B-Instruct
arch: causal
use_flash_attn: true

wandb:
project: pdelfin
entity: ai2-llm

# TODO This is not used
format:
instruction_template: "Original:"
response_template: "Rewritten:"
# Template from here: https://github.com/QwenLM/Qwen2/blob/main/examples/sft/finetune.py#L30
chat_template: |
{% for message in messages %}
{{'<|im_start|>' + message['role'] + '\n' + message['content']}}
{% if loop.last %}
{{ '<|im_end|>'}}
{% else %}
{{ '<|im_end|>\n' }}
{% endif %}
{% endfor %}
generate:
max_length: 4096

train_data:
seed: 1337
sources:
- name: openai_batch_data_v2
query_glob_path: s3://ai2-oe-data/jakep/openai_batch_data_v2/*.jsonl
response_glob_path: s3://ai2-oe-data/jakep/openai_batch_done_v2/*.json
backend:
- openai
size: 100_000

valid_data:
sources:
- name: openai_batch_data_eval_mini
query_glob_path: s3://ai2-oe-data/jakep/openai_batch_data_eval_mini/*.jsonl
response_glob_path: s3://ai2-oe-data/jakep/openai_batch_done_eval_mini/*.json
backend:
- openai
size: 100_000

# Mostly pulled from https://github.com/QwenLM/Qwen2/blob/main/examples/sft/finetune.sh
hparams:
batch_size: 1
eval_batch_size: 1
gradient_accumulation_steps: 4
gradient_checkpointing: false
clip_grad_norm: 1.0
learning_rate: 3e-4
max_steps: 5000
pad_multiple_of: 16
log_every_steps: 50
eval_every_steps: 500
optim: adamw_torch
lr_scheduler: cosine
weight_decay: 0.01
warmup_ratio: 0.03

# From https://github.com/QwenLM/Qwen2/blob/main/examples/sft/finetune.py
# Disable LORA for now, because we want the visual network to get trained too
# lora:
# rank: 32
# alpha: 32
# dropout: 0.05
# task_type: causal_lm
# target_modules:
# - q_proj
# - k_proj
# - v_proj
# - o_proj
# - gate_proj
# - up_proj
# - down_proj

save:
path: s3://ai2-oe-data/jakep/experiments/qwen2vl-pdf/v1/models/
save_every_steps: 1000

max_workers: 10
1 change: 1 addition & 0 deletions pdelfin/train/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ def run_train(config: TrainConfig):
logger.info("LoRA adapters merged successfully.")

model.save_pretrained(best_dir)

logger.info("Saved best model to %s", best_dir)


Expand Down

0 comments on commit 24b30b2

Please sign in to comment.