Skip to content

Commit

Permalink
Lora config
Browse files Browse the repository at this point in the history
  • Loading branch information
jakep-allenai committed Sep 25, 2024
1 parent 24b30b2 commit 385c1bf
Showing 1 changed file with 89 additions and 0 deletions.
89 changes: 89 additions & 0 deletions pdelfin/train/config/qwen2vl-7b-lora.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
model:
name_or_path: Qwen/Qwen2-VL-7B-Instruct
arch: causal
use_flash_attn: true

wandb:
project: pdelfin
entity: ai2-llm

# TODO This is not used
format:
instruction_template: "Original:"
response_template: "Rewritten:"
# Template from here: https://github.com/QwenLM/Qwen2/blob/main/examples/sft/finetune.py#L30
chat_template: |
{% for message in messages %}
{{'<|im_start|>' + message['role'] + '\n' + message['content']}}
{% if loop.last %}
{{ '<|im_end|>'}}
{% else %}
{{ '<|im_end|>\n' }}
{% endif %}
{% endfor %}
generate:
max_length: 4096

train_data:
seed: 1337
sources:
- name: openai_batch_data_v2
query_glob_path: s3://ai2-oe-data/jakep/openai_batch_data_v2/*.jsonl
response_glob_path: s3://ai2-oe-data/jakep/openai_batch_done_v2/*.json
backend:
- openai
size: 100_000

valid_data:
sources:
- name: openai_batch_data_eval_mini
query_glob_path: s3://ai2-oe-data/jakep/openai_batch_data_eval_mini/*.jsonl
response_glob_path: s3://ai2-oe-data/jakep/openai_batch_done_eval_mini/*.json
backend:
- openai
size: 100_000

# Mostly pulled from https://github.com/QwenLM/Qwen2/blob/main/examples/sft/finetune.sh
hparams:
batch_size: 1
eval_batch_size: 1
gradient_accumulation_steps: 4
gradient_checkpointing: false
clip_grad_norm: 1.0
learning_rate: 3e-4
max_steps: 5000
pad_multiple_of: 16
log_every_steps: 50
eval_every_steps: 500
optim: adamw_torch
lr_scheduler: cosine
weight_decay: 0.01
warmup_ratio: 0.03

# From https://github.com/QwenLM/Qwen2/blob/main/examples/sft/finetune.py
lora:
rank: 32
alpha: 32
dropout: 0.05
task_type: causal_lm
target_modules:
- q_proj
- k_proj
- v_proj
- o_proj
- gate_proj
- up_proj
- down_proj
- visual.blocks.[0-9]+.attn.qkv
- visual.blocks.[0-9]+.attn.proj
- visual.blocks.[0-9]+.mlp.fc1
- visual.blocks.[0-9]+.mlp.fc2
- visual.merger.mlp.0
- visual.merger.mlp.2

save:
path: s3://ai2-oe-data/jakep/experiments/qwen2vl-pdf/v1/models/
save_every_steps: 1000

max_workers: 10

0 comments on commit 385c1bf

Please sign in to comment.