-
설치방법
- poetry 사용시
poetry install --no-root
- pip 사용시
pip install -e .
- poetry 사용시
-
flash attention
은 PEP 517을 준수 하지 않아서 따로 설치 해줘야 합니다.-
flash-attention does not use PEP 517 because:
- We tried this earlier (v1.0.4 to 1.0.8 i think) and it was a lot of headache because the torch requirement conflicts with different ways that people install pytorch in their system.
- Dao-AILab/flash-attention#493
pip install flash-attn --no-build-isolation
export RUN_NAME="llama-functioncall"
export WANDB_PROJECT="function_call"
export FLASH_ATTENTION_DETERMINISTIC="1"
accelerate launch --debug --config_file "./examples/configs/accelerate_config.yaml" ./main.py \
--model_name_or_path /data/nlp-public_338/models/decoder/internal/kanana-nano-3.1b-instruct-v1.0.1 \
--train_data_path train.jsonl \
--eval_data_path eval.jsonl \
--bf16 true \
--attn_implementation "flash_attention_2" \
--torch_dtype bfloat16 \
--num_train_epochs 2 \
--per_device_train_batch_size 2 \
--gradient_accumulation_steps 8 \
--per_device_eval_batch_size 4 \
--eval_accumulation_steps 1 \
--evaluation_strategy "epoch" \
--save_strategy "epoch" \
--learning_rate 2e-5 \
--weight_decay 0.01 \
--warmup_ratio 0.03 \
--max_grad_norm 1.0 \
--optim "adamw_8bit" \
--lr_scheduler_type "cosine" \
--logging_steps 1 \
--seed 42 \
--data_seed 42 \
--max_seq_length 4096 \
--output_dir "checkpoints/${RUN_NAME}" \
--full_determinism true \
--run_name ${RUN_NAME} \
| tee ./logs/${RUN_NAME}.log
export CUDA_VISIBLE_DEVICES=7;
export MODEL=/path/to/model;
export SERVED_MODEL_NAME=model_nickname;
python -m kanana_trainer.inference.vllm_fc \
--model ${MODEL} \
--served-model-name ${SERVED_MODEL_NAME} \
--tool-call-parser functionary_llama_v3 \
--port 8000 \
--enable-auto-tool-choice