Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add Target Modules Env Variable and ETCD Cleanup #489

Merged
merged 14 commits into from
Jul 2, 2024
15 changes: 13 additions & 2 deletions .github/actions/build-image-action/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,19 @@ runs:
if ! kind get clusters | grep -q kind; then
echo "Creating directory for etcd storage"
sudo mkdir -p /mnt/storage/etcd
echo "Creating Kind cluster using kind-1es.yaml"
if ! kind create cluster --config .github/workflows/kind-cluster/kind-1es.yaml; then
echo "Creating Kind cluster"

if [[ "${{ inputs.runs_on }}" == "hostname:model-server" ]]; then
# Ensure clean etcd directory for mounting
sudo rm -rf /datadrive/etcd/*
config_file=".github/workflows/kind-cluster/kind.yaml"
else
# Ensure clean etcd directory for mounting
sudo rm -rf /mnt/storage/etcd/*
config_file=".github/workflows/kind-cluster/kind-1es.yaml"
fi

if ! kind create cluster --config $config_file; then
echo "Failed to create the Kind cluster."
exit 1
fi
Expand Down
4 changes: 3 additions & 1 deletion pkg/resources/manifests.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,8 @@ func GenerateStatefulSetManifest(ctx context.Context, workspaceObj *kaitov1alpha
func GenerateTuningJobManifest(ctx context.Context, wObj *kaitov1alpha1.Workspace, imageName string,
imagePullSecretRefs []corev1.LocalObjectReference, replicas int, commands []string, containerPorts []corev1.ContainerPort,
livenessProbe, readinessProbe *corev1.Probe, resourceRequirements corev1.ResourceRequirements, tolerations []corev1.Toleration,
initContainers []corev1.Container, sidecarContainers []corev1.Container, volumes []corev1.Volume, volumeMounts []corev1.VolumeMount) *batchv1.Job {
initContainers []corev1.Container, sidecarContainers []corev1.Container, volumes []corev1.Volume, volumeMounts []corev1.VolumeMount,
envVars []corev1.EnvVar) *batchv1.Job {
labels := map[string]string{
kaitov1alpha1.LabelWorkspaceName: wObj.Name,
}
Expand All @@ -210,6 +211,7 @@ func GenerateTuningJobManifest(ctx context.Context, wObj *kaitov1alpha1.Workspac
ReadinessProbe: readinessProbe,
Ports: containerPorts,
VolumeMounts: volumeMounts,
Env: envVars,
},
}, sidecarContainers...)

Expand Down
11 changes: 10 additions & 1 deletion pkg/tuning/preset-tuning.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,17 @@ func CreatePresetTuning(ctx context.Context, workspaceObj *kaitov1alpha1.Workspa
imagePullSecrets = append(imagePullSecrets, tuningImagePullSecrets...)
}

var envVars []corev1.EnvVar
presetName := strings.ToLower(string(workspaceObj.Tuning.Preset.Name))
// Append environment variable for default target modules if using Phi3 model
if strings.HasPrefix(presetName, "phi-3") {
envVars = append(envVars, corev1.EnvVar{
Name: "DEFAULT_TARGET_MODULES",
Value: "k_proj,q_proj,v_proj,o_proj,gate_proj,down_proj,up_proj",
})
}
jobObj := resources.GenerateTuningJobManifest(ctx, workspaceObj, tuningImage, imagePullSecrets, *workspaceObj.Resource.Count, commands,
containerPorts, nil, nil, resourceReq, tolerations, initContainers, sidecarContainers, volumes, volumeMounts)
containerPorts, nil, nil, resourceReq, tolerations, initContainers, sidecarContainers, volumes, volumeMounts, envVars)

err = resources.CreateResource(ctx, jobObj, kubeClient)
if client.IgnoreAlreadyExists(err) != nil {
Expand Down
34 changes: 33 additions & 1 deletion presets/models/supported_models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,36 @@ models:
runtime: tfs
tag: 0.0.1
# Tag history:
# 0.0.1 - Initial Release
# 0.0.1 - Initial Release

- name: phi-3-small-8k-instruct
type: text-generation
version: https://huggingface.co/microsoft/Phi-3-small-8k-instruct/commit/69caae1f2acea34b26f535fecb1f2abb9a304695
runtime: tfs
tag: 0.0.1
# Tag history:
# 0.0.1 - Initial Release

- name: phi-3-small-128k-instruct
type: text-generation
version: https://huggingface.co/microsoft/Phi-3-small-128k-instruct/commit/f80aaa30bfc64c2b8ab214b541d9050e97163bc4
runtime: tfs
tag: 0.0.1
# Tag history:
# 0.0.1 - Initial Release

- name: phi-3-medium-4k-instruct
type: text-generation
version: https://huggingface.co/microsoft/Phi-3-medium-4k-instruct/commit/d194e4e74ffad5a5e193e26af25bcfc80c7f1ffc
runtime: tfs
tag: 0.0.1
# Tag history:
# 0.0.1 - Initial Release

- name: phi-3-medium-128k-instruct
type: text-generation
version: https://huggingface.co/microsoft/Phi-3-medium-128k-instruct/commit/cae1d42b5577398fd1be9f0746052562ae552886
runtime: tfs
tag: 0.0.1
# Tag history:
# 0.0.1 - Initial Release
5 changes: 4 additions & 1 deletion presets/tuning/text-generation/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
from transformers import (BitsAndBytesConfig, DataCollatorForLanguageModeling,
PreTrainedTokenizer)

target_modules_env = os.environ.get('DEFAULT_TARGET_MODULES', None)
DEFAULT_TARGET_MODULES = [module.strip() for module in target_modules_env.split(",")] if target_modules_env else None

# Consider Future Support for other trainers
# class TrainerTypes(Enum):
# TRAINER = "Trainer"
Expand All @@ -35,7 +38,7 @@ class ExtLoraConfig(LoraConfig):
Lora Config
"""
init_lora_weights: bool = field(default=True, metadata={"help": "Enable initialization of LoRA weights"})
target_modules: Optional[List[str]] = field(default=None, metadata={"help": ("List of module names to replace with LoRA.")})
target_modules: Optional[List[str]] = field(default_factory=lambda: DEFAULT_TARGET_MODULES if DEFAULT_TARGET_MODULES else None, metadata={"help": "List of module names to replace with LoRA."})
layers_to_transform: Optional[List[int]] = field(default=None, metadata={"help": "Layer indices to apply LoRA"})
layers_pattern: Optional[List[str]] = field(default=None, metadata={"help": "Pattern to match layers for LoRA"})
loftq_config: Dict[str, any] = field(default_factory=dict, metadata={"help": "LoftQ configuration for quantization"})
Expand Down
Loading