From d226101d75cd3f12299486cd3ebade5039fe893b Mon Sep 17 00:00:00 2001 From: Sean Yang Date: Mon, 27 Jan 2025 12:15:39 -0800 Subject: [PATCH] Remove duplicate templates, add to MANIFEST.in (#3183) - Remove duplicate templates files after provision refactor, update paths - Build process does not include lighter/templates -> added to MANIFEST.in ### Types of changes - [x] Non-breaking change (fix or new feature that would not break existing functionality). - [ ] Breaking change (fix or new feature that would cause existing functionality to change). - [ ] New tests added to cover the changes. - [ ] Quick tests passed locally by running `./runtest.sh`. - [ ] In-line docstrings updated. - [ ] Documentation updated. --- MANIFEST.in | 1 + nvflare/dashboard/application/blob.py | 4 +- nvflare/dashboard/cli.py | 6 +- nvflare/lighter/impl/aws_template.yml | 437 --------- nvflare/lighter/impl/azure_template.yml | 517 ----------- nvflare/lighter/impl/master_template.yml | 1048 ---------------------- 6 files changed, 6 insertions(+), 2007 deletions(-) delete mode 100644 nvflare/lighter/impl/aws_template.yml delete mode 100644 nvflare/lighter/impl/azure_template.yml delete mode 100644 nvflare/lighter/impl/master_template.yml diff --git a/MANIFEST.in b/MANIFEST.in index 046e779d28..4b74873322 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,3 +3,4 @@ include nvflare/_version.py include nvflare/libs/*.so include nvflare/fuel/utils/*.json include nvflare/private/fed/app/simulator/log_config.json +include nvflare/lighter/templates diff --git a/nvflare/dashboard/application/blob.py b/nvflare/dashboard/application/blob.py index 5c6d02b69f..3f2275b75a 100644 --- a/nvflare/dashboard/application/blob.py +++ b/nvflare/dashboard/application/blob.py @@ -24,10 +24,10 @@ from .models import Client, Project, User lighter_folder = os.path.dirname(utils.__file__) -template = utils.load_yaml(os.path.join(lighter_folder, "impl", "master_template.yml")) +template = utils.load_yaml(os.path.join(lighter_folder, "templates", "master_template.yml")) supported_csps = ["aws", "azure"] for csp in supported_csps: - csp_template_file = os.path.join(lighter_folder, "impl", f"{csp}_template.yml") + csp_template_file = os.path.join(lighter_folder, "templates", f"{csp}_template.yml") if os.path.exists(csp_template_file): template.update(utils.load_yaml(csp_template_file)) diff --git a/nvflare/dashboard/cli.py b/nvflare/dashboard/cli.py index fef82525d6..80a94f612e 100644 --- a/nvflare/dashboard/cli.py +++ b/nvflare/dashboard/cli.py @@ -137,9 +137,9 @@ def stop(): def cloud(args): lighter_folder = os.path.dirname(utils.__file__) - template = utils.load_yaml(os.path.join(lighter_folder, "impl", "master_template.yml")) - template.update(utils.load_yaml(os.path.join(lighter_folder, "impl", "aws_template.yml"))) - template.update(utils.load_yaml(os.path.join(lighter_folder, "impl", "azure_template.yml"))) + template = utils.load_yaml(os.path.join(lighter_folder, "templates", "master_template.yml")) + template.update(utils.load_yaml(os.path.join(lighter_folder, "templates", "aws_template.yml"))) + template.update(utils.load_yaml(os.path.join(lighter_folder, "templates", "azure_template.yml"))) tplt = tplt_utils.Template(template) cwd = os.getcwd() csp = args.cloud diff --git a/nvflare/lighter/impl/aws_template.yml b/nvflare/lighter/impl/aws_template.yml deleted file mode 100644 index bd6a918906..0000000000 --- a/nvflare/lighter/impl/aws_template.yml +++ /dev/null @@ -1,437 +0,0 @@ -aws_start_sh: | - - function find_ec2_gpu_instance_type() { - local gpucnt=0 - local gpumem=0 - if rfile=$(get_resources_file) - then - # Parse the number of GPUs and memory per GPU from the resource_manager component in local/resources.json - gpucnt=$(jq -r '.components[] | select(.id == "resource_manager") | .args.num_of_gpus' "${rfile}") - if [ ${gpucnt} -gt 0 ] - then - gpumem=$(jq -r '.components[] | select(.id == "resource_manager") | .args.mem_per_gpu_in_GiB' "${rfile}") - if [ ${gpumem} -gt 0 ] - then - gpumem=$(( ${gpumem}*1024 )) - printf " finding smallest instance type with ${gpucnt} GPUs and ${gpumem} MiB VRAM ... " - gpu_types=$(aws ec2 describe-instance-types --region ${REGION} --query 'InstanceTypes[?GpuInfo.Gpus[?Manufacturer==`NVIDIA`]].{InstanceType: InstanceType, GPU: GpuInfo.Gpus[*].{Name: Name, GpuMemoryMiB: MemoryInfo.SizeInMiB, GpuCount: Count}, Architecture: ProcessorInfo.SupportedArchitectures, VCpuCount: VCpuInfo.DefaultVCpus, MemoryMiB: MemoryInfo.SizeInMiB}' --output json) - filtered_gpu_types=$(echo ${gpu_types} | jq "[.[] | select(.GPU | any(.GpuCount == ${gpucnt} and .GpuMemoryMiB >= ${gpumem})) | select(.Architecture | index(\"${ARCH}\"))]") - smallest_gpu_type=$(echo ${filtered_gpu_types} | jq -r 'min_by(.VCpuCount).InstanceType') - if [ ${smallest_gpu_type} = null ] - then - echo "failed finding a GPU instance, EC2_TYPE unchanged." - else - echo "${smallest_gpu_type} found" - EC2_TYPE=${smallest_gpu_type} - fi - fi - fi - fi - } - - VM_NAME=nvflare_{~~type~~} - SECURITY_GROUP=nvflare_{~~type~~}_sg_$RANDOM - DEST_FOLDER=/var/tmp/cloud - KEY_PAIR=NVFlare{~~type~~}KeyPair - KEY_FILE=${KEY_PAIR}.pem - AMI_IMAGE_OWNER="099720109477" # Owner account id=Amazon - AMI_NAME="ubuntu-*-22.04-amd64-pro-server" - ARCH=x86_64 - AMI_IMAGE=ami-03c983f9003cb9cd1 # 22.04 20.04:ami-04bad3c587fe60d89 24.04:ami-0406d1fdd021121cd - EC2_TYPE=t2.small - EC2_TYPE_ARM=t4g.small - TMPDIR="${TMPDIR:-/tmp}" - LOGFILE=$(mktemp "${TMPDIR}/nvflare-aws-XXX") - - echo "This script requires aws (AWS CLI), sshpass, dig and jq. Now checking if they are installed." - - check_binary aws "Please see https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html on how to install it on your system." - check_binary sshpass "Please install it first." - check_binary dig "Please install it first." - check_binary jq "Please install it first." - - REGION=$(aws configure get region 2>/dev/null) - : "${REGION:=us-west-2}" - : "${AWS_DEFAULT_REGION:=$REGION}" - : "${AWS_REGION:=$AWS_DEFAULT_REGION}" - REGION=${AWS_REGION} - - echo "Note: run this command first for a different AWS profile:" - echo " export AWS_PROFILE=your-profile-name." - - echo -e "\nChecking AWS identity ... \n" - aws_identity=$(aws sts get-caller-identity) - if [[ $? -ne 0 ]]; then - echo "" - exit 1 - fi - - if [ -z ${vpc_id+x} ] - then - using_default_vpc=true - else - using_default_vpc=false - fi - - if [ -z ${image_name+x} ] - then - container=false - else - container=true - fi - - if [ $container == "true" ] - then - AMI_IMAGE=ami-06b8d5099f3a8d79d - EC2_TYPE=t2.xlarge - fi - - if [ -z ${config_file+x} ] - then - useDefault=true - else - useDefault=false - . $config_file - report_status "$?" "Loading config file" - fi - - if [ $useDefault == true ] - then - while true - do - prompt REGION "* Cloud EC2 region, press ENTER to accept default" "${REGION}" - if [ ${container} = false ] - then - prompt AMI_NAME "* Cloud AMI image name (use amd64 or arm64), press ENTER to accept default" "${AMI_NAME}" - printf " retrieving AMI ID for ${AMI_NAME} ... " - IMAGES=$(aws ec2 describe-images --region ${REGION} --owners ${AMI_IMAGE_OWNER} --filters "Name=name,Values=*${AMI_NAME}*" --output json) - if [ "${#IMAGES}" -lt 30 ] - then - echo -e "\nNo images found, starting over\n" - continue - fi - AMI_IMAGE=$(echo $IMAGES | jq -r '.Images | sort_by(.CreationDate) | last(.[]).ImageId') - echo "${AMI_IMAGE} found" - if [[ "$AMI_NAME" == *"arm64"* ]] - then - ARCH="arm64" - EC2_TYPE=${EC2_TYPE_ARM} - fi - find_ec2_gpu_instance_type - fi - prompt AMI_IMAGE "* Cloud AMI image, press ENTER to accept default" - prompt EC2_TYPE "* Cloud EC2 type, press ENTER to accept default" "${EC2_TYPE}" - prompt ans "region = ${REGION}, ami image = ${AMI_IMAGE}, EC2 type = ${EC2_TYPE}, OK? (Y/n)" - if [[ $ans = "" ]] || [[ $ans =~ ^(y|Y)$ ]] - then - break - fi - done - fi - - if [ $container == false ] - then - echo "If the {~~type~~} requires additional Python packages, please add them to: " - echo " ${DIR}/requirements.txt" - prompt ans "Press ENTER when it's done or no additional dependencies. " - fi - - # Check if default VPC exists - if [ $using_default_vpc == true ] - then - echo "Checking if default VPC exists" - found_default_vpc=$(aws ec2 describe-vpcs --region ${REGION} | jq '.Vpcs[] | select(.IsDefault == true)') - if [ -z "${found_default_vpc}" ] - then - echo "No default VPC found. Please create one before running this script with the following command." - echo "aws ec2 create-default-vpc --region ${REGION}" - echo "or specify your own vpc and subnet with --vpc-id and --subnet-id" - exit - else - echo "Default VPC found" - fi - else - echo "Please check the vpc-id $vpc_id and subnet-id $subnet_id are correct and they support EC2 with public IP and internet gateway with proper routing." - echo "This script will use the above info to create EC2 instance." - fi - - cd $DIR/.. - # Generate key pair - - echo "Generating key pair for VM" - - aws ec2 delete-key-pair --region ${REGION} --key-name $KEY_PAIR > /dev/null 2>&1 - rm -rf $KEY_FILE - aws ec2 create-key-pair --region ${REGION} --key-name $KEY_PAIR --query 'KeyMaterial' --output text > $KEY_FILE - report_status "$?" "creating key pair" - chmod 400 $KEY_FILE - - # Generate Security Group - # Try not reusing existing security group because we have to modify it for our own need. - if [ $using_default_vpc == true ] - then - sg_id=$(aws ec2 create-security-group --region ${REGION} --group-name $SECURITY_GROUP --description "NVFlare security group" | jq -r .GroupId) - else - sg_id=$(aws ec2 create-security-group --region ${REGION} --group-name $SECURITY_GROUP --description "NVFlare security group" --vpc-id $vpc_id | jq -r .GroupId) - fi - report_status "$?" "creating security group" - my_public_ip=$(dig +short myip.opendns.com @resolver1.opendns.com) - if [ "$?" -eq 0 ] && [[ "$my_public_ip" =~ ^(([1-9]?[0-9]|1[0-9][0-9]|2([0-4][0-9]|5[0-5]))\.){3}([1-9]?[0-9]|1[0-9][0-9]|2([0-4][0-9]|5[0-5]))$ ]] - then - aws ec2 authorize-security-group-ingress --region ${REGION} --group-id $sg_id --protocol tcp --port 22 --cidr ${my_public_ip}/32 > ${LOGFILE}.sec_grp.log - else - echo "getting my public IP failed, please manually configure the inbound rule to limit SSH access" - aws ec2 authorize-security-group-ingress --region ${REGION} --group-id $sg_id --protocol tcp --port 22 --cidr 0.0.0.0/0 > ${LOGFILE}.sec_grp.log - fi - {~~inbound_rule~~} - report_status "$?" "creating security group rules" - - # Start provisioning - - echo "Creating VM at region ${REGION}, may take a few minutes." - - ami_info=$(aws ec2 describe-images --region ${REGION} --image-ids $AMI_IMAGE --output json) - amidevice=$(echo $ami_info | jq -r '.Images[0].BlockDeviceMappings[0].DeviceName') - block_device_mappings=$(echo $ami_info | jq -r '.Images[0].BlockDeviceMappings') - original_size=$(echo $block_device_mappings | jq -r '.[0].Ebs.VolumeSize') - original_volume_type=$(echo $block_device_mappings | jq -r '.[0].Ebs.VolumeType') - new_size=$((original_size + 8)) # increase disk size by 8GB for nvflare, torch, etc - bdmap='[{"DeviceName":"'${amidevice}'","Ebs":{"VolumeSize":'${new_size}',"VolumeType":"'${original_volume_type}'","DeleteOnTermination":true}}]' - - if [ $using_default_vpc == true ] - then - aws ec2 run-instances --region ${REGION} --image-id $AMI_IMAGE --count 1 --instance-type $EC2_TYPE --key-name $KEY_PAIR --block-device-mappings $bdmap --security-group-ids $sg_id > vm_create.json - else - aws ec2 run-instances --region ${REGION} --image-id $AMI_IMAGE --count 1 --instance-type $EC2_TYPE --key-name $KEY_PAIR --block-device-mappings $bdmap --security-group-ids $sg_id --subnet-id $subnet_id > vm_create.json - fi - report_status "$?" "creating VM" - instance_id=$(jq -r .Instances[0].InstanceId vm_create.json) - - longkeyfile="$(pwd)/${KEY_PAIR}_${instance_id}.pem" - cp -f ${KEY_FILE} "${longkeyfile}" - chmod 400 "${longkeyfile}" - KEY_FILE="${longkeyfile}" - - aws ec2 wait instance-status-ok --region ${REGION} --instance-ids $instance_id - aws ec2 describe-instances --region ${REGION} --instance-ids $instance_id > vm_result.json - - IP_ADDRESS=$(jq -r .Reservations[0].Instances[0].PublicIpAddress vm_result.json) - - echo "VM created with IP address: ${IP_ADDRESS}" - - echo "Copying files to $VM_NAME" - DEST_SITE=ubuntu@${IP_ADDRESS} - DEST=${DEST_SITE}:${DEST_FOLDER} - echo "Destination folder is ${DEST}" - scp -q -i "${KEY_FILE}" -r -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null $PWD $DEST - report_status "$?" "copying startup kits to VM" - - rm -f ${LOGFILE}.log - if [ $container == true ] - then - echo "Launching container with docker option ${DOCKER_OPTION}." - ssh -f -i "${KEY_FILE}" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${DEST_SITE} \ - "docker run -d -v ${DEST_FOLDER}:${DEST_FOLDER} --network host ${DOCKER_OPTION} ${image_name} \ - /bin/bash -c \"python -u -m nvflare.private.fed.app.{~~type~~}.{~~type~~}_train -m ${DEST_FOLDER} \ - -s fed_{~~type~~}.json --set {~~cln_uid~~} secure_train=true config_folder=config org={~~ORG~~} \" " > /tmp/nvflare.log 2>&1 - report_status "$?" "launching container" - else - echo "Installing os packages as root in $VM_NAME, may take a few minutes ... " - ssh -f -i "${KEY_FILE}" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${DEST_SITE} \ - ' NVIDIA_OS_PKG="nvidia-driver-550-server" && sudo apt update && \ - sudo DEBIAN_FRONTEND=noninteractive apt install -y python3-dev gcc && \ - . /etc/os-release && if [ "${VERSION_ID}" \< "22.04" ]; then NVIDIA_OS_PKG="nvidia-driver-535-server"; fi && \ - if lspci | grep -i nvidia; then sudo DEBIAN_FRONTEND=noninteractive apt install -y ${NVIDIA_OS_PKG}; fi && \ - if lspci | grep -i nvidia; then sudo modprobe nvidia; fi && sleep 10 && \ - exit' >> ${LOGFILE}.log 2>&1 - report_status "$?" "installing os packages" - sleep 10 - echo "Installing user space packages in $VM_NAME, may take a few minutes ... " - ssh -f -i "${KEY_FILE}" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${DEST_SITE} \ - ' echo "export PATH=~/.local/bin:$PATH" >> ~/.bashrc && \ - export PATH=/home/ubuntu/.local/bin:$PATH && \ - pwd && wget -q https://bootstrap.pypa.io/get-pip.py && \ - timeout 300 sh -c """until [ -f /usr/bin/gcc ]; do sleep 3; done""" && \ - python3 get-pip.py --break-system-packages && python3 -m pip install --break-system-packages nvflare && \ - touch /var/tmp/cloud/startup/requirements.txt && \ - printf "installing from requirements.txt: " && \ - cat /var/tmp/cloud/startup/requirements.txt | tr "\n" " " && \ - python3 -m pip install --break-system-packages --no-cache-dir -r /var/tmp/cloud/startup/requirements.txt && \ - (crontab -l 2>/dev/null; echo "@reboot /var/tmp/cloud/startup/start.sh >> /var/tmp/nvflare-start.log 2>&1") | crontab && \ - NVIDIAMOD="nvidia.ko.zst" && . /etc/os-release && if [ "${VERSION_ID}" \< "24.04" -a "${VERSION_ID}" \> "16.04" ]; then NVIDIAMOD="nvidia.ko"; fi && \ - if lspci | grep -i nvidia; then timeout 900 sh -c """until [ -f /lib/modules/$(uname -r)/updates/dkms/${NVIDIAMOD} ]; do sleep 3; done"""; fi && \ - sleep 60 && nohup /var/tmp/cloud/startup/start.sh && sleep 20 && \ - exit' >> ${LOGFILE}.log 2>&1 - report_status "$?" "installing user space packages" - sleep 10 - fi - - echo "System was provisioned, packages may continue to install in the background." - echo "To terminate the EC2 instance, run the following command:" - echo " aws ec2 terminate-instances --region ${REGION} --instance-ids ${instance_id}" - echo "Other resources provisioned" - echo "security group: ${SECURITY_GROUP}" - echo "key pair: ${KEY_PAIR}" - echo "review install progress:" - echo " tail -f ${LOGFILE}.log" - echo "login to instance:" - echo " ssh -i \"${KEY_FILE}\" ubuntu@${IP_ADDRESS}" - -aws_start_dsb_sh: | - VM_NAME=nvflare_dashboard - AMI_IMAGE=ami-04c7330a29e61bbca # 22.04 from https://cloud-images.ubuntu.com/locator/ec2/ - EC2_TYPE=t2.small - SECURITY_GROUP=nvflare_dashboard_sg_$RANDOM - REGION=us-west-2 - ADMIN_USERNAME=ubuntu - DEST_FOLDER=/home/${ADMIN_USERNAME} - KEY_PAIR=NVFlareDashboardKeyPair - KEY_FILE=${KEY_PAIR}.pem - - echo "This script requires aws (AWS CLI), sshpass, dig and jq. Now checking if they are installed." - - check_binary aws "Please see https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html on how to install it on your system." - check_binary sshpass "Please install it first." - check_binary dig "Please install it first." - check_binary jq "Please install it first." - - if [ -z ${vpc_id+x} ] - then - using_default_vpc=true - else - using_default_vpc=false - fi - - echo "One initial user will be created when starting dashboard." - echo "Please enter the email address for this user." - read email - credential="${email}:$RANDOM" - - # Generate key pair - - echo "Generating key pair for VM" - - aws ec2 delete-key-pair --key-name $KEY_PAIR > /dev/null 2>&1 - rm -rf $KEY_FILE - aws ec2 create-key-pair --key-name $KEY_PAIR --query 'KeyMaterial' --output text > $KEY_FILE - report_status "$?" "creating key pair" - chmod 400 $KEY_FILE - - # Check if default VPC exists - if [ $using_default_vpc == true ] - then - echo "Checking if default VPC exists" - found_default_vpc=$(aws ec2 describe-vpcs | jq '.Vpcs[] | select(.IsDefault == true)') - if [ -z "${found_default_vpc}" ] - then - echo "No default VPC found. Please create one before running this script with the following command." - echo "aws ec2 create-default-vpc" - echo "or specify your own vpc and subnet with --vpc-id and --subnet-id" - exit - else - echo "Default VPC found" - fi - else - echo "Please check the vpc-id $vpc_id and subnet-id $subnet_id are correct and they support EC2 with public IP and internet gateway with proper routing." - echo "This script will use the above info to create EC2 instance." - fi - - # Generate Security Group - # Try not reusing existing security group because we have to modify it for our own need. - if [ $using_default_vpc == true ] - then - sg_id=$(aws ec2 create-security-group --group-name $SECURITY_GROUP --description "NVFlare security group" | jq -r .GroupId) - else - sg_id=$(aws ec2 create-security-group --group-name $SECURITY_GROUP --description "NVFlare security group" --vpc-id $vpc_id | jq -r .GroupId) - fi - report_status "$?" "creating security group" - echo "Security group id: ${sg_id}" - my_public_ip=$(dig +short myip.opendns.com @resolver1.opendns.com) - if [ "$?" -eq 0 ] && [[ "$my_public_ip" =~ ^(([1-9]?[0-9]|1[0-9][0-9]|2([0-4][0-9]|5[0-5]))\.){3}([1-9]?[0-9]|1[0-9][0-9]|2([0-4][0-9]|5[0-5]))$ ]] - then - aws ec2 authorize-security-group-ingress --group-id $sg_id --protocol tcp --port 22 --cidr ${my_public_ip}/32 > /tmp/sec_grp.log - else - echo "getting my public IP failed, please manually configure the inbound rule to limit SSH access" - aws ec2 authorize-security-group-ingress --group-id $sg_id --protocol tcp --port 22 --cidr 0.0.0.0/0 > /tmp/sec_grp.log - fi - aws ec2 authorize-security-group-ingress --group-id $sg_id --protocol tcp --port 443 --cidr 0.0.0.0/0 >> /tmp/sec_grp.log - report_status "$?" "creating security group rules" - - # Start provisioning - - echo "Creating VM at region ${REGION}, may take a few minutes." - if [ $using_default_vpc == true ] - then - aws ec2 run-instances --region ${REGION} --image-id $AMI_IMAGE --count 1 --instance-type $EC2_TYPE --key-name $KEY_PAIR --security-group-ids $sg_id > vm_create.json - else - aws ec2 run-instances --region ${REGION} --image-id $AMI_IMAGE --count 1 --instance-type $EC2_TYPE --key-name $KEY_PAIR --security-group-ids $sg_id --subnet-id $subnet_id > vm_create.json - fi - report_status "$?" "creating VM" - instance_id=$(jq -r .Instances[0].InstanceId vm_create.json) - - aws ec2 wait instance-status-ok --instance-ids $instance_id - aws ec2 describe-instances --instance-ids $instance_id > vm_result.json - - IP_ADDRESS=$(jq -r .Reservations[0].Instances[0].PublicIpAddress vm_result.json) - - echo "VM created with IP address: ${IP_ADDRESS}" - - echo "Installing docker engine in $VM_NAME, may take a few minutes." - DEST_SITE=${ADMIN_USERNAME}@${IP_ADDRESS} - scripts=$(cat << 'EOF' - sudo apt-get update && \ - sudo DEBIAN_FRONTEND=noninteractive apt-get install -y ca-certificates curl gnupg lsb-release && \ - sudo mkdir -p /etc/apt/keyrings && \ - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg && \ - echo \ - "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ - $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null && \ - sudo apt-get update && \ - sudo DEBIAN_FRONTEND=noninteractive apt-get install -y docker-ce docker-ce-cli containerd.io - EOF - ) - ssh -t -i $KEY_FILE -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${DEST_SITE} "$scripts" > /tmp/docker_engine.log - report_status "$?" "installing docker engine" - ssh -t -i $KEY_FILE -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${DEST_SITE} "sudo usermod -aG docker $ADMIN_USERNAME && exit" >> /tmp/docker_engine.log - report_status "$?" "installing docker engine" - - echo "Installing nvflare in $VM_NAME, may take a few minutes." - ssh -i $KEY_FILE -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${DEST_SITE} \ - "export PATH=/home/ubuntu/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin && \ - wget -q https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py && \ - python3 -m pip install {~~NVFLARE~~} && \ - mkdir -p ./cert && \ - exit" > /tmp/nvflare.json - report_status "$?" "installing nvflare" - - echo "Checking if certificate (web.crt) and private key (web.key) are available" - if [[ -f "web.crt" && -f "web.key" ]]; then - CERT_FOLDER=${DEST_SITE}:${DEST_FOLDER}/cert - echo "Cert folder is ${CERT_FOLDER}" - scp -i $KEY_FILE -r -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null web.{crt,key} $CERT_FOLDER - report_status "$?" "copying cert/key to VM ${CERT_FOLDER} folder" - secure=true - else - echo "No web.crt and web.key found" - secure=false - fi - - echo "Starting dashboard" - ssh -i $KEY_FILE -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ${DEST_SITE} \ - "export PATH=/home/ubuntu/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin && \ - python3 -m nvflare.dashboard.cli --start -f ${DEST_FOLDER} --cred ${credential} {~~START_OPT~~}" > /tmp/dashboard.json - - echo "Dashboard url is running at IP address ${IP_ADDRESS}, listening to port 443." - if [ "$secure" == true ] - then - echo "URL is https://${IP_ADDRESS}" - else - echo "URL is http://${IP_ADDRESS}:443" - fi - echo "Note: you may need to configure DNS server with your DNS hostname and the above IP address." - echo "Project admin credential (username:password) is ${credential} ." - echo "To terminate the EC2 instance, run the following command." - echo "aws ec2 terminate-instances --instance-ids ${instance_id}" - echo "Other resources provisioned" - echo "security group: ${SECURITY_GROUP}" - echo "key pair: ${KEY_PAIR}" diff --git a/nvflare/lighter/impl/azure_template.yml b/nvflare/lighter/impl/azure_template.yml deleted file mode 100644 index 8a5c100121..0000000000 --- a/nvflare/lighter/impl/azure_template.yml +++ /dev/null @@ -1,517 +0,0 @@ -azure_start_svr_header_sh: | - RESOURCE_GROUP=nvflare_rg - VM_NAME=nvflare_server - VM_IMAGE=Canonical:0001-com-ubuntu-server-jammy:22_04-lts-gen2:latest - VM_SIZE=Standard_B2ms - NSG_NAME=nvflare_nsgs - ADMIN_USERNAME=nvflare - PASSWORD="NVFl@r3_P@88"$RANDOM"w0rd" - DEST_FOLDER=/var/tmp/cloud - NIC_NAME=${VM_NAME}VMNic - SERVER_NAME={~~server_name~~} - FL_PORT=8002 - ADMIN_PORT=8003 - - echo "This script requires az (Azure CLI), sshpass and jq. Now checking if they are installed." - - check_binary az "Please see https://learn.microsoft.com/en-us/cli/azure/install-azure-cli on how to install it on your system." - check_binary sshpass "Please install it first." - check_binary jq "Please install it first." - - self_dns=true - if [[ "$SERVER_NAME" = *".cloudapp.azure.com"* ]] - then - DNS_TAG=$(echo $SERVER_NAME | cut -d "." -f 1) - DERIVED_LOCATION=$(echo $SERVER_NAME | cut -d "." -f 2) - LOCATION=$DERIVED_LOCATION - self_dns=false - else - echo "Warning: ${SERVER_NAME} does not end with .cloudapp.azure.com." - echo "The cloud launch process will not create the domain name for you." - echo "Please use your own DNS to set the information." - LOCATION=westus2 - fi - - if [ -z ${image_name+x} ] - then - container=false - else - container=true - fi - - if [ $container == true ] - then - VM_IMAGE=Canonical:0001-com-ubuntu-server-jammy:22_04-lts-gen2:latest - VM_SIZE=Standard_D8s_v3 - else - VM_IMAGE=Canonical:0001-com-ubuntu-server-jammy:22_04-lts-gen2:latest - VM_SIZE=Standard_B2ms - fi - - if [ -z ${config_file+x} ] - then - useDefault=true - else - useDefault=false - . $config_file - report_status "$?" "Loading config file" - if [ $self_dns == false ] && [ $DERIVED_LOCATION != $LOCATION ] - then - echo "Server name implies LOCATION=${DERIVED_LOCATION} but the config file specifies LOCATION=${LOCATION}. Unable to continue provisioning." - exit 1 - fi - fi - - if [ $useDefault == true ] - then - while true - do - prompt VM_IMAGE "Cloud VM image, press ENTER to accept default" "${VM_IMAGE}" - prompt VM_SIZE "Cloud VM size, press ENTER to accept default" "${VM_SIZE}" - if [ $self_dns == true ] - then - prompt LOCATION "Cloud location, press ENTER to accept default" "${LOCATION}" - prompt ans "VM image = ${VM_IMAGE}, VM size = ${VM_SIZE}, location = ${LOCATION}, OK? (Y/n)" - else - prompt ans "VM image = ${VM_IMAGE}, VM size = ${VM_SIZE}, OK? (Y/n)" - fi - if [[ $ans = "" ]] || [[ $ans =~ ^(y|Y)$ ]]; then break; fi - done - fi - - if [ $container == false ] - then - echo "If the client requires additional dependencies, please copy the requirements.txt to ${DIR}." - prompt ans "Press ENTER when it's done or no additional dependencies." - fi - - az login --use-device-code -o none - report_status "$?" "login" - - # Start provisioning - - if [ $(az group exists -n $RESOURCE_GROUP) == 'false' ] - then - echo "Creating Resource Group $RESOURCE_GROUP at Location $LOCATION" - az group create --output none --name $RESOURCE_GROUP --location $LOCATION - report_status "$?" "creating resource group" - elif [ $useDefault == true ] - then - report_status "1" "Only one NVFL server VM and its resource group is allowed. $RESOURCE_GROUP exists and thus creating duplicate resource group" - else - echo "Users require to reuse Resource Group $RESOURCE_GROUP. This script will modify the group and may not work always." - fi - - echo "Creating Virtual Machine, will take a few minutes" - if [ $self_dns == true ] - then - az vm create \ - --output json \ - --resource-group $RESOURCE_GROUP \ - --location $LOCATION \ - --name $VM_NAME \ - --image $VM_IMAGE \ - --size $VM_SIZE \ - --admin-username $ADMIN_USERNAME \ - --admin-password $PASSWORD \ - --authentication-type password \ - --public-ip-address nvflare_server_ip \ - --public-ip-address-allocation static \ - --public-ip-sku Standard > /tmp/vm.json - else - az vm create \ - --output json \ - --resource-group $RESOURCE_GROUP \ - --location $LOCATION \ - --name $VM_NAME \ - --image $VM_IMAGE \ - --size $VM_SIZE \ - --admin-username $ADMIN_USERNAME \ - --admin-password $PASSWORD \ - --authentication-type password \ - --public-ip-address nvflare_server_ip \ - --public-ip-address-allocation static \ - --public-ip-sku Standard \ - --public-ip-address-dns-name $DNS_TAG > /tmp/vm.json - fi - report_status "$?" "creating virtual machine" - - IP_ADDRESS=$(jq -r .publicIpAddress /tmp/vm.json) - echo "Setting up network related configuration" - az network nsg create \ - --output none \ - --resource-group $RESOURCE_GROUP \ - --location $LOCATION \ - --name $NSG_NAME - report_status "$?" "creating network security group" - - az network nsg rule create \ - --output none \ - --resource-group $RESOURCE_GROUP \ - --name SSH \ - --nsg-name $NSG_NAME \ - --priority 1000 \ - --protocol Tcp \ - --destination-port-ranges 22 - report_status "$?" "creating network security group rule for SSH" - - az network nsg rule create \ - --output none \ - --resource-group $RESOURCE_GROUP \ - --name FL_PORT \ - --nsg-name $NSG_NAME \ - --priority 1001 \ - --protocol Tcp \ - --destination-port-ranges $FL_PORT - report_status "$?" "creating network security group rule for FL port" - - az network nsg rule create \ - --output none \ - --resource-group $RESOURCE_GROUP \ - --name ADMIN_PORT \ - --nsg-name $NSG_NAME \ - --priority 1002 \ - --protocol Tcp \ - --destination-port-ranges $ADMIN_PORT - report_status "$?" "creating network security group rule for Admin port" - -azure_start_cln_header_sh: | - RESOURCE_GROUP=nvflare_client_rg_${RANDOM}_${RANDOM} - VM_NAME=nvflare_client - VM_IMAGE=Canonical:0001-com-ubuntu-server-jammy:22_04-lts-gen2:latest - VM_SIZE=Standard_B2ms - NSG_NAME=nvflare_nsgc - ADMIN_USERNAME=nvflare - PASSWORD="NVFl@r3_P@88"$RANDOM"w0rd" - DEST_FOLDER=/var/tmp/cloud - LOCATION=westus2 - NIC_NAME=${VM_NAME}VMNic - echo "This script requires az (Azure CLI), sshpass and jq. Now checking if they are installed." - - check_binary az "Please see https://learn.microsoft.com/en-us/cli/azure/install-azure-cli on how to install it on your system." - check_binary sshpass "Please install it first." - check_binary jq "Please install it first." - - - if [ -z ${image_name+x} ] - then - container=false - else - container=true - fi - - if [ $container == true ] - then - VM_IMAGE=Canonical:0001-com-ubuntu-server-jammy:22_04-lts-gen2:latest - VM_SIZE=Standard_D8s_v3 - else - VM_IMAGE=Canonical:0001-com-ubuntu-server-jammy:22_04-lts-gen2:latest - VM_SIZE=Standard_B2ms - fi - if [ -z ${config_file+x} ] - then - useDefault=true - else - useDefault=false - . $config_file - report_status "$?" "Loading config file" - fi - - if [ $useDefault == true ] - then - while true - do - prompt LOCATION "Cloud location, press ENTER to accept default" "${LOCATION}" - prompt VM_IMAGE "Cloud VM image, press ENTER to accept default" "${VM_IMAGE}" - prompt VM_SIZE "Cloud VM size, press ENTER to accept default" "${VM_SIZE}" - prompt ans "location = ${LOCATION}, VM image = ${VM_IMAGE}, VM size = ${VM_SIZE}, OK? (Y/n) " - if [[ $ans = "" ]] || [[ $ans =~ ^(y|Y)$ ]]; then break; fi - done - fi - - if [ $container == false ] - then - echo "If the client requires additional dependencies, please copy the requirements.txt to ${DIR}." - prompt ans "Press ENTER when it's done or no additional dependencies." - fi - - az login --use-device-code -o none - report_status "$?" "login" - - # Start provisioning - - if [ $(az group exists -n $RESOURCE_GROUP) == 'false' ] - then - echo "Creating Resource Group $RESOURCE_GROUP at Location $LOCATION" - az group create --output none --name $RESOURCE_GROUP --location $LOCATION - report_status "$?" "creating resource group" - else - echo "Resource Group $RESOURCE_GROUP exists, will reuse it." - fi - - echo "Creating Virtual Machine, will take a few minutes" - az vm create \ - --output json \ - --resource-group $RESOURCE_GROUP \ - --location $LOCATION \ - --name $VM_NAME \ - --image $VM_IMAGE \ - --size $VM_SIZE \ - --admin-username $ADMIN_USERNAME \ - --admin-password $PASSWORD \ - --authentication-type password \ - --public-ip-sku Standard > /tmp/vm.json - report_status "$?" "creating virtual machine" - - IP_ADDRESS=$(jq -r .publicIpAddress /tmp/vm.json) - - echo "Setting up network related configuration" - - az network nsg create \ - --output none \ - --resource-group $RESOURCE_GROUP \ - --location $LOCATION \ - --name $NSG_NAME - report_status "$?" "creating network security group" - - az network nsg rule create \ - --output none \ - --resource-group $RESOURCE_GROUP \ - --name SSH \ - --nsg-name $NSG_NAME \ - --priority 1000 \ - --protocol Tcp \ - --destination-port-ranges 22 - report_status "$?" "creating network security group rule for SSH" - -azure_start_common_sh: | - az network nic update \ - --output none \ - --resource-group $RESOURCE_GROUP \ - --name $NIC_NAME \ - --network-security-group $NSG_NAME - report_status "$?" "updating network interface card" - - echo "Copying files to $VM_NAME" - DEST=$ADMIN_USERNAME@${IP_ADDRESS}:$DEST_FOLDER - echo "Destination folder is ${DEST}" - cd $DIR/.. && sshpass -p $PASSWORD scp -r -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null $PWD $DEST - report_status "$?" "copying startup kits to VM" - - if [ $container == true ] - then - echo "Installing and lauching container in $VM_NAME, may take a few minutes." - scripts=$(cat << 'EOF' - sudo apt-get update && \ - sudo DEBIAN_FRONTEND=noninteractive apt-get install -y ca-certificates curl gnupg lsb-release && \ - sudo mkdir -p /etc/apt/keyrings && \ - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg && \ - echo \ - "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ - $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null && \ - sudo apt-get update && \ - sudo DEBIAN_FRONTEND=noninteractive apt-get install -y docker-ce docker-ce-cli containerd.io - EOF - ) - az vm run-command invoke \ - --output json \ - --resource-group $RESOURCE_GROUP \ - --command-id RunShellScript \ - --name $VM_NAME \ - --scripts \ - "$scripts" > /tmp/docker_engine.json - report_status "$?" "installing docker engine" - az vm run-command invoke \ - --output json \ - --resource-group $RESOURCE_GROUP \ - --command-id RunShellScript \ - --name $VM_NAME \ - --scripts \ - "sudo usermod -aG docker $ADMIN_USERNAME" >> /tmp/docker_engine.json - report_status "$?" "Setting user group" - az vm run-command invoke \ - --output json \ - --resource-group $RESOURCE_GROUP \ - --command-id RunShellScript \ - --name $VM_NAME \ - --scripts \ - "docker run -d -v ${DEST_FOLDER}:${DEST_FOLDER} {~~docker_network~~} ${image_name} /bin/bash -c \"python -u -m nvflare.private.fed.app.{~~type~~}.{~~type~~}_train -m ${DEST_FOLDER} -s fed_{~~type~~}.json --set {~~cln_uid~~} secure_train=true config_folder=config org={~~ORG~~} \" " > /tmp/vm_create.json 2>&1 - report_status "$?" "launching container" - else - echo "Installing packages in $VM_NAME, may take a few minutes." - az vm run-command invoke \ - --output json \ - --resource-group $RESOURCE_GROUP \ - --command-id RunShellScript \ - --name $VM_NAME \ - --scripts \ - "echo ${DEST_FOLDER} && wget -q https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py && python3 -m pip install --ignore-installed nvflare && touch ${DEST_FOLDER}/startup/requirements.txt && python3 -m pip install -r ${DEST_FOLDER}/startup/requirements.txt && ${DEST_FOLDER}/startup/start.sh && sleep 20 && cat ${DEST_FOLDER}/log.txt" > /tmp/vm_create.json - report_status "$?" "installing packages" - fi - echo "System was provisioned" - echo "To delete the resource group (also delete the VM), run the following command" - echo "az group delete -n ${RESOURCE_GROUP}" - echo "To login to the VM with SSH, use ${ADMIN_USERNAME} : ${PASSWORD}" > vm_credential.txt - -azure_start_dsb_sh: | - RESOURCE_GROUP=nvflare_dashboard_rg_${RANDOM}_${RANDOM} - VM_NAME=nvflare_dashboard - VM_IMAGE=Canonical:0001-com-ubuntu-server-jammy:22_04-lts-gen2:latest - VM_SIZE=Standard_B2ms - NSG_NAME=nvflare_nsgc - ADMIN_USERNAME=nvflare - PASSWORD="NVFl@r3_P@88"$RANDOM"w0rd" - DEST_FOLDER=/var/tmp/cloud - LOCATION=westus2 - NIC_NAME=${VM_NAME}VMNic - - echo "This script requires az (Azure CLI), sshpass and jq. Now checking if they are installed." - - check_binary az "Please see https://learn.microsoft.com/en-us/cli/azure/install-azure-cli on how to install it on your system." - check_binary sshpass "Please install it first." - check_binary jq "Please install it first." - - echo "One initial user will be created when starting dashboard." - echo "Please enter the email address for this user." - read email - credential="${email}:$RANDOM" - - az login --use-device-code -o none - report_status "$?" "login" - - # Start provisioning - if [ $(az group exists -n $RESOURCE_GROUP) == 'false' ] - then - echo "Creating Resource Group $RESOURCE_GROUP at Location $LOCATION" - az group create --output none --name $RESOURCE_GROUP --location $LOCATION - report_status "$?" "creating resource group" - else - echo "Resource Group $RESOURCE_GROUP exists, will reuse it." - fi - - echo "Creating Virtual Machine, will take a few minutes" - az vm create \ - --output json \ - --resource-group $RESOURCE_GROUP \ - --location $LOCATION \ - --name $VM_NAME \ - --image $VM_IMAGE \ - --size $VM_SIZE \ - --admin-username $ADMIN_USERNAME \ - --admin-password $PASSWORD \ - --authentication-type password \ - --public-ip-sku Standard > /tmp/vm.json - report_status "$?" "creating virtual machine" - - IP_ADDRESS=$(jq -r .publicIpAddress /tmp/vm.json) - report_status "$?" "extracting ip address" - - echo "Setting up network related configuration" - az network nsg create \ - --output none \ - --resource-group $RESOURCE_GROUP \ - --location $LOCATION \ - --name $NSG_NAME - report_status "$?" "creating network security group" - - az network nsg rule create \ - --output none \ - --resource-group $RESOURCE_GROUP \ - --name SSH \ - --nsg-name $NSG_NAME \ - --priority 1000 \ - --protocol Tcp \ - --destination-port-ranges 22 - report_status "$?" "creating network security group rule for SSH" - - az network nsg rule create \ - --output none \ - --resource-group $RESOURCE_GROUP \ - --name HTTPS \ - --nsg-name $NSG_NAME \ - --priority 1001 \ - --protocol Tcp \ - --destination-port-ranges 443 - report_status "$?" "creating network security group rule for HTTPS" - - az network nic update \ - --output none \ - --resource-group $RESOURCE_GROUP \ - --name $NIC_NAME \ - --network-security-group $NSG_NAME - report_status "$?" "updating network interface card" - - echo "Installing docker engine in $VM_NAME, may take a few minutes." - scripts=$(cat << 'EOF' - sudo apt-get update && \ - sudo DEBIAN_FRONTEND=noninteractive apt-get install -y ca-certificates curl gnupg lsb-release && \ - sudo mkdir -p /etc/apt/keyrings && \ - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg && \ - echo \ - "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ - $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null && \ - sudo apt-get update && \ - sudo DEBIAN_FRONTEND=noninteractive apt-get install -y docker-ce docker-ce-cli containerd.io - EOF - ) - az vm run-command invoke \ - --output json \ - --resource-group $RESOURCE_GROUP \ - --command-id RunShellScript \ - --name $VM_NAME \ - --scripts \ - "$scripts" > /tmp/docker_engine.json - report_status "$?" "installing docker engine" - az vm run-command invoke \ - --output json \ - --resource-group $RESOURCE_GROUP \ - --command-id RunShellScript \ - --name $VM_NAME \ - --scripts \ - "sudo usermod -aG docker $ADMIN_USERNAME" >> /tmp/docker_engine.json - report_status "$?" "installing docker engine" - - DEST_FOLDER=/home/${ADMIN_USERNAME} - echo "Installing nvflare in $VM_NAME, may take a few minutes." - az vm run-command invoke \ - --output json \ - --resource-group $RESOURCE_GROUP \ - --command-id RunShellScript \ - --name $VM_NAME \ - --scripts \ - "echo ${DEST_FOLDER} && wget -q https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py && python3 -m pip install --ignore-installed {~~NVFLARE~~} && mkdir -p ${DEST_FOLDER}/cert && chown -R ${ADMIN_USERNAME} ${DEST_FOLDER}" > /tmp/nvflare.json - report_status "$?" "installing nvflare" - - echo "Checking if certificate (web.crt) and private key (web.key) are available" - if [[ -f "web.crt" && -f "web.key" ]]; then - DEST=$ADMIN_USERNAME@$IP_ADDRESS:${DEST_FOLDER}/cert - echo "Destination folder is ${DEST}" - sshpass -p $PASSWORD scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null web.{crt,key} $DEST - report_status "$?" "copying cert/key to VM ${DEST} folder" - secure=true - else - echo "No web.crt and web.key found" - secure=false - fi - - echo "Starting dashboard" - az vm run-command invoke \ - --output json \ - --resource-group $RESOURCE_GROUP \ - --command-id RunShellScript \ - --name $VM_NAME \ - --scripts \ - "cd ${DEST_FOLDER} && python3 -m nvflare.dashboard.cli --start -f ${DEST_FOLDER} --cred ${credential} {~~START_OPT~~}" > /tmp/dashboard.json - - # credential=$(jq -r .value[0].message /tmp/dashboard.json | grep "Project admin") - # echo "The VM was created with user: ${ADMIN_USERNAME} and password: ${PASSWORD}" - if [ "$secure" == true ] - then - echo "URL is https://${IP_ADDRESS}" - else - echo "URL is http://${IP_ADDRESS}:443" - fi - echo "Note: you may need to configure DNS server with your DNS hostname and the above IP address." - echo "Project admin credential (username:password) is ${credential} ." - echo "To stop the dashboard, run az group delete -n ${RESOURCE_GROUP}" - echo "To login to the VM with SSH, use ${ADMIN_USERNAME} : ${PASSWORD}" > vm_credential.txt diff --git a/nvflare/lighter/impl/master_template.yml b/nvflare/lighter/impl/master_template.yml deleted file mode 100644 index b183dbc28d..0000000000 --- a/nvflare/lighter/impl/master_template.yml +++ /dev/null @@ -1,1048 +0,0 @@ -readme_am: | - ********************************* - Admin Client package - ********************************* - The package includes at least the following files: - readme.txt - rootCA.pem - client.crt - client.key - fl_admin.sh - - Please install the nvflare package by 'python3 -m pip nvflare.' This will install a set of Python codes - in your environment. After installation, you can run the fl_admin.sh file to start communicating to the admin server. - - The rootCA.pem file is pointed by "ca_cert" in fl_admin.sh. If you plan to move/copy it to a different place, - you will need to modify fl_admin.sh. The same applies to the other two files, client.crt and client.key. - - The email in your submission to participate this Federated Learning project is embedded in the CN field of client - certificate, which uniquely identifies the participant. As such, please safeguard its private key, client.key. - -readme_fc: | - ********************************* - Federated Learning Client package - ********************************* - The package includes at least the following files: - readme.txt - rootCA.pem - client.crt - client.key - fed_client.json - start.sh - sub_start.sh - stop_fl.sh - - Run start.sh to start the client. - - The rootCA.pem file is pointed by "ssl_root_cert" in fed_client.json. If you plan to move/copy it to a different place, - you will need to modify fed_client.json. The same applies to the other two files, client.crt and client.key. - - The client name in your submission to participate this Federated Learning project is embedded in the CN field of client - certificate, which uniquely identifies the participant. As such, please safeguard its private key, client.key. - -readme_fs: | - ********************************* - Federated Learning Server package - ********************************* - The package includes at least the following files: - readme.txt - rootCA.pem - server.crt - server.key - authorization.json - fed_server.json - start.sh - sub_start.sh - stop_fl.sh - signature.json - - Run start.sh to start the server. - - The rootCA.pem file is pointed by "ssl_root_cert" in fed_server.json. If you plan to move/copy it to a different place, - you will need to modify fed_server.json. The same applies to the other two files, server.crt and server.key. - - Please always safeguard the server.key. - -gunicorn_conf_py: | - bind="0.0.0.0:{~~port~~}" - cert_reqs=2 - do_handshake_on_connect=True - timeout=30 - worker_class="nvflare.ha.overseer.worker.ClientAuthWorker" - workers=1 - wsgi_app="nvflare.ha.overseer.overseer:app" - -local_client_resources: | - { - "format_version": 2, - "client": { - "retry_timeout": 30, - "compression": "Gzip" - }, - "components": [ - { - "id": "resource_manager", - "path": "nvflare.app_common.resource_managers.gpu_resource_manager.GPUResourceManager", - "args": { - "num_of_gpus": 0, - "mem_per_gpu_in_GiB": 0 - } - }, - { - "id": "resource_consumer", - "path": "nvflare.app_common.resource_consumers.gpu_resource_consumer.GPUResourceConsumer", - "args": {} - }, - { - "id": "process_launcher", - "path": "nvflare.app_common.job_launcher.client_process_launcher.ClientProcessJobLauncher", - "args": {} - } - ] - } - -fed_client: | - { - "format_version": 2, - "servers": [ - { - "name": "spleen_segmentation", - "service": { - } - } - ], - "client": { - "ssl_private_key": "client.key", - "ssl_cert": "client.crt", - "ssl_root_cert": "rootCA.pem" - } - } - -sample_privacy: | - { - "scopes": [ - { - "name": "public", - "properties": { - "train_dataset": "/data/public/train", - "val_dataset": "/data/public/val" - }, - "task_result_filters": [ - { - "name": "AddNoiseToMinMax", - "args": { - "min_noise_level": 0.2, - "max_noise_level": 0.2 - } - }, - { - "name": "PercentilePrivacy", - "args": { - "percentile": 10, - "gamma": 0.02 - } - } - ], - "task_data_filters": [ - { - "name": "BadModelDetector" - } - ] - }, - { - "name": "private", - "properties": { - "train_dataset": "/data/private/train", - "val_dataset": "/data/private/val" - }, - "task_result_filters": [ - { - "name": "AddNoiseToMinMax", - "args": { - "min_noise_level": 0.1, - "max_noise_level": 0.1 - } - }, - { - "name": "SVTPrivacy", - "args": { - "fraction": 0.1, - "epsilon": 0.2 - } - } - ] - } - ], - "default_scope": "public" - } - -local_server_resources: | - { - "format_version": 2, - "servers": [ - { - "admin_storage": "transfer", - "max_num_clients": 100, - "heart_beat_timeout": 600, - "num_server_workers": 4, - "download_job_url": "http://download.server.com/", - "compression": "Gzip" - } - ], - "snapshot_persistor": { - "path": "nvflare.app_common.state_persistors.storage_state_persistor.StorageStatePersistor", - "args": { - "uri_root": "/", - "storage": { - "path": "nvflare.app_common.storages.filesystem_storage.FilesystemStorage", - "args": { - "root_dir": "/tmp/nvflare/snapshot-storage", - "uri_root": "/" - } - } - } - }, - "components": [ - { - "id": "job_scheduler", - "path": "nvflare.app_common.job_schedulers.job_scheduler.DefaultJobScheduler", - "args": { - "max_jobs": 4 - } - }, - { - "id": "job_manager", - "path": "nvflare.apis.impl.job_def_manager.SimpleJobDefManager", - "args": { - "uri_root": "/tmp/nvflare/jobs-storage", - "job_store_id": "job_store" - } - }, - { - "id": "job_store", - "path": "nvflare.app_common.storages.filesystem_storage.FilesystemStorage" - }, - { - "id": "process_launcher", - "path": "nvflare.app_common.job_launcher.server_process_launcher.ServerProcessJobLauncher", - "args": {} - } - ] - } - -fed_server: | - { - "format_version": 2, - "servers": [ - { - "name": "spleen_segmentation", - "service": { - "target": "localhost:8002" - }, - "admin_host": "localhost", - "admin_port": 5005, - "ssl_private_key": "server.key", - "ssl_cert": "server.crt", - "ssl_root_cert": "rootCA.pem" - } - ] - } - -fed_admin: | - { - "format_version": 1, - "admin": { - "with_file_transfer": true, - "upload_dir": "transfer", - "download_dir": "transfer", - "with_login": true, - "with_ssl": true, - "cred_type": "cert", - "client_key": "client.key", - "client_cert": "client.crt", - "ca_cert": "rootCA.pem", - "prompt": "> " - } - } - -default_authz: | - { - "format_version": "1.0", - "permissions": { - "project_admin": "any", - "org_admin": { - "submit_job": "none", - "clone_job": "none", - "manage_job": "o:submitter", - "download_job": "o:submitter", - "view": "any", - "operate": "o:site", - "shell_commands": "o:site", - "byoc": "none" - }, - "lead": { - "submit_job": "any", - "clone_job": "n:submitter", - "manage_job": "n:submitter", - "download_job": "n:submitter", - "view": "any", - "operate": "o:site", - "shell_commands": "o:site", - "byoc": "any" - }, - "member": { - "view": "any" - } - } - } - -fl_admin_sh: | - #!/usr/bin/env bash - DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - mkdir -p $DIR/../transfer - python3 -m nvflare.fuel.hci.tools.admin -m $DIR/.. -s fed_admin.json - -log_config: | - { - "version": 1, - "disable_existing_loggers": false, - "formatters": { - "baseFormatter": { - "()": "nvflare.fuel.utils.log_utils.BaseFormatter", - "fmt": "%(asctime)s - %(name)s - %(levelname)s - %(fl_ctx)s - %(message)s" - }, - "consoleFormatter": { - "()": "nvflare.fuel.utils.log_utils.ColorFormatter", - "fmt": "%(asctime)s - %(name)s - %(levelname)s - %(fl_ctx)s - %(message)s" - }, - "jsonFormatter": { - "()": "nvflare.fuel.utils.log_utils.JsonFormatter", - "fmt": "%(asctime)s - %(name)s - %(fullName)s - %(levelname)s - %(fl_ctx)s - %(message)s" - } - }, - "filters": { - "FLFilter": { - "()": "nvflare.fuel.utils.log_utils.LoggerNameFilter", - "logger_names": ["custom", "nvflare.app_common", "nvflare.app_opt"] - } - }, - "handlers": { - "consoleHandler": { - "class": "logging.StreamHandler", - "level": "DEBUG", - "formatter": "consoleFormatter", - "filters": [], - "stream": "ext://sys.stdout" - }, - "logFileHandler": { - "class": "logging.handlers.RotatingFileHandler", - "level": "DEBUG", - "formatter": "baseFormatter", - "filename": "log.txt", - "mode": "a", - "maxBytes": 20971520, - "backupCount": 10 - }, - "errorFileHandler": { - "class": "logging.handlers.RotatingFileHandler", - "level": "ERROR", - "formatter": "baseFormatter", - "filename": "log_error.txt", - "mode": "a", - "maxBytes": 20971520, - "backupCount": 10 - }, - "jsonFileHandler": { - "class": "logging.handlers.RotatingFileHandler", - "level": "DEBUG", - "formatter": "jsonFormatter", - "filename": "log.json", - "mode": "a", - "maxBytes": 20971520, - "backupCount": 10 - }, - "FLFileHandler": { - "class": "logging.handlers.RotatingFileHandler", - "level": "DEBUG", - "formatter": "baseFormatter", - "filters": ["FLFilter"], - "filename": "log_fl.txt", - "mode": "a", - "maxBytes": 20971520, - "backupCount": 10, - "delay": true - } - }, - "loggers": { - "root": { - "level": "INFO", - "handlers": ["consoleHandler", "logFileHandler", "errorFileHandler", "jsonFileHandler", "FLFileHandler"] - } - } - } - -start_ovsr_sh: | - #!/usr/bin/env bash - DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - NVFL_OVERSEER_HEARTBEAT_TIMEOUT=10 AUTHZ_FILE=$DIR/privilege.yml gunicorn -c $DIR/gunicorn.conf.py --keyfile $DIR/overseer.key --certfile $DIR/overseer.crt --ca-certs $DIR/rootCA.pem - -start_cln_sh: | - #!/usr/bin/env bash - DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - all_arguments="${@}" - doCloud=false - # parse arguments - while [[ $# -gt 0 ]] - do - key="$1" - case $key in - --cloud) - doCloud=true - csp=$2 - shift - ;; - esac - shift - done - - if [ $doCloud == true ] - then - case $csp in - azure) - $DIR/azure_start.sh ${all_arguments} - ;; - aws) - $DIR/aws_start.sh ${all_arguments} - ;; - *) - echo "Only on-prem or azure or aws is currently supported." - esac - else - $DIR/sub_start.sh & - fi - -start_svr_sh: | - #!/usr/bin/env bash - DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - all_arguments="${@}" - doCloud=false - ha_mode={~~ha_mode~~} - # parse arguments - while [[ $# -gt 0 ]] - do - key="$1" - case $key in - --cloud) - if [ $ha_mode == false ] - then - doCloud=true - csp=$2 - shift - else - echo "Cloud launch does not support NVFlare HA mode." - exit 1 - fi - ;; - esac - shift - done - - if [ $doCloud == true ] - then - case $csp in - azure) - $DIR/azure_start.sh ${all_arguments} - ;; - aws) - $DIR/aws_start.sh ${all_arguments} - ;; - *) - echo "Only on-prem or azure or aws is currently supported." - esac - else - $DIR/sub_start.sh & - fi - -stop_fl_sh: | - #!/usr/bin/env bash - DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - echo "Please use FL admin console to issue shutdown client command to properly stop this client." - echo "This stop_fl.sh script can only be used as the last resort to stop this client." - echo "It will not properly deregister the client to the server." - echo "The client status on the server after this shell script will be incorrect." - read -n1 -p "Would you like to continue (y/N)? " answer - case $answer in - y|Y) - echo - echo "Shutdown request created. Wait for local FL process to shutdown." - touch $DIR/../shutdown.fl - ;; - n|N|*) - echo - echo "Not continue" - ;; - esac - -sub_start_sh: | - #!/usr/bin/env bash - DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - echo "WORKSPACE set to $DIR/.." - mkdir -p $DIR/../transfer - export PYTHONPATH=/local/custom:$PYTHONPATH - echo "PYTHONPATH is $PYTHONPATH" - - SECONDS=0 - lst=-400 - restart_count=0 - start_fl() { - if [[ $(( $SECONDS - $lst )) -lt 300 ]]; then - ((restart_count++)) - else - restart_count=0 - fi - if [[ $(($SECONDS - $lst )) -lt 300 && $restart_count -ge 5 ]]; then - echo "System is in trouble and unable to start the task!!!!!" - rm -f $DIR/../pid.fl $DIR/../shutdown.fl $DIR/../restart.fl $DIR/../daemon_pid.fl - exit - fi - lst=$SECONDS - ((python3 -u -m nvflare.private.fed.app.{~~type~~}.{~~type~~}_train -m $DIR/.. -s fed_{~~type~~}.json --set secure_train=true {~~cln_uid~~} org={~~org_name~~} config_folder={~~config_folder~~} 2>&1 & echo $! >&3 ) 3>$DIR/../pid.fl ) - pid=`cat $DIR/../pid.fl` - echo "new pid ${pid}" - } - - stop_fl() { - if [[ ! -f "$DIR/../pid.fl" ]]; then - echo "No pid.fl. No need to kill process." - return - fi - pid=`cat $DIR/../pid.fl` - sleep 5 - kill -0 ${pid} 2> /dev/null 1>&2 - if [[ $? -ne 0 ]]; then - echo "Process already terminated" - return - fi - kill -9 $pid - rm -f $DIR/../pid.fl $DIR/../shutdown.fl $DIR/../restart.fl 2> /dev/null 1>&2 - } - - if [[ -f "$DIR/../daemon_pid.fl" ]]; then - dpid=`cat $DIR/../daemon_pid.fl` - kill -0 ${dpid} 2> /dev/null 1>&2 - if [[ $? -eq 0 ]]; then - echo "There seems to be one instance, pid=$dpid, running." - echo "If you are sure it's not the case, please kill process $dpid and then remove daemon_pid.fl in $DIR/.." - exit - fi - rm -f $DIR/../daemon_pid.fl - fi - - echo $BASHPID > $DIR/../daemon_pid.fl - - while true - do - sleep 5 - if [[ ! -f "$DIR/../pid.fl" ]]; then - echo "start fl because of no pid.fl" - start_fl - continue - fi - pid=`cat $DIR/../pid.fl` - kill -0 ${pid} 2> /dev/null 1>&2 - if [[ $? -ne 0 ]]; then - if [[ -f "$DIR/../shutdown.fl" ]]; then - echo "Gracefully shutdown." - break - fi - echo "start fl because process of ${pid} does not exist" - start_fl - continue - fi - if [[ -f "$DIR/../shutdown.fl" ]]; then - echo "About to shutdown." - stop_fl - break - fi - if [[ -f "$DIR/../restart.fl" ]]; then - echo "About to restart." - stop_fl - fi - done - - rm -f $DIR/../pid.fl $DIR/../shutdown.fl $DIR/../restart.fl $DIR/../daemon_pid.fl - -docker_cln_sh: | - #!/usr/bin/env bash - DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - # docker run script for FL client - # local data directory - : ${MY_DATA_DIR:="/home/flclient/data"} - # The syntax above is to set MY_DATA_DIR to /home/flcient/data if this - # environment variable is not set previously. - # Therefore, users can set their own MY_DATA_DIR with - # export MY_DATA_DIR=$SOME_DIRECTORY - # before running docker.sh - - # for all gpus use line below - #GPU2USE='--gpus=all' - # for 2 gpus use line below - #GPU2USE='--gpus=2' - # for specific gpus as gpu#0 and gpu#2 use line below - #GPU2USE='--gpus="device=0,2"' - # to use host network, use line below - NETARG="--net=host" - # FL clients do not need to open ports, so the following line is not needed. - #NETARG="-p 443:443 -p 8003:8003" - DOCKER_IMAGE={~~docker_image~~} - echo "Starting docker with $DOCKER_IMAGE" - mode="${1:--r}" - if [ $mode = "-d" ] - then - docker run -d --rm --name={~~client_name~~} $GPU2USE -u $(id -u):$(id -g) \ - -v /etc/passwd:/etc/passwd -v /etc/group:/etc/group -v $DIR/..:/workspace/ \ - -v $MY_DATA_DIR:/data/:ro -w /workspace/ --ipc=host $NETARG $DOCKER_IMAGE \ - /bin/bash -c "python -u -m nvflare.private.fed.app.client.client_train -m /workspace -s fed_client.json --set uid={~~client_name~~} secure_train=true config_folder=config org={~~org_name~~}" - else - docker run --rm -it --name={~~client_name~~} $GPU2USE -u $(id -u):$(id -g) \ - -v /etc/passwd:/etc/passwd -v /etc/group:/etc/group -v $DIR/..:/workspace/ \ - -v $MY_DATA_DIR:/data/:ro -w /workspace/ --ipc=host $NETARG $DOCKER_IMAGE /bin/bash - fi - -docker_svr_sh: | - #!/usr/bin/env bash - DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - # docker run script for FL server - # to use host network, use line below - NETARG="--net=host" - # or to expose specific ports, use line below - #NETARG="-p {~~admin_port~~}:{~~admin_port~~} -p {~~fed_learn_port~~}:{~~fed_learn_port~~}" - DOCKER_IMAGE={~~docker_image~~} - echo "Starting docker with $DOCKER_IMAGE" - svr_name="${SVR_NAME:-flserver}" - mode="${1:-r}" - if [ $mode = "-d" ] - then - docker run -d --rm --name=$svr_name -v $DIR/..:/workspace/ -w /workspace \ - --ipc=host $NETARG $DOCKER_IMAGE /bin/bash -c \ - "python -u -m nvflare.private.fed.app.server.server_train -m /workspace -s fed_server.json --set secure_train=true config_folder=config org={~~org_name~~}" - else - docker run --rm -it --name=$svr_name -v $DIR/..:/workspace/ -w /workspace/ --ipc=host $NETARG $DOCKER_IMAGE /bin/bash - fi - -docker_adm_sh: | - #!/usr/bin/env bash - DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - # docker run script for FL admin - # to use host network, use line below - #NETARG="--net=host" - # Admin clients do not need to open ports, so the following line is not needed. - #NETARG="-p 8003:8003" - DOCKER_IMAGE={~~docker_image~~} - echo "Starting docker with $DOCKER_IMAGE" - docker run --rm -it --name=fladmin -v $DIR/..:/workspace/ -w /workspace/ $DOCKER_IMAGE /bin/bash - -compose_yaml: | - services: - __overseer__: - build: ./nvflare - image: ${IMAGE_NAME} - volumes: - - .:/workspace - command: ["${WORKSPACE}/startup/start.sh"] - ports: - - "8443:8443" - - __flserver__: - image: ${IMAGE_NAME} - ports: - - "8002:8002" - - "8003:8003" - volumes: - - .:/workspace - - nvflare_svc_persist:/tmp/nvflare/ - command: ["${PYTHON_EXECUTABLE}", - "-u", - "-m", - "nvflare.private.fed.app.server.server_train", - "-m", - "${WORKSPACE}", - "-s", - "fed_server.json", - "--set", - "secure_train=true", - "config_folder=config", - "org=__org_name__", - ] - - __flclient__: - image: ${IMAGE_NAME} - volumes: - - .:/workspace - command: ["${PYTHON_EXECUTABLE}", - "-u", - "-m", - "nvflare.private.fed.app.client.client_train", - "-m", - "${WORKSPACE}", - "-s", - "fed_client.json", - "--set", - "secure_train=true", - "uid=__flclient__", - "org=__org_name__", - "config_folder=config", - ] - - volumes: - nvflare_svc_persist: - -dockerfile: | - RUN pip install -U pip - RUN pip install nvflare - COPY requirements.txt requirements.txt - RUN pip install -r requirements.txt - -helm_chart_chart: | - apiVersion: v2 - name: nvflare - description: A Helm chart for NVFlare overseer and servers - type: application - version: 0.1.0 - appVersion: "2.2.0" - -helm_chart_service_overseer: | - apiVersion: v1 - kind: Service - metadata: - name: overseer - spec: - selector: - system: overseer - ports: - - protocol: TCP - port: 8443 - targetPort: overseer-port - -helm_chart_service_server: | - apiVersion: v1 - kind: Service - metadata: - name: server - labels: - system: server - spec: - selector: - system: server - ports: - - name: fl-port - protocol: TCP - port: 8002 - targetPort: fl-port - - name: admin-port - protocol: TCP - port: 8003 - targetPort: admin-port - -helm_chart_deployment_overseer: | - apiVersion: apps/v1 - kind: Deployment - metadata: - name: overseer - labels: - system: overseer - spec: - replicas: 1 - selector: - matchLabels: - system: overseer - template: - metadata: - labels: - system: overseer - spec: - volumes: - - name: workspace - hostPath: - path: - type: Directory - containers: - - name: overseer - image: nvflare-min:2.2.0 - imagePullPolicy: IfNotPresent - volumeMounts: - - name: workspace - mountPath: /workspace - command: ["/workspace/overseer/startup/start.sh"] - ports: - - name: overseer-port - containerPort: 8443 - protocol: TCP -helm_chart_deployment_server: | - apiVersion: apps/v1 - kind: Deployment - metadata: - name: server - labels: - system: server - spec: - replicas: 1 - selector: - matchLabels: - system: server - template: - metadata: - labels: - system: server - spec: - volumes: - - name: workspace - hostPath: - path: - type: Directory - - name: persist - hostPath: - path: /tmp/nvflare - type: Directory - containers: - - name: server1 - image: nvflare-min:2.2.0 - imagePullPolicy: IfNotPresent - volumeMounts: - - name: workspace - mountPath: /workspace - - name: persist - mountPath: /tmp/nvflare - command: ["/usr/local/bin/python3"] - args: - [ - "-u", - "-m", - "nvflare.private.fed.app.server.server_train", - "-m", - "/workspace/server", - "-s", - "fed_server.json", - "--set", - "secure_train=true", - "config_folder=config", - "org=__org_name__", - - ] - ports: - - containerPort: 8002 - protocol: TCP - - containerPort: 8003 - protocol: TCP -helm_chart_values: | - workspace: /home/nvflare - persist: /home/nvflare - - -cloud_script_header: | - #!/usr/bin/env bash - - DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - function report_status() { - status="$1" - if [ "${status}" -ne 0 ] - then - echo "$2 failed" - exit "${status}" - fi - } - - function check_binary() { - echo -n "Checking if $1 exists. => " - if ! command -v $1 &> /dev/null - then - echo "not found. $2" - exit 1 - else - echo "found" - fi - } - - function prompt() { - # usage: prompt NEW_VAR "Prompt message" ["${PROMPT_VALUE}"] - local __resultvar=$1 - local __prompt=$2 - local __default=${3:-} - local __result - if [[ ${BASH_VERSINFO[0]} -ge 4 && -n "$__default" ]] - then - read -e -i "$__default" -p "$__prompt: " __result - else - __default=${3:-${!__resultvar:-}} - if [[ -n $__default ]] - then - printf "%s [%s]: " "$__prompt" "$__default" - else - printf "%s: " "$__prompt" - fi - IFS= read -r __result - if [[ -z "$__result" && -n "$__default" ]] - then - __result="$__default" - fi - fi - eval $__resultvar="'$__result'" - } - - function get_resources_file() { - local rfile="${DIR}/../local/resources.json" - if [ -f "${rfile}" ] - then - echo "${rfile}" - elif [ -f "${rfile}.default" ] - then - echo "${rfile}.default" - else - echo "" - exit 1 - fi - } - - # parse arguments - while [[ $# -gt 0 ]] - do - key="$1" - case $key in - --config) - config_file=$2 - shift - ;; - --image) - image_name=$2 - shift - ;; - --vpc-id) - vpc_id=$2 - shift - ;; - --subnet-id) - subnet_id=$2 - shift - ;; - esac - shift - done - -adm_notebook: | - { - "cells": [ - { - "cell_type": "markdown", - "id": "b758695b", - "metadata": {}, - "source": [ - "# System Info" - ] - }, - { - "cell_type": "markdown", - "id": "9f7cd9e6", - "metadata": {}, - "source": [ - "In this notebook, System Info is checked with the FLARE API." - ] - }, - { - "cell_type": "markdown", - "id": "ea50ba28", - "metadata": {}, - "source": [ - "#### 1. Connect to the FL System with the FLARE API\n", - "\n", - "Use `new_secure_session()` to initiate a session connecting to the FL Server with the FLARE API. The necessary arguments are the username of the admin user you are using and the corresponding startup kit location.\n", - "\n", - "In the code example below, we get the `admin_user_dir` by concatenating the workspace root with the default directories that are created if you provision a project with a given project name. You can change the values to what applies to your system if needed.\n", - "\n", - "Note that if debug mode is not enabled, there is no output after initiating a session successfully, so instead we print the output of `get_system_info()`. If you are unable to connect and initiate a session, make sure that your FL Server is running and that the configurations are correct with the right path to the admin startup kit directory." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0166942d", - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# Run this pip install if NVFlare is not installed in your Jupyter Notebook\n", - "\n", - "# !python3 -m pip install -U nvflare" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c3dbde69", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from nvflare.fuel.flare_api.flare_api import new_secure_session\n", - "\n", - "username = \"{~~admin_name~~}\" # change this to your own username\n", - "\n", - "sess = new_secure_session(\n", - " username=username,\n", - " startup_kit_location=os.getcwd()\n", - ")\n", - "print(sess.get_system_info())" - ] - }, - { - "cell_type": "markdown", - "id": "31ccb6a6", - "metadata": {}, - "source": [ - "### 2. Shutting Down the FL System\n", - "\n", - "As of now, there is no specific FLARE API command for shutting down the FL system, but the FLARE API can use the `do_command()` function of the underlying AdminAPI to submit any commands that the FLARE Console supports including shutdown commands to the clients and server:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b0d8aa9c", - "metadata": {}, - "outputs": [], - "source": [ - "print(sess.api.do_command(\"shutdown client\"))\n", - "print(sess.api.do_command(\"shutdown server\"))\n", - "\n", - "sess.close()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - }, - "vscode": { - "interpreter": { - "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 - } -