Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for configurable JDKs #316

Merged
merged 5 commits into from
Jan 8, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions flintrock/config.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,6 @@ launch:
num-slaves: 1
# install-hdfs: True
# install-spark: False
# java-version: 8

debug: false
63 changes: 51 additions & 12 deletions flintrock/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def stop(self):
def add_slaves_check(self):
pass

def add_slaves(self, *, user: str, identity_file: str, new_hosts: list):
def add_slaves(self, *, user: str, identity_file: str, java_version: int, new_hosts: list):
"""
Add new slaves to the cluster.

Expand All @@ -285,6 +285,7 @@ def add_slaves(self, *, user: str, identity_file: str, new_hosts: list):
services=self.services,
user=user,
identity_file=identity_file,
java_version=java_version,
cluster=self,
new_hosts=new_hosts)
run_against_hosts(partial_func=partial_func, hosts=hosts)
Expand Down Expand Up @@ -506,42 +507,73 @@ def get_java_major_version(client: paramiko.client.SSHClient):
client=client,
command=command)
tokens = output.split()
# First line of the output is like: 'java version "1.8.0_20"'
# First line of the output is like: 'openjdk version "1.8.0_252"' or 'openjdk version "11.0.7" 2020-04-14'
# Get the version string and strip out the first two parts of the
# version as a tuple: (1, 8)
# version as an int: 7, 8, 9, 10...
if len(tokens) >= 3:
version_parts = tokens[2].strip('"').split(".")
if len(version_parts) >= 2:
return tuple(int(part) for part in version_parts[:2])
if version_parts[0] == "1":
# Java 6, 7 or 8
return int(version_parts[1])
else:
# Java 9+
return int(version_parts[0])
except SSHError:
pass

return None


def ensure_java8(client: paramiko.client.SSHClient):
def ensure_java(client: paramiko.client.SSHClient, java_version: int):
host = client.get_transport().getpeername()[0]
java_major_version = get_java_major_version(client)

if not java_major_version or java_major_version < (1, 8):
logger.info("[{h}] Installing Java 1.8...".format(h=host))
if java_major_version and java_major_version == java_version:
nchammas marked this conversation as resolved.
Show resolved Hide resolved
logger.info("Java {j} is already installed, skipping Java install".format(j=java_major_version))
elif java_major_version and java_major_version > java_version:
logger.warning("Downgrading existing Java {j} installation".format(j=java_major_version))

# sudo yum install -y java-1.8.0-openjdk
# sudo amazon-linux-extras install -y java-openjdk11
elif not java_major_version or java_major_version < java_version:
logger.info("[{h}] Installing Java {j}...".format(h=host, j=java_version))
nchammas marked this conversation as resolved.
Show resolved Hide resolved

install_adoptopenjdk_repo(client)
java_package = "adoptopenjdk-{j}-hotspot".format(j=java_version)
ssh_check_output(
client=client,
command="""
set -e

# Install Java 1.8 first to protect packages that depend on Java from being removed.
sudo yum install -y java-1.8.0-openjdk
# Install Java first to protect packages that depend on Java from being removed.
sudo yum install -q -y {jp}

# Remove any older versions of Java to force the default Java to 1.8.
# We don't use /etc/alternatives because it does not seem to update links in /usr/lib/jvm correctly,
# and we don't just rely on JAVA_HOME because some programs use java directly in the PATH.
sudo yum remove -y java-1.6.0-openjdk java-1.7.0-openjdk

sudo sh -c "echo export JAVA_HOME=/usr/lib/jvm/jre >> /etc/environment"
sudo sh -c "echo export JAVA_HOME=/usr/lib/jvm/{jp} >> /etc/environment"
source /etc/environment
""")
""".format(jp=java_package))


def install_adoptopenjdk_repo(client):
"""
Installs the adoptopenjdk.repo file into /etc/yum.repos.d/
"""
with client.open_sftp() as sftp:
sftp.put(
localpath=os.path.join(SCRIPTS_DIR, 'adoptopenjdk.repo'),
remotepath='/tmp/adoptopenjdk.repo')
ssh_check_output(
client=client,
command="""
# Use sudo to install the repo file
sudo mv /tmp/adoptopenjdk.repo /etc/yum.repos.d/
"""
)


def setup_node(
Expand All @@ -551,6 +583,7 @@ def setup_node(
# can be looked up by host and reused?
ssh_client: paramiko.client.SSHClient,
services: list,
java_version: int,
cluster: FlintrockCluster):
"""
Setup a new node.
Expand Down Expand Up @@ -592,7 +625,7 @@ def setup_node(
cluster.storage_dirs.root = storage_dirs['root']
cluster.storage_dirs.ephemeral = storage_dirs['ephemeral']

ensure_java8(ssh_client)
ensure_java(ssh_client, java_version)

for service in services:
try:
Expand All @@ -610,6 +643,7 @@ def setup_node(
def provision_cluster(
*,
cluster: FlintrockCluster,
java_version: int,
services: list,
user: str,
identity_file: str):
Expand All @@ -618,6 +652,7 @@ def provision_cluster(
"""
partial_func = functools.partial(
provision_node,
java_version=java_version,
services=services,
user=user,
identity_file=identity_file,
Expand Down Expand Up @@ -658,6 +693,7 @@ def provision_cluster(

def provision_node(
*,
java_version: int,
services: list,
user: str,
host: str,
Expand All @@ -680,6 +716,7 @@ def provision_node(
setup_node(
ssh_client=client,
services=services,
java_version=java_version,
cluster=cluster)
for service in services:
service.configure(
Expand Down Expand Up @@ -730,6 +767,7 @@ def add_slaves_node(
user: str,
host: str,
identity_file: str,
java_version: int,
services: list,
cluster: FlintrockCluster,
new_hosts: list):
Expand All @@ -753,6 +791,7 @@ def add_slaves_node(
setup_node(
ssh_client=client,
services=services,
java_version=java_version,
cluster=cluster)

for service in services:
Expand Down
6 changes: 6 additions & 0 deletions flintrock/ec2.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,9 @@ def add_slaves(
user: str,
identity_file: str,
num_slaves: int,
java_version: int,
spot_price: float,
spot_request_valid_until: str,
min_root_ebs_size_gb: int,
tags: list,
assume_yes: bool):
Expand Down Expand Up @@ -294,6 +296,7 @@ def add_slaves(
num_instances=num_slaves,
region=self.region,
spot_price=spot_price,
spot_request_valid_until=spot_request_valid_until,
ami=self.master_instance.image_id,
assume_yes=assume_yes,
key_name=self.master_instance.key_name,
Expand Down Expand Up @@ -331,6 +334,7 @@ def add_slaves(
super().add_slaves(
user=user,
identity_file=identity_file,
java_version=java_version,
new_hosts=new_slaves)
except (Exception, KeyboardInterrupt) as e:
if isinstance(e, InterruptedEC2Operation):
Expand Down Expand Up @@ -787,6 +791,7 @@ def launch(
*,
cluster_name,
num_slaves,
java_version,
services,
assume_yes,
key_name,
Expand Down Expand Up @@ -933,6 +938,7 @@ def launch(

provision_cluster(
cluster=cluster,
java_version=java_version,
services=services,
user=user,
identity_file=identity_file)
Expand Down
10 changes: 10 additions & 0 deletions flintrock/flintrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ def cli(cli_context, config, provider, debug):
@cli.command()
@click.argument('cluster-name')
@click.option('--num-slaves', type=click.IntRange(min=1), required=True)
@click.option('--java-version', type=click.IntRange(min=8), default=8)
@click.option('--install-hdfs/--no-install-hdfs', default=False)
@click.option('--hdfs-version', default='2.8.5')
@click.option('--hdfs-download-source',
Expand Down Expand Up @@ -323,6 +324,7 @@ def launch(
cli_context,
cluster_name,
num_slaves,
java_version,
install_hdfs,
hdfs_version,
hdfs_download_source,
Expand Down Expand Up @@ -436,6 +438,7 @@ def launch(
cluster = ec2.launch(
cluster_name=cluster_name,
num_slaves=num_slaves,
java_version=java_version,
services=services,
assume_yes=assume_yes,
key_name=ec2_key_name,
Expand Down Expand Up @@ -714,6 +717,7 @@ def stop(cli_context, cluster_name, ec2_region, ec2_vpc_id, assume_yes):

@cli.command(name='add-slaves')
@click.argument('cluster-name')
@click.option('--java-version', type=click.IntRange(min=8), default=8)
@click.option('--num-slaves', type=click.IntRange(min=1), required=True)
@click.option('--ec2-region', default='us-east-1', show_default=True)
@click.option('--ec2-vpc-id', default='', help="Leave empty for default VPC.")
Expand All @@ -722,6 +726,8 @@ def stop(cli_context, cluster_name, ec2_region, ec2_vpc_id, assume_yes):
help="Path to SSH .pem file for accessing nodes.")
@click.option('--ec2-user')
@click.option('--ec2-spot-price', type=float)
@click.option('--ec2-spot-request-duration', default='7d',
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @nchammas ,
I suspect that "add-slaves" in master is not working right now because this "ec2_spot_request_duration" parameter is missing.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. I'll report this over on #315.

help="Duration a spot request is valid (e.g. 3d 2h 1m).")
nchammas marked this conversation as resolved.
Show resolved Hide resolved
@click.option('--ec2-min-root-ebs-size-gb', type=int, default=30)
@click.option('--assume-yes/--no-assume-yes', default=False)
@click.option('--ec2-tag', 'ec2_tags',
Expand All @@ -733,12 +739,14 @@ def stop(cli_context, cluster_name, ec2_region, ec2_vpc_id, assume_yes):
def add_slaves(
cli_context,
cluster_name,
java_version,
num_slaves,
ec2_region,
ec2_vpc_id,
ec2_identity_file,
ec2_user,
ec2_spot_price,
ec2_spot_request_duration,
ec2_min_root_ebs_size_gb,
ec2_tags,
assume_yes):
Expand Down Expand Up @@ -769,6 +777,7 @@ def add_slaves(
provider_options = {
'min_root_ebs_size_gb': ec2_min_root_ebs_size_gb,
'spot_price': ec2_spot_price,
'spot_request_valid_until': ec2_spot_request_duration,
'tags': ec2_tags
}
else:
Expand All @@ -790,6 +799,7 @@ def add_slaves(
cluster.add_slaves(
user=user,
identity_file=identity_file,
java_version=java_version,
num_slaves=num_slaves,
assume_yes=assume_yes,
**provider_options)
Expand Down
6 changes: 6 additions & 0 deletions flintrock/scripts/adoptopenjdk.repo
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[AdoptOpenJDK]
name=AdoptOpenJDK
baseurl=http://adoptopenjdk.jfrog.io/adoptopenjdk/rpm/centos/8/$basearch
enabled=1
gpgcheck=1
gpgkey=https://adoptopenjdk.jfrog.io/adoptopenjdk/api/gpg/key/public