From bac251471a9cb0756b540a381b73da690486f42c Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Sun, 18 Apr 2021 18:11:53 +0200 Subject: [PATCH 1/4] add support for installing extensions in parallel (WIP) --- easybuild/easyblocks/generic/rpackage.py | 91 ++++++++++++++++++++++-- easybuild/easyblocks/r/r.py | 4 ++ easybuild/easyblocks/r/rmpi.py | 5 +- easybuild/easyblocks/r/rserve.py | 4 +- easybuild/easyblocks/x/xml.py | 4 +- 5 files changed, 95 insertions(+), 13 deletions(-) diff --git a/easybuild/easyblocks/generic/rpackage.py b/easybuild/easyblocks/generic/rpackage.py index fdf5e6bae6..20e68203f5 100644 --- a/easybuild/easyblocks/generic/rpackage.py +++ b/easybuild/easyblocks/generic/rpackage.py @@ -33,6 +33,7 @@ @author: Balazs Hajgato (Vrije Universiteit Brussel) """ import os +import re from easybuild.easyblocks.r import EXTS_FILTER_R_PACKAGES, EB_R from easybuild.easyblocks.generic.configuremake import check_config_guess, obtain_config_guess @@ -85,6 +86,7 @@ def __init__(self, *args, **kwargs): self.configurevars = [] self.configureargs = [] self.ext_src = None + self._required_deps = None def make_r_cmd(self, prefix=None): """Create a command to run in R to install an R package.""" @@ -162,10 +164,16 @@ def build_step(self): def install_R_package(self, cmd, inp=None): """Install R package as specified, and check for errors.""" - cmdttdouterr, _ = run_cmd(cmd, log_all=True, simple=False, inp=inp, regexp=False) + output, _ = run_cmd(cmd, log_all=True, simple=False, inp=inp, regexp=False) + self.check_install_output(output) - cmderrors = parse_log_for_error(cmdttdouterr, regExp="^ERROR:") - if cmderrors: + def check_install_output(self, output): + """ + Check output of installation command, and clean up installation if needed. + """ + errors = parse_log_for_error(output, regExp="^ERROR:") + if errors: + self.handle_installation_errors() cmd = "R -q --no-save" stdin = """ remove.library(%s) @@ -175,7 +183,7 @@ def install_R_package(self, cmd, inp=None): run_cmd(cmd, log_all=False, log_ok=False, simple=False, inp=stdin, regexp=False) raise EasyBuildError("Errors detected during installation of R package %s!", self.name) else: - self.log.debug("R package %s installed succesfully" % self.name) + self.log.debug("R package %s installed succesfully", self.name) def update_config_guess(self, path): """Update any config.guess found in specified directory""" @@ -197,13 +205,65 @@ def install_step(self): cmd, stdin = self.make_cmdline_cmd(prefix=os.path.join(self.installdir, self.cfg['exts_subdir'])) self.install_R_package(cmd, inp=stdin) - def run(self): + @property + def required_deps(self): + """Return list of required dependencies for this extension.""" + + if self._required_deps is None: + if self.src: + cmd = "tar --wildcards --extract --file %s --to-stdout '*/DESCRIPTION'" % self.src + out, _ = run_cmd(cmd, simple=False, trace=False) + + # lines that start with whitespace are merged with line above + lines = [] + for line in out.splitlines(): + if line and line[0] in (' ', '\t'): + lines[-1] = lines[-1] + line + else: + lines.append(line) + out = '\n'.join(lines) + + pkg_key = 'Package:' + deps_map = {} + deps = [] + pkg = None + + for line in out.splitlines(): + if pkg_key in line: + if pkg is not None: + deps = [] + + pkg_name_regex = re.compile(r'Package:\s*([^ ]+)') + res = pkg_name_regex.search(line) + if res: + pkg = res.group(1) + if pkg in deps_map: + deps = deps_map[pkg] + else: + raise EasyBuildError("Failed to determine package name from line '%s'", line) + + deps_map[pkg] = deps + + elif any(line.startswith(x) for x in ('Depends:', 'Imports:', 'LinkingTo:')): + # entries may specify version requirements between brackets (which we don't care about here) + dep_names = [x.split('(')[0].strip() for x in line.split(':', 1)[1].split(',')] + deps.extend([d for d in dep_names if d not in ('', 'R', self.name)]) + + self._required_deps = deps_map.get(self.name, []) + self.log.info("Required dependencies for %s: %s", self.name, self._required_deps) + else: + # no source => no required dependencies assumed + self._required_deps = [] + + return self._required_deps + + def run(self, asynchronous=False): """Install R package as an extension.""" # determine location if isinstance(self.master, EB_R): # extension is being installed as part of an R installation/module - (out, _) = run_cmd("R RHOME", log_all=True, simple=False) + (out, _) = run_cmd("R RHOME", log_all=True, simple=False, trace=False) rhome = out.strip() lib_install_prefix = os.path.join(rhome, 'library') else: @@ -223,7 +283,24 @@ def run(self): self.log.debug("Installing most recent version of R package %s (source not found)." % self.name) cmd, stdin = self.make_r_cmd(prefix=lib_install_prefix) - self.install_R_package(cmd, inp=stdin) + if asynchronous: + self.async_cmd_start(cmd, inp=stdin) + else: + self.install_R_package(cmd, inp=stdin) + + def async_cmd_check(self): + """ + Check progress of installation command that was started asynchronously. + + Output is checked for errors on completion. + + :return: True if command completed, False otherwise + """ + done = super(RPackage, self).async_cmd_check() + if done: + self.check_install_output(self.async_cmd_output) + + return done def sanity_check_step(self, *args, **kwargs): """ diff --git a/easybuild/easyblocks/r/r.py b/easybuild/easyblocks/r/r.py index a165752540..a23bfaf7a6 100644 --- a/easybuild/easyblocks/r/r.py +++ b/easybuild/easyblocks/r/r.py @@ -106,6 +106,10 @@ def configure_step(self): self.log.warning(warn_msg) print_warning(warn_msg) + def install_extensions(self, **kwargs): + """Install extensions (asynchronously).""" + super(EB_R, self).install_extensions(parallel=True, **kwargs) + def make_module_req_guess(self): """ Add extra paths to modulefile diff --git a/easybuild/easyblocks/r/rmpi.py b/easybuild/easyblocks/r/rmpi.py index 489a8f0031..c856693217 100644 --- a/easybuild/easyblocks/r/rmpi.py +++ b/easybuild/easyblocks/r/rmpi.py @@ -40,7 +40,7 @@ class EB_Rmpi(RPackage): """Build and install Rmpi R library.""" - def run(self): + def run(self, **kwargs): """Set various configure arguments prior to building.""" mpi_types = { @@ -63,4 +63,5 @@ def run(self): "--with-mpi=%s" % self.toolchain.get_software_root(self.toolchain.MPI_MODULE_NAME)[0], "--with-Rmpi-type=%s" % Rmpi_type, ] - super(EB_Rmpi, self).run() # it might be needed to get the R cmd and run it with mympirun... + # it might be needed to get the R cmd and run it with mympirun... + super(EB_Rmpi, self).run(**kwargs) diff --git a/easybuild/easyblocks/r/rserve.py b/easybuild/easyblocks/r/rserve.py index ba6bfca607..1603501642 100644 --- a/easybuild/easyblocks/r/rserve.py +++ b/easybuild/easyblocks/r/rserve.py @@ -37,8 +37,8 @@ class EB_Rserve(RPackage): """Build and install Rserve R library.""" - def run(self): + def run(self, **kwargs): """Set LIBS environment variable correctly prior to building.""" self.configurevars = ['LIBS="$LIBS -lpthread"'] - super(EB_Rserve, self).run() + super(EB_Rserve, self).run(**kwargs) diff --git a/easybuild/easyblocks/x/xml.py b/easybuild/easyblocks/x/xml.py index 26258090cc..24d7cc940b 100644 --- a/easybuild/easyblocks/x/xml.py +++ b/easybuild/easyblocks/x/xml.py @@ -39,7 +39,7 @@ class EB_XML(RPackage): """Support for installing the XML R package.""" - def install_R_package(self, cmd, inp=None): + def install_R_package(self, *args, **kwargs): """Customized install procedure for XML R package, add zlib lib path to LIBS.""" libs = os.getenv('LIBS', '') @@ -52,4 +52,4 @@ def install_R_package(self, cmd, inp=None): else: raise EasyBuildError("zlib module not loaded (required)") - super(EB_XML, self).install_R_package(cmd, inp) + return super(EB_XML, self).install_R_package(*args, **kwargs) From 14692c2726bf6fb5b9dd1eeec6ff03528d5dc2f1 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Tue, 26 Oct 2021 14:58:59 +0200 Subject: [PATCH 2/4] drop custom override of install_extensions in R easyblock, no longer needed --- easybuild/easyblocks/r/r.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/easybuild/easyblocks/r/r.py b/easybuild/easyblocks/r/r.py index a23bfaf7a6..a165752540 100644 --- a/easybuild/easyblocks/r/r.py +++ b/easybuild/easyblocks/r/r.py @@ -106,10 +106,6 @@ def configure_step(self): self.log.warning(warn_msg) print_warning(warn_msg) - def install_extensions(self, **kwargs): - """Install extensions (asynchronously).""" - super(EB_R, self).install_extensions(parallel=True, **kwargs) - def make_module_req_guess(self): """ Add extra paths to modulefile From b51ab1f646c4ce6fa3379810a2992b58777b7986 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Tue, 26 Oct 2021 15:25:39 +0200 Subject: [PATCH 3/4] implement run_async for RPackage --- easybuild/easyblocks/generic/rpackage.py | 27 ++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/easybuild/easyblocks/generic/rpackage.py b/easybuild/easyblocks/generic/rpackage.py index 20e68203f5..ca1b0a5bcf 100644 --- a/easybuild/easyblocks/generic/rpackage.py +++ b/easybuild/easyblocks/generic/rpackage.py @@ -257,8 +257,12 @@ def required_deps(self): return self._required_deps - def run(self, asynchronous=False): - """Install R package as an extension.""" + def prepare_r_ext_install(self): + """ + Prepare installation of R package as extension. + + :return: Shell command to run + string to pass to stdin. + """ # determine location if isinstance(self.master, EB_R): @@ -283,10 +287,21 @@ def run(self, asynchronous=False): self.log.debug("Installing most recent version of R package %s (source not found)." % self.name) cmd, stdin = self.make_r_cmd(prefix=lib_install_prefix) - if asynchronous: - self.async_cmd_start(cmd, inp=stdin) - else: - self.install_R_package(cmd, inp=stdin) + return cmd, stdin + + def run(self): + """ + Install R package as an extension. + """ + cmd, stdin = self.prepare_r_ext_install() + self.install_R_package(cmd, inp=stdin) + + def run_async(self): + """ + Start installation of R package as an extension asynchronously. + """ + cmd, stdin = self.prepare_r_ext_install() + self.async_cmd_start(cmd, inp=stdin) def async_cmd_check(self): """ From 1bddb6513dd11360857e639b9916905ce2aaa7d4 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Tue, 26 Oct 2021 21:23:30 +0200 Subject: [PATCH 4/4] redefine run_async in custom easyblocks for Rmpi and Rserve --- easybuild/easyblocks/r/rmpi.py | 28 ++++++++++++++++++++++------ easybuild/easyblocks/r/rserve.py | 8 ++++++-- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/easybuild/easyblocks/r/rmpi.py b/easybuild/easyblocks/r/rmpi.py index c856693217..9d7f4f6083 100644 --- a/easybuild/easyblocks/r/rmpi.py +++ b/easybuild/easyblocks/r/rmpi.py @@ -40,8 +40,10 @@ class EB_Rmpi(RPackage): """Build and install Rmpi R library.""" - def run(self, **kwargs): - """Set various configure arguments prior to building.""" + def prepare_rmpi_configureargs(self): + """ + Prepare configure arguments for installing Rpmi. + """ mpi_types = { toolchain.MPI_TYPE_OPENMPI: "OPENMPI", @@ -51,17 +53,31 @@ def run(self, **kwargs): # type of MPI # MPI_TYPE does not distinguish between MPICH and IntelMPI, which is why we also check mpi_family() mpi_type = self.toolchain.mpi_family() - Rmpi_type = mpi_types[self.toolchain.MPI_TYPE] + rmpi_type = mpi_types[self.toolchain.MPI_TYPE] # Rmpi versions 0.6-4 and up support INTELMPI (using --with-Rmpi-type=INTELMPI) if ((LooseVersion(self.version) >= LooseVersion('0.6-4')) and (mpi_type == toolchain.INTELMPI)): - Rmpi_type = 'INTELMPI' + rmpi_type = 'INTELMPI' self.log.debug("Setting configure args for Rmpi") self.configureargs = [ "--with-Rmpi-include=%s" % self.toolchain.get_variable('MPI_INC_DIR'), "--with-Rmpi-libpath=%s" % self.toolchain.get_variable('MPI_LIB_DIR'), "--with-mpi=%s" % self.toolchain.get_software_root(self.toolchain.MPI_MODULE_NAME)[0], - "--with-Rmpi-type=%s" % Rmpi_type, + "--with-Rmpi-type=%s" % rmpi_type, ] + + def run(self): + """ + Install Rmpi as extension, after seting various configure arguments. + """ + self.prepare_rmpi_configureargs() + # it might be needed to get the R cmd and run it with mympirun... + super(EB_Rmpi, self).run() + + def run_async(self): + """ + Asynchronously install Rmpi as extension, after seting various configure arguments. + """ + self.prepare_rmpi_configureargs() # it might be needed to get the R cmd and run it with mympirun... - super(EB_Rmpi, self).run(**kwargs) + super(EB_Rmpi, self).run_async() diff --git a/easybuild/easyblocks/r/rserve.py b/easybuild/easyblocks/r/rserve.py index 1603501642..32ac5c11ae 100644 --- a/easybuild/easyblocks/r/rserve.py +++ b/easybuild/easyblocks/r/rserve.py @@ -37,8 +37,12 @@ class EB_Rserve(RPackage): """Build and install Rserve R library.""" - def run(self, **kwargs): + def run(self): """Set LIBS environment variable correctly prior to building.""" + self.configurevars = ['LIBS="$LIBS -lpthread"'] + super(EB_Rserve, self).run() + def run_async(self): + """Set LIBS environment variable correctly prior to building.""" self.configurevars = ['LIBS="$LIBS -lpthread"'] - super(EB_Rserve, self).run(**kwargs) + super(EB_Rserve, self).run_async()