diff --git a/.gitignore b/.gitignore index e1d8b168fe..40ba943252 100644 --- a/.gitignore +++ b/.gitignore @@ -158,10 +158,7 @@ src/tools/prun/prun src/tools/prte_info/prte_info src/tools/prted/prted src/tools/prte/prte -src/tools/pcc/pcc -src/tools/pcc/pcc-wrapper-data.txt src/tools/pterm/pterm -src/tools/psched/psched src/util/hostfile/hostfile_lex.c src/util/keyval/keyval_lex.c diff --git a/Makefile.am b/Makefile.am index 8768750743..fd76d080fa 100644 --- a/Makefile.am +++ b/Makefile.am @@ -27,7 +27,6 @@ SUBDIRS = config contrib src include docs EXTRA_DIST = README.md VERSION LICENSE autogen.pl -include examples/Makefile.include # Check for common symbols. Use a "-hook" to increase the odds that a # developer will see it at the end of their installation process. diff --git a/config/prte_config_files.m4 b/config/prte_config_files.m4 index ef3f2eb2e1..ef604c165f 100644 --- a/config/prte_config_files.m4 +++ b/config/prte_config_files.m4 @@ -20,12 +20,10 @@ AC_DEFUN([PRTE_CONFIG_FILES],[ src/etc/Makefile src/util/Makefile src/util/hostfile/Makefile - src/tools/pcc/Makefile src/tools/prted/Makefile src/tools/prun/Makefile src/tools/prte_info/Makefile src/tools/prte/Makefile src/tools/pterm/Makefile - src/tools/psched/Makefile ]) ]) diff --git a/docs/Makefile.am b/docs/Makefile.am index 6ef9e92e61..ce4651bb8f 100644 --- a/docs/Makefile.am +++ b/docs/Makefile.am @@ -68,7 +68,6 @@ PRTE_MAN1 = \ prted.1 \ prterun.1 \ prun.1 \ - psched.1 \ pterm.1 PRTE_MAN5 = \ diff --git a/docs/man/man1/index.rst b/docs/man/man1/index.rst index da27174695..a1d2e7beb0 100644 --- a/docs/man/man1/index.rst +++ b/docs/man/man1/index.rst @@ -9,5 +9,4 @@ Commands (section 1) prted.1.rst prterun.1.rst prun.1.rst - psched.1.rst pterm.1.rst diff --git a/docs/man/man1/psched.1.rst b/docs/man/man1/psched.1.rst deleted file mode 100644 index 2f44d3c548..0000000000 --- a/docs/man/man1/psched.1.rst +++ /dev/null @@ -1,189 +0,0 @@ -.. _man1-psched: - -psched -====== - -psched |mdash| a modest scheduler for PRRTE - -SYNOPSIS --------- - -.. code:: sh - - shell$ psched ...options... - -DESCRIPTION ------------ - -``psched`` is a standalone daemon that acts as a dynamic -scheduler for PRRTE. - -Extensive help documentation for this command is provided through -``psched --help [topic]``. - -COMMAND LINE OPTIONS --------------------- -The following command line options are recognized by ``psched``. - -General command line options -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. include:: /prrte-rst-content/cli-general.rst - -MCA parameters -^^^^^^^^^^^^^^ - -* ``--pmixmca ``: Pass context-specific PMIx MCA - parameters (``>`` is the parameter name; ```` is the - parameter value). - :ref:`See below for details `. - -* ``--prtemca ``: Pass context-specific PRRTE MCA - parameters to the scheduler. - :ref:`See below for details `. - -* ``--tune ``: File(s) containing MCA params for tuning - scheduler operations. - :ref:`See below for details `. - -Output options -^^^^^^^^^^^^^^ - -* ``--output ``: Comma-delimited list of options that control - how output is generated. :ref:`See below for details - `. - -* ``--stream-buffering ``: Control how output is buffered. - :ref:`See below for details `. - -Resource options -^^^^^^^^^^^^^^^^ - -* ``--default-hostfile ``: Provide a default hostfile. - -* ``-H`` | ``--host ``: Comma-delimited list of hosts to be - included in scheduler queues - :ref:`See below for details `. - -* ``--hostfile ``: Provide a hostfile. - :ref:`See below for details `. - -* ``--machinefile ``: Synonym for ``--hostfile``. - - -Specific options -^^^^^^^^^^^^^^^^ - -* ``--allow-run-as-root``: Allow execution as root **(STRONGLY - DISCOURAGED)**. :ref:`See below for details - `. - -* ``--daemonize``: Daemonize the scheduler into the background. - -* ``--no-ready-msg``: Do not output a "ready" message when the - scheduler has completed initializing. - -* ``--set-sid``: Direct the scheduler to separate from the current - session. - -* ``--tmpdir ``: Set the root for the session directory tree. - -* ``--report-pid ``: Print out PID on stdout (``-``), stderr - (``+``), or a filename (anything else) - -* ``--report-uri ``: Print out URI on stdout (``-``), stderr - (``+``), or a filename (anything else) - -* ``--keepalive ``: Named pipe filename to monitor |mdash| - ``psched`` will terminate upon closure - - -Debug options -^^^^^^^^^^^^^ - -* ``--debug``: Synonym for ``--leave-session-attached`` - -* ``--leave-session-attached``: Do not discard stdout/stderr of remote - PRTE daemons. - :ref:`See below for details `. - - -Details of individual command line options -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The sections below offer more detail than the abbreviated lists, -above. - -.. _label-psched-pmixmca: - -The ``--pmixmca`` option -~~~~~~~~~~~~~~~~~~~~~~~~ - -.. include:: /prrte-rst-content/cli-pmixmca.rst - -.. _label-psched-prtemca: - -The ``--prtemca`` option -~~~~~~~~~~~~~~~~~~~~~~~~ - -.. include:: /prrte-rst-content/cli-prtemca.rst - -.. _label-psched-tune: - -The ``--tune`` option -~~~~~~~~~~~~~~~~~~~~~ - -.. include:: /prrte-rst-content/cli-tune.rst - -.. _label-psched-allow-run-as-root: - -The ``--allow-run-as-root`` option -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. include:: /prrte-rst-content/cli-allow-run-as-root.rst - -.. _label-psched-output: - -The ``--output`` option -~~~~~~~~~~~~~~~~~~~~~~~ - -.. include:: /prrte-rst-content/cli-output.rst - -.. _label-psched-stream-buffering: - -The ``--stream-buffering`` option -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. include:: /prrte-rst-content/cli-stream-buffering.rst - -.. _label-psched-host: - -The ``--host`` option -~~~~~~~~~~~~~~~~~~~~~ - -.. include:: /prrte-rst-content/cli-dash-host.rst - -.. _label-psched-hostfile: - -The ``--hostfile`` option -~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. include:: /prrte-rst-content/cli-dvm-hostfile.rst - -.. _label-psched-leave-session-attached: - -The ``--leave-session-attached`` option -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. include:: /prrte-rst-content/cli-leave-session-attached.rst - -.. _label-psched-display: - -The ``--display`` option -~~~~~~~~~~~~~~~~~~~~~~~~ - -.. include:: /prrte-rst-content/cli-display.rst - - -.. seealso:: - :ref:`prte(1) ` diff --git a/examples/Makefile b/examples/Makefile deleted file mode 100644 index 07b3d06d18..0000000000 --- a/examples/Makefile +++ /dev/null @@ -1,71 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2006-2007 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2011-2016 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013 Mellanox Technologies, Inc. All rights reserved. -# Copyright (c) 2016-2020 Intel, Inc. All rights reserved. -# Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Use the PRRTE-provided wrapper compiler - -CC = pcc - -# Using -g is not necessary, but it is helpful for example programs, -# especially if users want to examine them with debuggers. - -CFLAGS = -g - -# Example programs to build - -EXAMPLES = \ - client \ - client2 \ - debugger/direct \ - debugger/direct-multi \ - debugger/indirect \ - debugger/indirect-multi \ - debugger/attach \ - debugger/daemon \ - debugger/hello \ - debugger/stdincheck \ - dmodex \ - dynamic \ - fault \ - pub \ - tool \ - alloc \ - probe \ - target \ - hello \ - log \ - bad_exit \ - jctrl \ - launcher \ - showkeys \ - legacy \ - colocate \ - pset \ - nodeid - -all: $(EXAMPLES) - -# The usual "clean" target - -clean: - rm -f $(EXAMPLES) *~ *.o diff --git a/examples/Makefile.include b/examples/Makefile.include deleted file mode 100644 index d971d304bc..0000000000 --- a/examples/Makefile.include +++ /dev/null @@ -1,72 +0,0 @@ -# -*- makefile -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2013 Mellanox Technologies, Inc. All rights reserved. -# Copyright (c) 2016-2020 Intel, Inc. All rights reserved. -# Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Note that this file does not stand on its own. It is included by a -# higher-level Makefile so that Automake features such as "make dist" -# work properly (and include all the relevant files in this directory -# in the distribution tarball). - -# If you are looking for the file that builds these examples, look at -# "Makefile" in this same directory (it is *NOT* generated by -# Automake). - -EXTRA_DIST += \ - examples/README \ - examples/Makefile \ - examples/examples.h \ - examples/alloc.c \ - examples/bad_exit.c \ - examples/client.c \ - examples/client2.c\ - examples/daemon_error_notify.c \ - examples/debugger/debugger.h \ - examples/debugger/attach.c \ - examples/debugger/daemon.c \ - examples/debugger/direct.c \ - examples/debugger/direct-multi.c \ - examples/debugger/indirect.c \ - examples/debugger/indirect-multi.c \ - examples/debugger/hello.c \ - examples/debugger/stdincheck.c \ - examples/debugger/mpihello.c \ - examples/dmodex.c \ - examples/dynamic.c \ - examples/error_notify.c \ - examples/fault.c \ - examples/hello.c \ - examples/jctrl.c \ - examples/launcher.c \ - examples/legacy.c \ - examples/log.c \ - examples/pmi1client.c \ - examples/probe.c \ - examples/pub.c \ - examples/server.c \ - examples/showkeys.c \ - examples/target.c \ - examples/tool.c \ - examples/colocate.c \ - examples/pset.c \ - examples/nodeid.c diff --git a/examples/README b/examples/README deleted file mode 100644 index a7fd5aae50..0000000000 --- a/examples/README +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) 2016 Intel, Inc. All rights reserved. -# $COPYRIGHT$ - -The files in this directory are sample PMIx applications provided both -as a trivial primer to PMIx as well as simple tests to ensure that your -PMIx Reference Server (PSVR) installation is working properly: - -client.c: -A simple PMIx client that attaches to the server and performs a few -simple PMIx_Get calls following a blocking fence operation that returns -all collected data from calls to PMIx_Put. - -debugger.c: -Mimics a debugger tool. Contacts the PSVR to obtain -nspace information and displays it. Takes user input as to the application -to be "debugged", and then instructs PSVR to launch the companion -"debuggerd" daemons on the nodes where the application is running. The -daemons then connect to the local PSVR daemon and issue a "breakpoint" -notification so the application knows the debugger has attached and is -released from its barrier. The debugger then cleans up and quits. - -dmodex.c: -A simple PMIx client that attaches to the server and performs a few -simple PMIx_Get calls following a blocking fence operation that has -been directed _not_ to return any collected data from calls to PMIx_Put. - -dynamic.c: - - -The Makefile assumes that the pcc wrapper compiler is in your path. - -Make today a PMIx day! diff --git a/examples/alloc.c b/examples/alloc.c deleted file mode 100644 index 3933dc2a9e..0000000000 --- a/examples/alloc.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include -#include -#include -#include -#include - -#include "examples.h" -#include - -/* this is a callback function for the PMIx_Query and - * PMIx_Allocate APIs. The query will callback with a status indicating - * if the request could be fully satisfied, partially - * satisfied, or completely failed. The info parameter - * contains an array of the returned data, with the - * info->key field being the key that was provided in - * the query call. Thus, you can correlate the returned - * data in the info->value field to the requested key. - * - * Once we have dealt with the returned data, we must - * call the release_fn so that the PMIx library can - * cleanup */ -static void infocbfunc(pmix_status_t status, pmix_info_t *info, size_t ninfo, void *cbdata, - pmix_release_cbfunc_t release_fn, void *release_cbdata) -{ - myquery_data_t *mq = (myquery_data_t *) cbdata; - size_t n; - - fprintf(stderr, "Allocation request returned %s\n", PMIx_Error_string(status)); - - /* save the returned info - the PMIx library "owns" it - * and will release it and perform other cleanup actions - * when release_fn is called */ - if (0 < ninfo) { - PMIX_INFO_CREATE(mq->info, ninfo); - mq->ninfo = ninfo; - for (n = 0; n < ninfo; n++) { - fprintf(stderr, "Transferring %s\n", info[n].key); - PMIX_INFO_XFER(&mq->info[n], &info[n]); - } - } - /* the status returned here indicates whether the requested - * information was found or not - preserve it */ - mq->lock.status = status; - - /* let the library release the data and cleanup from - * the operation */ - if (NULL != release_fn) { - release_fn(release_cbdata); - } - - /* release the block */ - DEBUG_WAKEUP_THREAD(&mq->lock); -} - -/* this is an event notification function that we explicitly request - * be called when the PMIX_ERR_ALLOC_COMPLETE notification is issued. - * We could catch it in the general event notification function and test - * the status to see if it was "alloc complete", but it often is simpler - * to declare a use-specific notification callback point. In this case, - * we are asking to know when the allocation request completes */ -static void release_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - myrel_t *lock; - size_t n; - - /* find the return object */ - lock = NULL; - for (n = 0; n < ninfo; n++) { - if (0 == strncmp(info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { - lock = (myrel_t *) info[n].value.data.ptr; - break; - } - } - /* if the object wasn't returned, then that is an error */ - if (NULL == lock) { - fprintf(stderr, "LOCK WASN'T RETURNED IN RELEASE CALLBACK\n"); - /* let the event handler progress */ - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); - } - return; - } - - /* tell the event handler state machine that we are the last step */ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - /* the status will be PMIX_ERR_ALLOC_COMPLETE since that is the code - * we registered to receive. The result of the allocation request is - * in the info array - for now, just assume success */ - lock->lock.status = PMIX_SUCCESS; - /* release the lock */ - DEBUG_WAKEUP_THREAD(&lock->lock); -} - -/* event handler registration is done asynchronously because it - * may involve the PMIx server registering with the host RM for - * external events. So we provide a callback function that returns - * the status of the request (success or an error), plus a numerical index - * to the registered event. The index is used later on to deregister - * an event handler - if we don't explicitly deregister it, then the - * PMIx server will do so when it sees us exit */ -static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata) -{ - mylock_t *lock = (mylock_t *) cbdata; - - if (PMIX_SUCCESS != status) { - fprintf(stderr, "EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", status, - (unsigned long) evhandler_ref); - } - lock->status = status; - lock->evhandler_ref = evhandler_ref; - DEBUG_WAKEUP_THREAD(lock); -} - -int main(int argc, char **argv) -{ - pmix_proc_t myproc; - int rc; - pmix_value_t value; - pmix_value_t *val = &value; - pmix_proc_t proc; - uint32_t nprocs; - pmix_info_t *info; - uint64_t nnodes = 12; - myquery_data_t mydata; - pmix_query_t *query; - char *myallocation = "MYALLOCATION"; - mylock_t mylock; - pmix_status_t code; - myrel_t myrel; - - /* init us */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, - rc); - exit(0); - } - fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank); - - /* get our job size */ - PMIX_PROC_CONSTRUCT(&proc); - (void) strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - nprocs = val->data.uint32; - PMIX_VALUE_RELEASE(val); - fprintf(stderr, "Client %s:%d job size %d\n", myproc.nspace, myproc.rank, nprocs); - - if (0 == myproc.rank) { - /* try to get an allocation */ - DEBUG_CONSTRUCT_MYQUERY(&mydata); - PMIX_INFO_CREATE(info, 2); - PMIX_INFO_LOAD(&info[0], PMIX_ALLOC_NUM_NODES, &nnodes, PMIX_UINT64); - PMIX_INFO_LOAD(&info[0], PMIX_ALLOC_ID, myallocation, PMIX_STRING); - if (PMIX_SUCCESS - != (rc = PMIx_Allocation_request_nb(PMIX_ALLOC_NEW, info, 2, infocbfunc, &mydata))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Allocation_request_nb failed: %d\n", - myproc.nspace, myproc.rank, rc); - goto done; - } - DEBUG_WAIT_THREAD(&mydata.lock); - PMIX_INFO_FREE(info, 2); - fprintf(stderr, "Client ns %s rank %d: Allocation returned status: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(mydata.lock.status)); - DEBUG_DESTRUCT_MYQUERY(&mydata); - /* if it didn't succeed and we have peers out there, then we better wake - * them up */ - - } else if (1 == myproc.rank) { - /* demonstrate a notification based approach - register a handler - * specifically for when the allocation operation completes */ - DEBUG_CONSTRUCT_MYREL(&myrel); - PMIX_INFO_CREATE(info, 2); - PMIX_INFO_LOAD(&info[0], PMIX_ALLOC_ID, myallocation, PMIX_STRING); - PMIX_INFO_LOAD(&info[1], PMIX_EVENT_RETURN_OBJECT, &myrel, PMIX_POINTER); - DEBUG_CONSTRUCT_LOCK(&mylock); - code = PMIX_NOTIFY_ALLOC_COMPLETE; - PMIx_Register_event_handler(&code, 1, info, 2, release_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - PMIX_INFO_FREE(info, 2); - rc = mylock.status; - DEBUG_DESTRUCT_LOCK(&mylock); - - /* now wait to hear that the request is complete */ - DEBUG_WAIT_THREAD(&myrel.lock); - fprintf(stderr, "[%s:%d] Allocation returned status: %s\n", myproc.nspace, myproc.rank, - PMIx_Error_string(myrel.lock.status)); - DEBUG_DESTRUCT_MYREL(&myrel); - - } else { - /* demonstrate a query-based approach - wait a little while and ask to - * see if it was done */ - usleep(10); - DEBUG_CONSTRUCT_MYQUERY(&mydata); - - PMIX_QUERY_CREATE(query, 1); - PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_ALLOC_STATUS); - PMIX_INFO_CREATE(query[0].qualifiers, 1); - PMIX_INFO_LOAD(&query[0].qualifiers[0], PMIX_ALLOC_ID, myallocation, PMIX_STRING); - - if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, 1, infocbfunc, (void *) &mydata))) { - fprintf(stderr, "PMIx_Query_info failed: %d\n", rc); - goto done; - } - DEBUG_WAIT_THREAD(&mydata.lock); - PMIX_QUERY_FREE(query, 1); - fprintf(stderr, "[%s:%d] Allocation returned status: %s\n", myproc.nspace, myproc.rank, - PMIx_Error_string(mydata.lock.status)); - DEBUG_DESTRUCT_MYQUERY(&mydata); - } - -done: - /* finalize us */ - fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, - myproc.rank, rc); - } else { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", - myproc.nspace, myproc.rank); - } - fflush(stderr); - return (0); -} diff --git a/examples/bad_exit.c b/examples/bad_exit.c deleted file mode 100644 index 440cf60a1a..0000000000 --- a/examples/bad_exit.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include - -#include "examples.h" -#include - -static pmix_proc_t myproc; - -/* this is a callback function for the PMIx_Query - * API. The query will callback with a status indicating - * if the request could be fully satisfied, partially - * satisfied, or completely failed. The info parameter - * contains an array of the returned data, with the - * info->key field being the key that was provided in - * the query call. Thus, you can correlate the returned - * data in the info->value field to the requested key. - * - * Once we have dealt with the returned data, we must - * call the release_fn so that the PMIx library can - * cleanup */ -static void cbfunc(pmix_status_t status, pmix_info_t *info, size_t ninfo, void *cbdata, - pmix_release_cbfunc_t release_fn, void *release_cbdata) -{ - mylock_t *lock = (mylock_t *) cbdata; - size_t n; - char *tmp; - pmix_status_t rc; - - lock->status = status; - - fprintf(stderr, "Query returned %d values status %s\n", (int) ninfo, PMIx_Error_string(status)); - /* print out the returned keys and pmix_info_t structs */ - for (n = 0; n < ninfo; n++) { - fprintf(stderr, "KEY: %s\n", info[n].key); - rc = PMIx_Data_print(&tmp, NULL, &info[n].value, info[n].value.type); - if (PMIX_SUCCESS != rc) { - lock->status = rc; - goto done; - } - rc = PMIx_Data_print(&tmp, NULL, &info[n].value, info[n].value.type); - if (PMIX_SUCCESS != rc) { - lock->status = rc; - goto done; - } - fprintf(stderr, "Key %s Type %s(%d)\n", info[n].key, - PMIx_Data_type_string(info[n].value.type), info[n].value.type); - free(tmp); - } - -done: - /* let the library release the data and cleanup from - * the operation */ - if (NULL != release_fn) { - release_fn(release_cbdata); - } - - /* release the block */ - DEBUG_WAKEUP_THREAD(lock); -} - -int main(int argc, char **argv) -{ - pmix_status_t rc; - pid_t pid; - char hostname[1024]; - pmix_value_t *val; - uint16_t localrank; - size_t n; - pmix_query_t query; - mylock_t mylock; - - pid = getpid(); - gethostname(hostname, 1024); - - /* init us - note that the call to "init" includes the return of - * any job-related info provided by the RM. This includes any - * debugger flag instructing us to stop-in-init. If such a directive - * is included, then the process will be stopped in this call until - * the "debugger release" notification arrives */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %s\n", myproc.nspace, myproc.rank, - PMIx_Error_string(rc)); - exit(0); - } - /* get our local rank */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_LOCAL_RANK, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get local rank failed: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(rc)); - goto done; - } - localrank = val->data.uint16; - PMIX_VALUE_RELEASE(val); - - fprintf(stderr, "Client ns %s rank %d pid %lu: Running on host %s localrank %d\n", - myproc.nspace, myproc.rank, (unsigned long) pid, hostname, (int) localrank); - -done: - if (0 == myproc.rank) { - exit(1); - } else { - sleep(3); - } - /* finalize us */ - fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(rc)); - } else { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", - myproc.nspace, myproc.rank); - } - fflush(stderr); - return (0); -} diff --git a/examples/client.c b/examples/client.c deleted file mode 100644 index 24ebc2d4ab..0000000000 --- a/examples/client.c +++ /dev/null @@ -1,374 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include - -#include "examples.h" -#include - -static pmix_proc_t myproc; - -/* this is the event notification function we pass down below - * when registering for general events - i.e.,, the default - * handler. We don't technically need to register one, but it - * is usually good practice to catch any events that occur */ -static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } -} - -/* this is an event notification function that we explicitly request - * be called when the PMIX_ERR_DEBUGGER_RELEASE notification is issued. - * We could catch it in the general event notification function and test - * the status to see if it was "debugger release", but it often is simpler - * to declare a use-specific notification callback point. In this case, - * we are asking to know when we are told the debugger released us */ -static void release_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - myrel_t *lock; - size_t n; - - /* find the return object */ - lock = NULL; - for (n = 0; n < ninfo; n++) { - if (0 == strncmp(info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { - lock = (myrel_t *) info[n].value.data.ptr; - break; - } - } - /* if the object wasn't returned, then that is an error */ - if (NULL == lock) { - fprintf(stderr, "LOCK WASN'T RETURNED IN RELEASE CALLBACK\n"); - /* let the event handler progress */ - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); - } - return; - } - - /* tell the event handler state machine that we are the last step */ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - /* the status will be PMIX_ERR_DEBUGGER_RELEASE since that is the code - * we registered to receive, so just return success */ - lock->lock.status = PMIX_SUCCESS; - /* release the lock */ - DEBUG_WAKEUP_THREAD(&lock->lock); -} - -/* event handler registration is done asynchronously because it - * may involve the PMIx server registering with the host RM for - * external events. So we provide a callback function that returns - * the status of the request (success or an error), plus a numerical index - * to the registered event. The index is used later on to deregister - * an event handler - if we don't explicitly deregister it, then the - * PMIx server will do so when it see us exit */ -static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata) -{ - mylock_t *lock = (mylock_t *) cbdata; - - if (PMIX_SUCCESS != status) { - fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", - myproc.nspace, myproc.rank, status, (unsigned long) evhandler_ref); - } - lock->status = status; - lock->evhandler_ref = evhandler_ref; - DEBUG_WAKEUP_THREAD(lock); -} - -int main(int argc, char **argv) -{ - pmix_status_t rc; - pmix_value_t value; - pmix_value_t *val = &value; - char *tmp; - pmix_proc_t proc; - uint32_t nprocs, n, sid; - pmix_info_t *info; - bool flag; - mylock_t mylock; - myrel_t myrel; - pmix_status_t dbg = PMIX_ERR_DEBUGGER_RELEASE; - pid_t pid; - - pid = getpid(); - fprintf(stderr, "Client %lu: Running\n", (unsigned long) pid); - - /* init us - note that the call to "init" includes the return of - * any job-related info provided by the RM. This includes the - * location of all procs in our job */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, - rc); - exit(0); - } - fprintf(stderr, "Client ns %s rank %d pid %lu: Running\n", myproc.nspace, myproc.rank, - (unsigned long) pid); - - /* register our default event handler - again, this isn't strictly - * required, but is generally good practice */ - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - rc = mylock.status; - DEBUG_DESTRUCT_LOCK(&mylock); - - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "[%s:%d] Default handler registration failed\n", myproc.nspace, - myproc.rank); - goto done; - } - - /* job-related info is found in our nspace, assigned to the - * wildcard rank as it doesn't relate to a specific rank. Setup - * a name to retrieve such values */ - PMIX_PROC_CONSTRUCT(&proc); - (void) strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - - /* check to see if we have been instructed to wait for a debugger - * to attach to us. We won't get both a stop-in-init AND a - * wait-for-notify directive, so we should never stop twice. This - * directive is provided so that something like an MPI implementation - * can do some initial setup in MPI_Init prior to pausing for the - * debugger */ - if (PMIX_SUCCESS == (rc = PMIx_Get(&proc, PMIX_DEBUG_STOP_IN_APP, NULL, 0, &val))) { - /* register for debugger release */ - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIX_INFO_CREATE(info, 1); - DEBUG_CONSTRUCT_MYREL(&myrel); - PMIX_INFO_LOAD(&info[0], PMIX_EVENT_RETURN_OBJECT, &myrel, PMIX_POINTER); - PMIx_Register_event_handler(&dbg, 1, info, 1, release_fn, evhandler_reg_callbk, - (void *) &mylock); - /* wait for registration to complete */ - DEBUG_WAIT_THREAD(&mylock); - rc = mylock.status; - DEBUG_DESTRUCT_LOCK(&mylock); - PMIX_INFO_FREE(info, 1); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "[%s:%d] Debug handler registration failed\n", myproc.nspace, - myproc.rank); - goto done; - } - /* wait for debugger release */ - DEBUG_WAIT_THREAD(&myrel.lock); - DEBUG_DESTRUCT_MYREL(&myrel); - } - - /* get our universe size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - PMIX_VALUE_GET_NUMBER(rc, val, n, uint32_t); - fprintf(stderr, "Client %s:%d universe size %u\n", myproc.nspace, myproc.rank, n); - - /* get the number of procs in our job - univ size is the total number of allocated - * slots, not the number of procs in the job */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - PMIX_VALUE_GET_NUMBER(rc, val, nprocs, uint32_t); - PMIX_VALUE_RELEASE(val); - fprintf(stderr, "Client %s:%d num procs %d\n", myproc.nspace, myproc.rank, nprocs); - - /* get out sessionID */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_SESSION_ID, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get sessionID failed: %d\n", myproc.nspace, - myproc.rank, rc); - } else { - PMIX_VALUE_GET_NUMBER(rc, val, sid, uint32_t); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Session ID was not a number: %s\n", PMIx_Error_string(rc)); - goto done; - } - fprintf(stderr, "Client %s:%d sessionID %u\n", myproc.nspace, myproc.rank, sid); - PMIX_VALUE_RELEASE(val); - } - - /* get out jobID */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOBID, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get jobID failed: %d\n", myproc.nspace, - myproc.rank, rc); - } else { - fprintf(stderr, "Client %s:%d jobID %s\n", myproc.nspace, myproc.rank, val->data.string); - PMIX_VALUE_RELEASE(val); - } - - /* get the number of local procs in our job */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get local size failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - PMIX_VALUE_GET_NUMBER(rc, val, n, uint32_t); - PMIX_VALUE_RELEASE(val); - fprintf(stderr, "Client %s:%d num local procs %d\n", myproc.nspace, myproc.rank, n); - - /* put a few values */ - if (0 > asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank)) { - exit(1); - } - value.type = PMIX_UINT32; - value.data.uint32 = 1234; - if (PMIX_SUCCESS != (rc = PMIx_Store_internal(&myproc, tmp, &value))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Store_internal failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - free(tmp); - - if (0 > asprintf(&tmp, "%s-%d-local", myproc.nspace, myproc.rank)) { - exit(1); - } - value.type = PMIX_UINT64; - value.data.uint64 = 1234; - if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_LOCAL, tmp, &value))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Put internal failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - free(tmp); - - if (0 > asprintf(&tmp, "%s-%d-remote", myproc.nspace, myproc.rank)) { - exit(1); - } - value.type = PMIX_STRING; - value.data.string = "1234"; - if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_REMOTE, tmp, &value))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Put internal failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - free(tmp); - - /* push the data to our PMIx server */ - if (PMIX_SUCCESS != (rc = PMIx_Commit())) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Commit failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - - /* call fence to synchronize with our peers - instruct - * the fence operation to collect and return all "put" - * data from our peers */ - PMIX_INFO_CREATE(info, 1); - flag = true; - PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL); - if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, info, 1))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, - rc); - goto done; - } - PMIX_INFO_FREE(info, 1); - - /* check the returned data */ - for (n = 0; n < nprocs; n++) { - if (0 > asprintf(&tmp, "%s-%d-local", myproc.nspace, myproc.rank)) { - exit(1); - } - if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, tmp, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, - myproc.rank, tmp, rc); - goto done; - } - if (PMIX_UINT64 != val->type) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", - myproc.nspace, myproc.rank, tmp, val->type); - PMIX_VALUE_RELEASE(val); - free(tmp); - goto done; - } - if (1234 != val->data.uint64) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %d\n", - myproc.nspace, myproc.rank, tmp, (int) val->data.uint64); - PMIX_VALUE_RELEASE(val); - free(tmp); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, - myproc.rank, tmp); - PMIX_VALUE_RELEASE(val); - free(tmp); - if (0 > asprintf(&tmp, "%s-%d-remote", myproc.nspace, myproc.rank)) { - exit(1); - } - if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, tmp, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, - myproc.rank, tmp, rc); - goto done; - } - if (PMIX_STRING != val->type) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", - myproc.nspace, myproc.rank, tmp, val->type); - PMIX_VALUE_RELEASE(val); - free(tmp); - goto done; - } - if (0 != strcmp(val->data.string, "1234")) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %s\n", - myproc.nspace, myproc.rank, tmp, val->data.string); - PMIX_VALUE_RELEASE(val); - free(tmp); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, - myproc.rank, tmp); - PMIX_VALUE_RELEASE(val); - free(tmp); - } - -done: - /* finalize us */ - fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, - myproc.rank, rc); - } else { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", - myproc.nspace, myproc.rank); - } - fflush(stderr); - return (0); -} diff --git a/examples/client2.c b/examples/client2.c deleted file mode 100644 index 026f03bba9..0000000000 --- a/examples/client2.c +++ /dev/null @@ -1,275 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include - -#include "examples.h" -#include - -static pmix_proc_t myproc; - -/* this is the event notification function we pass down below - * when registering for general events - i.e.,, the default - * handler. We don't technically need to register one, but it - * is usually good practice to catch any events that occur */ -static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } -} - -/* event handler registration is done asynchronously because it - * may involve the PMIx server registering with the host RM for - * external events. So we provide a callback function that returns - * the status of the request (success or an error), plus a numerical index - * to the registered event. The index is used later on to deregister - * an event handler - if we don't explicitly deregister it, then the - * PMIx server will do so when it see us exit */ -static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata) -{ - mylock_t *lock = (mylock_t *) cbdata; - - if (PMIX_SUCCESS != status) { - fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", - myproc.nspace, myproc.rank, status, (unsigned long) evhandler_ref); - } - lock->status = status; - lock->evhandler_ref = evhandler_ref; - DEBUG_WAKEUP_THREAD(lock); -} - -int main(int argc, char **argv) -{ - pmix_status_t rc; - pmix_value_t value; - pmix_value_t *val, *vptr; - pmix_proc_t proc; - uint32_t nprocs, n, k; - pmix_info_t *info; - bool flag; - mylock_t mylock; - pmix_data_array_t da, *dptr; - size_t q; - pmix_device_distance_t *dist; - - /* init us - note that the call to "init" includes the return of - * any job-related info provided by the RM. This includes any - * debugger flag instructing us to stop-in-init. If such a directive - * is included, then the process will be stopped in this call until - * the "debugger release" notification arrives */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, - rc); - exit(0); - } - fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank); - - /* register our default event handler - again, this isn't strictly - * required, but is generally good practice */ - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - rc = mylock.status; - DEBUG_DESTRUCT_LOCK(&mylock); - - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "[%s:%d] Default handler registration failed\n", myproc.nspace, - myproc.rank); - goto done; - } - - /* job-related info is found in our nspace, assigned to the - * wildcard rank as it doesn't relate to a specific rank. Setup - * a name to retrieve such values */ - PMIX_PROC_CONSTRUCT(&proc); - (void) strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - - /* get our job size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - nprocs = val->data.uint32; - PMIX_VALUE_RELEASE(val); - fprintf(stderr, "Client %s:%d job size %d\n", myproc.nspace, myproc.rank, nprocs); - - /* put a data array of pmix_value's */ - val = (pmix_value_t *) malloc(32 * sizeof(pmix_value_t)); - for (n = 0; n < 32; n++) { - val[n].type = PMIX_UINT64; - val[n].data.uint64 = 2 * n; - } - da.type = PMIX_VALUE; - da.size = 32; - da.array = val; - value.type = PMIX_DATA_ARRAY; - value.data.darray = &da; - rc = PMIx_Put(PMIX_GLOBAL, "test-key", &value); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Put failed: %d\n", myproc.nspace, myproc.rank, - rc); - goto done; - } - free(val); - - /* push the data to our PMIx server */ - if (PMIX_SUCCESS != (rc = PMIx_Commit())) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Commit failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - - /* call fence to synchronize with our peers - instruct - * the fence operation to collect and return all "put" - * data from our peers */ - PMIX_INFO_CREATE(info, 1); - flag = true; - PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL); - if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, info, 1))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, - rc); - goto done; - } - PMIX_INFO_FREE(info, 1); - - PMIX_INFO_CREATE(info, 1); - PMIX_INFO_LOAD(info, PMIX_IMMEDIATE, NULL, PMIX_BOOL); - /* check the returned data */ - for (n = 0; n < nprocs; n++) { - proc.rank = n; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, "test-key", info, 1, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get on rank %u failed: %d\n", myproc.nspace, - myproc.rank, proc.rank, rc); - goto done; - } - if (PMIX_DATA_ARRAY != val->type) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get on rank %u returned wrong type: %d\n", - myproc.nspace, myproc.rank, proc.rank, val->type); - PMIX_VALUE_RELEASE(val); - goto done; - } - dptr = val->data.darray; - if (NULL == dptr) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %d returned NULL array\n", - myproc.nspace, myproc.rank, proc.rank); - PMIX_VALUE_RELEASE(val); - goto done; - } - if (PMIX_VALUE != dptr->type) { - fprintf(stderr, - "Client ns %s rank %d: PMIx_Get %d returned wrong array value type %d\n", - myproc.nspace, myproc.rank, proc.rank, dptr->type); - PMIX_VALUE_RELEASE(val); - goto done; - } - if (32 != dptr->size) { - fprintf(stderr, - "Client ns %s rank %d: PMIx_Get %d returned wrong array value size %d\n", - myproc.nspace, myproc.rank, proc.rank, (int) dptr->size); - PMIX_VALUE_RELEASE(val); - goto done; - } - vptr = (pmix_value_t *) dptr->array; - for (k = 0; k < 32; k++) { - if (PMIX_UINT64 != vptr[k].type) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %d returned wrong type: %d\n", - myproc.nspace, myproc.rank, proc.rank, vptr[k].type); - PMIX_VALUE_RELEASE(val); - goto done; - } - if (2 * k != vptr[k].data.uint64) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %d returned wrong value: %lu\n", - myproc.nspace, myproc.rank, proc.rank, (unsigned long) vptr[k].data.uint64); - PMIX_VALUE_RELEASE(val); - goto done; - } - } - PMIX_VALUE_RELEASE(val); - } - - /* get our device distances, if available */ - rc = PMIx_Get(&myproc, PMIX_DEVICE_DISTANCES, NULL, 0, &val); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get for device distances failed: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(rc)); - goto done; - } - if (PMIX_DATA_ARRAY != val->type) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get device distances returned wrong type: %s\n", - myproc.nspace, myproc.rank, PMIx_Data_type_string(val->type)); - PMIX_VALUE_RELEASE(val); - goto done; - } - dptr = val->data.darray; - if (NULL == dptr) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get device distances returned NULL array\n", - myproc.nspace, myproc.rank); - PMIX_VALUE_RELEASE(val); - goto done; - } - if (PMIX_DEVICE_DIST != dptr->type) { - fprintf(stderr, - "Client ns %s rank %d: PMIx_Get device distances returned wrong array value type %s\n", - myproc.nspace, myproc.rank, PMIx_Data_type_string(dptr->type)); - PMIX_VALUE_RELEASE(val); - goto done; - } - fprintf(stderr, - "Client ns %s rank %d: PMIx_Get device distances succeeded\n", - myproc.nspace, myproc.rank); - dist = (pmix_device_distance_t*)dptr->array; - for (q=0; q < dptr->size; q++) { - fprintf(stderr, "UUID: %s OSNAME: %s TYPE: %s MIND: %u MAXD: %u\n", - dist[q].uuid, dist[q].osname, - PMIx_Device_type_string(dist[q].type), - dist[q].mindist, dist[q].maxdist); - } - -done: - /* finalize us */ - fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, - myproc.rank, rc); - } else { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", - myproc.nspace, myproc.rank); - } - fflush(stderr); - return (0); -} diff --git a/examples/colocate.c b/examples/colocate.c deleted file mode 100644 index 259565cdfc..0000000000 --- a/examples/colocate.c +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include - -#include "examples.h" -#include - -static pmix_proc_t myproc; - -/* this is an event notification function that we explicitly request - * be called when the PMIX_EVENT_JOB_END notification is issued. - * We could catch it in the general event notification function and test - * the status to see if it was "job terminated", but it often is simpler - * to declare a use-specific notification callback point. In this case, - * we are asking to know whenever a job terminates, and we will then - * know we can exit */ -static void release_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - myrel_t *lock; - bool found; - int exit_code; - size_t n; - pmix_proc_t *affected = NULL; - - /* find the return object */ - lock = NULL; - found = false; - for (n = 0; n < ninfo; n++) { - if (0 == strncmp(info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { - lock = (myrel_t *) info[n].value.data.ptr; - /* not every RM will provide an exit code, but check if one was given */ - } else if (0 == strncmp(info[n].key, PMIX_EXIT_CODE, PMIX_MAX_KEYLEN)) { - exit_code = info[n].value.data.integer; - found = true; - } else if (0 == strncmp(info[n].key, PMIX_EVENT_AFFECTED_PROC, PMIX_MAX_KEYLEN)) { - affected = info[n].value.data.proc; - } - } - /* if the object wasn't returned, then that is an error */ - if (NULL == lock) { - fprintf(stderr, "LOCK WASN'T RETURNED IN RELEASE CALLBACK\n"); - /* let the event handler progress */ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - return; - } - - if (found) { - if (!lock->exit_code_given) { - lock->exit_code = exit_code; - lock->exit_code_given = true; - } - } - - DEBUG_WAKEUP_THREAD(&lock->lock); - - /* tell the event handler state machine that we are the last step */ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - return; -} - -/* event handler registration is done asynchronously because it - * may involve the PMIx server registering with the host RM for - * external events. So we provide a callback function that returns - * the status of the request (success or an error), plus a numerical index - * to the registered event. The index is used later on to deregister - * an event handler - if we don't explicitly deregister it, then the - * PMIx server will do so when it see us exit */ -static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata) -{ - mylock_t *lock = (mylock_t *) cbdata; - - if (PMIX_SUCCESS != status) { - fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", - myproc.nspace, myproc.rank, status, (unsigned long) evhandler_ref); - } - lock->status = status; - DEBUG_WAKEUP_THREAD(lock); -} - -static void printusage(void) -{ - fprintf(stderr, "Usage: colocate [options]\n"); - fprintf(stderr, "\t--cmd foo : spawn the foo executable\n"); - fprintf(stderr, "\t-n/--np/-np N : number of procs to spawn\n"); -} - -int main(int argc, char **argv) -{ - pmix_status_t rc; - pmix_value_t *val; - pmix_proc_t proc, *pptr; - uint32_t nprocs, n; - pmix_info_t jinfo[2]; - pid_t pid; - int exitcode = 0; - pmix_data_array_t darray; - pmix_app_t app; - pmix_nspace_t nsp2; - uint16_t np = 1; - mylock_t mylock; - myrel_t myrel; - void *dirs; - pmix_info_t *dinfo; - size_t dninfo; - pmix_status_t code = PMIX_EVENT_JOB_END; - char *cmd = "hostname"; - - pid = getpid(); - - for (n=1; n < argc; n++) { - if (0 == strcmp(argv[n], "--cmd") || - 0 == strcmp(argv[n], "-cmd")) { - if (NULL == argv[n+1]) { - printusage(); - exit(1); - } - cmd = argv[n+1]; - } else if (0 == strcmp(argv[n], "--np") || - 0 == strcmp(argv[n], "-np") || - 0 == strcmp(argv[n], "-n")) { - if (NULL == argv[n+1]) { - printusage(); - exit(1); - } - np = strtol(argv[n+1], NULL, 10); - } else if (0 == strcmp(argv[n], "--help") || - 0 == strcmp(argv[n], "-h")) { - printusage(); - exit(0); - } - } - - /* init us - note that the call to "init" includes the return of - * any job-related info provided by the RM. This includes the - * location of all procs in our job */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", - myproc.nspace, myproc.rank, rc); - exit(1); - } - fprintf(stderr, "Client %s:%u pid %lu: Running\n", - myproc.nspace, myproc.rank, (unsigned long) pid); - - /* job-related info is found in our nspace, assigned to the - * wildcard rank as it doesn't relate to a specific rank. Setup - * a name to retrieve such values */ - PMIX_LOAD_PROCID(&proc, myproc.nspace, PMIX_RANK_WILDCARD); - - /* get the number of procs in our job */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %u: PMIx_Get job size failed: %d\n", - myproc.nspace, myproc.rank, rc); - goto done; - } - PMIX_VALUE_GET_NUMBER(rc, val, nprocs, uint32_t); - PMIX_VALUE_RELEASE(val); - if (1 < nprocs) { - fprintf(stderr, "Please only run one proc for this example\n"); - exitcode = 1; - goto done; - } - - PMIX_APP_CONSTRUCT(&app); - app.cmd = strdup("hostname"); - PMIX_ARGV_APPEND(rc, app.argv, app.cmd); - PMIX_INFO_CONSTRUCT(&jinfo[0]); - PMIX_DATA_ARRAY_CONSTRUCT(&darray, 1, PMIX_PROC); - pptr = (pmix_proc_t*)darray.array; - PMIX_LOAD_PROCID(&pptr[0], myproc.nspace, PMIX_RANK_WILDCARD); - PMIX_INFO_LOAD(&jinfo[0], PMIX_COLOCATE_PROCS, &darray, PMIX_DATA_ARRAY); - PMIX_INFO_CONSTRUCT(&jinfo[1]); - PMIX_INFO_LOAD(&jinfo[1], PMIX_COLOCATE_NPERNODE, &np, PMIX_UINT16); - - fprintf(stderr, "Client %s:%u: calling PMIx_Spawn\n", - myproc.nspace, myproc.rank); - rc = PMIx_Spawn(jinfo, 2, &app, 1, nsp2); - PMIX_APP_DESTRUCT(&app); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Client %s:%u: PMIx_Spawn failed: %s(%d)\n", - myproc.nspace, myproc.rank, PMIx_Error_string(rc), rc); - exitcode = rc; - goto done; - } - fprintf(stderr, "Spawn success.\n"); - - /* Register callback for when this job terminates */ - DEBUG_CONSTRUCT_LOCK(&myrel.lock); - myrel.nspace = strdup(nsp2); - PMIX_LOAD_PROCID(&proc, nsp2, PMIX_RANK_WILDCARD); - PMIX_INFO_LIST_START(dirs); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_RETURN_OBJECT, &myrel, PMIX_POINTER); - /* Only call me back when this specific job terminates */ - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_AFFECTED_PROC, &proc, PMIX_PROC); - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - dinfo = darray.array; - dninfo = darray.size; - - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIx_Register_event_handler(&code, 1, dinfo, dninfo, release_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - rc = mylock.status; - DEBUG_DESTRUCT_LOCK(&mylock); - - if (PMIX_SUCCESS != rc) { - // hang around a while - sleep(2); - } else { - DEBUG_WAIT_THREAD(&myrel.lock); - } - -done: - /* finalize us */ - fprintf(stderr, "Client ns %s rank %u: Finalizing\n", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %u:PMIx_Finalize failed: %d\n", myproc.nspace, - myproc.rank, rc); - } else { - fprintf(stderr, "Client ns %s rank %u:PMIx_Finalize successfully completed\n", - myproc.nspace, myproc.rank); - } - fflush(stderr); - return (exitcode); -} diff --git a/examples/daemon_error_notify.c b/examples/daemon_error_notify.c deleted file mode 100644 index 59e7afaa50..0000000000 --- a/examples/daemon_error_notify.c +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (c) 2018-2020 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include -#include -#include -#include -#include - -#include - -static pmix_proc_t myproc; -struct timeval start, end; -static bool completed; -double sec; - -static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - gettimeofday(&end, NULL); - int i; - char name[255]; - gethostname(name, 255); - if ((info[0].value.data.proc != NULL) - && strcmp(info[0].value.data.proc->nspace, myproc.nspace) == 0) { - for (i = 0; i < ninfo; i++) { - fprintf(stderr, - "%s Client %s:%d NOTIFIED with status %d and error proc %s:%d key %s \n", name, - myproc.nspace, myproc.rank, status, info[i].value.data.proc->nspace, - info[i].value.data.proc->rank, info[i].key); - } - completed = true; - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - } else - fprintf(stderr, "Not from my namespace"); -} - -static void op_callbk(pmix_status_t status, void *cbdata) -{ - fprintf(stderr, "Client %s:%d OP CALLBACK CALLED WITH STATUS %d\n", myproc.nspace, myproc.rank, - status); -} - -static void errhandler_reg_callbk(pmix_status_t status, size_t errhandler_ref, void *cbdata) -{ - fprintf(stderr, - "Client %s:%d ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%lu\n", - myproc.nspace, myproc.rank, status, (unsigned long) errhandler_ref); -} - -int main(int argc, char **argv) -{ - int rc; - pmix_value_t value; - pmix_value_t *val = &value; - pmix_proc_t proc; - uint32_t nprocs; - pid_t pid; - - char name[255]; - - /* init us */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, - rc); - exit(0); - } - gethostname(name, 255); - fprintf(stderr, "%s Client ns %s rank %d: Running\n", name, myproc.nspace, myproc.rank); - - PMIX_PROC_CONSTRUCT(&proc); - (void) strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - - /* get our universe size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - nprocs = val->data.uint32; - PMIX_VALUE_RELEASE(val); - // fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs); - completed = false; - - pmix_status_t status; - status = PMIX_ERR_PROC_ABORTED; - /* register our errhandler */ - PMIx_Register_event_handler(&status, 1, NULL, 0, notification_fn, errhandler_reg_callbk, NULL); - - /* call fence to sync */ - PMIX_PROC_CONSTRUCT(&proc); - (void) strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - sleep(3); - if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, - rc); - goto done; - } - - if (myproc.rank == 3) { - fprintf(stderr, "\nClient ns %s:%d kill its host \n", myproc.nspace, myproc.rank); - completed = true; - pid = getppid(); - } - gettimeofday(&start, NULL); - if (myproc.rank == 3) { - kill(pid, 9); - } - while (!completed) { - struct timespec ts; - ts.tv_sec = 0; - ts.tv_nsec = 100000; - nanosleep(&ts, NULL); - } -done: - sec = end.tv_sec + (double) end.tv_usec / 1000000.0 - start.tv_sec - - (double) start.tv_usec / 1000000.0; - fprintf(stderr, "Client ns %s rank %d takes %f Finalizing\n", myproc.nspace, myproc.rank, sec); - /* finalize us */ - PMIx_Deregister_event_handler(1, op_callbk, NULL); - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, - myproc.rank, rc); - } else { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", - myproc.nspace, myproc.rank); - } - fflush(stderr); - return (0); -} diff --git a/examples/debugger/attach.c b/examples/debugger/attach.c deleted file mode 100644 index 4f01ce4419..0000000000 --- a/examples/debugger/attach.c +++ /dev/null @@ -1,510 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021 IBM Corporation. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include - -#include "debugger.h" -#include - -static int attach_to_running_job(char *nspace); -static int query_application_namespace(char *nspace); - -static pmix_proc_t myproc; -static char application_namespace[PMIX_MAX_NSLEN + 1]; -static char *iof_data; -static int iof_size; -static int iof_registered; -static size_t iof_handler_id; -static int daemon_colocate_per_proc = 0; -static int daemon_colocate_per_node = 0; -static char *hostfile = NULL; - - - -/* This is the event notification function we pass down below - * when registering for general events - i.e.,, the default - * handler. We don't technically need to register one, but it - * is usually good practice to catch any events that occur */ -static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - printf("%s called as callback for event=%s\n", __FUNCTION__, PMIx_Error_string(status)); - /* This example doesn't do anything with default events */ - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); - } -} - -/* This is the handler function to capture stdio data from the daemon process. - * It accumulates stdio data in a buffer. That buffer is displayed at the end - * of this program's execution, instead of as it is received, so the output - * does not get randomly interspersed with other output. */ -static void stdio_callback(size_t iofhdlr, pmix_iof_channel_t channel, pmix_proc_t *source, - pmix_byte_object_t *payload, pmix_info_t info[], size_t ninfo) -{ - if (NULL == iof_data) { - /* Allocate initial string plus trailing '\0' that is not in - * payload->bytes then copy data and append '\0' */ - iof_size = payload->size; - /* iof_size counts number of bytes sent, need one more for trailing - * '\0' */ - iof_data = malloc(iof_size + 1); - if (NULL == iof_data) { - fprintf(stderr, "Unable to allocate I/O buffer, terminating\n"); - exit(1); - } - memcpy(iof_data, payload->bytes, payload->size); - iof_data[payload->size] = '\0'; - } else { - /* Reallocate buffer to hold additional data, copy data and append - * '\0' at end of buffer. */ - iof_data = realloc(iof_data, iof_size + payload->size + 1); - if (NULL == iof_data) { - fprintf(stderr, "Unable to allocate I/O buffer, terminating\n"); - exit(1); - } - memcpy(&iof_data[iof_size], payload->bytes, payload->size); - iof_size = iof_size + payload->size; - iof_data[iof_size] = '\0'; - } -} - -/* This is an event notification function that we explicitly request - * be called when the PMIX_EVENT_JOB_END notification is issued. - * We could catch it in the general event notification function and test - * the status to see if it was "job terminated", but it often is simpler - * to declare a use-specific notification callback point. In this case, - * we are asking to know whenever a job terminates, and we will then - * know we can exit */ -static void release_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - myrel_t *lock; - bool found; - int exit_code; - size_t n; - pmix_proc_t *affected = NULL; - - printf("%s called as callback for event=%s\n", __FUNCTION__, PMIx_Error_string(status)); - /* Find our return object */ - lock = NULL; - found = false; - for (n = 0; n < ninfo; n++) { - if (0 == strncmp(info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { - lock = (myrel_t *) info[n].value.data.ptr; - /* Not every RM will provide an exit code, but check if one was - * given */ - } else if (0 == strncmp(info[n].key, PMIX_EXIT_CODE, PMIX_MAX_KEYLEN)) { - exit_code = info[n].value.data.integer; - found = true; - } else if (0 == strncmp(info[n].key, PMIX_EVENT_AFFECTED_PROC, PMIX_MAX_KEYLEN)) { - affected = info[n].value.data.proc; - } - } - /* If the lock object wasn't returned, then that is an error */ - if (NULL == lock) { - fprintf(stderr, "LOCK WASN'T RETURNED IN RELEASE CALLBACK\n"); - /* Let the event handler progress */ - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); - } - return; - } - - printf("DEBUGGER NOTIFIED THAT JOB %s TERMINATED - AFFECTED %s\n", lock->nspace, - (NULL == affected) ? "NULL" : affected->nspace); - if (found) { - lock->exit_code = exit_code; - lock->exit_code_given = true; - } - DEBUG_WAKEUP_THREAD(&lock->lock); - - /* Tell the event handler state machine that we are the last step */ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - - DEBUG_WAKEUP_THREAD(&lock->lock); - return; -} - -/* Event handler registration is done asynchronously because it - * may involve the PMIx server registering with the host RM for - * external events. So we provide a callback function that returns - * the status of the request (success or an error), plus a numerical index - * to the registered event. The index is used later on to deregister - * an event handler - if we don't explicitly deregister it, then the - * PMIx server will do so when it see us exit */ -static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata) -{ - mylock_t *lock = (mylock_t *) cbdata; - - printf("%s called to register callback refid=%ld\n", __FUNCTION__, evhandler_ref); - if (PMIX_SUCCESS != status) { - fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", - myproc.nspace, myproc.rank, status, (unsigned long) evhandler_ref); - } - if (NULL == lock) { - return; - } - lock->status = status; - DEBUG_WAKEUP_THREAD(lock); -} - -/* Registration callback for IOF handler. This funcion gets called both when - * the IOF handler is registered, and when it gets de-registered. - */ -static void iof_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata) -{ - mylock_t *lock = (mylock_t *) cbdata; - - printf("%s called to register IOF handler refid=%ld\n", __FUNCTION__, - evhandler_ref); - if (PMIX_SUCCESS != status) { - fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", - myproc.nspace, myproc.rank, status, (unsigned long) evhandler_ref); - } - iof_handler_id = evhandler_ref; - if (SIZE_MAX == iof_handler_id) { - fprintf(stderr, "REGISTRATION RETURNED ERROR HANDLER ID"); - } - if (NULL == lock) { - return; - } - /* Only post the lock when handler is being registered */ - if (iof_registered) { - printf("IOF registration handler called for de-registration\n"); - return; - } - iof_registered = 1; - lock->status = status; - DEBUG_WAKEUP_THREAD(lock); -} - -int parse_tool_options(int argc, char **argv) -{ - char *endp; - int i = 1; - - while ((i < (argc - 1)) && (strncmp(argv[i], "--", 2) == 0)) { - if (0 == strcmp(argv[i], "--daemon-colocate-per-proc")) { - daemon_colocate_per_proc = strtol(argv[i + 1], &endp, 10); - if ('\0' != *endp) { - fprintf(stderr, "Invalid tool option parameter %s\n", argv[i + 1]); - return -1; - } - } else if (0 == strcmp(argv[i], "--daemon-colocate-per-node")) { - daemon_colocate_per_node = strtol(argv[i + 1], &endp, 10); - if ('\0' != *endp) { - fprintf(stderr, "Invalid tool option parameter %s\n", argv[i + 1]); - return -1; - } - } else if (0 == strcmp(argv[i], "--hostfile")) { - hostfile = strdup(argv[i + 1]); - } - else { - fprintf(stderr, "Invalid tool option %s\n", argv[i]); - return -1; - } - i = i + 2; - } - if ((0 < daemon_colocate_per_node) && (0 < daemon_colocate_per_proc)) { - fprintf(stderr, "Cannot specify daemon tasks per node and daemon tasks per proc\n"); - return -1; - } - if ((NULL != hostfile) && - ((0 != daemon_colocate_per_node) || (0 != daemon_colocate_per_proc))) { - fprintf(stderr, - "hostfile and daemons per node or daemons per proc cannot be combined\n"); - return -1; - } - return i; -} - -int main(int argc, char **argv) -{ - pmix_status_t rc; - pmix_info_t *info; - void *attr_list; - size_t ninfo; - char *nspace = NULL; - mylock_t mylock; - pid_t pid; - int ns_index; - pmix_data_array_t attr_array; - - pid = getpid(); - - ns_index = parse_tool_options(argc, argv); - if (0 > ns_index) { - exit(1); - } else if (ns_index >= argc) { - printf("Usage: %s [OPTIONS] app-launcher-namespace\n", argv[0]); - printf("OPTIONS:\n"); - printf(" --daemon-colocate-per-proc Test Colaunch with Daemons Per Process (Default: " - "0 = off)\n"); - printf(" --daemon-colocate-per-node Test Colaunch with Daemons Per Node (Default: 0 = " - "off)\n"); - printf(" --hostfile Hostfile specifying where daemons will be loaded\n"); - exit(0); - } - nspace = strdup(argv[ns_index]); - info = NULL; - ninfo = 1; - - /* Initialize as a tool */ - PMIX_INFO_LIST_START(attr_list); - PMIX_INFO_LIST_ADD(rc, attr_list, PMIX_LAUNCHER, NULL, PMIX_BOOL); - PMIX_INFO_LIST_ADD(rc, attr_list, PMIX_IOF_LOCAL_OUTPUT, NULL, PMIX_BOOL); - PMIX_INFO_LIST_CONVERT(rc, attr_list, &attr_array); - PMIX_INFO_LIST_RELEASE(attr_list); - info = attr_array.array; - ninfo = attr_array.size; - if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, info, ninfo))) { - fprintf(stderr, "PMIx_tool_init failed: %s(%d)\n", PMIx_Error_string(rc), rc); - exit(rc); - } - PMIX_DATA_ARRAY_DESTRUCT(&attr_array); - - printf("Debugger ns %s rank %d pid %lu: Running\n", myproc.nspace, myproc.rank, - (unsigned long) pid); - - /* Register a default event handler */ - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - DEBUG_DESTRUCT_LOCK(&mylock); - if (PMIX_SUCCESS != (rc = attach_to_running_job(nspace))) { - fprintf(stderr, "Failed to attach to nspace %s: error code %d\n", nspace, rc); - } - PMIx_tool_finalize(); - if (NULL != iof_data) { - printf("Forwarded stdio data:\n%s", iof_data); - printf("End forwarded stdio\n"); - } - return (rc); -} - -static int attach_to_running_job(char *nspace) -{ - void *dirs; - pmix_status_t rc; - pmix_proc_t daemon_proc, target_proc; - pmix_info_t *info; - pmix_app_t *app; - pmix_status_t code = PMIX_ERR_LOST_CONNECTION; - size_t ninfo; - int n; - mylock_t mylock, iof_lock; - myrel_t myrel; - char cwd[_POSIX_PATH_MAX]; - char dspace[PMIX_MAX_NSLEN + 1]; - pmix_data_array_t darray; - - printf("%s called to attach to application with namespace=%s\n", __FUNCTION__, nspace); - /* This is where a debugger tool would process the proctable to - * create whatever blob it needs to provide to its daemons */ - - /* We are given the namespace of the launcher. The debugger daemon needs - * the namespace of the application so it can interact with and control - * execution of the application tasks. - * - * Query the namespaces known to the launcher to get the application - * namespace. */ - query_application_namespace(nspace); - printf("Spawn debugger daemon\n"); - /* Set up the debugger daemon spawn request */ - PMIX_APP_CREATE(app, 1); - /* Set up the name of the daemon executable to launch */ - app->cmd = strdup("./daemon"); - app->argv = (char **) malloc(2 * sizeof(char *)); - /* Set up the debuger daemon arguments, in this case, just argv[0] */ - app->argv[0] = strdup("./daemon"); - app->argv[1] = NULL; - /* No environment variables */ - app->env = NULL; - /* Set the daemon's working directory to our current directory */ - getcwd(cwd, _POSIX_PATH_MAX); - app->cwd = strdup(cwd); - /* No attributes set in the pmix_app_t structure */ - app->info = NULL; - app->ninfo = 0; - if ((0 < daemon_colocate_per_node) || (0 < daemon_colocate_per_proc)) { - app->maxprocs = 0; - } - else { - app->maxprocs = 1; - } - /* Provide directives so the daemon goes where we want, and - * let the RM know this is a debugger daemon */ - PMIX_LOAD_PROCID(&target_proc, application_namespace, PMIX_RANK_WILDCARD); - PMIX_INFO_LIST_START(dirs); - /* Indicate the requestor is a tool process */ - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_REQUESTOR_IS_TOOL, NULL, PMIX_BOOL); - /* Indicate this is a debugger daemon */ - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_DEBUGGER_DAEMONS, NULL, PMIX_BOOL); - /* Set the application namespace to attach to */ - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_DEBUG_TARGET, &target_proc, PMIX_PROC); - /* Forward stdout and stderr to this process */ - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_FWD_STDOUT, NULL, PMIX_BOOL); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_FWD_STDERR, NULL, PMIX_BOOL); - /* Set up daemon mapping based on options */ - if (0 < daemon_colocate_per_node) { - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_DEBUG_DAEMONS_PER_NODE, - &daemon_colocate_per_node, PMIX_UINT16); - } else if (0 < daemon_colocate_per_proc) { - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_DEBUG_DAEMONS_PER_PROC, - &daemon_colocate_per_proc, PMIX_UINT16); - } else if (NULL != hostfile) { - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_HOSTFILE, hostfile, PMIX_STRING); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_MAPBY, "ppr:1:node", PMIX_STRING); - } - else { - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_MAPBY, "ppr:1:node", PMIX_STRING); - } - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - info = darray.array; - ninfo = darray.size; - /* Spawn the daemon */ - rc = PMIx_Spawn(info, ninfo, app, 1, dspace); - - PMIX_APP_FREE(app, 1); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - - printf("Debugger daemon namespace '%s'\n", dspace); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Error spawning debugger daemon, %s\n", PMIx_Error_string(rc)); - return -1; - } - PMIX_PROC_LOAD(&daemon_proc, dspace, PMIX_RANK_WILDCARD); - DEBUG_CONSTRUCT_LOCK(&iof_lock); - /* Register a handler to handle daemon's stdout and stderr */ - ninfo = 1; - n = 0; - PMIX_INFO_CREATE(info, ninfo); - PMIX_INFO_LOAD(&info[n], PMIX_IOF_REDIRECT, NULL, PMIX_BOOL); - rc = PMIx_IOF_pull(&daemon_proc, 1, info, ninfo, - PMIX_FWD_STDOUT_CHANNEL | PMIX_FWD_STDERR_CHANNEL, stdio_callback, - iof_reg_callbk, &iof_lock); - PMIX_INFO_FREE(info, ninfo); - DEBUG_WAIT_THREAD(&iof_lock); - rc = iof_lock.status; - /* Don't destroy the iof_lock since evhandler_reg_callback gets called - * multiple times */ - /* This is where a debugger tool would wait until the debug operation is - * complete */ - /* Register callback for when the debugger daemon terminates */ - DEBUG_CONSTRUCT_LOCK(&myrel.lock); - myrel.nspace = strdup(dspace); - - /* Register a handler for PMIX_ERR_LOST_CONNECTION, which will occur when the - * daemon terminates. */ - PMIX_INFO_LIST_START(dirs); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_RETURN_OBJECT, &myrel, PMIX_POINTER); - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - info = darray.array; - ninfo = darray.size; - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIx_Register_event_handler(&code, 1, info, ninfo, release_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - rc = mylock.status; - DEBUG_DESTRUCT_LOCK(&mylock); - printf("Waiting for debugger daemon namespace %s to complete\n", dspace); - DEBUG_WAIT_THREAD(&myrel.lock); - printf("Debugger daemon namespace %s terminated\n", dspace); - return rc; -} - -int query_application_namespace(char *nspace) -{ - pmix_info_t *namespace_query_data; - char *p; - size_t namespace_query_size; - pmix_status_t rc; - pmix_query_t namespace_query; - int wildcard_rank = PMIX_RANK_WILDCARD; - int ninfo; - int n; - int len; - - printf("%s called to get application namespace\n", __FUNCTION__); - PMIX_QUERY_CONSTRUCT(&namespace_query); - PMIX_ARGV_APPEND(rc, namespace_query.keys, PMIX_QUERY_NAMESPACES); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "An error occurred creating namespace query."); - PMIX_QUERY_DESTRUCT(&namespace_query); - return -1; - } - PMIX_INFO_CREATE(namespace_query.qualifiers, 2); - ninfo = 2; - n = 0; - PMIX_INFO_LOAD(&namespace_query.qualifiers[n], PMIX_NSPACE, nspace, PMIX_STRING); - n++; - PMIX_INFO_LOAD(&namespace_query.qualifiers[n], PMIX_RANK, &wildcard_rank, PMIX_INT32); - namespace_query.nqual = ninfo; - rc = PMIx_Query_info(&namespace_query, 1, &namespace_query_data, &namespace_query_size); - PMIX_QUERY_DESTRUCT(&namespace_query); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "An error occurred querying application namespace: %s.\n", - PMIx_Error_string(rc)); - return -1; - } - if ((1 != namespace_query_size) || (PMIX_STRING != namespace_query_data->value.type)) { - fprintf(stderr, "The response to namespace query has wrong format.\n"); - return -1; - } - /* The query retruns a comma-delimited list of namespaces. If there are - * multple namespaces in the list, then assume the first is the - * application namespace and the second is the daemon namespace. - * Copy only the application namespace and terminate the name with '\0' */ - p = strchr(namespace_query_data->value.data.string, ','); - if (NULL == p) { - len = strlen(namespace_query_data->value.data.string); - } else { - len = p - namespace_query_data->value.data.string; - } - strncpy(application_namespace, namespace_query_data->value.data.string, len); - application_namespace[len] = '\0'; - printf("Application namespace is '%s'\n", application_namespace); - return 0; -} diff --git a/examples/debugger/daemon.c b/examples/debugger/daemon.c deleted file mode 100644 index 33156f8042..0000000000 --- a/examples/debugger/daemon.c +++ /dev/null @@ -1,475 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * Copyright (c) 2021 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include - -#include "debugger.h" -#include -/* - * This module is an example of a PMIx debugger daemon. The debugger daemon - * handles interactions with application processes on a node in behalf of the - * front end debugger process. - */ - -static pmix_proc_t myproc; -static char *target_namespace = NULL; - -/* This is a callback function for the PMIx_Query - * API. The query will callback with a status indicating - * if the request could be fully satisfied, partially - * satisfied, or completely failed. The info parameter - * contains an array of the returned data, with the - * info->key field being the key that was provided in - * the query call. Thus, you can correlate the returned - * data in the info->value field to the requested key. - * - * Once we have dealt with the returned data, we must - * call the release_fn so that the PMIx library can - * cleanup */ -static void cbfunc(pmix_status_t status, pmix_info_t *info, size_t ninfo, void *cbdata, - pmix_release_cbfunc_t release_fn, void *release_cbdata) -{ - myquery_data_t *mq = (myquery_data_t *) cbdata; - size_t n; - - mq->status = status; - - printf("%s called as daemon callback for PMIx_Query\n", __FUNCTION__); - /* Save the returned info - it will be * released in the release_fn */ - if (0 < ninfo) { - PMIX_INFO_CREATE(mq->info, ninfo); - mq->ninfo = ninfo; - for (n = 0; n < ninfo; n++) { - printf("Transferring %s\n", info[n].key); - PMIX_INFO_XFER(&mq->info[n], &info[n]); - } - } - - /* Let the library release the data */ - if (NULL != release_fn) { - release_fn(release_cbdata); - } - - /* Release the lock */ - DEBUG_WAKEUP_THREAD(&mq->lock); -} - -/* This is the event notification function we pass down below - * when registering for general events - i.e.,, the default - handler. We don't technically need to register one, but it - * is usually good practice to catch any events that occur */ -static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - printf("%s called as daemon default event handler for event=%s\n", __FUNCTION__, - PMIx_Error_string(status)); - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); - } -} - -/* This is an event notification function that we explicitly request - * be called when the PMIX_EVENT_JOB_END notification is issued. - * We could catch it in the general event notification function and test - * the status to see if it was "job terminated", but it often is simpler - * to declare a use-specific notification callback point. In this case, - * we are asking to know whenever a job terminates, and we will then - * know we can exit */ -static void release_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - myrel_t *lock; - bool found; - int exit_code; - size_t n; - pmix_proc_t *affected = NULL; - - printf("%s called as daemon callback for event=%s\n", __FUNCTION__, PMIx_Error_string(status)); - - /* Be sure notification is for our application process namespace */ - if (0 != strcmp(target_namespace, source->nspace)) { - printf("Ignoring termination notification for '%s'\n", source->nspace); - /* Tell the event handler state machine that we are the last step */ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - return; - } - /* Find our return object */ - lock = NULL; - found = false; - for (n = 0; n < ninfo; n++) { - /* Retrieve the lock that needs to be released by this callback. */ - if (0 == strncmp(info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { - lock = (myrel_t *) info[n].value.data.ptr; - /* Not every RM will provide an exit code, but check if one was - * given */ - } else if (0 == strncmp(info[n].key, PMIX_EXIT_CODE, PMIX_MAX_KEYLEN)) { - exit_code = info[n].value.data.integer; - found = true; - } else if (0 == strncmp(info[n].key, PMIX_EVENT_AFFECTED_PROC, PMIX_MAX_KEYLEN)) { - affected = info[n].value.data.proc; - } - } - /* if the lock object wasn't returned, then that is an error */ - if (NULL == lock) { - fprintf(stderr, "LOCK WASN'T RETURNED IN RELEASE CALLBACK\n"); - /* let the event handler progress */ - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); - } - return; - } - - printf("DEBUGGER DAEMON NAMESPACE %s NOTIFIED THAT JOB TERMINATED - AFFECTED %s\n", - lock->nspace, (NULL == affected) ? "NULL" : affected->nspace); - - /* If the lock object was found then store return status in the lock - * object. */ - if (found) { - lock->exit_code = exit_code; - lock->exit_code_given = true; - } - - /* Tell the event handler state machine that we are the last step */ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - - /* Wake up the thread that is waiting for this callback to complete */ - DEBUG_WAKEUP_THREAD(&lock->lock); -} - -/* Event handler registration is done asynchronously because it - * may involve the PMIx server registering with the host RM for - * external events. So we provide a callback function that returns - * the status of the request (success or an error), plus a numerical index - * to the registered event. The index is used later on to deregister - * an event handler - if we don't explicitly deregister it, then the - * PMIx server will do so when it see us exit */ -static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata) -{ - mylock_t *lock = (mylock_t *) cbdata; - - printf("%s called by daemon as registration callback\n", __FUNCTION__); - if (PMIX_SUCCESS != status) { - fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", - myproc.nspace, myproc.rank, status, (unsigned long) evhandler_ref); - } - lock->status = status; - DEBUG_WAKEUP_THREAD(lock); -} - -int main(int argc, char **argv) -{ - pmix_status_t rc; - pmix_value_t *val; - void *dirs; - pmix_proc_t proc; - pmix_info_t *info; - size_t ninfo; - pmix_query_t *query; - pmix_proc_info_t *proctable; - size_t nq; - size_t n; - myquery_data_t myquery_data; - pid_t pid; - pmix_status_t code = PMIX_EVENT_JOB_END; - mylock_t mylock; - myrel_t myrel; - uint16_t localrank; - int i; - pmix_data_array_t darray; - int cospawned_namespace = 0; - char hostname[256]; - - pid = getpid(); - gethostname(hostname, sizeof hostname); - - /* Initialize this daemon - since we were launched by the RM, our - * connection info * will have been provided at startup. */ - if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, NULL, 0))) { - fprintf(stderr, "Debugger daemon: PMIx_tool_init failed: %s\n", PMIx_Error_string(rc)); - exit(0); - } - printf("Debugger daemon ns %s on host %s rank %d pid %lu: Running\n", myproc.nspace, - hostname, myproc.rank, (unsigned long) pid); - - /* Register our default event handler */ - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIX_INFO_CREATE(info, 1); - PMIX_INFO_LOAD(&info[0], PMIX_EVENT_HDLR_NAME, "DEFAULT", PMIX_STRING); - PMIx_Register_event_handler(NULL, 0, info, 1, notification_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - PMIX_INFO_FREE(info, 1); - if (PMIX_SUCCESS != mylock.status) { - rc = mylock.status; - DEBUG_DESTRUCT_LOCK(&mylock); - goto done; - } - DEBUG_DESTRUCT_LOCK(&mylock); - - /* - * Get the namespace of the job we are to debug. If the application and the - * debugger daemons are spawned separately or if the debugger is attaching - * to a running application, the debugger will set the application - * namespace in the PMIX_DEBUG_TARGET attribute, and the daemon retrieves - * it by calling PMIx_Get. - * - * If the application processes and debugger daemons are spawned together - * (cospawn), then the debugger cannot pass the application namespace since - * that is not known until after the PMIx_Spawn call completes. However, - * the applicaton processes and the debugger daemons have the same - * namespace, so this module uses the debugger namespace, which it knows. - */ - PMIX_LOAD_PROCID(&proc, myproc.nspace, PMIX_RANK_WILDCARD); - rc = PMIx_Get(&proc, PMIX_DEBUG_TARGET, NULL, 0, &val); - if (PMIX_ERR_NOT_FOUND == rc) { - /* Save the application namespace for later */ - // NOTE: This is a bug. The cospawned namespace should be more distinct. - /* - fprintf(stderr, - "[%s:%d:%lu] Warning: Could not find PMIX_DEBUG_TARGET. Assume Cospawn.\n", - myproc.nspace, myproc.rank); - */ - target_namespace = strdup(myproc.nspace); - cospawned_namespace = 1; - } else if (rc != PMIX_SUCCESS) { - fprintf(stderr, "[%s:%d:%lu] Failed to get job being debugged - error %s\n", myproc.nspace, - myproc.rank, (unsigned long) pid, PMIx_Error_string(rc)); - goto done; - } else { - /* Verify that the expected data structures were returned */ - if (NULL == val || PMIX_PROC != val->type) { - fprintf(stderr, "[%s:%d:%lu] Failed to get job being debugged - NULL data returned\n", - myproc.nspace, myproc.rank, (unsigned long) pid); - goto done; - } - printf("[%s:%d:%lu] PMIX_DEBUG_JOB is '%s'\n", proc.nspace, proc.rank, (unsigned long) pid, - val->data.proc->nspace); - /* Save the application namespace for later */ - target_namespace = strdup(val->data.proc->nspace); - PMIX_VALUE_RELEASE(val); - } - - printf("[%s:%d:%lu] Debugging '%s'\n", myproc.nspace, myproc.rank, (unsigned long) pid, - target_namespace); - - /* Get my local rank so I can determine which local proc is "mine" to - * debug */ - val = NULL; - if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_LOCAL_RANK, NULL, 0, &val))) { - fprintf(stderr, "[%s:%d:%lu] Failed to get my local rank - error %s\n", myproc.nspace, - myproc.rank, (unsigned long) pid, PMIx_Error_string(rc)); - goto done; - } - - /* Verify the expected data object was returned */ - if (NULL == val) { - fprintf(stderr, "[%s:%d:%lu] Failed to get my local rank - NULL data returned\n", - myproc.nspace, myproc.rank, (unsigned long) pid); - goto done; - } - if (PMIX_UINT16 != val->type) { - fprintf(stderr, "[%s:%d:%lu] Failed to get my local rank - returned wrong type %s\n", - myproc.nspace, myproc.rank, (unsigned long) pid, PMIx_Data_type_string(val->type)); - goto done; - } - - /* Save the rank */ - localrank = val->data.uint16; - PMIX_VALUE_RELEASE(val); - printf("[%s:%d:%lu] my local rank %d\n", myproc.nspace, myproc.rank, (unsigned long) pid, - (int) localrank); - - /* Register an event handler specifically for when the target job - * completes */ - DEBUG_CONSTRUCT_LOCK(&myrel.lock); - myrel.nspace = strdup(proc.nspace); - - PMIX_LOAD_PROCID(&proc, target_namespace, PMIX_RANK_WILDCARD); - - PMIX_INFO_LIST_START(dirs); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_HDLR_NAME, "APP-TERMINATION", PMIX_STRING); - /* Pass the lock we will use to wait for notification of the - * PMIX_EVENT_JOB_END event */ - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_RETURN_OBJECT, &myrel, PMIX_POINTER); - /* Only call me back when this specific job terminates */ - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_AFFECTED_PROC, &proc, PMIX_PROC); - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - ninfo = darray.size; - info = darray.array; - printf("[%s:%d:%lu] registering for termination of '%s'\n", myproc.nspace, myproc.rank, - (unsigned long) pid, proc.nspace); - - /* Create a lock to wait for completion of the event registration - * callback */ - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIx_Register_event_handler(&code, 1, info, ninfo, release_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - if (PMIX_SUCCESS != mylock.status) { - fprintf(stderr, "Failed to register handler for PMIX_EVENT_JOB_END: %s\n", - PMIx_Error_string(mylock.status)); - rc = mylock.status; - DEBUG_DESTRUCT_LOCK(&mylock); - goto done; - } - DEBUG_DESTRUCT_LOCK(&mylock); - - /* Get our local proctable - for scalability reasons, we don't want to - * have our "root" debugger process get the proctable for everybody and - * send it out to us. So ask the local PMIx server for the pid's of - * our local target processes - */ - nq = 1; - PMIX_QUERY_CREATE(query, nq); - PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_LOCAL_PROC_TABLE); - n = 0; - ninfo = 1; - query[0].nqual = ninfo; - PMIX_INFO_CREATE(query[0].qualifiers, ninfo); - /* Set the namespace to query */ - PMIX_INFO_LOAD(&query[0].qualifiers[n], PMIX_NSPACE, target_namespace, PMIX_STRING); - - /* Create the lock used to wait for query completion */ - DEBUG_CONSTRUCT_LOCK(&myquery_data.lock); - myquery_data.info = NULL; - myquery_data.ninfo = 0; - - /* Execute the query */ - if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void *) &myquery_data))) { - fprintf(stderr, "PMIx_Query_info failed: %d\n", rc); - goto done; - } - - /* Wait for the query to complete */ - DEBUG_WAIT_THREAD(&myquery_data.lock); - DEBUG_DESTRUCT_LOCK(&myquery_data.lock); - PMIX_QUERY_FREE(query, nq); - if (PMIX_SUCCESS != myquery_data.status) { - rc = myquery_data.status; - fprintf(stderr, "Error querying proc table for '%s': %s\n", target_namespace, - PMIx_Error_string(myquery_data.status)); - goto done; - } - - /* Display the process table */ - printf("[%s:%d:%lu] Local proctable received for nspace '%s' has %d entries\n", myproc.nspace, - myproc.rank, (unsigned long) pid, target_namespace, - (int) myquery_data.info[0].value.data.darray->size); - - proctable = myquery_data.info[0].value.data.darray->array; - for (i = 0; i < myquery_data.info[0].value.data.darray->size; i++) { - printf("Proctable[%d], namespace %s rank %d exec %s\n", i, proctable[i].proc.nspace, - proctable[i].proc.rank, basename(proctable[i].executable_name)); - } - - /* Now that we have the proctable for our local processes, this daemon can - * interact with application processes, such as setting initial breakpoints, - * or other setup for the debugging * session. - * If the application was launched by the debugger, then all application - * tasks should be suspended in PMIx_Init, usually within the application's - * MPI_Init call. - * Once initial setup is complete, the daemon sends a release event to the - * application processes and those processes resume execution. - */ - (void) strncpy(proc.nspace, target_namespace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - // Since we are using the 'wildcard' only one daemon should send - // the release message. - // If we are 'cospawned' then the daemons are not ranked separately - // from the application (this is a bug) so just have everyone - // send the release. - if (0 == myproc.rank || 1 == cospawned_namespace) { - - PMIX_INFO_LIST_START(dirs); - /* Send release notification to application namespace */ - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_CUSTOM_RANGE, &proc, PMIX_PROC); - /* Don't send notification to default event handlers */ - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_NON_DEFAULT, NULL, PMIX_BOOL); - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - info = darray.array; - ninfo = darray.size; - - // Todo: Move this to the main tool - // https://github.com/openpmix/prrte/pull/857#discussion_r600849033 - sleep(1); - printf("[%s:%u:%lu] Sending release\n", myproc.nspace, myproc.rank, (unsigned long) pid); - rc = PMIx_Notify_event(PMIX_DEBUGGER_RELEASE, NULL, PMIX_RANGE_CUSTOM, info, ninfo, NULL, - NULL); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "[%s:%u:%lu] Sending release failed with error %s(%d)\n", myproc.nspace, - myproc.rank, (unsigned long) pid, PMIx_Error_string(rc), rc); - goto done; - } - } - - /* At this point the application processes should be running under debugger - * control. The daemons can interact further with application processes as - * needed, or just wait for the application * termination. - * This example just waits for application termination. - * Note that if the application processes and daemon processes are spawned - * by the same PMIx_Spawn call, then no PMIX_EVENT_JOB_END - * notifications are sent since the daemons are part of the same namespace - * and are still running. - */ - if (0 == cospawned_namespace) { - printf("Waiting for application namespace %s to terminate\n", proc.nspace); - DEBUG_WAIT_THREAD(&myrel.lock); - printf("Application namespace %s terminated\n", proc.nspace); - } - -done: - if (NULL != target_namespace) { - free(target_namespace); - } - /* Call PMIx_tool_finalize to shut down the PMIx runtime */ - printf("Debugger daemon ns %s rank %d pid %lu: Finalizing\n", myproc.nspace, myproc.rank, - (unsigned long) pid); - rc = PMIx_tool_finalize(); - fclose(stdout); - fclose(stderr); - sleep(1); - return (rc); -} diff --git a/examples/debugger/debugger.h b/examples/debugger/debugger.h deleted file mode 100644 index 5de2baae4b..0000000000 --- a/examples/debugger/debugger.h +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include - -#include -#include - -typedef struct { - pthread_mutex_t mutex; - pthread_cond_t cond; - volatile bool active; - pmix_status_t status; - int count; -} mylock_t; - -#define DEBUG_CONSTRUCT_LOCK(l) \ - do { \ - pthread_mutex_init(&(l)->mutex, NULL); \ - pthread_cond_init(&(l)->cond, NULL); \ - (l)->active = true; \ - (l)->status = PMIX_SUCCESS; \ - (l)->count = 0; \ - } while (0) - -#define DEBUG_DESTRUCT_LOCK(l) \ - do { \ - pthread_mutex_destroy(&(l)->mutex); \ - pthread_cond_destroy(&(l)->cond); \ - } while (0) - -#define DEBUG_WAIT_THREAD(lck) \ - do { \ - pthread_mutex_lock(&(lck)->mutex); \ - while ((lck)->active) { \ - pthread_cond_wait(&(lck)->cond, &(lck)->mutex); \ - } \ - pthread_mutex_unlock(&(lck)->mutex); \ - } while (0) - -#define DEBUG_WAKEUP_THREAD(lck) \ - do { \ - pthread_mutex_lock(&(lck)->mutex); \ - (lck)->active = false; \ - pthread_cond_broadcast(&(lck)->cond); \ - pthread_mutex_unlock(&(lck)->mutex); \ - } while (0) - -/* define a structure for collecting returned - * info from a query */ -typedef struct { - mylock_t lock; - pmix_status_t status; - pmix_info_t *info; - size_t ninfo; - pmix_app_t *apps; - size_t napps; -} myquery_data_t; - -/* define a structure for releasing when a given - * nspace terminates */ -typedef struct { - mylock_t lock; - pmix_status_t status; - char *nspace; - int exit_code; - bool exit_code_given; -} myrel_t; - -#if PMIX_NUMERIC_VERSION < 0x00040203 -#define PMIX_ARGV_JOIN_COMPAT(a, b) \ - pmix_argv_join(a, b) -#else -#define PMIX_ARGV_JOIN_COMPAT(a, b) \ - PMIx_Argv_join(a, b) -#endif - -#if PMIX_NUMERIC_VERSION < 0x00040203 -#define PMIX_ARGV_SPLIT_COMPAT(a, b) \ - pmix_argv_split(a, b) -#else -#define PMIX_ARGV_SPLIT_COMPAT(a, b) \ - PMIx_Argv_split(a, b) -#endif - -#if PMIX_NUMERIC_VERSION < 0x00040203 -#define PMIX_ARGV_SPLIT_WITH_EMPTY_COMPAT(a, b) \ - pmix_argv_split_with_empty(a, b) -#else -#define PMIX_ARGV_SPLIT_WITH_EMPTY_COMPAT(a, b) \ - PMIx_Argv_split_with_empty(a, b) -#endif - -#if PMIX_NUMERIC_VERSION < 0x00040203 -#define PMIX_ARGV_COUNT_COMPAT(a) \ - pmix_argv_count(a) -#else -#define PMIX_ARGV_COUNT_COMPAT(a) \ - PMIx_Argv_count(a) -#endif - -#if PMIX_NUMERIC_VERSION < 0x00040203 -#define PMIX_ARGV_FREE_COMPAT(a) \ - pmix_argv_free(a) -#else -#define PMIX_ARGV_FREE_COMPAT(a) \ - PMIx_Argv_free(a) -#endif - -#if PMIX_NUMERIC_VERSION < 0x00040203 -#define PMIX_ARGV_APPEND_UNIQUE_COMPAT(a, b) \ - pmix_argv_append_unique_nosize(a, b) -#else -#define PMIX_ARGV_APPEND_UNIQUE_COMPAT(a, b) \ - PMIx_Argv_append_unique_nosize(a, b) -#endif - -#if PMIX_NUMERIC_VERSION < 0x00040203 -#define PMIX_ARGV_APPEND_NOSIZE_COMPAT(a, b) \ - pmix_argv_append_nosize(a, b) -#else -#define PMIX_ARGV_APPEND_NOSIZE_COMPAT(a, b) \ - PMIx_Argv_append_nosize(a, b) -#endif - -#if PMIX_NUMERIC_VERSION < 0x00040203 -#define PMIX_ARGV_COPY_COMPAT(a) \ - pmix_argv_copy(a) -#else -#define PMIX_ARGV_COPY_COMPAT(a) \ - PMIx_Argv_copy(a) -#endif - -#if PMIX_NUMERIC_VERSION < 0x00040203 -#define PMIX_SETENV_COMPAT(a, b, c, d) \ - pmix_setenv(a, b, c, d) -#else -#define PMIX_SETENV_COMPAT(a, b, c, d) \ - PMIx_Setenv(a, b, c, d) -#endif diff --git a/examples/debugger/direct-multi.c b/examples/debugger/direct-multi.c deleted file mode 100644 index 9db400f2f3..0000000000 --- a/examples/debugger/direct-multi.c +++ /dev/null @@ -1,781 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * Copyright (c) 2021 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include - -#include "debugger.h" -#include - -static pmix_proc_t myproc; -static char client_nspace[PMIX_MAX_NSLEN + 1]; -static char daemon_nspace[PMIX_MAX_NSLEN + 1]; - -static bool stop_in_init = true; -static bool stop_on_exec = false; -static bool stop_in_init_supported = false; -static bool stop_on_exec_supported = false; -static int app_npernode = 2; // > 0. Default 2 ppn -static int app_np = 2; // <= 0 means use default from prte. Default to single node. Must be multiple of npernode -static int daemon_colocate_per_proc = 0; // 0 = disable -static int daemon_colocate_per_node = 0; // 0 = disable - -/* this is a callback function for the PMIx_Query - * API. The query will callback with a status indicating - * if the request could be fully satisfied, partially - * satisfied, or completely failed. The info parameter - * contains an array of the returned data, with the - * info->key field being the key that was provided in - * the query call. Thus, you can correlate the returned - * data in the info->value field to the requested key. - * - * Once we have dealt with the returned data, we must - * call the release_fn so that the PMIx library can - * cleanup */ -static void cbfunc(pmix_status_t status, pmix_info_t *info, size_t ninfo, void *cbdata, - pmix_release_cbfunc_t release_fn, void *release_cbdata) -{ - myquery_data_t *mq = (myquery_data_t *) cbdata; - size_t n; - - printf("Called %s as callback for PMIx_Query\n", __FUNCTION__); - mq->status = status; - /* save the returned info - the PMIx library "owns" it - * and will release it and perform other cleanup actions - * when release_fn is called */ - if (0 < ninfo) { - PMIX_INFO_CREATE(mq->info, ninfo); - mq->ninfo = ninfo; - for (n = 0; n < ninfo; n++) { - printf("Key %s Type %s(%d)\n", info[n].key, PMIx_Data_type_string(info[n].value.type), - info[n].value.type); - PMIX_INFO_XFER(&mq->info[n], &info[n]); - } - } - - /* let the library release the data and cleanup from - * the operation */ - if (NULL != release_fn) { - release_fn(release_cbdata); - } - - /* release the block */ - DEBUG_WAKEUP_THREAD(&mq->lock); -} - -/* this is the event notification function we pass down below - * when registering for general events - i.e.,, the default - * handler. We don't technically need to register one, but it - * is usually good practice to catch any events that occur */ -static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - myrel_t *lock; - size_t n; - - printf("%s called as callback for event=%s\n", __FUNCTION__, PMIx_Error_string(status)); - lock = NULL; - if (PMIX_ERR_UNREACH == status || PMIX_ERR_LOST_CONNECTION == status) { - /* we should always have info returned to us - if not, there is - * nothing we can do */ - if (NULL != info) { - for (n = 0; n < ninfo; n++) { - if (PMIX_CHECK_KEY(&info[n], PMIX_EVENT_RETURN_OBJECT)) { - lock = (myrel_t *) info[n].value.data.ptr; - } - } - } - - /* If a pointer to a lock was passed then save status and - * release the lock */ - if (NULL != lock) { - lock->exit_code = status; - lock->exit_code_given = true; - DEBUG_WAKEUP_THREAD(&lock->lock); - } - } - - /* this example doesn't do anything with default events */ - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); - } -} - -/* this is an event notification function that we explicitly request - * be called when the PMIX_EVENT_JOB_END notification is issued. - * We could catch it in the general event notification function and test - * the status to see if it was "job terminated", but it often is simpler - * to declare a use-specific notification callback point. In this case, - * we are asking to know whenever a job terminates, and we will then - * know we can exit */ -static void release_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - myrel_t *lock; - bool found; - int exit_code; - size_t n; - pmix_proc_t *affected = NULL; - - printf("%s called as callback for event=%s source=%s:%d\n", __FUNCTION__, - PMIx_Error_string(status), source->nspace, source->rank); - /* find the return object */ - lock = NULL; - found = false; - for (n = 0; n < ninfo; n++) { - if (0 == strncmp(info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { - lock = (myrel_t *) info[n].value.data.ptr; - /* not every RM will provide an exit code, but check if one was given */ - } else if (0 == strncmp(info[n].key, PMIX_EXIT_CODE, PMIX_MAX_KEYLEN)) { - exit_code = info[n].value.data.integer; - found = true; - } else if (0 == strncmp(info[n].key, PMIX_EVENT_AFFECTED_PROC, PMIX_MAX_KEYLEN)) { - affected = info[n].value.data.proc; - } - } - /* if the object wasn't returned, then that is an error */ - if (NULL == lock) { - fprintf(stderr, "LOCK WASN'T RETURNED IN RELEASE CALLBACK\n"); - /* let the event handler progress */ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - return; - } - - printf("DEBUGGER NOTIFIED THAT JOB %s TERMINATED \n", - (NULL == affected) ? "NULL" : affected->nspace); - if (found) { - if (!lock->exit_code_given) { - lock->exit_code = exit_code; - lock->exit_code_given = true; - } - } - - /* A system PMIx daemon may have kept track of notifications for - * termination of previous application runs, and may send those - * notifications to this process, which has registered a callback for - * application terminations. Those notifcations need to be ignored. - * - * Therefore, in the co-spawn case, we expect one termination notification, - * which is for the combined application/daemon namespace when the daemon - * terminates. - * - * In the separate spawn case, we expect two terminations, the application - * and the daemon. */ - if ((0 == strcmp(daemon_nspace, source->nspace)) - || (0 == strcmp(client_nspace, source->nspace))) { - lock->lock.count--; - if (0 == lock->lock.count) { - DEBUG_WAKEUP_THREAD(&lock->lock); - } - } - - /* tell the event handler state machine that we are the last step */ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - return; -} - -/* Handle READY-FOR_DEBUG notifications from each application task. The waiting for - * debug ready state is not complete until all application processes are ready */ -static void app_ready(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - myrel_t *lock; - int n; - printf("All expected READY-FOR-DEBUG notifications received\n"); - lock = NULL; - for (n = 0; n < ninfo; n++) { - if (0 == strncmp(info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { - lock = (myrel_t *) info[n].value.data.ptr; - break; - } - } - /* if the lock wasn't returned, then that is an error */ - if (NULL == lock) { - fprintf(stderr, "LOCK WASN'T RETURNED IN RELEASE CALLBACK\n"); - /* let the event handler progress */ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - return; - } - DEBUG_WAKEUP_THREAD(&lock->lock); -} - -/* event handler registration is done asynchronously because it - * may involve the PMIx server registering with the host RM for - * external events. So we provide a callback function that returns - * the status of the request (success or an error), plus a numerical index - * to the registered event. The index is used later on to deregister - * an event handler - if we don't explicitly deregister it, then the - * PMIx server will do so when it see us exit */ -static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata) -{ - mylock_t *lock = (mylock_t *) cbdata; - - printf("%s called to register callback\n", __FUNCTION__); - if (PMIX_SUCCESS != status) { - fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", - myproc.nspace, myproc.rank, status, (unsigned long) evhandler_ref); - } - lock->status = status; - DEBUG_WAKEUP_THREAD(lock); -} - -/* Register a callback for an application event notification */ -void register_app_notification(pmix_status_t code, myrel_t *myrel, pmix_notification_fn_t callback) -{ - void *tinfo; - pmix_info_t *info; - pmix_status_t rc; - pmix_proc_t proc; - size_t ninfo; - pmix_data_array_t darray; - mylock_t mylock; - - PMIX_INFO_LIST_START(tinfo); - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_EVENT_RETURN_OBJECT, myrel, PMIX_POINTER); - /* Only call me back when this specific job terminates */ - PMIX_LOAD_PROCID(&proc, client_nspace, PMIX_RANK_WILDCARD); - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_EVENT_AFFECTED_PROC, &proc, PMIX_PROC); - PMIX_INFO_LIST_CONVERT(rc, tinfo, &darray); - info = (pmix_info_t*)darray.array; - ninfo = darray.size; - PMIX_INFO_LIST_RELEASE(tinfo); - - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIx_Register_event_handler(&code, 1, info, ninfo, callback, - evhandler_reg_callbk, (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - printf("Debugger: Registered for event %s on nspace %s\n", - PMIx_Error_string(code), client_nspace); - rc = mylock.status; - DEBUG_DESTRUCT_LOCK(&mylock); - PMIX_DATA_ARRAY_DESTRUCT(&darray); -} - -/* Parse command line flags */ -int parse_command_line(int argc, char **argv) -{ - int i; - - for (i = 1; i < argc; i++) { - if (0 == strcmp(argv[i], "-h") || 0 == strcmp(argv[i], "--help")) { - /* print the usage message and exit */ - printf("Direct Launch Example\n"); - printf("$ prte --daemonize\n"); - printf("$ %s [OPTIONS]\n", argv[0]); - printf("\n"); - printf(" --stop-in-init Stop application in PMIx_Init (Default)\n"); - printf(" --stop-on-exec Stop application on exec\n"); - printf(" --app-npernode Number of processes per node (Default: 2)\n"); - printf(" --app-np Number of total processes. Must be multiple of " - "--app-npernode (Default: 2)\n"); - printf(" --daemon-colocate-per-proc Test Colaunch with Daemons Per Process (Default: " - "0 = off)\n"); - printf(" --daemon-colocate-per-node Test Colaunch with Daemons Per Node (Default: 0 = " - "off)\n"); - return -1; - } else if (0 == strcmp(argv[i], "--stop-in-init")) { - stop_in_init = true; - stop_on_exec = false; - break; - } else if (0 == strcmp(argv[i], "--stop-on-exec")) { - stop_in_init = false; - stop_on_exec = true; - break; - } else if (0 == strcmp(argv[i], "--app-npernode")) { - ++i; - if (i >= argc && isdigit(argv[i][0])) { - fprintf(stderr, "Error: --app-npernode requires a positive integer argument\n"); - return -1; - } - app_npernode = atoi(argv[i]); - if (app_npernode <= 0) { - fprintf(stderr, "Error: --app-npernode requires a positive integer argument\n"); - return -1; - } - } else if (0 == strcmp(argv[i], "--app-np")) { - ++i; - if (i >= argc && isdigit(argv[i][0])) { - fprintf(stderr, "Error: --app-np requires a positive integer argument\n"); - return -1; - } - app_np = atoi(argv[i]); - if (app_np < 0) { - fprintf(stderr, "Error: --app-np requires a positive integer argument\n"); - return -1; - } - } else if (0 == strcmp(argv[i], "--daemon-colocate-per-proc")) { - ++i; - if (i >= argc && isdigit(argv[i][0])) { - fprintf(stderr, - "Error: --daemon-colocate-per-proc requires a positive integer argument\n"); - return -1; - } - daemon_colocate_per_proc = atoi(argv[i]); - if (daemon_colocate_per_proc < 0) { - fprintf(stderr, - "Error: --daemon-colocate-per-proc requires a positive integer argument\n"); - return -1; - } - } else if (0 == strcmp(argv[i], "--daemon-colocate-per-node")) { - ++i; - if (i >= argc && isdigit(argv[i][0])) { - fprintf(stderr, - "Error: --daemon-colocate-per-node requires a positive integer argument\n"); - return -1; - } - daemon_colocate_per_node = atoi(argv[i]); - if (daemon_colocate_per_node < 0) { - fprintf(stderr, - "Error: --daemon-colocate-per-node requires a positive integer argument\n"); - return -1; - } - } - } - - if (daemon_colocate_per_node > 0 && daemon_colocate_per_proc > 0) { - fprintf(stderr, "Error: Both --daemon-colocate-per-node and --daemon-colocate-per-node " - "options present, but are exclusive\n"); - return -1; - } - if (app_np < app_npernode || app_np % app_npernode != 0) { - fprintf(stderr, "Error: --app-np must be a multiple of --app-npernode\n"); - return -1; - } - return 0; -} - -/* Determine what services are available from the RM */ -int query_capabilities(void) -{ - /* This is an initial launch - we need to launch the application - * plus the debugger daemons, letting the RM know we are debugging - * so that it will "pause" the app procs until we are ready. First - * we need to know if this RM supports co-spawning of daemons with - * the application, or if we need to launch the daemons as a separate - * spawn command. The former is faster and more scalable, but not - * every RM may support it. We also need to ask for debug support - * so we know if the RM can stop-on-exec, or only supports stop-in-init */ - - pmix_query_t *query; - pmix_status_t rc; - int n, nq = 1; - myquery_data_t myquery_data; - - PMIX_QUERY_CREATE(query, nq); - PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_SPAWN_SUPPORT); - PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_DEBUG_SUPPORT); - /* setup the caddy to retrieve the data */ - DEBUG_CONSTRUCT_LOCK(&myquery_data.lock); - myquery_data.info = NULL; - myquery_data.ninfo = 0; - /* execute the query */ - if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void *) &myquery_data))) { - fprintf(stderr, "PMIx_Query_info failed: %d\n", rc); - return -1; - } - DEBUG_WAIT_THREAD(&myquery_data.lock); - DEBUG_DESTRUCT_LOCK(&myquery_data.lock); - - /* We should have received back two info structs, one containing - * a comma-delimited list of PMIx spawn attributes the RM supports, - * and the other containing a comma-delimited list of PMIx debugger - * attributes it supports */ - if (2 != myquery_data.ninfo) { - /* this is an error */ - fprintf(stderr, "PMIx Query returned an incorrect number of results: %lu\n", - myquery_data.ninfo); - PMIX_INFO_FREE(myquery_data.info, myquery_data.ninfo); - return -1; - } - - /* We will check to see if "stop_on_exec" is supported. Few RMs - * do so, which is why we have to check. The reference server sadly is - * not one of them, so we shouldn't find it here - * - * Note that the PMIx reference server always returns the query results - * in the same order as the query keys. However, this is not guaranteed, - * so we should search the returned info structures to find the desired key - */ - for (n = 0; n < myquery_data.ninfo; n++) { - if (0 == strcmp(myquery_data.info[n].key, PMIX_QUERY_DEBUG_SUPPORT)) { - /* See if stop on exec is included */ - if (NULL != strstr(myquery_data.info[n].value.data.string, PMIX_DEBUG_STOP_ON_EXEC)) { - stop_on_exec_supported = true; - } - /* See if stop in init is included */ - if (NULL != strstr(myquery_data.info[n].value.data.string, PMIX_DEBUG_STOP_IN_INIT)) { - stop_in_init_supported = true; - } - } - } - - if (!stop_on_exec_supported && stop_on_exec) { - fprintf(stderr, "Error: Stop-on-exec requested but the RM does not support it\n"); - return -1; - } - - if (!stop_in_init_supported && stop_in_init) { - fprintf(stderr, "Error: Stop-in-init requested but the RM does not support it\n"); - return -1; - } - return 0; -} - -/* Query the entire application proctable */ -static int query_proctable(void) -{ - pmix_query_t *query; - pmix_status_t rc; - myquery_data_t myquery_data; - - /* Get the proctable for this nspace */ - PMIX_QUERY_CREATE(query, 1); - PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_PROC_TABLE); - query[0].nqual = 1; - PMIX_INFO_CREATE(query->qualifiers, query[0].nqual); - PMIX_INFO_LOAD(&query->qualifiers[0], PMIX_NSPACE, client_nspace, PMIX_STRING); - - DEBUG_CONSTRUCT_LOCK(&myquery_data.lock); - myquery_data.info = NULL; - myquery_data.ninfo = 0; - - if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, 1, cbfunc, (void *) &myquery_data))) { - fprintf(stderr, "Debugger[%s:%d] Proctable query failed: %d\n", myproc.nspace, - myproc.rank, rc); - return -1; - } - /* Wait to get a response */ - DEBUG_WAIT_THREAD(&myquery_data.lock); - DEBUG_DESTRUCT_LOCK(&myquery_data.lock); - /* we should have gotten a response */ - if (PMIX_SUCCESS != myquery_data.status) { - fprintf(stderr, "Debugger[%s:%d] Proctable query failed: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(myquery_data.status)); - return -1; - } - /* There should have been data */ - if (NULL == myquery_data.info || 0 == myquery_data.ninfo) { - fprintf(stderr, "Debugger[%s:%d] Proctable query return no results\n", myproc.nspace, - myproc.rank); - return -1; - } - /* the query should have returned a data_array */ - if (PMIX_DATA_ARRAY != myquery_data.info[0].value.type) { - fprintf(stderr, "Debugger[%s:%d] Query returned incorrect data type: %s(%d)\n", - myproc.nspace, myproc.rank, - PMIx_Data_type_string(myquery_data.info[0].value.type), - (int) myquery_data.info[0].value.type); - return -1; - } - if (NULL == myquery_data.info[0].value.data.darray->array) { - fprintf(stderr, "Debugger[%s:%d] Query returned no proctable info\n", myproc.nspace, - myproc.rank); - return -1; - } - /* The data array consists of a struct: - * size_t size; - * void* array; - * - * In this case, the array is composed of pmix_proc_info_t structs: - * pmix_proc_t proc; // contains the nspace,rank of this proc - * char* hostname; - * char* executable_name; - * pid_t pid; - * int exit_code; - * pmix_proc_state_t state; - */ - printf("Received proc table for %d procs\n", - (int) myquery_data.info[0].value.data.darray->size); - return 0; -} - -/* Spawn the application processes */ -static pmix_status_t spawn_app(void) -{ - void *tinfo; - pmix_info_t *info; - size_t ninfo, napps; - pmix_status_t rc; - pmix_rank_t all_ranks = PMIX_RANK_WILDCARD; - pmix_data_array_t darray; - pmix_app_t app; - char map_str[30]; - char cwd[_POSIX_PATH_MAX + 1]; - - napps = 1; - PMIX_APP_CONSTRUCT(&app); - /* Setup the executable */ - app.cmd = strdup("hello"); - PMIX_ARGV_APPEND(rc, app.argv, "./hello"); - getcwd(cwd, _POSIX_PATH_MAX); // point us to our current directory - app.cwd = strdup(cwd); - if (app_np > 0) { - app.maxprocs = app_np; - } - app.ninfo = 0; - /* Provide job-level directives so the apps do what the user requested */ - PMIX_INFO_LIST_START(tinfo); - if (stop_on_exec) { - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_DEBUG_STOP_ON_EXEC, NULL, PMIX_BOOL); // All procs stop at first instruction - } else { - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_DEBUG_STOP_IN_INIT, NULL, PMIX_BOOL); // All procs stop in PMIx_Init - } - sprintf(map_str, "ppr:%d:node", app_npernode); - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_MAPBY, map_str, PMIX_STRING); // app procs/node - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_RANKBY, "slot", PMIX_STRING); // match baseline - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_FWD_STDOUT, NULL, PMIX_BOOL); // forward stdout to me - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_FWD_STDERR, NULL, PMIX_BOOL); // forward stderr to me - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_NOTIFY_COMPLETION, NULL, - PMIX_BOOL); // notify us when the job completes - PMIX_INFO_LIST_CONVERT(rc, tinfo, &darray); - info = (pmix_info_t*)darray.array; - ninfo = darray.size; - PMIX_INFO_LIST_RELEASE(tinfo); - /* Spawn the job - the function will return when the app - * has been launched */ - printf("Debugger: spawning %s\n", app.cmd); - if (PMIX_SUCCESS != (rc = PMIx_Spawn(info, ninfo, &app, napps, client_nspace))) { - fprintf(stderr, "Application failed to launch with error: %s(%d)\n", - PMIx_Error_string(rc), rc); - } - PMIX_DATA_ARRAY_DESTRUCT(&darray); - return rc; -} - -/* Spawn the debuger daemons */ -static pmix_status_t spawn_debugger(char *appspace, myrel_t *myrel) -{ - pmix_info_t *dinfo; - pmix_app_t *debugger; - void *tinfo; - size_t dninfo; - pmix_status_t rc; - pmix_status_t code = PMIX_EVENT_JOB_END; - pmix_proc_t proc; - int n; - mylock_t mylock; - pmix_data_array_t darray; - char cwd[_POSIX_PATH_MAX]; - - printf("Calling %s to spawn the debugger daemon\n", __FUNCTION__); - /* Setup the debugger spawn parameters*/ - PMIX_APP_CREATE(debugger, 1); - debugger[0].cmd = strdup("./daemon"); - /* Set up debugger command arguments, in this example, just argv[0] */ - PMIX_ARGV_APPEND(rc, debugger[0].argv, "./daemon"); - /* No environment variables */ - debugger[0].env = NULL; - /* Set the working directory to our current directory */ - getcwd(cwd, _POSIX_PATH_MAX); - debugger[0].cwd = strdup(cwd); - /* Spawn daemon processes - 1 per node if not colocating */ - if (daemon_colocate_per_proc < 0 && daemon_colocate_per_node < 0) { - debugger[0].maxprocs = app_np / app_npernode; - } - /* No spawn attributes set here, all are set in dinfo array */ - debugger[0].ninfo = 0; - debugger[0].info = NULL; - /* Set attributes for debugger daemon launch and let the RM know these are - * debugger daemons */ - PMIX_INFO_LIST_START(tinfo); - /* Indicate a debugger daemon is being spawned */ - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_DEBUGGER_DAEMONS, NULL, PMIX_BOOL); - /* Set the name of the namespace being debugged */ - PMIX_LOAD_PROCID(&proc, appspace, PMIX_RANK_WILDCARD); - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_DEBUG_TARGET, &proc, PMIX_PROC); - /* Number of daemons per node in the application allocation */ - if (daemon_colocate_per_node > 0) { - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_DEBUG_DAEMONS_PER_NODE, &daemon_colocate_per_node, - PMIX_UINT16); - } - /* Number of daemons per proc in the application allocation */ - else if (daemon_colocate_per_proc > 0) { - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_DEBUG_DAEMONS_PER_PROC, &daemon_colocate_per_proc, - PMIX_UINT16); - } - /* Launch one daemon per node -- only needed if co-launch is not supported */ - else { - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_MAPBY, "ppr:1:node", PMIX_STRING); - } - /* Notify this process when the job completes */ - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_NOTIFY_COMPLETION, NULL, PMIX_BOOL); - /* Forward stdout to this process */ - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_FWD_STDOUT, NULL, PMIX_BOOL); - /* Forward stderr to this process */ - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_FWD_STDERR, NULL, PMIX_BOOL); - - PMIX_INFO_LIST_CONVERT(rc, tinfo, &darray); - dinfo = (pmix_info_t*)darray.array; - dninfo = darray.size; - PMIX_INFO_LIST_RELEASE(tinfo); - - /* Spawn the daemons */ - printf("Debugger: spawning %s\n", debugger[0].cmd); - rc = PMIx_Spawn(dinfo, dninfo, debugger, 1, daemon_nspace); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - PMIX_APP_FREE(debugger, 1); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Debugger daemons failed to launch with error: %s\n", - PMIx_Error_string(rc)); - return rc; - } - - /* Register callback for when the daemons terminate */ - myrel->nspace = strdup(daemon_nspace); - dninfo = 2; - n = 0; - PMIX_INFO_CREATE(dinfo, dninfo); - PMIX_INFO_LOAD(&dinfo[n], PMIX_EVENT_RETURN_OBJECT, myrel, PMIX_POINTER); - n++; - /* Only call me back when the daemon job terminates */ - PMIX_LOAD_PROCID(&proc, daemon_nspace, PMIX_RANK_WILDCARD); - PMIX_INFO_LOAD(&dinfo[n], PMIX_EVENT_AFFECTED_PROC, &proc, PMIX_PROC); - /* Track that we need both jobs to terminate */ - myrel->lock.count++; - - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIx_Register_event_handler(&code, 1, dinfo, dninfo, release_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - printf("Debugger: Registered for daemon termination on nspace %s\n", daemon_nspace); - rc = mylock.status; - DEBUG_DESTRUCT_LOCK(&mylock); - PMIX_INFO_FREE(dinfo, 2); - - return rc; -} - -int main(int argc, char **argv) -{ - pmix_info_t *info; - void *tinfo; - size_t ninfo; - pmix_status_t rc; - mylock_t mylock; - myrel_t myrel, ready_rel; - pid_t pid; - pmix_data_array_t darray; - - pid = getpid(); - - /* Process any arguments we were given */ - if (0 != parse_command_line(argc, argv)) { - exit(1); - } - - /* Use the system connection first, if available */ - PMIX_INFO_LIST_START(tinfo); - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_CONNECT_SYSTEM_FIRST, NULL, PMIX_BOOL); - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_LAUNCHER, NULL, PMIX_BOOL); - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_IOF_LOCAL_OUTPUT, NULL, PMIX_BOOL); - PMIX_INFO_LIST_CONVERT(rc, tinfo, &darray); - info = (pmix_info_t*)darray.array; - ninfo = darray.size; - PMIX_INFO_LIST_RELEASE(tinfo); - /* Init as a tool */ - if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, info, ninfo))) { - fprintf(stderr, "PMIx_tool_init failed: %s(%d)\n", PMIx_Error_string(rc), rc); - exit(rc); - } - PMIX_DATA_ARRAY_DESTRUCT(&darray); - - printf("Debugger ns %s rank %d pid %lu: Running\n", myproc.nspace, myproc.rank, - (unsigned long) pid); - - /* Construct my own release first */ - DEBUG_CONSTRUCT_LOCK(&myrel.lock); - - /* Register a default event handler */ - ninfo = 1; - PMIX_INFO_CREATE(info, ninfo); - PMIX_INFO_LOAD(&info[0], PMIX_EVENT_RETURN_OBJECT, &myrel, PMIX_POINTER); - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIx_Register_event_handler(NULL, 0, info, ninfo, notification_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - DEBUG_DESTRUCT_LOCK(&mylock); - PMIX_INFO_FREE(info, ninfo); - - if (0 != query_capabilities()) { - goto done; - } - - if (PMIX_SUCCESS != spawn_app()) { - goto done; - } - - /* Register for notification application is ready for debug - * (paused on exec or paused in PMIx_Init) */ - DEBUG_CONSTRUCT_LOCK(&ready_rel.lock); - register_app_notification(PMIX_DEBUG_WAITING_FOR_NOTIFY, &ready_rel, app_ready); - - /* Register callback for when the app terminates */ - register_app_notification(PMIX_EVENT_JOB_END, &myrel, release_fn); - - /* track number of jobs to terminate */ - myrel.lock.count++; - - /* Wait for all app tasks to be paused, ready for debug */ - DEBUG_WAIT_THREAD(&ready_rel.lock); - DEBUG_DESTRUCT_LOCK(&ready_rel.lock); - - if (0 != query_proctable()) { - goto done; - } - - /* now launch the debugger daemons */ - if (PMIX_SUCCESS != (rc = spawn_debugger(client_nspace, &myrel))) { - fprintf(stderr, "Debugger daemons failed to spawn: %s\n", PMIx_Error_string(rc)); - goto done; - } - - /* This is where a debugger tool would wait until the debug operation is complete */ - DEBUG_WAIT_THREAD(&myrel.lock); - -done: - DEBUG_DESTRUCT_LOCK(&myrel.lock); - PMIx_tool_finalize(); - return (rc); -} diff --git a/examples/debugger/direct.c b/examples/debugger/direct.c deleted file mode 100644 index 391b4afe69..0000000000 --- a/examples/debugger/direct.c +++ /dev/null @@ -1,946 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * Copyright (c) 2021 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include - -#include "debugger.h" -#include - -static pmix_proc_t myproc; -static char client_nspace[PMIX_MAX_NSLEN + 1]; -static char daemon_nspace[PMIX_MAX_NSLEN + 1]; -static pmix_proc_t *connected_servers; - -static bool stop_in_init = true; -static bool stop_on_exec = false; -static bool stop_in_init_supported = false; -static bool stop_on_exec_supported = false; -static bool cospawn_supported = false; -static bool cospawn_reqd = false; -static bool dbactive = true; -static int app_npernode = 2; // > 0. Default 2 ppn -static int app_np - = 2; // <= 0 means use default from prte. Default to single node. Must be multiple of npernode -static int daemon_colocate_per_proc = 0; // 0 = disable -static int daemon_colocate_per_node = 0; // 0 = disable - -/* this is a callback function for the PMIx_Query - * API. The query will callback with a status indicating - * if the request could be fully satisfied, partially - * satisfied, or completely failed. The info parameter - * contains an array of the returned data, with the - * info->key field being the key that was provided in - * the query call. Thus, you can correlate the returned - * data in the info->value field to the requested key. - * - * Once we have dealt with the returned data, we must - * call the release_fn so that the PMIx library can - * cleanup */ -static void cbfunc(pmix_status_t status, pmix_info_t *info, size_t ninfo, void *cbdata, - pmix_release_cbfunc_t release_fn, void *release_cbdata) -{ - myquery_data_t *mq = (myquery_data_t *) cbdata; - size_t n; - - printf("Called %s as callback for PMIx_Query\n", __FUNCTION__); - mq->status = status; - /* save the returned info - the PMIx library "owns" it - * and will release it and perform other cleanup actions - * when release_fn is called */ - if (0 < ninfo) { - PMIX_INFO_CREATE(mq->info, ninfo); - mq->ninfo = ninfo; - for (n = 0; n < ninfo; n++) { - printf("Key %s Type %s(%d)\n", info[n].key, PMIx_Data_type_string(info[n].value.type), - info[n].value.type); - PMIX_INFO_XFER(&mq->info[n], &info[n]); - } - } - - /* let the library release the data and cleanup from - * the operation */ - if (NULL != release_fn) { - release_fn(release_cbdata); - } - - /* release the block */ - DEBUG_WAKEUP_THREAD(&mq->lock); -} - -/* this is the event notification function we pass down below - * when registering for general events - i.e.,, the default - * handler. We don't technically need to register one, but it - * is usually good practice to catch any events that occur */ -static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - myrel_t *lock; - size_t n; - - printf("%s called as callback for event=%s\n", __FUNCTION__, PMIx_Error_string(status)); - lock = NULL; - if (PMIX_ERR_UNREACH == status || PMIX_ERR_LOST_CONNECTION == status) { - /* we should always have info returned to us - if not, there is - * nothing we can do */ - if (NULL != info) { - for (n = 0; n < ninfo; n++) { - if (PMIX_CHECK_KEY(&info[n], PMIX_EVENT_RETURN_OBJECT)) { - lock = (myrel_t *) info[n].value.data.ptr; - } - } - } - - /* If a pointer to a lock was passed then save status and - * release the lock */ - if (NULL != lock) { - lock->exit_code = status; - lock->exit_code_given = true; - DEBUG_WAKEUP_THREAD(&lock->lock); - } - } - - /* this example doesn't do anything with default events */ - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); - } -} - -/* this is an event notification function that we explicitly request - * be called when the PMIX_EVENT_JOB_END notification is issued. - * We could catch it in the general event notification function and test - * the status to see if it was "job terminated", but it often is simpler - * to declare a use-specific notification callback point. In this case, - * we are asking to know whenever a job terminates, and we will then - * know we can exit */ -static void release_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - myrel_t *lock; - bool found; - int exit_code; - size_t n; - pmix_proc_t *affected = NULL; - - printf("%s called as callback for event=%s source=%s:%d\n", __FUNCTION__, - PMIx_Error_string(status), source->nspace, source->rank); - /* find the return object */ - lock = NULL; - found = false; - for (n = 0; n < ninfo; n++) { - if (0 == strncmp(info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { - lock = (myrel_t *) info[n].value.data.ptr; - /* not every RM will provide an exit code, but check if one was given */ - } else if (0 == strncmp(info[n].key, PMIX_EXIT_CODE, PMIX_MAX_KEYLEN)) { - exit_code = info[n].value.data.integer; - found = true; - } else if (0 == strncmp(info[n].key, PMIX_EVENT_AFFECTED_PROC, PMIX_MAX_KEYLEN)) { - affected = info[n].value.data.proc; - } - } - /* if the object wasn't returned, then that is an error */ - if (NULL == lock) { - fprintf(stderr, "LOCK WASN'T RETURNED IN RELEASE CALLBACK\n"); - /* let the event handler progress */ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - return; - } - - printf("DEBUGGER NOTIFIED THAT JOB %s TERMINATED \n", - (NULL == affected) ? "NULL" : affected->nspace); - if (found) { - if (!lock->exit_code_given) { - lock->exit_code = exit_code; - lock->exit_code_given = true; - } - } - - /* A system PMIx daemon may have kept track of notifications for - * termination of previous application runs, and may send those - * notifications to this process, which has registered a callback for - * application terminations. Those notifcations need to be ignored. - * - * Therefore, in the co-spawn case, we expect one termination notification, - * which is for the combined application/daemon namespace when the daemon - * terminates. - * - * In the separate spawn case, we expect two terminations, the application - * and the daemon. */ - if ((0 == strcmp(daemon_nspace, source->nspace)) - || (0 == strcmp(client_nspace, source->nspace))) { - lock->lock.count--; - if (0 == lock->lock.count) { - DEBUG_WAKEUP_THREAD(&lock->lock); - } - } - - /* tell the event handler state machine that we are the last step */ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - return; -} - -/* event handler registration is done asynchronously because it - * may involve the PMIx server registering with the host RM for - * external events. So we provide a callback function that returns - * the status of the request (success or an error), plus a numerical index - * to the registered event. The index is used later on to deregister - * an event handler - if we don't explicitly deregister it, then the - * PMIx server will do so when it see us exit */ -static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata) -{ - mylock_t *lock = (mylock_t *) cbdata; - - printf("%s called to register callback\n", __FUNCTION__); - if (PMIX_SUCCESS != status) { - fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", - myproc.nspace, myproc.rank, status, (unsigned long) evhandler_ref); - } - lock->status = status; - DEBUG_WAKEUP_THREAD(lock); -} - -static void debug_ready_cb(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], - size_t ninfo, pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, - void *cbdata) -{ - size_t n; - printf("%s called for event notification %s from nspace %s\n", __FUNCTION__, - PMIx_Error_string(status), source->nspace); - for (n = 0; n < ninfo; n++) { - if (PMIX_CHECK_KEY(&info[n], PMIX_NSPACE)) { - printf("Got %s notification for target nspace %s\n", - PMIx_Error_string(status), info[n].value.data.string); - break; - } - } - dbactive = false; - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } -} - -/* Register for a PMIX_READY_FOR_DEBUG event issued by the system server then - * wait for that event to be issued or until the timeout limit is reached */ -static int wait_for_ready(myrel_t *myrel) -{ - void *dirs; - pmix_info_t *info; - pmix_status_t rc; - size_t ninfo; - int n; - mylock_t mylock; - pmix_status_t code = PMIX_READY_FOR_DEBUG; - pmix_data_array_t darray; - - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIX_INFO_LIST_START(dirs); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_RETURN_OBJECT, &myrel, PMIX_POINTER); - /* Register for PMIX_READY_FOR_DEBUG event. This is sent from system server once all - * application processes are ready for debug. */ - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_AFFECTED_PROC, &connected_servers[0], PMIX_PROC); - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - info = darray.array; - ninfo = darray.size; - PMIx_Register_event_handler(&code, 1, info, ninfo, debug_ready_cb, - evhandler_reg_callbk, (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - printf("Debugger: Registered for READY_FOR_DEBUG event for nspace %s\n", - connected_servers[0].nspace); - rc = mylock.status; - DEBUG_DESTRUCT_LOCK(&mylock); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Registration for PMIX_READY_FOR_DEBUG failed: %s\n", - PMIx_Error_string(rc)); - return -1; - } - - n = 0; - printf("Waiting for PMIX_READY_FOR_DEBUG event to be posted\n"); - while (dbactive) { - struct timespec tp = {0, 500000000}; - nanosleep(&tp, NULL); - ++n; - if (n > 10) { - fprintf(stderr, "Error: Target not ready for debug by timeout limit\n"); - return -1; - } - } - return 0; -} - -static int cospawn_launch(myrel_t *myrel) -{ - void *dirs; - pmix_info_t *info; - pmix_app_t *app; - size_t ninfo; - int code = PMIX_EVENT_JOB_END; - pmix_status_t rc; - int n; - pmix_data_array_t data_array; - mylock_t mylock; - pmix_proc_t daemon_proc; - pmix_rank_t all_ranks = PMIX_RANK_WILDCARD; - char cwd[_POSIX_PATH_MAX + 1]; - char map_str[128]; - pmix_data_array_t darray, daemon_darray; - - printf("Calling %s to spawn application processes and debugger daemon\n", __FUNCTION__); - /* Provide job-level directives so the apps do what the user requested. - * These attributes apply to both the application and daemon processes. */ - PMIX_INFO_LIST_START(dirs); - /* Forward stdout to this process */ - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_FWD_STDOUT, NULL, PMIX_BOOL); - /* Forward stderr to this process */ - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_FWD_STDERR, NULL, PMIX_BOOL); - /* Process that is spawning processes is a tool process */ - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_REQUESTOR_IS_TOOL, NULL, PMIX_BOOL); - /* Map spawned processes by slot */ - sprintf(map_str, "ppr:%d:node", app_npernode); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_MAPBY, map_str, PMIX_STRING); - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - info = darray.array; - ninfo = darray.size; - - /* The application and daemon processes are being spawned together - * so create 2 pmix_app_t structures. The first is parameters for - * the application and the second is parameters for the daemon. */ - PMIX_APP_CREATE(app, 2); - /* setup the executable */ - app[0].cmd = strdup("./hello"); - /* Set up the executable command arguments, For the co-spawn case - * the daemon needs to know the namespace of the tool process - * in addition to setting the application (argv[0]) */ - PMIX_ARGV_APPEND(rc, app->argv, app[0].cmd); - app[0].env = NULL; - /* Set the working directory */ - getcwd(cwd, _POSIX_PATH_MAX); - app[0].cwd = strdup(cwd); - /* Two application processes */ - if (app_np > 0) { - app[0].maxprocs = app_np; - } - - if (stop_on_exec || stop_in_init) { - app[0].ninfo = 1; - PMIX_INFO_CREATE(app[0].info, app[0].ninfo); - n = 0; - if (stop_on_exec) { - /* Stop application at first instruction */ - PMIX_INFO_LOAD(&app[n].info[0], PMIX_DEBUG_STOP_ON_EXEC, NULL, PMIX_BOOL); - } else if (stop_in_init) { - /* Stop application in PMIx_Init */ - PMIX_INFO_LOAD(&app[n].info[0], PMIX_DEBUG_STOP_IN_INIT, NULL, PMIX_BOOL); - } - } else { - app[0].ninfo = 0; - app[0].info = NULL; - } - - /* Set up the daemon executable */ - app[1].cmd = strdup("./daemon"); - /* Set up daemon arguments, in this case just the executable (argv[0]) */ - PMIX_ARGV_APPEND(rc, app[1].argv, app[1].cmd); - PMIX_ARGV_APPEND(rc, app[1].argv, myproc.nspace); - app[1].env = NULL; - /* Set the working directory */ - app[1].cwd = strdup(cwd); - /* One daemon process */ - app[1].maxprocs = app_np / app_npernode; - /* Provide directives so the daemons go where we want, and - * let the RM know these are debugger daemons */ - PMIX_INFO_LIST_START(dirs); - /* This process is a debugger daemon */ - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_DEBUGGER_DAEMONS, NULL, PMIX_BOOL); - /* Notify this process when debugger job completes */ - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_NOTIFY_COMPLETION, NULL, PMIX_BOOL); - PMIX_INFO_LIST_CONVERT(rc, dirs, &daemon_darray); - PMIX_INFO_LIST_RELEASE(dirs); - app[1].info = daemon_darray.array; - app[1].ninfo = daemon_darray.size; - - /* Spawn the job - the function will return when the app - * has been launched */ - rc = PMIx_Spawn(info, ninfo, app, 2, client_nspace); - myrel->lock.count = 1; // app[0].maxprocs + app[1].maxprocs; - myrel->nspace = strdup(client_nspace); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - PMIX_DATA_ARRAY_DESTRUCT(&daemon_darray); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Application failed to launch with error: %s(%d)\n", - PMIx_Error_string(rc), rc); - return rc; - } - /* Daemon and application are in same namespace */ - printf("Application namespace is %s\n", client_nspace); - /* Register the termination event handler here with the intent to - * filter out non-daemon notifcations . - * Since the daemon is in the same namespace as the application, it's - * rank is assigned one higher than the last application process. In - * this example,the daemon's rank is 2. - */ - strcpy(daemon_proc.nspace, client_nspace); - strcpy(daemon_nspace, client_nspace); - daemon_proc.rank = 2; - data_array.size = 1; - data_array.type = PMIX_PROC; - data_array.array = &daemon_proc; - PMIX_INFO_LIST_START(dirs); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_CUSTOM_RANGE, &data_array, PMIX_DATA_ARRAY); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_RETURN_OBJECT, myrel, PMIX_POINTER); - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - info = darray.array; - ninfo = darray.size; - - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIx_Register_event_handler(&code, 1, info, ninfo, release_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - DEBUG_DESTRUCT_LOCK(&mylock); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - return rc; -} - -static pmix_status_t spawn_debugger(char *appspace, myrel_t *myrel) -{ - void *dirs; - pmix_status_t rc; - pmix_info_t *dinfo; - pmix_app_t *debugger; - size_t dninfo; - char cwd[_POSIX_PATH_MAX]; - mylock_t mylock; - pmix_status_t code = PMIX_EVENT_JOB_END; - pmix_proc_t proc; - void *tinfo; - pmix_data_array_t darray; - - printf("Calling %s to spawn the debugger daemon\n", __FUNCTION__); - /* Setup the debugger spawn parameters*/ - PMIX_APP_CREATE(debugger, 1); - debugger[0].cmd = strdup("./daemon"); - /* Set up debugger command arguments, in this example, just argv[0] */ - PMIX_ARGV_APPEND(rc, debugger[0].argv, "./daemon"); - /* No environment variables */ - debugger[0].env = NULL; - /* Set the working directory to our current directory */ - getcwd(cwd, _POSIX_PATH_MAX); - debugger[0].cwd = strdup(cwd); - /* Spawn daemon processes - 1 per node if not colocating */ - if (daemon_colocate_per_proc < 0 && daemon_colocate_per_node < 0) { - debugger[0].maxprocs = app_np / app_npernode; - } - /* No spawn attributes set here, all are set in dinfo array */ - debugger[0].ninfo = 0; - debugger[0].info = NULL; - /* Set attributes for debugger daemon launch and let the RM know these are - * debugger daemons */ - PMIX_INFO_LIST_START(tinfo); - /* Indicate a debugger daemon is being spawned */ - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_DEBUGGER_DAEMONS, NULL, PMIX_BOOL); - /* Set the name of the namespace being debugged */ - PMIX_LOAD_PROCID(&proc, appspace, PMIX_RANK_WILDCARD); - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_DEBUG_TARGET, &proc, PMIX_PROC); - /* Number of daemons per node in the application allocation */ - if (daemon_colocate_per_node > 0) { - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_DEBUG_DAEMONS_PER_NODE, &daemon_colocate_per_node, - PMIX_UINT16); - } - /* Number of daemons per proc in the application allocation */ - else if (daemon_colocate_per_proc > 0) { - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_DEBUG_DAEMONS_PER_PROC, &daemon_colocate_per_proc, - PMIX_UINT16); - } - /* Launch one daemon per node -- only needed if co-launch is not supported */ - else { - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_MAPBY, "ppr:1:node", PMIX_STRING); - } - /* Notify this process when the job completes */ - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_NOTIFY_COMPLETION, NULL, PMIX_BOOL); - /* Forward stdout to this process */ - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_FWD_STDOUT, NULL, PMIX_BOOL); - /* Forward stderr to this process */ - PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_FWD_STDERR, NULL, PMIX_BOOL); - - PMIX_INFO_LIST_CONVERT(rc, tinfo, &darray); - dinfo = (pmix_info_t*)darray.array; - dninfo = darray.size; - PMIX_INFO_LIST_RELEASE(tinfo); - - /* Spawn the daemons */ - printf("Debugger: spawning %s\n", debugger[0].cmd); - rc = PMIx_Spawn(dinfo, dninfo, debugger, 1, daemon_nspace); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - PMIX_APP_FREE(debugger, 1); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Debugger daemons failed to launch with error: %s\n", - PMIx_Error_string(rc)); - return rc; - } - /* Cleanup */ - - /* Register callback for when this job terminates */ - myrel->nspace = strdup(daemon_nspace); - PMIX_LOAD_PROCID(&proc, daemon_nspace, PMIX_RANK_WILDCARD); - PMIX_INFO_LIST_START(dirs); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_RETURN_OBJECT, myrel, PMIX_POINTER); - /* Only call me back when this specific job terminates */ - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_AFFECTED_PROC, &proc, PMIX_PROC); - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - dinfo = darray.array; - dninfo = darray.size; - /* Track that we need both jobs to terminate */ - myrel->lock.count++; - - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIx_Register_event_handler(&code, 1, dinfo, dninfo, release_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - printf("Debugger: Registered for termination on nspace %s\n", daemon_nspace); - rc = mylock.status; - DEBUG_DESTRUCT_LOCK(&mylock); - - return rc; -} - -int main(int argc, char **argv) -{ - void *dirs; - pmix_status_t rc; - pmix_info_t *info; - pmix_app_t *app; - size_t ninfo, napps; - int i, n; - pmix_query_t *query; - size_t nq, num_servers; - myquery_data_t myquery_data; - pmix_status_t code = PMIX_EVENT_JOB_END; - mylock_t mylock; - myrel_t myrel; - pid_t pid; - pmix_proc_t proc; - pmix_data_array_t darray; - char cwd[_POSIX_PATH_MAX]; - char map_str[128]; - pmix_rank_t all_ranks = PMIX_RANK_WILDCARD; - - pid = getpid(); - - /* Process any arguments we were given */ - for (i = 1; i < argc; i++) { - if (0 == strcmp(argv[i], "-h") || 0 == strcmp(argv[i], "--help")) { - /* print the usage message and exit */ - printf("Direct Launch Example\n"); - printf("$ prte --daemonize\n"); - printf("$ %s [OPTIONS]\n", argv[0]); - printf("\n"); - printf(" -c | --cospawn Test Cospawn\n"); - printf(" --stop-in-init Stop application in PMIx_Init (Default)\n"); - printf(" --stop-on-exec Stop application on exec\n"); - printf(" --app-npernode Number of processes per node (Default: 2)\n"); - printf(" --app-np Number of total processes. Must be multiple of " - "--app-npernode (Default: 2)\n"); - printf(" --daemon-colocate-per-proc Test Colaunch with Daemons Per Process (Default: " - "0 = off)\n"); - printf(" --daemon-colocate-per-node Test Colaunch with Daemons Per Node (Default: 0 = " - "off)\n"); - exit(0); - } else if (0 == strcmp(argv[i], "-c") || 0 == strcmp(argv[i], "--cospawn")) { - cospawn_reqd = true; - break; - } else if (0 == strcmp(argv[i], "--stop-in-init")) { - stop_in_init = true; - stop_on_exec = false; - break; - } else if (0 == strcmp(argv[i], "--stop-on-exec")) { - stop_in_init = false; - stop_on_exec = true; - break; - } else if (0 == strcmp(argv[i], "--app-npernode")) { - ++i; - if (i >= argc && isdigit(argv[i][0])) { - fprintf(stderr, "Error: --app-npernode requires a positive integer argument\n"); - exit(1); - } - app_npernode = atoi(argv[i]); - if (app_npernode <= 0) { - fprintf(stderr, "Error: --app-npernode requires a positive integer argument\n"); - exit(1); - } - } else if (0 == strcmp(argv[i], "--app-np")) { - ++i; - if (i >= argc && isdigit(argv[i][0])) { - fprintf(stderr, "Error: --app-np requires a positive integer argument\n"); - exit(1); - } - app_np = atoi(argv[i]); - if (app_np < 0) { - fprintf(stderr, "Error: --app-np requires a positive integer argument\n"); - exit(1); - } - } else if (0 == strcmp(argv[i], "--daemon-colocate-per-proc")) { - ++i; - if (i >= argc && isdigit(argv[i][0])) { - fprintf(stderr, - "Error: --daemon-colocate-per-proc requires a positive integer argument\n"); - exit(1); - } - daemon_colocate_per_proc = atoi(argv[i]); - if (daemon_colocate_per_proc < 0) { - fprintf(stderr, - "Error: --daemon-colocate-per-proc requires a positive integer argument\n"); - exit(1); - } - } else if (0 == strcmp(argv[i], "--daemon-colocate-per-node")) { - ++i; - if (i >= argc && isdigit(argv[i][0])) { - fprintf(stderr, - "Error: --daemon-colocate-per-node requires a positive integer argument\n"); - exit(1); - } - daemon_colocate_per_node = atoi(argv[i]); - if (daemon_colocate_per_node < 0) { - fprintf(stderr, - "Error: --daemon-colocate-per-node requires a positive integer argument\n"); - exit(1); - } - } - } - - if (daemon_colocate_per_node > 0 && daemon_colocate_per_proc > 0) { - fprintf(stderr, "Error: Both --daemon-colocate-per-node and --daemon-colocate-per-node " - "options present, but are exclusive\n"); - exit(1); - } - if (cospawn_reqd && (daemon_colocate_per_node > 0 || daemon_colocate_per_proc > 0)) { - fprintf(stderr, "Error: Cospawn and Colaunch are not supported at the same time\n"); - exit(1); - } - if (app_np < app_npernode || app_np % app_npernode != 0) { - fprintf(stderr, "Error: --app-np must be a multiple of --app-npernode\n"); - exit(1); - } - - PMIX_INFO_LIST_START(dirs); - - /* Use the system connection first, if available */ - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_CONNECT_SYSTEM_FIRST, NULL, PMIX_BOOL); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_LAUNCHER, NULL, PMIX_BOOL); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_IOF_LOCAL_OUTPUT, NULL, PMIX_BOOL); - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - info = darray.array; - ninfo = darray.size; - /* Init as a tool */ - if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, info, ninfo))) { - fprintf(stderr, "PMIx_tool_init failed: %s(%d)\n", PMIx_Error_string(rc), rc); - exit(rc); - } - PMIX_DATA_ARRAY_DESTRUCT(&darray); - - printf("Debugger ns %s rank %d pid %lu: Running\n", myproc.nspace, myproc.rank, - (unsigned long) pid); - - /* We need to know the server we connected to so we can register for - * PMIX_READY_FOR_DEBUG notifications from that server when target processes - * are ready for debug. There should be only one server */ - if (PMIX_SUCCESS != PMIx_tool_get_servers(&connected_servers, &num_servers)) { - fprintf(stderr, "Unable to get connected servers: %s\n", - PMIx_Error_string(rc)); - exit(1); - } - printf("Connected system server is %s:%d\n", connected_servers[0].nspace, - connected_servers[0].rank); - /* Construct my own release first */ - DEBUG_CONSTRUCT_LOCK(&myrel.lock); - - /* Register a default event handler */ - ninfo = 1; - PMIX_INFO_CREATE(info, ninfo); - PMIX_INFO_LOAD(&info[0], PMIX_EVENT_RETURN_OBJECT, &myrel, PMIX_POINTER); - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIx_Register_event_handler(NULL, 0, info, ninfo, notification_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - DEBUG_DESTRUCT_LOCK(&mylock); - PMIX_INFO_FREE(info, ninfo); - - /* This is an initial launch - we need to launch the application - * plus the debugger daemons, letting the RM know we are debugging - * so that it will "pause" the app procs until we are ready. First - * we need to know if this RM supports co-spawning of daemons with - * the application, or if we need to launch the daemons as a separate - * spawn command. The former is faster and more scalable, but not - * every RM may support it. We also need to ask for debug support - * so we know if the RM can stop-on-exec, or only supports stop-in-init */ - nq = 1; - PMIX_QUERY_CREATE(query, nq); - PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_SPAWN_SUPPORT); - PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_DEBUG_SUPPORT); - /* setup the caddy to retrieve the data */ - DEBUG_CONSTRUCT_LOCK(&myquery_data.lock); - myquery_data.info = NULL; - myquery_data.ninfo = 0; - /* execute the query */ - if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void *) &myquery_data))) { - fprintf(stderr, "PMIx_Query_info failed: %d\n", rc); - goto done; - } - DEBUG_WAIT_THREAD(&myquery_data.lock); - DEBUG_DESTRUCT_LOCK(&myquery_data.lock); - - /* We should have received back two info structs, one containing - * a comma-delimited list of PMIx spawn attributes the RM supports, - * and the other containing a comma-delimited list of PMIx debugger - * attributes it supports */ - if (2 != myquery_data.ninfo) { - /* this is an error */ - fprintf(stderr, "PMIx Query returned an incorrect number of results: %lu\n", - myquery_data.ninfo); - PMIX_INFO_FREE(myquery_data.info, myquery_data.ninfo); - goto done; - } - - /* We would like to co-spawn the debugger daemons with the app, but - * let's first check to see if this RM supports that operation by - * looking for the PMIX_COSPAWN_APP attribute in the spawn support - * - * We will also check to see if "stop_on_exec" is supported. Few RMs - * do so, which is why we have to check. The reference server sadly is - * not one of them, so we shouldn't find it here - * - * Note that the PMIx reference server always returns the query results - * in the same order as the query keys. However, this is not guaranteed, - * so we should search the returned info structures to find the desired key - */ - for (n = 0; n < myquery_data.ninfo; n++) { - if (0 == strcmp(myquery_data.info[n].key, PMIX_QUERY_SPAWN_SUPPORT)) { - /* See if the cospawn attribute is included */ - if (NULL != strstr(myquery_data.info[n].value.data.string, PMIX_COSPAWN_APP)) { - cospawn_supported = true; - } - } else if (0 == strcmp(myquery_data.info[n].key, PMIX_QUERY_DEBUG_SUPPORT)) { - /* See if stop on exec is included */ - if (NULL != strstr(myquery_data.info[n].value.data.string, PMIX_DEBUG_STOP_ON_EXEC)) { - stop_on_exec_supported = true; - } - /* See if stop in init is included */ - if (NULL != strstr(myquery_data.info[n].value.data.string, PMIX_DEBUG_STOP_IN_INIT)) { - stop_in_init_supported = true; - } - } - } - - if (!stop_on_exec_supported && stop_on_exec) { - fprintf(stderr, "Error: Stop-on-exec requested but the RM does not support it\n"); - goto done; - } - - if (!stop_in_init_supported && stop_in_init) { - fprintf(stderr, "Error: Stop-in-init requested but the RM does not support it\n"); - goto done; - } - - if (!cospawn_supported && cospawn_reqd) { - fprintf(stderr, "Error: Cospawn requested but the RM does not support it\n"); - goto done; - } - - /* If cospawn is available and they requested it, then we launch both - * the app and the debugger daemons at the same time */ - if (cospawn_supported && cospawn_reqd) { - cospawn_launch(&myrel); - } else { - /* We must do these as separate launches, so do the app first */ - napps = 1; - PMIX_APP_CREATE(app, napps); - /* Setup the executable */ - app[0].cmd = strdup("hello"); - PMIX_ARGV_APPEND(rc, app[0].argv, "./hello"); - getcwd(cwd, _POSIX_PATH_MAX); // point us to our current directory - app[0].cwd = strdup(cwd); - if (app_np > 0) { - app[0].maxprocs = app_np; - } - app[0].ninfo = 0; - /* Provide job-level directives so the apps do what the user requested */ - PMIX_INFO_LIST_START(dirs); - if (stop_on_exec) { - // procs are to stop on first instruction - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_DEBUG_STOP_ON_EXEC, NULL, PMIX_BOOL); - } else { - // procs are to pause in PMIx_Init for debugger attach - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_DEBUG_STOP_IN_INIT, NULL, PMIX_BOOL); - } - sprintf(map_str, "ppr:%d:node", app_npernode); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_MAPBY, map_str, PMIX_STRING); // 1 per node - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_FWD_STDOUT, NULL, PMIX_BOOL); // forward stdout to me - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_FWD_STDERR, NULL, PMIX_BOOL); // forward stderr to me - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_NOTIFY_COMPLETION, NULL, - PMIX_BOOL); // notify us when the job completes - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - info = darray.array; - ninfo = darray.size; - - /* Spawn the job - the function will return when the app - * has been launched */ - printf("Debugger: spawning %s\n", app[0].cmd); - if (PMIX_SUCCESS != (rc = PMIx_Spawn(info, ninfo, app, napps, client_nspace))) { - fprintf(stderr, "Application failed to launch with error: %s(%d)\n", - PMIx_Error_string(rc), rc); - goto done; - } - PMIX_DATA_ARRAY_DESTRUCT(&darray); - PMIX_APP_FREE(app, napps); - - /* Only call me back when this specific job terminates */ - PMIX_LOAD_PROCID(&proc, client_nspace, PMIX_RANK_WILDCARD); - /* Register callback for when the app terminates */ - PMIX_INFO_LIST_START(dirs); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_RETURN_OBJECT, &myrel, PMIX_POINTER); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_AFFECTED_PROC, &proc, PMIX_PROC); - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - info = darray.array; - ninfo = darray.size; - /* track number of jobs to terminate */ - myrel.lock.count++; - - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIx_Register_event_handler(&code, 1, info, ninfo, release_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - printf("Debugger: Registered for termination on nspace %s\n", client_nspace); - rc = mylock.status; - DEBUG_DESTRUCT_LOCK(&mylock); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Registration for PMIX_EVENT_JOB_END failed: %s\n", - PMIx_Error_string(rc)); - goto done; - } - - rc = wait_for_ready(&myrel); - if (0 != rc) { - goto done; - } - /* Get the proctable for this nspace */ - PMIX_QUERY_CREATE(query, 1); - PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_PROC_TABLE); - query[0].nqual = 1; - PMIX_INFO_CREATE(query->qualifiers, query[0].nqual); - PMIX_INFO_LOAD(&query->qualifiers[0], PMIX_NSPACE, client_nspace, PMIX_STRING); - - DEBUG_CONSTRUCT_LOCK(&myquery_data.lock); - myquery_data.info = NULL; - myquery_data.ninfo = 0; - - if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, 1, cbfunc, (void *) &myquery_data))) { - fprintf(stderr, "Debugger[%s:%d] Proctable query failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - /* Wait to get a response */ - DEBUG_WAIT_THREAD(&myquery_data.lock); - DEBUG_DESTRUCT_LOCK(&myquery_data.lock); - /* we should have gotten a response */ - if (PMIX_SUCCESS != myquery_data.status) { - fprintf(stderr, "Debugger[%s:%d] Proctable query failed: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(myquery_data.status)); - goto done; - } - /* There should have been data */ - if (NULL == myquery_data.info || 0 == myquery_data.ninfo) { - fprintf(stderr, "Debugger[%s:%d] Proctable query return no results\n", myproc.nspace, - myproc.rank); - goto done; - } - /* the query should have returned a data_array */ - if (PMIX_DATA_ARRAY != myquery_data.info[0].value.type) { - fprintf(stderr, "Debugger[%s:%d] Query returned incorrect data type: %s(%d)\n", - myproc.nspace, myproc.rank, - PMIx_Data_type_string(myquery_data.info[0].value.type), - (int) myquery_data.info[0].value.type); - return -1; - } - if (NULL == myquery_data.info[0].value.data.darray->array) { - fprintf(stderr, "Debugger[%s:%d] Query returned no proctable info\n", myproc.nspace, - myproc.rank); - goto done; - } - /* The data array consists of a struct: - * size_t size; - * void* array; - * - * In this case, the array is composed of pmix_proc_info_t structs: - * pmix_proc_t proc; // contains the nspace,rank of this proc - * char* hostname; - * char* executable_name; - * pid_t pid; - * int exit_code; - * pmix_proc_state_t state; - */ - printf("Received proc table for %d procs\n", - (int) myquery_data.info[0].value.data.darray->size); - /* now launch the debugger daemons */ - if (PMIX_SUCCESS != (rc = spawn_debugger(client_nspace, &myrel))) { - fprintf(stderr, "Debugger daemons failed to spawn: %s\n", PMIx_Error_string(rc)); - goto done; - } - } - - /* This is where a debugger tool would wait until the debug operation is complete */ - DEBUG_WAIT_THREAD(&myrel.lock); - -done: - DEBUG_DESTRUCT_LOCK(&myrel.lock); - PMIx_tool_finalize(); - return (rc); -} diff --git a/examples/debugger/hello.c b/examples/debugger/hello.c deleted file mode 100644 index b9b0954a78..0000000000 --- a/examples/debugger/hello.c +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include - -#include - -static pmix_proc_t myproc; - -int main(int argc, char **argv) -{ - pmix_status_t rc; - pid_t pid; - char hostname[1024]; - pmix_value_t *val; - uint16_t localrank; - int spin = 0; - - pid = getpid(); - gethostname(hostname, 1024); - - if (1 < argc) { - spin = strtoul(argv[1], NULL, 10); - } - - /* init us - note that the call to "init" includes the return of - * any job-related info provided by the RM. This includes any - * debugger flag instructing us to stop-in-init. If such a directive - * is included, then the process will be stopped in this call until - * the "debugger release" notification arrives */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %s\n", myproc.nspace, myproc.rank, - PMIx_Error_string(rc)); - exit(0); - } - /* get our local rank */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_LOCAL_RANK, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get local rank failed: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(rc)); - goto done; - } - localrank = val->data.uint16; - PMIX_VALUE_RELEASE(val); - - printf("Client ns %s rank %d pid %lu: Running on host %s localrank %d\n", myproc.nspace, - myproc.rank, (unsigned long) pid, hostname, (int) localrank); - - if (0 < spin) { - sleep(spin); - } - -done: - /* finalize us */ - printf("Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(rc)); - } else { - printf("Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, - myproc.rank); - } - fflush(stderr); - return (0); -} diff --git a/examples/debugger/indirect-multi.c b/examples/debugger/indirect-multi.c deleted file mode 100644 index 4c37573d69..0000000000 --- a/examples/debugger/indirect-multi.c +++ /dev/null @@ -1,559 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * Copyright (c) 2021 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "debugger.h" -#include - -static pmix_proc_t myproc; -static volatile bool ilactive = true; -static volatile bool dbactive = true; -static volatile bool regpending = true; -static volatile char *appnspace = NULL; -static pmix_nspace_t clientspace; -static int daemon_colocate_per_proc = 0; -static int daemon_colocate_per_node = 0; -static int num_nodes = 1; -static char *hostfile = NULL; - - -/* this is the event notification function we pass down below - * when registering for general events - i.e.,, the default - * handler. We don't technically need to register one, but it - * is usually good practice to catch any events that occur */ -static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - printf("Default event handler called with status %s\n", PMIx_Error_string(status)); - - /* this example doesn't do anything with default events */ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - ilactive = false; - printf("\tComplete\n"); -} - -/* this is the event notification function we pass down below - * when registering for LOST_CONNECTION, thereby indicating - * that the intermediate launcher we started has terminated */ -static void terminate_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - printf("%s called with status %s\n", __FUNCTION__, PMIx_Error_string(status)); - /* this example doesn't do anything further */ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - ilactive = false; -} -static void dbgr_complete_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - printf("%s called with status %s\n", __FUNCTION__, PMIx_Error_string(status)); - /* this example doesn't do anything further */ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - ilactive = false; -} - - -/* event handler registration is done asynchronously because it - * may involve the PMIx server registering with the host RM for - * external events. So we provide a callback function that returns - * the status of the request (success or an error), plus a numerical index - * to the registered event. The index is used later on to deregister - * an event handler - if we don't explicitly deregister it, then the - * PMIx server will do so when it see us exit */ -static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata) -{ - mylock_t *lock = (mylock_t *) cbdata; - - printf("%s called with status %s\n", __FUNCTION__, PMIx_Error_string(status)); - if (PMIX_SUCCESS != status) { - fprintf(stderr, "Client %s:%d event handler registration failed with status %d, ref=%lu\n", - myproc.nspace, myproc.rank, status, (unsigned long) evhandler_ref); - } - lock->status = status; - regpending = false; - DEBUG_WAKEUP_THREAD(lock); -} - -static void spawn_cbfunc(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - size_t n; - - for (n = 0; n < ninfo; n++) { - if (PMIX_CHECK_KEY(&info[n], PMIX_NSPACE)) { - appnspace = strdup(info[n].value.data.string); - printf("Got READY-FOR-DEBUG event from nspace %s@%d\n", source->nspace, - source->rank); - break; - } - } - printf("Debugger daemon job: %s\n", appnspace); - dbactive = false; - - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } -} - -#define DBGR_LOOP_LIMIT 10 - -int parse_tool_options(int argc, char **argv) -{ - char *endp; - int i = 1; - - while ((i < (argc - 1)) && (strncmp(argv[i], "--", 2) == 0)) { - if (0 == strcmp(argv[i], "--daemon-colocate-per-proc")) { - daemon_colocate_per_proc = strtol(argv[i + 1], &endp, 10); - if ('\0' != *endp) { - fprintf(stderr, "Invalid tool option parameter %s\n", argv[i + 1]); - return -1; - } - } else if (0 == strcmp(argv[i], "--daemon-colocate-per-node")) { - daemon_colocate_per_node = strtol(argv[i + 1], &endp, 10); - if ('\0' != *endp) { - fprintf(stderr, "Invalid tool option parameter %s\n", argv[i + 1]); - return -1; - } - } else if (0 == strcmp(argv[i], "--num-nodes")) { - num_nodes = strtol(argv[i + 1], &endp, 10); - if ('\0' != *endp) { - fprintf(stderr, "Invalid num-nodes value %s\n", argv[i + 1]); - return -1; - } - } else if (0 == strcmp(argv[i], "--hostfile")) { - hostfile = strdup(argv[i + 1]); - } - else { - fprintf(stderr, "Invalid tool option %s\n", argv[i]); - return -1; - } - i = i + 2; - } - if ((0 < daemon_colocate_per_node) && (0 < daemon_colocate_per_proc)) { - fprintf(stderr, "Cannot specify daemon tasks per node and daemon tasks per proc\n"); - return -1; - } - if ((NULL != hostfile) && - ((0 != daemon_colocate_per_node) || (0 != daemon_colocate_per_proc))) { - fprintf(stderr, - "hostfile and daemons per node or daemons per proc cannot be combined\n"); - return -1; - } - return i; -} - -static pmix_status_t spawn_daemons(char **dbgrs) -{ - void *dirs; - pmix_info_t *info; - size_t ninfo; - pmix_status_t rc; - pmix_app_t app; - pmix_proc_t target_proc; - pmix_data_array_t darray; - pmix_nspace_t dbnspace; - char cwd[_POSIX_PATH_MAX]; - - PMIX_APP_CONSTRUCT(&app); - app.cmd = strdup("./daemon"); - PMIX_ARGV_APPEND(rc, app.argv, "./daemon"); - getcwd(cwd, _POSIX_PATH_MAX - 1); // point us to our current directory - app.cwd = strdup(cwd); - if ((0 < daemon_colocate_per_node) || (0 < daemon_colocate_per_proc)) { - app.maxprocs = 0; - } - else { - app.maxprocs = 1; - } - PMIX_LOAD_PROCID(&target_proc, (void *) appnspace, PMIX_RANK_WILDCARD); - /* provide directives so the daemons go where we want, and - * let the RM know these are debugger daemons */ - PMIX_INFO_LIST_START(dirs); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_DEBUGGER_DAEMONS, NULL, PMIX_BOOL); // these are debugger daemons - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_DEBUG_TARGET, &target_proc, PMIX_PROC); // the nspace being debugged - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_NOTIFY_COMPLETION, NULL, PMIX_BOOL); // notify us when the debugger job completes - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_FWD_STDOUT, NULL, PMIX_BOOL); // forward stdout to me - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_FWD_STDERR, NULL, PMIX_BOOL); // forward stderr to me - if (0 < daemon_colocate_per_proc) { - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_DEBUG_DAEMONS_PER_PROC, &daemon_colocate_per_proc, PMIX_UINT16); - } - else if (0 < daemon_colocate_per_node) { - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_DEBUG_DAEMONS_PER_NODE, &daemon_colocate_per_node, PMIX_UINT16); - } - else { - // instruct the RM to launch one copy of the daemon on each node - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_MAPBY, "ppr:1:node:oversubscribe", PMIX_STRING); - if (NULL != hostfile) { - app.maxprocs = num_nodes; - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_HOSTFILE, hostfile, PMIX_STRING); - } - } - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - info = darray.array; - ninfo = darray.size; - - /* spawn the daemons */ - printf("Debugger: spawning %s\n", app.cmd); - rc = PMIx_Spawn(info, ninfo, &app, 1, dbnspace); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Debugger daemons failed to launch with error: %s\n", - PMIx_Error_string(rc)); - } - *dbgrs = strdup(dbnspace); - return rc; -} - -static pmix_status_t spawn_app(char *myuri, int argc, char **argv, - pmix_nspace_t clientnspace) -{ - void *jinfo; - void *linfo; - pmix_info_t *info; - size_t ninfo; - int n; - pmix_status_t rc; - pmix_rank_t rank; - pmix_app_t app; - pmix_data_array_t darray; - char cwd[_POSIX_PATH_MAX]; - - /* we are using an intermediate launcher - we will either use the - * reference server to start it or will fork/exec it ourselves, - * but either way tell it to wait after launch for directives */ - PMIX_APP_CONSTRUCT(&app); - /* setup the executable */ - app.cmd = strdup(argv[0]); - PMIX_ARGV_APPEND(rc, app.argv, argv[0]); - /* pass it the rest of the cmd line as we don't know - * how to parse it */ - for (n = 1; n < argc; n++) { - PMIX_ARGV_APPEND(rc, app.argv, argv[n]); - } - getcwd(cwd, _POSIX_PATH_MAX - 1); // point us to our current directory - app.cwd = strdup(cwd); - app.maxprocs = 1; // only start one instance of the IL - - /* tell the IL how to connect back to us */ - PMIX_SETENV(rc, PMIX_LAUNCHER_RNDZ_URI, myuri, &app.env); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Failed to set URI in app environment: %s\n", PMIx_Error_string(rc)); - PMIx_tool_finalize(); - return rc; - } - - /* provide launch directives so the launcher does what we want - * when it spawns the actual job */ - PMIX_INFO_LIST_START(jinfo); - - /* create the launch directives to tell the launcher what - * to do with the app it is going to spawn for us */ - PMIX_INFO_LIST_START(linfo); - rank = PMIX_RANK_WILDCARD; - PMIX_INFO_LIST_ADD(rc, linfo, PMIX_DEBUG_STOP_IN_INIT, NULL, PMIX_BOOL); // stop all procs in PMIx_Init - PMIX_INFO_LIST_ADD(rc, linfo, PMIX_NOTIFY_JOB_EVENTS, NULL, PMIX_BOOL); - PMIX_INFO_LIST_ADD(rc, linfo, PMIX_FWD_STDOUT, NULL, PMIX_BOOL); // forward stdout to me - PMIX_INFO_LIST_ADD(rc, linfo, PMIX_FWD_STDERR, NULL, PMIX_BOOL); // forward stderr to me - PMIX_INFO_LIST_CONVERT(rc, linfo, &darray); - PMIX_INFO_LIST_ADD(rc, jinfo, PMIX_LAUNCH_DIRECTIVES, &darray, PMIX_DATA_ARRAY); - PMIX_INFO_LIST_RELEASE(linfo); - - /* convert job info to array */ - PMIX_INFO_LIST_CONVERT(rc, jinfo, &darray); - PMIX_INFO_LIST_RELEASE(jinfo); - info = (pmix_info_t *) darray.array; - ninfo = darray.size; - - /* spawn the launcher - the function will return when the launcher - * has been started. */ - printf("Spawning launcher\n"); - rc = PMIx_Spawn(info, ninfo, &app, 1, clientnspace); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Launcher %s failed to start with error: %s(%d)\n", argv[1], - PMIx_Error_string(rc), rc); - } - printf("Launcher namespace is %s\n", clientnspace); - return rc; -} - -int main(int argc, char **argv) -{ - pmix_info_t *info; - pmix_value_t *val; - char *myuri = NULL; - void *dirs; - char *requested_launcher; - char *launchers[] = {"prun", "mpirun", "mpiexec", "prterun", NULL}; - size_t ninfo; - pmix_status_t rc; - int i, launcher_idx, icount; - size_t n; - pmix_status_t code; - bool found; - pid_t pid; - mylock_t mylock; - pmix_proc_t proc; - pmix_data_array_t darray; - char *dbgrs; - - /* need to provide args */ - if (2 > argc) { - printf("Usage: %s [OPTIONS] [launcher] [app]\n", argv[0]); - printf("OPTIONS:\n"); - printf(" --daemon-colocate-per-proc Test Colaunch with Daemons Per Process (Default: " - "0 = off)\n"); - printf(" --daemon-colocate-per-node Test Colaunch with Daemons Per Node (Default: 0 = " - "off)\n"); - printf(" --hostfile Hostfile specifying where daemons will be loaded\n"); - printf(" --num-nodes Number of nodes to use in non-colaunch mode\n"); - exit(0); - } - launcher_idx = parse_tool_options(argc, argv); - if (0 > launcher_idx) { - exit(1); - } - /* check to see if we are using an intermediate launcher - we only - * support those we recognize */ - found = false; - requested_launcher = basename(argv[launcher_idx]); - for (n = 0; NULL != launchers[n]; n++) { - if (0 == strcmp(requested_launcher, launchers[n])) { - found = true; - } - } - if (!found) { - fprintf(stderr, "Wrong test, dude\n"); - exit(1); - } - - pid = getpid(); - - /* do not connect to anyone */ - PMIX_INFO_LIST_START(dirs); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_TOOL_DO_NOT_CONNECT, NULL, PMIX_BOOL); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_LAUNCHER, NULL, PMIX_BOOL); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_IOF_LOCAL_OUTPUT, NULL, PMIX_BOOL); - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - info = (pmix_info_t *) darray.array; - ninfo = darray.size; - - /* init as a tool */ - if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, info, ninfo))) { - fprintf(stderr, "PMIx_tool_init failed: %s(%d)\n", PMIx_Error_string(rc), rc); - exit(rc); - } - PMIX_DATA_ARRAY_DESTRUCT(&darray); - - printf("Debugger ns %s rank %d pid %lu: Running\n", myproc.nspace, myproc.rank, - (unsigned long) pid); - - /* get server URI as we will need it later */ -#ifdef PMIX_MYSERVER_URI - rc = PMIx_Get(&myproc, PMIX_MYSERVER_URI, NULL, 0, &val); -#else - rc = PMIx_Get(&myproc, PMIX_SERVER_URI, NULL, 0, &val); -#endif - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Failed to retrieve server URI: %s\n", PMIx_Error_string(rc)); - PMIx_tool_finalize(); - exit(rc); - } - myuri = strdup(val->data.string); - PMIX_VALUE_RELEASE(val); - printf("Debugger URI: %s\n", myuri); - - /* register an event handler to pickup when the IL - * we spawned dies */ - DEBUG_CONSTRUCT_LOCK(&mylock); - code = PMIX_ERR_LOST_CONNECTION; - PMIX_INFO_CREATE(info, 1); - PMIX_INFO_LOAD(&info[0], PMIX_EVENT_HDLR_NAME, "LOST-CONNECTION", PMIX_STRING); - PMIx_Register_event_handler(&code, 1, info, 1, terminate_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - DEBUG_DESTRUCT_LOCK(&mylock); - PMIX_INFO_FREE(info, 1); - - /* register a default event handler */ - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIX_INFO_CREATE(info, 1); - PMIX_INFO_LOAD(&info[0], PMIX_EVENT_HDLR_NAME, "DEFAULT", PMIX_STRING); - PMIx_Register_event_handler(NULL, 0, info, 1, notification_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - DEBUG_DESTRUCT_LOCK(&mylock); - PMIX_INFO_FREE(info, 1); - - rc = spawn_app(myuri, argc - launcher_idx, &argv[launcher_idx], clientspace); - if (PMIX_SUCCESS != rc) { - goto done; - } - printf("Reconnect to IL at %s\n", clientspace); - /* set the spawned launcher as our primary server - wait for - * it to connect to us but provide a timeout so we don't hang - * waiting forever. The launcher shall connect to us prior - * to spawning the job we provided it */ - PMIX_LOAD_PROCID(&proc, clientspace, 0); - i = 2; - PMIX_INFO_LIST_START(dirs); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_WAIT_FOR_CONNECTION, NULL, PMIX_BOOL); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_TIMEOUT, &i, PMIX_INT); - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - info = (pmix_info_t*)darray.array; - ninfo = darray.size; - rc = PMIx_tool_set_server(&proc, info, ninfo); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - if (PMIX_SUCCESS != rc) { - /* connection failed */ - fprintf(stderr, "Failed to set spawned launcher as primary server: %s\n", - PMIx_Error_string(rc)); - goto done; - } - - /* register to receive the ready-for-debug event telling us the - * nspace of the child job and alerting us that things are ready - * for us to spawn the debugger daemons - this will be registered - * with the IL we started */ - printf("Registering READY-FOR-DEBUG handler\n"); - DEBUG_CONSTRUCT_LOCK(&mylock); - code = PMIX_READY_FOR_DEBUG; - PMIX_INFO_LIST_START(dirs); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_HDLR_NAME, "READY-FOR-DEBUG", PMIX_STRING); - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - info = (pmix_info_t*)darray.array; - ninfo = darray.size; - PMIx_Register_event_handler(&code, 1, info, ninfo, spawn_cbfunc, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - DEBUG_DESTRUCT_LOCK(&mylock); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - if (!ilactive) { - fprintf(stderr, "Error: Launcher not active\n"); - goto done; - } - - /* release the IL to spawn its job */ - printf("Releasing %s [%s,%d]\n", argv[launcher_idx], proc.nspace, proc.rank); - PMIX_INFO_LIST_START(dirs); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_NON_DEFAULT, NULL, PMIX_BOOL); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_CUSTOM_RANGE, &proc, PMIX_PROC); - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - info = (pmix_info_t*)darray.array; - ninfo = darray.size; - PMIx_Notify_event(PMIX_DEBUGGER_RELEASE, &myproc, PMIX_RANGE_CUSTOM, info, ninfo, NULL, NULL); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - - printf("Waiting for application launch\n"); - /* wait for the IL to have launched its application */ - icount = 0; - while (dbactive && ilactive) { - struct timespec tp = {0, 500000000}; - nanosleep(&tp, NULL); - ++icount; - if (icount > 10) { - fprintf(stderr, "Error: Failed to launch by the timeout\n"); - goto done; - } - } - if ((!ilactive) || (NULL == appnspace)) { - /* the launcher failed */ - fprintf(stderr, "Error: Launcher failed\n"); - goto done; - } - - printf("Application has launched: %s\n", (char *) appnspace); - - /* setup the debugger */ - dbgrs = NULL; - rc = spawn_daemons(&dbgrs); - printf("Debugger nspace: %s\n", dbgrs); - - /* wait for the debuggers to terminate */ - printf("Registering handler for debugger termination\n"); - DEBUG_CONSTRUCT_LOCK(&mylock); - code = PMIX_EVENT_JOB_END; - PMIX_INFO_LIST_START(dirs); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_HDLR_NAME, "DEBUGGGER-COMPLETE", PMIX_STRING); - PMIX_LOAD_PROCID(&proc, dbgrs, PMIX_RANK_WILDCARD); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_AFFECTED_PROC, &proc, PMIX_PROC); - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - info = (pmix_info_t*)darray.array; - ninfo = darray.size; - PMIx_Register_event_handler(&code, 1, info, ninfo, dbgr_complete_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - DEBUG_DESTRUCT_LOCK(&mylock); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - - /* wait for the IL to terminate */ - printf("Waiting for IL to terminate\n"); - while (ilactive) { - struct timespec tp = {0, 500000}; - nanosleep(&tp, NULL); - } - -done: - PMIx_tool_finalize(); - - if (NULL != myuri) { - free(myuri); - } - - return (rc); -} diff --git a/examples/debugger/indirect.c b/examples/debugger/indirect.c deleted file mode 100644 index 96a05525a7..0000000000 --- a/examples/debugger/indirect.c +++ /dev/null @@ -1,471 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * Copyright (c) 2021 IBM Corporation. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include - -#include "debugger.h" -#include - -static pmix_proc_t myproc; -static volatile bool ilactive = true; -static volatile bool dbactive = true; -static volatile char *appnspace = NULL; -static volatile bool regpending = true; - - -/* this is the event notification function we pass down below - * when registering for general events - i.e.,, the default - * handler. We don't technically need to register one, but it - * is usually good practice to catch any events that occur */ -static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - printf("DEFAULT EVENT HANDLER CALLED WITH STATUS %s\n", PMIx_Error_string(status)); - - /* this example doesn't do anything with default events */ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - ilactive = false; - printf("\tCOMPLETE\n"); -} - -/* this is the event notification function we pass down below - * when registering for LOST_CONNECTION, thereby indicating - * that the intermediate launcher we started has terminated */ -static void terminate_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - printf("%s called with status %s\n", __FUNCTION__, PMIx_Error_string(status)); - /* this example doesn't do anything further */ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - ilactive = false; -} - -/* event handler registration is done asynchronously because it - * may involve the PMIx server registering with the host RM for - * external events. So we provide a callback function that returns - * the status of the request (success or an error), plus a numerical index - * to the registered event. The index is used later on to deregister - * an event handler - if we don't explicitly deregister it, then the - * PMIx server will do so when it see us exit */ -static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata) -{ - mylock_t *lock = (mylock_t *) cbdata; - - printf("%s called with status %s\n", __FUNCTION__, PMIx_Error_string(status)); - if (PMIX_SUCCESS != status) { - fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", - myproc.nspace, myproc.rank, status, (unsigned long) evhandler_ref); - } - lock->status = status; - regpending = false; - DEBUG_WAKEUP_THREAD(lock); -} - -static void opcbfunc(pmix_status_t status, void *cbdata) -{ - mylock_t *lock = (mylock_t *) cbdata; - lock->status = status; - DEBUG_WAKEUP_THREAD(lock); -} - -static void spawn_cbfunc(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - size_t n; - - for (n = 0; n < ninfo; n++) { - if (PMIX_CHECK_KEY(&info[n], PMIX_NSPACE)) { - appnspace = strdup(info[n].value.data.string); - printf("GOT NSPACE %s\n", appnspace); - break; - } - } - printf("Debugger daemon job: %s\n", appnspace); - dbactive = false; - - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } -} - -#define DBGR_LOOP_LIMIT 10 - -int main(int argc, char **argv) -{ - pmix_status_t rc; - pmix_info_t *info, iofinfo; - pmix_app_t *app; - size_t ninfo, napps; - char *requested_launcher; - int timeout; - size_t n; - char cwd[1024]; - pmix_status_t code = PMIX_EVENT_JOB_END; - mylock_t mylock; - pid_t pid; - char *launchers[] = {"prun", "mpirun", "mpiexec", "prterun", NULL}; - pmix_proc_t proc, target_proc; - bool found; - pmix_data_array_t darray, darray2; - pmix_nspace_t clientspace, dbnspace; - pmix_value_t *val; - char *myuri = NULL; - void *jinfo, *linfo, *dirs; - myquery_data_t *mydata = NULL; - pmix_rank_t rank; - - /* need to provide args */ - if (2 > argc) { - fprintf(stderr, "Usage: %s [launcher] [app]\n", argv[0]); - exit(0); - } - - /* check to see if we are using an intermediate launcher - we only - * support those we recognize */ - found = false; - requested_launcher = basename(argv[1]); - for (n = 0; NULL != launchers[n]; n++) { - if (0 == strcmp(requested_launcher, launchers[n])) { - found = true; - } - } - if (!found) { - char *tmp = PMIX_ARGV_JOIN_COMPAT(launchers, ','); - fprintf(stderr, "Wrong test, dude - unknown launcher\n"); - fprintf(stderr, "Known launchers: %s\n", tmp); - free(tmp); - exit(1); - } - - pid = getpid(); - - info = NULL; - ninfo = 0; - - /* do not connect to anyone */ - PMIX_INFO_LIST_START(dirs); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_TOOL_DO_NOT_CONNECT, NULL, PMIX_BOOL); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_LAUNCHER, NULL, PMIX_BOOL); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_IOF_LOCAL_OUTPUT, NULL, PMIX_BOOL); - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - info = darray.array; - ninfo = darray.size; - - /* init as a tool */ - if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, info, ninfo))) { - fprintf(stderr, "PMIx_tool_init failed: %s(%d)\n", PMIx_Error_string(rc), rc); - exit(rc); - } - PMIX_DATA_ARRAY_DESTRUCT(&darray); - - printf("Debugger ns %s rank %d pid %lu: Running\n", myproc.nspace, myproc.rank, - (unsigned long) pid); - - /* get our URI as we will need it later */ -#ifdef PMIX_MYSERVER_URI - rc = PMIx_Get(&myproc, PMIX_MYSERVER_URI, NULL, 0, &val); -#else - rc = PMIx_Get(&myproc, PMIX_SERVER_URI, NULL, 0, &val); -#endif - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Failed to retrieve our URI: %s\n", PMIx_Error_string(rc)); - PMIx_tool_finalize(); - exit(rc); - } - myuri = strdup(val->data.string); - PMIX_VALUE_RELEASE(val); - printf("DEBUGGER URI: %s\n", myuri); - - /* register an event handler to pickup when the IL - * we spawned dies */ - DEBUG_CONSTRUCT_LOCK(&mylock); - code = PMIX_ERR_LOST_CONNECTION; - PMIX_INFO_CREATE(info, 1); - PMIX_INFO_LOAD(&info[0], PMIX_EVENT_HDLR_NAME, "LOST-CONNECTION", PMIX_STRING); - PMIx_Register_event_handler(&code, 1, info, 1, terminate_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - DEBUG_DESTRUCT_LOCK(&mylock); - PMIX_INFO_FREE(info, 1); - - /* register a default event handler */ - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIX_INFO_CREATE(info, 1); - PMIX_INFO_LOAD(&info[0], PMIX_EVENT_HDLR_NAME, "DEFAULT", PMIX_STRING); - PMIx_Register_event_handler(NULL, 0, info, 1, notification_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - DEBUG_DESTRUCT_LOCK(&mylock); - PMIX_INFO_FREE(info, 1); - - /* we are using an intermediate launcher - we will either use the - * reference server to start it or will fork/exec it ourselves, - * but either way tell it to wait after launch for directives */ - napps = 1; - PMIX_APP_CREATE(app, napps); - /* setup the executable */ - app[0].cmd = strdup(argv[1]); - PMIX_ARGV_APPEND(rc, app[0].argv, argv[1]); - /* pass it the rest of the cmd line as we don't know - * how to parse it */ - for (n = 2; n < argc; n++) { - PMIX_ARGV_APPEND(rc, app[0].argv, argv[n]); - } - getcwd(cwd, 1024); // point us to our current directory - app[0].cwd = strdup(cwd); - app[0].maxprocs = 1; // only start one instance of the IL - - /* tell the IL how to connect back to us */ - PMIX_SETENV(rc, PMIX_LAUNCHER_RNDZ_URI, myuri, &app[0].env); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Failed to set URI in app environment: %s\n", PMIx_Error_string(rc)); - PMIx_tool_finalize(); - exit(rc); - } - - /* provide job-level directives so the launcher does what we want - * when it spawns the actual job - note that requesting the stdout - * and stderr of the launcher will automatically get us the output - * from the application as the launcher will have had it forwarded - * to itself */ - PMIX_INFO_LIST_START(jinfo); - /* create the launch directives to tell the launcher what - * to do with the app it is going to spawn for us */ - PMIX_INFO_LIST_START(linfo); - rank = PMIX_RANK_WILDCARD; - if (NULL != strstr(argv[1], "mpi")) { - PMIX_INFO_LIST_ADD(rc, linfo, PMIX_DEBUG_STOP_IN_APP, NULL, PMIX_BOOL); // stop all procs in MPI_Init - } else { - PMIX_INFO_LIST_ADD(rc, linfo, PMIX_DEBUG_STOP_IN_INIT, NULL, PMIX_BOOL); // stop all procs in PMIx_Init - } - PMIX_INFO_LIST_ADD(rc, linfo, PMIX_NOTIFY_JOB_EVENTS, NULL, PMIX_BOOL); - PMIX_INFO_LIST_ADD(rc, linfo, PMIX_FWD_STDOUT, NULL, PMIX_BOOL); // forward stdout to me - PMIX_INFO_LIST_ADD(rc, linfo, PMIX_FWD_STDERR, NULL, PMIX_BOOL); // forward stderr to me - PMIX_INFO_LIST_CONVERT(rc, linfo, &darray2); - PMIX_INFO_LIST_ADD(rc, jinfo, PMIX_LAUNCH_DIRECTIVES, &darray2, PMIX_DATA_ARRAY); - PMIX_INFO_LIST_RELEASE(linfo); - - /* convert job info to array */ - PMIX_INFO_LIST_CONVERT(rc, jinfo, &darray); - PMIX_INFO_LIST_RELEASE(jinfo); - info = (pmix_info_t *) darray.array; - ninfo = darray.size; - - /* spawn the launcher - the function will return when the launcher - * has been started. */ - printf("SPAWNING LAUNCHER\n"); - rc = PMIx_Spawn(info, ninfo, app, napps, clientspace); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - PMIX_DATA_ARRAY_DESTRUCT(&darray2); - PMIX_APP_FREE(app, napps); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Launcher %s failed to start with error: %s(%d)\n", argv[1], - PMIx_Error_string(rc), rc); - goto done; - } - - printf("RECONNECT TO IL AT %s\n", clientspace); - /* set the spawned launcher as our primary server - wait for - * it to connect to us but provide a timeout so we don't hang - * waiting forever. The launcher shall connect to us prior - * to spawning the job we provided it */ - PMIX_LOAD_PROCID(&proc, clientspace, 0); - PMIX_INFO_LIST_START(dirs); - timeout = 2; - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_WAIT_FOR_CONNECTION, NULL, PMIX_BOOL); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_TIMEOUT, &timeout, PMIX_INT); - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - info = darray.array; - ninfo = darray.size; - rc = PMIx_tool_set_server(&proc, info, ninfo); - if (PMIX_SUCCESS != rc) { - /* connection failed */ - fprintf(stderr, "Failed to set spawned launcher as primary server: %s\n", - PMIx_Error_string(rc)); - goto done; - } - PMIX_DATA_ARRAY_DESTRUCT(&darray); - - /* register to receive the ready-for-debug event alerting us that things are ready - * for us to spawn the debugger daemons - this will be registered - * with the IL we started */ - printf("REGISTERING READY-FOR-DEBUG HANDLER\n"); - DEBUG_CONSTRUCT_LOCK(&mylock); - code = PMIX_READY_FOR_DEBUG; - PMIX_INFO_LIST_START(dirs); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_HDLR_NAME, "READY-FOR-DEBUG", PMIX_STRING); - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - info = darray.array; - ninfo = darray.size; - PMIx_Register_event_handler(&code, 1, info, ninfo, spawn_cbfunc, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - DEBUG_DESTRUCT_LOCK(&mylock); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - if (!ilactive) { - fprintf(stderr, "Error: Launcher not active\n"); - goto done; - } - - printf("RELEASING %s [%s,%d]\n", argv[1], proc.nspace, proc.rank); - /* release the IL to spawn its job */ - PMIX_INFO_LIST_START(dirs); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_NON_DEFAULT, NULL, PMIX_BOOL); - /* target this notification solely to that one tool */ - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_EVENT_CUSTOM_RANGE, &proc, PMIX_PROC); - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - info = darray.array; - ninfo = darray.size; - PMIx_Notify_event(PMIX_DEBUGGER_RELEASE, &myproc, PMIX_RANGE_CUSTOM, info, ninfo, NULL, NULL); - PMIX_DATA_ARRAY_DESTRUCT(&darray); - printf("WAITING FOR APPLICATION LAUNCH\n"); - /* wait for the IL to have launched its application */ - int icount = 0; - while (dbactive && ilactive) { - struct timespec tp = {0, 500000000}; - nanosleep(&tp, NULL); - ++icount; - if (icount > 20) { - fprintf(stderr, "Error: Failed to launch by the timeout\n"); - goto done; - } - } - if (!ilactive) { - /* the launcher failed */ - fprintf(stderr, "Error: Launcher failed\n"); - goto done; - } - - if (NULL == appnspace) { - fprintf(stderr, "Error: The application has failed to launch\n"); - goto done; - } - printf("APPLICATION HAS LAUNCHED: %s\n", (char *) appnspace); - - /* we want to forward our stdin to the launcher we - * started - it will know what to do with its stdin */ - PMIX_LOAD_PROCID(&proc, (char*)clientspace, PMIX_RANK_WILDCARD); - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIX_INFO_LOAD(&iofinfo, PMIX_IOF_PUSH_STDIN, NULL, PMIX_BOOL); - rc = PMIx_IOF_push(&proc, 1, NULL, &iofinfo, 1, opcbfunc, &mylock); - if (PMIX_SUCCESS != rc && PMIX_OPERATION_SUCCEEDED != rc) { - fprintf(stderr, "IOF push of stdin failed: %s\n", PMIx_Error_string(rc)); - DEBUG_DESTRUCT_LOCK(&mylock); - goto done; - } else if (PMIX_SUCCESS == rc) { - DEBUG_WAIT_THREAD(&mylock); - if (PMIX_SUCCESS != mylock.status) { - fprintf(stderr, "IOF push of stdin failed: %s\n", PMIx_Error_string(rc)); - DEBUG_DESTRUCT_LOCK(&mylock); - goto done; - } - } - DEBUG_DESTRUCT_LOCK(&mylock); - - /* setup the debugger */ - mydata = (myquery_data_t *) malloc(sizeof(myquery_data_t)); - mydata->napps = 1; - PMIX_APP_CREATE(mydata->apps, mydata->napps); - mydata->apps[0].cmd = strdup("./daemon"); - PMIX_ARGV_APPEND(rc, mydata->apps[0].argv, "./daemon"); - getcwd(cwd, 1024); // point us to our current directory - mydata->apps[0].cwd = strdup(cwd); - mydata->apps[0].maxprocs = 1; - PMIX_LOAD_PROCID(&target_proc, (void *) appnspace, PMIX_RANK_WILDCARD); - /* provide directives so the daemons go where we want, and - * let the RM know these are debugger daemons */ - PMIX_INFO_LIST_START(dirs); - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_MAPBY, "ppr:1:node", - PMIX_STRING); // instruct the RM to launch one copy of the executable on each - // node - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_DEBUGGER_DAEMONS, NULL, - PMIX_BOOL); // these are debugger daemons - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_DEBUG_TARGET, &target_proc, - PMIX_PROC); // the nspace being debugged - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_NOTIFY_COMPLETION, NULL, - PMIX_BOOL); // notify us when the debugger job completes - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_FWD_STDOUT, NULL, PMIX_BOOL); // forward stdout to me - PMIX_INFO_LIST_ADD(rc, dirs, PMIX_FWD_STDERR, NULL, PMIX_BOOL); // forward stderr to me - PMIX_INFO_LIST_CONVERT(rc, dirs, &darray); - PMIX_INFO_LIST_RELEASE(dirs); - mydata->info = darray.array; - mydata->ninfo = darray.size; - darray.array = NULL; - darray.size = 0; - PMIX_DATA_ARRAY_DESTRUCT(&darray); - - /* spawn the daemons */ - printf("Debugger: spawning %s\n", mydata->apps[0].cmd); - rc = PMIx_Spawn(mydata->info, mydata->ninfo, mydata->apps, mydata->napps, dbnspace); - PMIX_INFO_FREE(mydata->info, mydata->ninfo); - PMIX_APP_FREE(mydata->apps, mydata->napps); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Debugger daemons failed to launch with error: %s\n", - PMIx_Error_string(rc)); - free(mydata); - goto done; - } - free(mydata); - - /* wait for the IL to terminate */ - printf("WAITING FOR IL TO TERMINATE\n"); - while (ilactive) { - struct timespec tp = {0, 500000}; - nanosleep(&tp, NULL); - } - -done: - PMIx_tool_finalize(); - - if (NULL != myuri) { - free(myuri); - } - - return (rc); -} diff --git a/examples/debugger/mpihello.c b/examples/debugger/mpihello.c deleted file mode 100644 index ba30e16706..0000000000 --- a/examples/debugger/mpihello.c +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. - * - * Sample MPI "hello world" application in C - */ - -#include "mpi.h" -#include - -int main(int argc, char *argv[]) -{ - int rank, size, len; - char version[MPI_MAX_LIBRARY_VERSION_STRING]; - - MPI_Init(&argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - MPI_Get_library_version(version, &len); - printf("Hello, world, I am %d of %d, (%s, %d)\n", rank, size, version, len); - MPI_Finalize(); - - return 0; -} diff --git a/examples/debugger/stdincheck.c b/examples/debugger/stdincheck.c deleted file mode 100644 index 910c77fea1..0000000000 --- a/examples/debugger/stdincheck.c +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#include -#include -#include -#include - -#include "debugger.h" -#include - -/* This program reads stdin until it closes, and then exits. It first - * calls PMIx_Init so it can "hold" for a debugger, thereby allowing - * the indirect and direct tests in this directory to function. - */ - -static pmix_proc_t myproc; -char msg[8192]; - -int main(int argc, char **argv) -{ - pmix_status_t rc; - int msgsize; - pid_t pid; - char hostname[1024]; - - pid = getpid(); - gethostname(hostname, 1024); - - fprintf(stderr, "Proc %d on host %s running\n", (int)pid, hostname); - - rc = PMIx_Init(&myproc, NULL, 0); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "PMIx_Init failed: %s\n", PMIx_Error_string(rc)); - exit(1); - } - fprintf(stderr, "Proc %d on host %s RELEASED FROM INIT\n", (int)pid, hostname); - - if (0 == myproc.rank) { - while (1) { - msgsize = read(0, msg, 8192); - if (msgsize < 0) { - if (EAGAIN == errno || EINTR == errno) { - continue; - } - break; - } - if (0 == msgsize) { - /* end of input */ - break; - } - msg[msgsize] = '\n'; - write(1, msg, msgsize); - } - } - fprintf(stderr, "Proc %d on host %s finalizing\n", (int)pid, hostname); - - PMIx_Finalize(NULL, 0); - return 0; -} diff --git a/examples/dmodex.c b/examples/dmodex.c deleted file mode 100644 index 286b6c3e6e..0000000000 --- a/examples/dmodex.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2019-2022 IBM Corporation. All rights reserved. - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. - * Copyright (c) 2023 Triad National Security, LLC. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include -#include -#include -#include -#include - -#include "examples.h" - -static uint32_t nprocs; -static pmix_proc_t myproc; - -int main(int argc, char **argv) -{ - int rc, np; - pmix_value_t value; - pmix_value_t *val = NULL; - char tmp[1024]; - pmix_proc_t proc; - uint32_t n, k, nlocal; - bool local, all_local = false; - char **peers; - pmix_rank_t *locals = NULL; - uint8_t j; - pmix_info_t timeout; - int tlimit = 240; - - EXAMPLES_HIDE_UNUSED_PARAMS(argc, argv); - - /* init us */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, - rc); - exit(EXIT_FAILURE); - } - - /* get our job size */ - PMIX_LOAD_PROCID(&proc, myproc.nspace, PMIX_RANK_WILDCARD); - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(rc)); - goto done; - } - nprocs = val->data.uint32; - PMIX_VALUE_RELEASE(val); - - if(0 == myproc.rank) { - fprintf(stderr, "Client ns %s rank %d: Running. World size %d\n", myproc.nspace, myproc.rank, nprocs); - } - - /* put a few values */ - (void) snprintf(tmp, 1024, "%s-%d-internal", myproc.nspace, myproc.rank); - value.type = PMIX_UINT32; - value.data.uint32 = 1234; - if (PMIX_SUCCESS != (rc = PMIx_Store_internal(&myproc, tmp, &value))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Store_internal failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - - (void) snprintf(tmp, 1024, "%s-%d-local", myproc.nspace, myproc.rank); - value.type = PMIX_UINT64; - value.data.uint64 = 1234; - if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_LOCAL, tmp, &value))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Put internal failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - - (void) snprintf(tmp, 1024, "%s-%d-remote", myproc.nspace, myproc.rank); - value.type = PMIX_STRING; - value.data.string = "1234"; - if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_GLOBAL, tmp, &value))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Put internal failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - - value.type = PMIX_BYTE_OBJECT; - value.data.bo.bytes = (char *) malloc(128); - for (j = 0; j < 128; j++) { - value.data.bo.bytes[j] = j; - } - value.data.bo.size = 128; - if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_GLOBAL, "ghex", &value))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Put ghex failed: %d\n", myproc.nspace, - myproc.rank, rc); - PMIX_VALUE_DESTRUCT(&value); - goto done; - } - PMIX_VALUE_DESTRUCT(&value); - - /* commit the data to the server */ - if (PMIX_SUCCESS != (rc = PMIx_Commit())) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Commit failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - - /* get a list of our local peers */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_LOCAL_PEERS, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get local peers failed: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(rc)); - goto done; - } - /* split the returned string to get the rank of each local peer */ - peers = PMIX_ARGV_SPLIT_COMPAT(val->data.string, ','); - PMIX_VALUE_RELEASE(val); - nlocal = PMIX_ARGV_COUNT_COMPAT(peers); - if (nprocs == nlocal) { - all_local = true; - } else { - all_local = false; - locals = (pmix_rank_t *) malloc(PMIX_ARGV_COUNT_COMPAT(peers) * sizeof(pmix_rank_t)); - for (n = 0; NULL != peers[n]; n++) { - locals[n] = strtoul(peers[n], NULL, 10); - } - } - PMIX_ARGV_FREE(peers); - - np = snprintf(proc.nspace, PMIX_MAX_NSLEN, "%s", myproc.nspace); - if (np >= PMIX_MAX_NSLEN) { - fprintf(stderr, "Client ns %s rank %d: snprintf failed\n", myproc.nspace, myproc.rank); - exit(EXIT_FAILURE); - } - - PMIX_INFO_LOAD(&timeout, PMIX_TIMEOUT, &tlimit, PMIX_INT); - /* get the committed data - ask for someone who doesn't exist as well */ - for (n = 0; n < nprocs; n++) { - if (n == myproc.rank) { - /* local peers doesn't include us, so check for - * ourselves separately */ - local = true; - } else if (all_local) { - local = true; - } else { - local = false; - /* see if this proc is local to us */ - for (k = 0; k < nlocal; k++) { - if (n == locals[k]) { - local = true; - break; - } - } - } - proc.rank = n; - if (local) { - (void)snprintf(tmp, 1024, "%s-%d-local", proc.nspace, n); - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, &timeout, 1, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %s\n", myproc.nspace, myproc.rank, - tmp, PMIx_Error_string(rc)); - goto done; - } - if (PMIX_UINT64 != val->type) { - fprintf(stderr, "%s:%d: PMIx_Get Key %s failed - returned wrong type: %s\n", myproc.nspace, - myproc.rank, tmp, PMIx_Data_type_string(val->type)); - PMIX_VALUE_RELEASE(val); - goto done; - } - if (1234 != val->data.uint64) { - fprintf(stderr, "%s:%d: PMIx_Get Key %s failed - returned wrong value: %d\n", myproc.nspace, - myproc.rank, tmp, (int) val->data.uint64); - PMIX_VALUE_RELEASE(val); - goto done; - } - PMIX_VALUE_RELEASE(val); - } else { - (void)snprintf(tmp, 1024, "%s-%d-remote", myproc.nspace, n); - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, tmp, &timeout, 1, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %s\n", myproc.nspace, myproc.rank, - tmp, PMIx_Error_string(rc)); - goto done; - } - if (PMIX_STRING != val->type) { - fprintf(stderr, "%s:%d: PMIx_Get Key %s failed - returned wrong type: %s\n", myproc.nspace, - myproc.rank, tmp, PMIx_Data_type_string(val->type)); - PMIX_VALUE_RELEASE(val); - goto done; - } - if (0 != strcmp(val->data.string, "1234")) { - fprintf(stderr, "%s:%d: PMIx_Get Key %s failed - returned wrong value: %s\n", myproc.nspace, - myproc.rank, tmp, val->data.string); - PMIX_VALUE_RELEASE(val); - goto done; - } - PMIX_VALUE_RELEASE(val); - } - /* if this isn't us, then get the ghex key */ - if (n != myproc.rank) { - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, "ghex", &timeout, 1, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get ghex failed: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(rc)); - goto done; - } - if (PMIX_BYTE_OBJECT != val->type) { - fprintf(stderr, "%s:%d: PMIx_Get ghex failed - returned wrong type: %s\n", myproc.nspace, - myproc.rank, PMIx_Data_type_string(val->type)); - PMIX_VALUE_RELEASE(val); - goto done; - } - if (128 != val->data.bo.size) { - fprintf(stderr, "%s:%d: PMIx_Get ghex failed - returned wrong size: %d\n", myproc.nspace, - myproc.rank, (int) val->data.bo.size); - PMIX_VALUE_RELEASE(val); - goto done; - } - PMIX_VALUE_RELEASE(val); - } - } - -done: - /* finalize us */ - - /* call fence so everyone waits before leaving */ - if (PMIX_SUCCESS != (rc = PMIx_Fence(NULL, 0, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, - rc); - exit(EXIT_FAILURE); - } - - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, - myproc.rank, rc); - } else { - if(0 == myproc.rank) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", - myproc.nspace, myproc.rank); - } - } - fflush(stderr); - return (EXIT_SUCCESS); -} diff --git a/examples/dynamic.c b/examples/dynamic.c deleted file mode 100644 index e92f5075fd..0000000000 --- a/examples/dynamic.c +++ /dev/null @@ -1,244 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2016 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include - -#define _GNU_SOURCE -#include -#include -#include -#include -#include - -#include "examples.h" -#include - -static pmix_proc_t myproc; - -int main(int argc, char **argv) -{ - int rc, exitcode; - pmix_value_t value; - pmix_value_t *val = &value; - pmix_proc_t proc; - uint32_t nprocs; - char nsp2[PMIX_MAX_NSLEN + 1]; - pmix_app_t *app; - char hostname[1024], dir[1024]; - pmix_proc_t *peers; - size_t npeers, ntmp = 0; - char *nodelist; - char *cmd; - - if (0 > gethostname(hostname, sizeof(hostname))) { - exit(1); - } - if (NULL == getcwd(dir, 1024)) { - exit(1); - } - - if (1 < argc) { - if (0 == strcmp(argv[1], "fail")) { - cmd = "client-does-not-exist"; - } else { - fprintf(stderr, "usage: dynamic [fail]\n\tSpecify fail if you want the spawn command to fail\n"); - exit(1); - } - } else { - cmd = "client"; - } - - /* init us */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, - rc); - exit(rc); - } - fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank); - - PMIX_PROC_CONSTRUCT(&proc); - (void) strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - - /* get our job size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %d\n", myproc.nspace, - myproc.rank, rc); - exitcode = rc; - goto done; - } - nprocs = val->data.uint32; - PMIX_VALUE_RELEASE(val); - fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs); - - /* call fence to sync */ - (void) strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, - rc); - exitcode = rc; - goto done; - } - - /* rank=0 calls spawn */ - if (0 == myproc.rank) { - PMIX_APP_CREATE(app, 1); - if (0 > asprintf(&app->cmd, "%s/%s", dir, cmd)) { - exitcode = 1; - goto done; - } - app->maxprocs = 2; - PMIX_ARGV_APPEND(rc, app->argv, app->cmd); - PMIX_ARGV_APPEND(rc, app->env, "PMIX_ENV_VALUE=3"); - - fprintf(stderr, "Client ns %s rank %d: calling PMIx_Spawn\n", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Spawn(NULL, 0, app, 1, nsp2))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Spawn failed: %s(%d)\n", myproc.nspace, - myproc.rank, PMIx_Error_string(rc), rc); - exitcode = rc; - /* terminate our peers */ - PMIx_Abort(rc, "FAILED TO START CHILD JOB", &proc, 1); - goto done; - } else { - fprintf(stderr, "Spawn success.\n"); - } - PMIX_APP_FREE(app, 1); - - /* get their job size */ - val = NULL; - (void) strncpy(proc.nspace, nsp2, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val)) || NULL == val) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - ntmp = val->data.uint32; - PMIX_VALUE_RELEASE(val); - fprintf(stderr, "Client %s:%d universe %s size %d\n", myproc.nspace, myproc.rank, nsp2, - (int) ntmp); - } - - /* just cycle the connect/disconnect functions */ - (void) strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Connect(&proc, 1, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Connect failed: %d\n", myproc.nspace, - myproc.rank, rc); - exitcode = rc; - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Connect succeeded\n", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Disconnect(&proc, 1, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Disonnect failed: %d\n", myproc.nspace, - myproc.rank, rc); - exitcode = rc; - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Disconnect succeeded\n", myproc.nspace, - myproc.rank); - - /* finally, test the resolve functions */ - if (0 == myproc.rank) { - if (PMIX_SUCCESS != (rc = PMIx_Resolve_peers(hostname, NULL, &peers, &npeers))) { - fprintf(stderr, - "Client ns %s rank %d: PMIx_Resolve_peers failed for nspace %s: %s(%d)\n", - myproc.nspace, myproc.rank, nsp2, PMIx_Error_string(rc), rc); - exitcode = rc; - goto done; - } - if ((nprocs + ntmp) != npeers) { - fprintf(stderr, - "Client ns %s rank %d: PMIx_Resolve_peers returned incorrect npeers: %d vs %d\n", - myproc.nspace, myproc.rank, (int) (nprocs + ntmp), (int) npeers); - exitcode = 1; - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers returned %d npeers\n", - myproc.nspace, myproc.rank, (int) npeers); - if (PMIX_SUCCESS != (rc = PMIx_Resolve_nodes(nsp2, &nodelist))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_nodes failed for nspace %s: %d\n", - myproc.nspace, myproc.rank, nsp2, rc); - exitcode = rc; - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_nodes %s\n", myproc.nspace, myproc.rank, - nodelist); - } else { - if (PMIX_SUCCESS != (rc = PMIx_Resolve_peers(hostname, myproc.nspace, &peers, &npeers))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers failed for nspace %s: %d\n", - myproc.nspace, myproc.rank, myproc.nspace, rc); - exitcode = rc; - goto done; - } - if (nprocs != npeers) { - fprintf(stderr, - "Client ns %s rank %d: PMIx_Resolve_peers returned incorrect npeers: %d vs %d\n", - myproc.nspace, myproc.rank, nprocs, (int) npeers); - exitcode = rc; - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_peers returned %d npeers\n", - myproc.nspace, myproc.rank, (int) npeers); - if (PMIX_SUCCESS != (rc = PMIx_Resolve_nodes(NULL, &nodelist))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_nodes failed: %d\n", myproc.nspace, - myproc.rank, rc); - exitcode = rc; - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Resolve_nodes %s\n", myproc.nspace, myproc.rank, - nodelist); - } - PMIX_PROC_FREE(peers, npeers); - free(nodelist); - -done: - /* call fence to sync */ - (void) strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, - rc); - return(rc); - } - - /* finalize us */ - fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); - - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, - myproc.rank, rc); - } else { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", - myproc.nspace, myproc.rank); - } - fflush(stderr); - printf("exit\n"); - return (exitcode); -} diff --git a/examples/error_notify.c b/examples/error_notify.c deleted file mode 100644 index fa96bd442f..0000000000 --- a/examples/error_notify.c +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2018-2020 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include -#include -#include -#include -#include - -#include - -static pmix_proc_t myproc; -static bool completed; -struct timeval start, end; -double sec; - -static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - gettimeofday(&end, NULL); - fprintf(stderr, "New notification comes\n"); - if ((info[0].value.data.proc != NULL) - && strcmp(info[0].value.data.proc->nspace, myproc.nspace) == 0) { - fprintf(stderr, "Client %s:%d NOTIFIED with status %d and error proc %s:%d key %s \n", - myproc.nspace, myproc.rank, status, info[0].value.data.proc->nspace, - info[0].value.data.proc->rank, info[0].key); - completed = true; - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - } else - fprintf(stderr, "Not from my namespace \n"); -} - -static void op_callbk(pmix_status_t status, void *cbdata) -{ - fprintf(stderr, "Client %s:%d OP CALLBACK CALLED WITH STATUS %d\n", myproc.nspace, myproc.rank, - status); -} - -static void errhandler_reg_callbk(pmix_status_t status, size_t errhandler_ref, void *cbdata) -{ - fprintf(stderr, - "Client %s:%d ERRHANDLER REGISTRATION CALLBACK CALLED WITH STATUS %d, ref=%lu\n", - myproc.nspace, myproc.rank, status, (unsigned long) errhandler_ref); -} - -int main(int argc, char **argv) -{ - int rc; - pmix_value_t value; - pmix_value_t *val = &value; - pmix_proc_t proc; - uint32_t nprocs; - pid_t pid; - - char name[255]; - - /* init us */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, - rc); - exit(0); - } - gethostname(name, 255); - fprintf(stderr, "%s Client ns %s rank %d: Running\n", name, myproc.nspace, myproc.rank); - - PMIX_PROC_CONSTRUCT(&proc); - (void) strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - - /* get our universe size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - nprocs = val->data.uint32; - PMIX_VALUE_RELEASE(val); - // fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs); - completed = false; - - pmix_status_t status; - status = PMIX_ERR_PROC_ABORTED; - /* register our errhandler */ - PMIx_Register_event_handler(&status, 1, NULL, 0, notification_fn, errhandler_reg_callbk, NULL); - - /* call fence to sync */ - PMIX_PROC_CONSTRUCT(&proc); - (void) strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - gethostname(name, 255); - sleep(3); - if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, - rc); - goto done; - } - gettimeofday(&start, NULL); - if (myproc.rank == 2) // || myproc.rank == 4) - { - fprintf(stderr, "\nClient ns %s:%d kill self \n", myproc.nspace, myproc.rank); - completed = true; - pid = getpid(); - kill(pid, 1); - } - while (!completed) { - struct timespec ts; - ts.tv_sec = 0; - ts.tv_nsec = 100000; - nanosleep(&ts, NULL); - } -done: - /* finalize us */ - sec = end.tv_sec + (double) end.tv_usec / 1000000.0 - start.tv_sec - - (double) start.tv_usec / 1000000.0; - fprintf(stderr, "Client ns %s rank %d takes %f: Finalizing\n", myproc.nspace, myproc.rank, sec); - PMIx_Deregister_event_handler(1, op_callbk, NULL); - - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, - myproc.rank, rc); - } else { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", - myproc.nspace, myproc.rank); - } - fflush(stderr); - return (0); -} diff --git a/examples/examples.h b/examples/examples.h deleted file mode 100644 index b42c4cbeb5..0000000000 --- a/examples/examples.h +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include - -#include -#include - -typedef struct { - pthread_mutex_t mutex; - pthread_cond_t cond; - volatile bool active; - pmix_status_t status; - int count; - size_t evhandler_ref; -} mylock_t; - -#define DEBUG_CONSTRUCT_LOCK(l) \ - do { \ - pthread_mutex_init(&(l)->mutex, NULL); \ - pthread_cond_init(&(l)->cond, NULL); \ - (l)->active = true; \ - (l)->status = PMIX_SUCCESS; \ - (l)->count = 0; \ - (l)->evhandler_ref = 0; \ - } while (0) - -#define DEBUG_DESTRUCT_LOCK(l) \ - do { \ - pthread_mutex_destroy(&(l)->mutex); \ - pthread_cond_destroy(&(l)->cond); \ - } while (0) - -#define DEBUG_WAIT_THREAD(lck) \ - do { \ - pthread_mutex_lock(&(lck)->mutex); \ - while ((lck)->active) { \ - pthread_cond_wait(&(lck)->cond, &(lck)->mutex); \ - } \ - pthread_mutex_unlock(&(lck)->mutex); \ - } while (0) - -#define DEBUG_WAKEUP_THREAD(lck) \ - do { \ - pthread_mutex_lock(&(lck)->mutex); \ - (lck)->active = false; \ - pthread_cond_broadcast(&(lck)->cond); \ - pthread_mutex_unlock(&(lck)->mutex); \ - } while (0) - -/* define a structure for collecting returned - * info from a query */ -typedef struct { - mylock_t lock; - pmix_info_t *info; - size_t ninfo; -} myquery_data_t; - -#define DEBUG_CONSTRUCT_MYQUERY(q) \ - do { \ - DEBUG_CONSTRUCT_LOCK(&((q)->lock)); \ - (q)->info = NULL; \ - (q)->ninfo = 0; \ - } while (0) - -#define DEBUG_DESTRUCT_MYQUERY(q) \ - do { \ - DEBUG_DESTRUCT_LOCK(&((q)->lock)); \ - if (NULL != (q)->info) { \ - PMIX_INFO_FREE((q)->info, (q)->ninfo); \ - } \ - } while (0) - -/* define a structure for releasing when a given - * nspace terminates */ -typedef struct { - mylock_t lock; - char *nspace; - int exit_code; - bool exit_code_given; -} myrel_t; - -#define DEBUG_CONSTRUCT_MYREL(r) \ - do { \ - DEBUG_CONSTRUCT_LOCK(&((r)->lock)); \ - (r)->nspace = NULL; \ - (r)->exit_code = 0; \ - (r)->exit_code_given = false; \ - } while (0) - -#define DEBUG_DESTRUCT_MYREL(r) \ - do { \ - DEBUG_DESTRUCT_LOCK(&((r)->lock)); \ - if (NULL != (r)->nspace) { \ - free((r)->nspace); \ - } \ - } while (0) - -#define EXAMPLES_HIDE_UNUSED_PARAMS(...) \ - do { \ - int __x = 3; \ - examples_hide_unused_params(__x, __VA_ARGS__); \ - } while(0) - -static inline void examples_hide_unused_params(int x, ...) -{ - va_list ap; - - va_start(ap, x); - va_end(ap); -} - -#if PMIX_NUMERIC_VERSION < 0x00040203 -#define PMIX_ARGV_JOIN_COMPAT(a, b) \ - pmix_argv_join(a, b) -#else -#define PMIX_ARGV_JOIN_COMPAT(a, b) \ - PMIx_Argv_join(a, b) -#endif - -#if PMIX_NUMERIC_VERSION < 0x00040203 -#define PMIX_ARGV_SPLIT_COMPAT(a, b) \ - pmix_argv_split(a, b) -#else -#define PMIX_ARGV_SPLIT_COMPAT(a, b) \ - PMIx_Argv_split(a, b) -#endif - -#if PMIX_NUMERIC_VERSION < 0x00040203 -#define PMIX_ARGV_SPLIT_WITH_EMPTY_COMPAT(a, b) \ - pmix_argv_split_with_empty(a, b) -#else -#define PMIX_ARGV_SPLIT_WITH_EMPTY_COMPAT(a, b) \ - PMIx_Argv_split_with_empty(a, b) -#endif - -#if PMIX_NUMERIC_VERSION < 0x00040203 -#define PMIX_ARGV_COUNT_COMPAT(a) \ - pmix_argv_count(a) -#else -#define PMIX_ARGV_COUNT_COMPAT(a) \ - PMIx_Argv_count(a) -#endif - -#if PMIX_NUMERIC_VERSION < 0x00040203 -#define PMIX_ARGV_FREE_COMPAT(a) \ - pmix_argv_free(a) -#else -#define PMIX_ARGV_FREE_COMPAT(a) \ - PMIx_Argv_free(a) -#endif - -#if PMIX_NUMERIC_VERSION < 0x00040203 -#define PMIX_ARGV_APPEND_UNIQUE_COMPAT(a, b) \ - pmix_argv_append_unique_nosize(a, b) -#else -#define PMIX_ARGV_APPEND_UNIQUE_COMPAT(a, b) \ - PMIx_Argv_append_unique_nosize(a, b) -#endif - -#if PMIX_NUMERIC_VERSION < 0x00040203 -#define PMIX_ARGV_APPEND_NOSIZE_COMPAT(a, b) \ - pmix_argv_append_nosize(a, b) -#else -#define PMIX_ARGV_APPEND_NOSIZE_COMPAT(a, b) \ - PMIx_Argv_append_nosize(a, b) -#endif - -#if PMIX_NUMERIC_VERSION < 0x00040203 -#define PMIX_ARGV_COPY_COMPAT(a) \ - pmix_argv_copy(a) -#else -#define PMIX_ARGV_COPY_COMPAT(a) \ - PMIx_Argv_copy(a) -#endif - -#if PMIX_NUMERIC_VERSION < 0x00040203 -#define PMIX_SETENV_COMPAT(a, b, c, d) \ - pmix_setenv(a, b, c, d) -#else -#define PMIX_SETENV_COMPAT(a, b, c, d) \ - PMIx_Setenv(a, b, c, d) -#endif diff --git a/examples/fault.c b/examples/fault.c deleted file mode 100644 index 7390df3db9..0000000000 --- a/examples/fault.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include -#include -#include -#include -#include -#include - -#include "examples.h" -#include - -static pmix_proc_t myproc; - -static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - myrel_t *lock; - bool found; - int exit_code = -1; - size_t n; - pmix_proc_t *affected = NULL; - - /* find our return object */ - lock = NULL; - found = false; - for (n = 0; n < ninfo; n++) { - if (0 == strncmp(info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { - lock = (myrel_t *) info[n].value.data.ptr; - /* not every RM will provide an exit code, but check if one was given */ - } else if (0 == strncmp(info[n].key, PMIX_EXIT_CODE, PMIX_MAX_KEYLEN)) { - exit_code = info[n].value.data.integer; - found = true; - } else if (0 == strncmp(info[n].key, PMIX_EVENT_AFFECTED_PROC, PMIX_MAX_KEYLEN)) { - affected = info[n].value.data.proc; - } - } - /* if the object wasn't returned, then that is an error */ - if (NULL == lock) { - fprintf(stderr, "LOCK WASN'T RETURNED IN RELEASE CALLBACK\n"); - /* let the event handler progress */ - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); - } - return; - } - - /* tell the event handler state machine that we are the last step */ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } - if (NULL == affected) { - fprintf(stderr, "CLIENT %s:%u NOTIFIED STATUS %s - AFFECTED NULL\n", - myproc.nspace, myproc.rank, PMIx_Error_string(status)); - } else { - fprintf(stderr, "CLIENT %s:%u NOTIFIED STATUS %s - AFFECTED %s:%u EXIT STATUS %d\n", - myproc.nspace, myproc.rank, PMIx_Error_string(status), - affected->nspace, affected->rank, exit_code); - } - - if (found) { - lock->exit_code = exit_code; - lock->exit_code_given = true; - } - DEBUG_WAKEUP_THREAD(&lock->lock); -} - -static void op_callbk(pmix_status_t status, void *cbdata) -{ - mylock_t *lock = (mylock_t *) cbdata; - DEBUG_WAKEUP_THREAD(lock); -} - -static void evhandler_reg_callbk(pmix_status_t status, size_t errhandler_ref, void *cbdata) -{ - mylock_t *lock = (mylock_t *) cbdata; - - DEBUG_WAKEUP_THREAD(lock); -} - -int main(int argc, char **argv) -{ - int rc; - pmix_value_t value; - pmix_value_t *val = &value; - pmix_proc_t proc; - pmix_info_t *info; - mylock_t mylock; - myrel_t myrel; - pmix_status_t codes[4] = { - PMIX_ERR_PROC_ABORTED, - PMIX_ERR_EXIT_NONZERO_TERM, - PMIX_ERR_PROC_ABORTED_BY_SIG, - PMIX_EVENT_JOB_END - }; - - /* init us */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, - rc); - exit(0); - } - fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank); - - PMIX_PROC_CONSTRUCT(&proc); - (void) strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - - /* register a handler specifically for when the target - * job completes */ - DEBUG_CONSTRUCT_MYREL(&myrel); - PMIX_INFO_CREATE(info, 2); - PMIX_INFO_LOAD(&info[0], PMIX_EVENT_RETURN_OBJECT, &myrel, PMIX_POINTER); - /* only call me back when one of us terminates */ - PMIX_INFO_LOAD(&info[1], PMIX_NSPACE, myproc.nspace, PMIX_STRING); - - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIx_Register_event_handler(codes, 4, info, 2, notification_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - if (PMIX_SUCCESS != mylock.status) { - rc = mylock.status; - DEBUG_DESTRUCT_LOCK(&mylock); - PMIX_INFO_FREE(info, 2); - goto done; - } - DEBUG_DESTRUCT_LOCK(&mylock); - PMIX_INFO_FREE(info, 2); - - /* call fence to sync */ - PMIX_PROC_CONSTRUCT(&proc); - (void) strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, - rc); - goto done; - } - - /* rank=0 calls abort */ - if (0 == myproc.rank) { - sleep(2); - fprintf(stderr, "Client ns %s rank %d: exiting with error\n", myproc.nspace, myproc.rank); - exit(1); - } - /* everyone simply waits */ - DEBUG_WAIT_THREAD(&myrel.lock); - DEBUG_DESTRUCT_MYREL(&myrel); - - /* rank 1 waits longer to check that we don't cleanup - * until all ranks are done */ - if (1 == myproc.rank) { - sleep(5); - } -done: - /* finalize us */ - fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIx_Deregister_event_handler(1, op_callbk, &mylock); - DEBUG_WAIT_THREAD(&mylock); - DEBUG_DESTRUCT_LOCK(&mylock); - - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, - myproc.rank, rc); - } else { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", - myproc.nspace, myproc.rank); - } - fflush(stderr); - return (0); -} diff --git a/examples/hello.c b/examples/hello.c deleted file mode 100644 index e921a3a414..0000000000 --- a/examples/hello.c +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include - -#include "examples.h" -#include - -static pmix_proc_t myproc; - -/* this is a callback function for the PMIx_Query - * API. The query will callback with a status indicating - * if the request could be fully satisfied, partially - * satisfied, or completely failed. The info parameter - * contains an array of the returned data, with the - * info->key field being the key that was provided in - * the query call. Thus, you can correlate the returned - * data in the info->value field to the requested key. - * - * Once we have dealt with the returned data, we must - * call the release_fn so that the PMIx library can - * cleanup */ -static void cbfunc(pmix_status_t status, pmix_info_t *info, size_t ninfo, void *cbdata, - pmix_release_cbfunc_t release_fn, void *release_cbdata) -{ - mylock_t *lock = (mylock_t *) cbdata; - size_t n; - char *tmp; - pmix_status_t rc; - - lock->status = status; - - fprintf(stderr, "Query returned %d values status %s\n", (int) ninfo, PMIx_Error_string(status)); - /* print out the returned keys and pmix_info_t structs */ - for (n = 0; n < ninfo; n++) { - fprintf(stderr, "KEY: %s\n", info[n].key); - rc = PMIx_Data_print(&tmp, NULL, &info[n].value, info[n].value.type); - if (PMIX_SUCCESS != rc) { - lock->status = rc; - goto done; - } - rc = PMIx_Data_print(&tmp, NULL, &info[n].value, info[n].value.type); - if (PMIX_SUCCESS != rc) { - lock->status = rc; - goto done; - } - fprintf(stderr, "Key %s Type %s(%d)\n", info[n].key, - PMIx_Data_type_string(info[n].value.type), info[n].value.type); - free(tmp); - } - -done: - /* let the library release the data and cleanup from - * the operation */ - if (NULL != release_fn) { - release_fn(release_cbdata); - } - - /* release the block */ - DEBUG_WAKEUP_THREAD(lock); -} - -int main(int argc, char **argv) -{ - pmix_status_t rc; - pid_t pid; - char hostname[1024]; - pmix_value_t *val; - uint16_t localrank; - size_t n; - pmix_query_t query; - mylock_t mylock; - bool refresh = false; - - if (1 < argc) { - if (NULL != strstr(argv[1], "true")) { - refresh = true; - } - } - - pid = getpid(); - gethostname(hostname, 1024); - - /* init us - note that the call to "init" includes the return of - * any job-related info provided by the RM. This includes any - * debugger flag instructing us to stop-in-init. If such a directive - * is included, then the process will be stopped in this call until - * the "debugger release" notification arrives */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %s\n", myproc.nspace, myproc.rank, - PMIx_Error_string(rc)); - exit(0); - } - /* get our local rank */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_LOCAL_RANK, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get local rank failed: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(rc)); - goto done; - } - localrank = val->data.uint16; - PMIX_VALUE_RELEASE(val); - - fprintf(stderr, "Client ns %s rank %d pid %lu: Running on host %s localrank %d\n", - myproc.nspace, myproc.rank, (unsigned long) pid, hostname, (int) localrank); - -#if PMIX_VERSION_MAJOR >= 0x00040000 - n = 1; - PMIX_QUERY_CONSTRUCT(&query); - PMIX_ARGV_APPEND(rc, query.keys, PMIX_QUERY_NUM_PSETS); - PMIX_ARGV_APPEND(rc, query.keys, PMIX_QUERY_PSET_NAMES); - if (refresh) { - PMIX_INFO_CREATE(query.qualifiers, 1); - query.nqual = 1; - PMIX_INFO_LOAD(&query.qualifiers[0], PMIX_QUERY_REFRESH_CACHE, &refresh, PMIX_BOOL); - } - /* setup the caddy to retrieve the data */ - DEBUG_CONSTRUCT_LOCK(&mylock); - /* execute the query */ - if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(&query, 1, cbfunc, (void *) &mylock))) { - fprintf(stderr, "PMIx_Query_info failed: %d\n", rc); - goto done; - } - DEBUG_WAIT_THREAD(&mylock); - DEBUG_DESTRUCT_LOCK(&mylock); - -#endif - -done: - /* finalize us */ - fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(rc)); - } else { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", - myproc.nspace, myproc.rank); - } - fflush(stderr); - return (0); -} diff --git a/examples/jctrl.c b/examples/jctrl.c deleted file mode 100644 index c01a6f6443..0000000000 --- a/examples/jctrl.c +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include - -#include "examples.h" -#include - -static pmix_proc_t myproc; - -/* this is the event notification function we pass down below - * when registering for general events - i.e.,, the default - * handler. We don't technically need to register one, but it - * is usually good practice to catch any events that occur */ -static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - if (NULL != cbfunc) { - cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); - } -} - -/* event handler registration is done asynchronously because it - * may involve the PMIx server registering with the host RM for - * external events. So we provide a callback function that returns - * the status of the request (success or an error), plus a numerical index - * to the registered event. The index is used later on to deregister - * an event handler - if we don't explicitly deregister it, then the - * PMIx server will do so when it see us exit */ -static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata) -{ - mylock_t *lock = (mylock_t *) cbdata; - - if (PMIX_SUCCESS != status) { - fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", - myproc.nspace, myproc.rank, status, (unsigned long) evhandler_ref); - } - lock->status = status; - lock->evhandler_ref = evhandler_ref; - DEBUG_WAKEUP_THREAD(lock); -} - -static void infocbfunc(pmix_status_t status, pmix_info_t *info, size_t ninfo, void *cbdata, - pmix_release_cbfunc_t release_fn, void *release_cbdata) -{ - mylock_t *lock = (mylock_t *) cbdata; - - /* release the caller */ - if (NULL != release_fn) { - release_fn(release_cbdata); - } - - lock->status = status; - DEBUG_WAKEUP_THREAD(lock); -} - -int main(int argc, char **argv) -{ - pmix_status_t rc; - pmix_value_t value; - pmix_value_t *val = &value; - pmix_proc_t proc; - uint32_t nprocs, n; - pmix_info_t *info, *iptr; - bool flag; - mylock_t mylock; - pmix_data_array_t *dptr; - - /* init us - note that the call to "init" includes the return of - * any job-related info provided by the RM. */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, - rc); - exit(0); - } - fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank); - - /* register our default event handler - again, this isn't strictly - * required, but is generally good practice */ - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIx_Register_event_handler(NULL, 0, NULL, 0, notification_fn, evhandler_reg_callbk, - (void *) &mylock); - /* wait for registration to complete */ - DEBUG_WAIT_THREAD(&mylock); - rc = mylock.status; - DEBUG_DESTRUCT_LOCK(&mylock); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "[%s:%d] Default handler registration failed\n", myproc.nspace, - myproc.rank); - goto done; - } - - /* job-related info is found in our nspace, assigned to the - * wildcard rank as it doesn't relate to a specific rank. Setup - * a name to retrieve such values */ - PMIX_PROC_CONSTRUCT(&proc); - (void) strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - - /* get our universe size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - nprocs = val->data.uint32; - PMIX_VALUE_RELEASE(val); - fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs); - - /* inform the RM that we are preemptible, and that our checkpoint methods are - * "signal" on SIGUSR2 and event on PMIX_JCTRL_CHECKPOINT */ - PMIX_INFO_CREATE(info, 2); - flag = true; - PMIX_INFO_LOAD(&info[0], PMIX_JOB_CTRL_PREEMPTIBLE, (void *) &flag, PMIX_BOOL); - /* can't use "load" to load a pmix_data_array_t */ - (void) strncpy(info[1].key, PMIX_JOB_CTRL_CHECKPOINT_METHOD, PMIX_MAX_KEYLEN); - info[1].value.type = PMIX_DATA_ARRAY; - dptr = (pmix_data_array_t *) malloc(sizeof(pmix_data_array_t)); - info[1].value.data.darray = dptr; - dptr->type = PMIX_INFO; - dptr->size = 2; - PMIX_INFO_CREATE(dptr->array, dptr->size); - rc = SIGUSR2; - iptr = (pmix_info_t *) dptr->array; - PMIX_INFO_LOAD(&iptr[0], PMIX_JOB_CTRL_CHECKPOINT_SIGNAL, &rc, PMIX_INT); - rc = PMIX_JCTRL_CHECKPOINT; - PMIX_INFO_LOAD(&iptr[1], PMIX_JOB_CTRL_CHECKPOINT_EVENT, &rc, PMIX_STATUS); - - /* since this is informational and not a requested operation, the target parameter - * doesn't mean anything and can be ignored */ - DEBUG_CONSTRUCT_LOCK(&mylock); - if (PMIX_SUCCESS - != (rc = PMIx_Job_control_nb(NULL, 0, info, 2, infocbfunc, (void *) &mylock))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, - myproc.rank, rc); - DEBUG_DESTRUCT_LOCK(&mylock); - goto done; - } - DEBUG_WAIT_THREAD(&mylock); - PMIX_INFO_FREE(info, 2); - rc = mylock.status; - DEBUG_DESTRUCT_LOCK(&mylock); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - - /* now request that this process be monitored using heartbeats */ - PMIX_INFO_CREATE(iptr, 1); - PMIX_INFO_LOAD(&iptr[0], PMIX_MONITOR_HEARTBEAT, NULL, PMIX_POINTER); - - PMIX_INFO_CREATE(info, 3); - PMIX_INFO_LOAD(&info[0], PMIX_MONITOR_ID, "MONITOR1", PMIX_STRING); - n = 5; // require a heartbeat every 5 seconds - PMIX_INFO_LOAD(&info[1], PMIX_MONITOR_HEARTBEAT_TIME, &n, PMIX_UINT32); - n = 2; // two heartbeats can be missed before declaring us "stalled" - PMIX_INFO_LOAD(&info[2], PMIX_MONITOR_HEARTBEAT_DROPS, &n, PMIX_UINT32); - - /* make the request */ - DEBUG_CONSTRUCT_LOCK(&mylock); - if (PMIX_SUCCESS - != (rc = PMIx_Process_monitor_nb(iptr, PMIX_MONITOR_HEARTBEAT_ALERT, info, 3, infocbfunc, - (void *) &mylock))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, - myproc.rank, rc); - DEBUG_DESTRUCT_LOCK(&mylock); - goto done; - } - DEBUG_WAIT_THREAD(&mylock); - PMIX_INFO_FREE(iptr, 1); - PMIX_INFO_FREE(info, 3); - rc = mylock.status; - DEBUG_DESTRUCT_LOCK(&mylock); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - - /* send a heartbeat */ - PMIx_Heartbeat(); - - /* call fence to synchronize with our peers - no need to - * collect any info as we didn't "put" anything */ - PMIX_INFO_CREATE(info, 1); - flag = false; - PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL); - if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, info, 1))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, - rc); - goto done; - } - PMIX_INFO_FREE(info, 1); - -done: - /* finalize us */ - fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, - myproc.rank, rc); - } else { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", - myproc.nspace, myproc.rank); - } - fflush(stderr); - return (0); -} diff --git a/examples/launcher.c b/examples/launcher.c deleted file mode 100644 index 9455ee7aa2..0000000000 --- a/examples/launcher.c +++ /dev/null @@ -1,170 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2016-2020 Intel, Inc. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include -#include -#include -#include - -#include "examples.h" -#include - -static pmix_proc_t myproc; - -static void notification_fn(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - myrel_t *lock = NULL; - size_t n; - pmix_status_t jobstatus = 0; - pmix_proc_t affected; - char *msg = NULL; - - memset(&affected, 0, sizeof(pmix_proc_t)); - - /* we should always have info returned to us - if not, there is - * nothing we can do */ - if (NULL != info) { - for (n = 0; n < ninfo; n++) { - if (0 == strncmp(info[n].key, PMIX_JOB_TERM_STATUS, PMIX_MAX_KEYLEN)) { - jobstatus = info[n].value.data.status; - } else if (0 == strncmp(info[n].key, PMIX_EVENT_AFFECTED_PROC, PMIX_MAX_KEYLEN)) { - memcpy(&affected, info[n].value.data.proc, sizeof(pmix_proc_t)); - } else if (0 == strncmp(info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { - lock = (myrel_t *) info[n].value.data.ptr; - } else if (0 == strncmp(info[n].key, PMIX_EVENT_TEXT_MESSAGE, PMIX_MAX_KEYLEN)) { - msg = info[n].value.data.string; - } - } - } - if (NULL == lock) { - fprintf(stderr, "LOCK WAS NOT RETURNED IN EVENT NOTIFICATION\n"); - goto done; - } - /* save the status */ - lock->lock.status = jobstatus; - if (NULL != msg) { - lock->nspace = strdup(msg); - } - /* release the lock */ - DEBUG_WAKEUP_THREAD(&lock->lock); - -done: - /* we _always_ have to execute the evhandler callback or - * else the event progress engine will hang */ - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, NULL, 0, NULL, NULL, cbdata); - } -} - -/* event handler registration is done asynchronously because it - * may involve the PMIx server registering with the host RM for - * external events. So we provide a callback function that returns - * the status of the request (success or an error), plus a numerical index - * to the registered event. The index is used later on to deregister - * an event handler - if we don't explicitly deregister it, then the - * PMIx server will do so when it see us exit */ -static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata) -{ - mylock_t *lock = (mylock_t *) cbdata; - - if (PMIX_SUCCESS != status) { - fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", - myproc.nspace, myproc.rank, status, (unsigned long) evhandler_ref); - } - lock->status = status; - lock->evhandler_ref = evhandler_ref; - DEBUG_WAKEUP_THREAD(lock); -} - -int main(int argc, char **argv) -{ - pmix_status_t rc; - pmix_info_t info; - pmix_app_t *app; - size_t ninfo, napps; - bool flag; - myrel_t myrel; - mylock_t mylock; - pmix_status_t code[6] = {PMIX_ERR_PROC_ABORTING, - PMIX_ERR_PROC_ABORTED, - PMIX_ERR_PROC_REQUESTED_ABORT, - PMIX_EVENT_JOB_END, - PMIX_ERR_UNREACH, - PMIX_ERR_LOST_CONNECTION}; - pmix_nspace_t appspace; - - /* we need to attach to a "system" PMIx server so we - * can ask it to spawn applications for us. There can - * only be one such connection on a node, so we will - * instruct the tool library to only look for it */ - flag = true; - PMIX_INFO_LOAD(&info, PMIX_CONNECT_TO_SYSTEM, &flag, PMIX_BOOL); - - /* initialize the library and make the connection */ - if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, &info, 1))) { - fprintf(stderr, "PMIx_tool_init failed: %d\n", rc); - exit(rc); - } - - DEBUG_CONSTRUCT_MYREL(&myrel); - - /* register an event handler so we can be notified when - * our spawned job completes, or if it fails (even at launch) */ - DEBUG_CONSTRUCT_LOCK(&mylock); - PMIX_INFO_LOAD(&info, PMIX_EVENT_RETURN_OBJECT, &myrel, PMIX_POINTER); - PMIx_Register_event_handler(code, 6, &info, 1, notification_fn, evhandler_reg_callbk, - (void *) &mylock); - DEBUG_WAIT_THREAD(&mylock); - rc = mylock.status; - DEBUG_DESTRUCT_LOCK(&mylock); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "[%s:%d] Default handler registration failed\n", myproc.nspace, - myproc.rank); - goto done; - } - - /* parse the cmd line and create our array of app structs - * describing the application we want launched */ - napps = 1; - PMIX_APP_CREATE(app, napps); - /* setup the executable */ - app[0].cmd = strdup("app"); - app[0].argv = (char **) malloc(2 * sizeof(char *)); - app[0].argv[0] = strdup("app"); - app[0].argv[1] = NULL; - app[0].maxprocs = 128; - /* can also provide environmental params in the app.env field */ - - /* provide directives so the apps do what the user requested - just - * some random examples provided here*/ - app[0].ninfo = 2; - PMIX_INFO_CREATE(app[0].info, app[0].ninfo); - PMIX_INFO_LOAD(&app[0].info[0], PMIX_MAPBY, "slot", PMIX_STRING); - /* include a directive that we be notified upon completion of the job */ - PMIX_INFO_LOAD(&app[0].info[1], PMIX_NOTIFY_COMPLETION, &flag, PMIX_BOOL); - - /* spawn the application */ - PMIx_Spawn(NULL, 0, app, napps, appspace); - /* cleanup */ - PMIX_APP_FREE(app, napps); - - DEBUG_WAIT_THREAD(&myrel.lock); - DEBUG_DESTRUCT_MYREL(&myrel); - -done: - PMIx_tool_finalize(); - - return (0); -} diff --git a/examples/legacy.c b/examples/legacy.c deleted file mode 100644 index 7834dcb154..0000000000 --- a/examples/legacy.c +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include - -#include "examples.h" -#include - -static pmix_proc_t myproc; - -int main(int argc, char **argv) -{ - pmix_status_t rc; - pmix_value_t value; - pmix_value_t *val = &value; - char *tmp; - pmix_proc_t proc; - uint32_t nprocs, n; - pmix_info_t *info; - bool flag; - mylock_t mylock; - myrel_t myrel; - pmix_status_t dbg = PMIX_ERR_DEBUGGER_RELEASE; - pid_t pid; - char hostname[1024]; - - pid = getpid(); - gethostname(hostname, 1024); - fprintf(stderr, "Client %lu: Running on node %s\n", (unsigned long) pid, hostname); - - /* init us - note that the call to "init" includes the return of - * any job-related info provided by the RM. This includes any - * debugger flag instructing us to stop-in-init. If such a directive - * is included, then the process will be stopped in this call until - * the "debugger release" notification arrives */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, - rc); - exit(0); - } - fprintf(stderr, "Client ns %s rank %d pid %lu: Running\n", myproc.nspace, myproc.rank, - (unsigned long) pid); - - /* job-related info is found in our nspace, assigned to the - * wildcard rank as it doesn't relate to a specific rank. Setup - * a name to retrieve such values */ - PMIX_PROC_CONSTRUCT(&proc); - (void) strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - - /* get our universe size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - PMIX_VALUE_GET_NUMBER(rc, val, n, uint32_t); - fprintf(stderr, "Client %s:%d universe size %u\n", myproc.nspace, myproc.rank, n); - - /* get the number of procs in our job - univ size is the total number of allocated - * slots, not the number of procs in the job */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - nprocs = val->data.uint32; - PMIX_VALUE_RELEASE(val); - fprintf(stderr, "Client %s:%d num procs %d\n", myproc.nspace, myproc.rank, nprocs); - - if (0 > asprintf(&tmp, "uniq-key")) { - exit(1); - } - - if (myproc.rank == 0) { - /* put a few values */ - value.type = PMIX_UINT64; - value.data.uint64 = 1234; - if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_GLOBAL, tmp, &value))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Put internal failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - - /* push the data to our PMIx server */ - if (PMIX_SUCCESS != (rc = PMIx_Commit())) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Commit failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - } - - /* call fence to synchronize with our peers - instruct - * the fence operation to collect and return all "put" - * data from our peers */ - PMIX_INFO_CREATE(info, 1); - flag = true; - PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL); - if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, info, 1))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, - rc); - goto done; - } - PMIX_INFO_FREE(info, 1); - - proc.rank = PMIX_RANK_UNDEF; - - rc = PMIx_Get(&proc, tmp, NULL, 0, &val); - - if (rc != PMIX_SUCCESS) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s failed: %d\n", myproc.nspace, - myproc.rank, tmp, rc); - goto done; - } - if (PMIX_UINT64 != val->type) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong type: %d\n", - myproc.nspace, myproc.rank, tmp, val->type); - PMIX_VALUE_RELEASE(val); - goto done; - } - if (1234 != val->data.uint64) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned wrong value: %d\n", - myproc.nspace, myproc.rank, tmp, (int) val->data.uint64); - PMIX_VALUE_RELEASE(val); - goto done; - } - fprintf(stderr, "Client ns %s rank %d: PMIx_Get %s returned correct\n", myproc.nspace, - myproc.rank, tmp); - PMIX_VALUE_RELEASE(val); - - -done: - free(tmp); - - /* finalize us */ - fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, - myproc.rank, rc); - } else { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", - myproc.nspace, myproc.rank); - } - fflush(stderr); - return (0); -} diff --git a/examples/log.c b/examples/log.c deleted file mode 100644 index 30916744d8..0000000000 --- a/examples/log.c +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include - -#include "examples.h" -#include - -static pmix_proc_t myproc; - -int main(int argc, char **argv) -{ - pmix_status_t rc; - pmix_info_t *info, *directives; - bool flag; - pmix_proc_t proc; - bool syslog = false, global = false; - - /* check for CLI directives */ - if (1 < argc) { - if (0 == strcmp(argv[argc - 1], "--syslog")) { - syslog = true; - } else if (0 == strcmp(argv[argc - 1], "--global-syslog")) { - global = true; - } - } - /* init us - note that the call to "init" includes the return of - * any job-related info provided by the RM. */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, - rc); - exit(0); - } - fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank); - - /* have rank 0 do the logs - doesn't really matter who does it */ - if (0 == myproc.rank) { - /* always output a log message to stderr */ - PMIX_INFO_CREATE(info, 1); - PMIX_INFO_LOAD(&info[0], PMIX_LOG_STDERR, "stderr log message\n", PMIX_STRING); - PMIX_INFO_CREATE(directives, 1); - PMIX_INFO_LOAD(&directives[0], PMIX_LOG_GENERATE_TIMESTAMP, NULL, PMIX_BOOL); - rc = PMIx_Log(info, 1, directives, 1); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Log stderr failed: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(rc)); - goto fence; - } - /* if requested, output one to syslog */ - if (syslog) { - fprintf(stderr, "LOG TO LOCAL SYSLOG\n"); - PMIX_INFO_CREATE(info, 1); - PMIX_INFO_LOAD(&info[0], PMIX_LOG_LOCAL_SYSLOG, "SYSLOG message\n", PMIX_STRING); - rc = PMIx_Log(info, 1, NULL, 0); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Log syslog failed: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(rc)); - goto fence; - } - } - if (global) { - fprintf(stderr, "LOG TO GLOBAL SYSLOG\n"); - PMIX_INFO_CREATE(info, 1); - PMIX_INFO_LOAD(&info[0], PMIX_LOG_GLOBAL_SYSLOG, "GLOBAL SYSLOG message\n", - PMIX_STRING); - rc = PMIx_Log(info, 1, NULL, 0); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Log GLOBAL syslog failed: %s\n", - myproc.nspace, myproc.rank, PMIx_Error_string(rc)); - goto fence; - } - } - } - -fence: - fprintf(stderr, "%s:%d Calling Fence\n", myproc.nspace, myproc.rank); - /* call fence to synchronize with our peers - no need to - * collect any info as we didn't "put" anything */ - PMIX_INFO_CREATE(info, 1); - flag = false; - PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL); - PMIX_PROC_LOAD(&proc, myproc.nspace, PMIX_RANK_WILDCARD); - if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, info, 1))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, - rc); - goto done; - } - PMIX_INFO_FREE(info, 1); - -done: - /* finalize us */ - fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, - myproc.rank, rc); - } else { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", - myproc.nspace, myproc.rank); - } - fflush(stderr); - return (0); -} diff --git a/examples/nodeid.c b/examples/nodeid.c deleted file mode 100644 index da4a5a126a..0000000000 --- a/examples/nodeid.c +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * Copyright (c) 2023 Triad National Security, LLC. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include - -#include "examples.h" -#include - -static pmix_proc_t myproc; - -int main(int argc, char **argv) -{ - pmix_status_t rc; - pid_t pid; - char hostname[1024]; - pmix_value_t *val; - uint32_t jobsize, nodeid; - size_t n; - pmix_proc_t proc, wildcard; - - pid = getpid(); - gethostname(hostname, 1024); - - /* init us - note that the call to "init" includes the return of - * any job-related info provided by the RM */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "[%s:%lu] PMIx_Init failed: %s\n", - hostname, (unsigned long)pid, PMIx_Error_string(rc)); - exit(0); - } - PMIX_LOAD_PROCID(&wildcard, myproc.nspace, PMIX_RANK_WILDCARD); - - /* get our job size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&wildcard, PMIX_JOB_SIZE, NULL, 0, &val))) { - fprintf(stderr, "[%s:%u] PMIx_Get job size failed: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(rc)); - goto done; - } - PMIX_VALUE_GET_NUMBER(rc, val, jobsize, uint32_t); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "[%s:%u] Got bad job size: %s\n", - myproc.nspace, myproc.rank, PMIx_Error_string(rc)); - goto done; - } - PMIX_VALUE_RELEASE(val); - - /* get the nodeid of all our peers */ - PMIX_LOAD_NSPACE(proc.nspace, myproc.nspace); - for (n=0; n < jobsize; n++) { - proc.rank = n; - rc = PMIx_Get(&proc, PMIX_NODEID, NULL, 0, &val); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "[%s:%u] PMIx_Get failed for nodeid on rank %zd: %s\n", - myproc.nspace, myproc.rank, n, PMIx_Error_string(rc)); - break; - } - PMIX_VALUE_GET_NUMBER(rc, val, nodeid, uint32_t); - if (PMIX_SUCCESS != rc) { - fprintf(stderr, "[%s:%u] Got bad nodeid for rank %zd: %s\n", - myproc.nspace, myproc.rank, n, PMIx_Error_string(rc)); - goto done; - } - if (0 == myproc.rank) { - fprintf(stderr, "[%s:%u] Peer %zd is running on node %u\n", - myproc.nspace, myproc.rank, n, nodeid); - } - PMIX_VALUE_RELEASE(val); - } - - fprintf(stderr, "[%s:%u]: Successfully retrieved all nodeids\n", - myproc.nspace, myproc.rank); - -done: - /* finalize us */ - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(rc)); - } - fflush(stderr); - return (0); -} diff --git a/examples/pmi1client.c b/examples/pmi1client.c deleted file mode 100644 index 78eb163b1d..0000000000 --- a/examples/pmi1client.c +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include - -#include - -int main(int argc, char **argv) -{ - int rc, spawned; - char *tmp; - int nprocs, rank; - bool flag; - - /* init us - note that the call to "init" includes the return of - * any job-related info provided by the RM. This includes any - * debugger flag instructing us to stop-in-init. If such a directive - * is included, then the process will be stopped in this call until - * the "debugger release" notification arrives */ - if (PMI_SUCCESS != (rc = PMI_Init(&spawned))) { - fprintf(stderr, "Client: PMI_Init failed: %d\n", rc); - exit(0); - } - fprintf(stderr, "Client: Running\n"); - - /* check to see if we have been instructed to wait for a debugger - * to attach to us. We won't get both a stop-in-init AND a - * wait-for-notify directive, so we should never stop twice. This - * directive is provided so that something like an MPI implementation - * can do some initial setup in MPI_Init prior to pausing for the - * debugger */ - if (PMI_SUCCESS != (rc = PMI_Get_size(&nprocs))) { - fprintf(stderr, "PMI_Get_size failed: %d\n", rc); - exit(1); - } - - /* get our universe size */ - if (PMI_SUCCESS != (rc = PMI_Get_rank(&rank))) { - fprintf(stderr, "PMI_Get_rank failed: %d\n", rc); - exit(1); - } - - /* finalize us */ - fprintf(stderr, "Client %d: Finalizing\n", rank); - if (PMI_SUCCESS != (rc = PMI_Finalize())) { - fprintf(stderr, "Client %d: PMI_Finalize failed: %d\n", rank, rc); - } - fflush(stderr); - return (0); -} diff --git a/examples/probe.c b/examples/probe.c deleted file mode 100644 index fb978f18a0..0000000000 --- a/examples/probe.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include - -#include - -int main(int argc, char **argv) -{ - int rc; - pmix_value_t value; - pmix_value_t *val = &value; - pmix_proc_t proc; - pid_t pid; - pmix_proc_t myproc; - - if (2 != argc) { - fprintf(stderr, "Usage: probe \n"); - exit(1); - } - pid = getpid(); - fprintf(stderr, "Client %lu: Running\n", (unsigned long) pid); - - /* init us - note that the call to "init" includes the return of - * any job-related info provided by the RM. This includes any - * debugger flag instructing us to stop-in-init. If such a directive - * is included, then the process will be stopped in this call until - * the "debugger release" notification arrives */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, - rc); - exit(0); - } - fprintf(stderr, "Client ns %s rank %d pid %lu: Running\n", myproc.nspace, myproc.rank, - (unsigned long) pid); - - /* get our universe size */ - PMIX_PROC_CONSTRUCT(&proc); - (void) strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, - val->data.uint32); - - /* now get the universe size of the specified nspace */ - (void) strncpy(proc.nspace, argv[1], PMIX_MAX_NSLEN); - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - fprintf(stderr, - "Client ns %s rank %d: PMIx_Get of target nspace %s universe size failed: %d\n", - myproc.nspace, myproc.rank, argv[1], rc); - goto done; - } - fprintf(stderr, "Client %s:%d target nspace %s universe size %d\n", myproc.nspace, myproc.rank, - argv[1], val->data.uint32); - -done: - /* finalize us */ - fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, - myproc.rank, rc); - } else { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", - myproc.nspace, myproc.rank); - } - fflush(stderr); - return (0); -} diff --git a/examples/pset.c b/examples/pset.c deleted file mode 100644 index 9bf5eb2cf1..0000000000 --- a/examples/pset.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. - * Copyright (c) 2023 Triad National Security, LLC. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include - -#include - -int main(int argc, char **argv) -{ - int rc; - size_t n; - pmix_value_t value; - pmix_value_t *val = &value; - pmix_proc_t proc, *pptr; - pid_t pid; - pmix_proc_t myproc; - - pid = getpid(); - - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, - rc); - exit(0); - } - fprintf(stderr, "Client ns %s rank %d pid %lu: Running\n", myproc.nspace, myproc.rank, - (unsigned long) pid); - - /* get our pset name */ - PMIX_LOAD_PROCID(&proc, myproc.nspace, PMIX_RANK_WILDCARD); - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_PSET_NAME, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get pset name failed: %s\n", - myproc.nspace, myproc.rank, PMIx_Error_string(rc)); - goto done; - } - fprintf(stderr, "Client %s:%d pset name %s\n", - myproc.nspace, myproc.rank, val->data.string); - PMIX_VALUE_FREE(val, 1); - - /* since this is our pset, get our membership */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_PSET_MEMBERS, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get of pset members failed: %s\n", - myproc.nspace, myproc.rank, PMIx_Error_string(rc)); - goto done; - } - /* must return a pmix_data_array_t of members */ - if (PMIX_DATA_ARRAY != val->type) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get of pset members returned incorrect data type: %s\n", - myproc.nspace, myproc.rank, PMIx_Data_type_string(val->type)); - goto done; - } - fprintf(stderr, "Client %s:%d PMIx_Get returned %zd members\n", myproc.nspace, myproc.rank, - val->data.darray->size); - pptr = (pmix_proc_t*)val->data.darray->array; - for (n=0; n < val->data.darray->size; n++) { - fprintf(stderr, "\t%s:%d\n", pptr[n].nspace, pptr[n].rank); - } - PMIX_VALUE_FREE(val, 1); - -done: - return (rc); -} diff --git a/examples/pub.c b/examples/pub.c deleted file mode 100644 index b2e038b345..0000000000 --- a/examples/pub.c +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include -#include -#include -#include -#include - -#include - -int main(int argc, char **argv) -{ - pmix_proc_t myproc; - int rc; - pmix_value_t value; - pmix_value_t *val = &value; - pmix_proc_t proc; - uint32_t nprocs; - pmix_info_t *info; - pmix_pdata_t *pdata; - - /* init us */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, - rc); - exit(0); - } - fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank); - - /* get our universe size */ - PMIX_PROC_CONSTRUCT(&proc); - (void) strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - nprocs = val->data.uint32; - PMIX_VALUE_RELEASE(val); - fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs); - - /* call fence to ensure the data is received */ - if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, - rc); - goto done; - } - - /* publish something */ - if (0 == myproc.rank) { - fprintf(stderr, "%s:%d publishing two keys\n", myproc.nspace, myproc.rank); - PMIX_INFO_CREATE(info, 2); - (void) strncpy(info[0].key, "FOOBAR", PMIX_MAX_KEYLEN); - info[0].value.type = PMIX_UINT8; - info[0].value.data.uint8 = 1; - (void) strncpy(info[1].key, "PANDA", PMIX_MAX_KEYLEN); - info[1].value.type = PMIX_SIZE; - info[1].value.data.size = 123456; - if (PMIX_SUCCESS != (rc = PMIx_Publish(info, 2))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Publish failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - fprintf(stderr, "%s:%d publish complete\n", myproc.nspace, myproc.rank); - PMIX_INFO_FREE(info, 2); - } - - /* call fence again so all procs know the data - * has been published */ - if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, - rc); - goto done; - } - - /* lookup something */ - if (0 != myproc.rank) { - PMIX_PDATA_CREATE(pdata, 1); - fprintf(stderr, "%s:%d looking up key FOOBAR\n", myproc.nspace, myproc.rank); - (void) strncpy(pdata[0].key, "FOOBAR", PMIX_MAX_KEYLEN); - if (PMIX_SUCCESS != (rc = PMIx_Lookup(pdata, 1, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Lookup failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - /* check the return for value and source */ - if (0 != strncmp(myproc.nspace, pdata[0].proc.nspace, PMIX_MAX_NSLEN)) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Lookup returned wrong nspace: %s\n", - myproc.nspace, myproc.rank, pdata[0].proc.nspace); - goto done; - } - if (0 != pdata[0].proc.rank) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Lookup returned wrong rank: %d\n", - myproc.nspace, myproc.rank, pdata[0].proc.rank); - goto done; - } - if (PMIX_UINT8 != pdata[0].value.type) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Lookup returned wrong type: %d\n", - myproc.nspace, myproc.rank, pdata[0].value.type); - goto done; - } - if (1 != pdata[0].value.data.uint8) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Lookup returned wrong value: %d\n", - myproc.nspace, myproc.rank, (int) pdata[0].value.data.uint8); - goto done; - } - PMIX_PDATA_FREE(pdata, 1); - fprintf(stderr, "PUBLISH-LOOKUP SUCCEEDED\n"); - } - - /* call fence again so rank 0 waits before leaving */ - if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, - rc); - goto done; - } - - if (0 == myproc.rank) { - char **keys; - keys = (char **) malloc(3 * sizeof(char *)); - keys[0] = "FOOBAR"; - keys[1] = "PANDA"; - keys[2] = NULL; - - fprintf(stderr, "%s:%d unpublishing two keys\n", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Unpublish(keys, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Unpublish failed: %d\n", myproc.nspace, - myproc.rank, rc); - free(keys); - goto done; - } - free(keys); - fprintf(stderr, "UNPUBLISH SUCCEEDED\n"); - } - - /* call fence again so everyone waits for rank 0 before leaving */ - if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, - rc); - goto done; - } - -done: - /* finalize us */ - fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, - myproc.rank, rc); - } else { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", - myproc.nspace, myproc.rank); - } - fflush(stderr); - return (0); -} diff --git a/examples/server.c b/examples/server.c deleted file mode 100644 index bd31854cbb..0000000000 --- a/examples/server.c +++ /dev/null @@ -1,851 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 IBM Corporation. All rights reserved. - * Copyright (c) 2021-2022 Nanook Consulting All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include PMIX_EVENT_HEADER - -#include "src/class/pmix_list.h" -#include "src/util/argv.h" -#include "src/util/output.h" -#include "src/util/pmix_environ.h" -#include "src/util/pmix_printf.h" - -static pmix_status_t connected(const pmix_proc_t *proc, void *server_object, - pmix_op_cbfunc_t cbfunc, void *cbdata); -static pmix_status_t finalized(const pmix_proc_t *proc, void *server_object, - pmix_op_cbfunc_t cbfunc, void *cbdata); -static pmix_status_t abort_fn(const pmix_proc_t *proc, void *server_object, int status, - const char msg[], pmix_proc_t procs[], size_t nprocs, - pmix_op_cbfunc_t cbfunc, void *cbdata); -static pmix_status_t fencenb_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], - size_t ninfo, char *data, size_t ndata, pmix_modex_cbfunc_t cbfunc, - void *cbdata); -static pmix_status_t dmodex_fn(const pmix_proc_t *proc, const pmix_info_t info[], size_t ninfo, - pmix_modex_cbfunc_t cbfunc, void *cbdata); -static pmix_status_t publish_fn(const pmix_proc_t *proc, const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); -static pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys, const pmix_info_t info[], - size_t ninfo, pmix_lookup_cbfunc_t cbfunc, void *cbdata); -static pmix_status_t unpublish_fn(const pmix_proc_t *proc, char **keys, const pmix_info_t info[], - size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata); -static pmix_status_t spawn_fn(const pmix_proc_t *proc, const pmix_info_t job_info[], size_t ninfo, - const pmix_app_t apps[], size_t napps, pmix_spawn_cbfunc_t cbfunc, - void *cbdata); -static pmix_status_t connect_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], - size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata); -static pmix_status_t disconnect_fn(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, - void *cbdata); -static pmix_status_t register_event_fn(pmix_status_t *codes, size_t ncodes, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); -static pmix_status_t deregister_events(pmix_status_t *codes, size_t ncodes, pmix_op_cbfunc_t cbfunc, - void *cbdata); -static pmix_status_t notify_event(pmix_status_t code, const pmix_proc_t *source, - pmix_data_range_t range, pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata); -static pmix_status_t query_fn(pmix_proc_t *proct, pmix_query_t *queries, size_t nqueries, - pmix_info_cbfunc_t cbfunc, void *cbdata); -static void tool_connect_fn(pmix_info_t *info, size_t ninfo, pmix_tool_connection_cbfunc_t cbfunc, - void *cbdata); -static void log_fn(const pmix_proc_t *client, const pmix_info_t data[], size_t ndata, - const pmix_info_t directives[], size_t ndirs, pmix_op_cbfunc_t cbfunc, - void *cbdata); - -static pmix_server_module_t mymodule = {.client_connected = connected, - .client_finalized = finalized, - .abort = abort_fn, - .fence_nb = fencenb_fn, - .direct_modex = dmodex_fn, - .publish = publish_fn, - .lookup = lookup_fn, - .unpublish = unpublish_fn, - .spawn = spawn_fn, - .connect = connect_fn, - .disconnect = disconnect_fn, - .register_events = register_event_fn, - .deregister_events = deregister_events, - .notify_event = notify_event, - .query = query_fn, - .tool_connected = tool_connect_fn, - .log = log_fn}; - -typedef struct { - pmix_list_item_t super; - pmix_pdata_t pdata; -} pmix_locdat_t; -PMIX_CLASS_INSTANCE(pmix_locdat_t, pmix_list_item_t, NULL, NULL); - -#define PMIX_WAIT_FOR_COMPLETION(a) \ - do { \ - while ((a)) { \ - usleep(10); \ - } \ - PMIX_ACQUIRE_OBJECT((a)); \ - } while (0) - -typedef struct { - pmix_object_t super; - volatile bool active; - pmix_proc_t caller; - pmix_info_t *info; - size_t ninfo; - pmix_op_cbfunc_t cbfunc; - pmix_spawn_cbfunc_t spcbfunc; - void *cbdata; -} myxfer_t; -static void xfcon(myxfer_t *p) -{ - p->info = NULL; - p->ninfo = 0; - p->active = true; - p->cbfunc = NULL; - p->spcbfunc = NULL; - p->cbdata = NULL; -} -static void xfdes(myxfer_t *p) -{ - if (NULL != p->info) { - PMIX_INFO_FREE(p->info, p->ninfo); - } -} -PMIX_CLASS_INSTANCE(myxfer_t, pmix_object_t, xfcon, xfdes); - -typedef struct { - pmix_list_item_t super; - pid_t pid; -} wait_tracker_t; -PMIX_CLASS_INSTANCE(wait_tracker_t, pmix_list_item_t, NULL, NULL); - -static volatile int wakeup; -static pmix_list_t pubdata; -static pmix_event_t handler; -static pmix_list_t children; - -static void set_namespace(int nprocs, char *ranks, char *nspace, pmix_op_cbfunc_t cbfunc, - myxfer_t *x); -static void errhandler(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata); -static void wait_signal_callback(int fd, short event, void *arg); -static void errhandler_reg_callbk(pmix_status_t status, size_t errhandler_ref, void *cbdata); - -static void opcbfunc(pmix_status_t status, void *cbdata) -{ - myxfer_t *x = (myxfer_t *) cbdata; - - /* release the caller, if necessary */ - if (NULL != x->cbfunc) { - x->cbfunc(PMIX_SUCCESS, x->cbdata); - } - x->active = false; -} - -int main(int argc, char **argv) -{ - char **client_env = NULL; - char **client_argv = NULL; - char *tmp, **atmp, *executable = NULL, *tmpdir, *cleanup; - int rc, nprocs = 1, n, k; - uid_t myuid; - gid_t mygid; - pid_t pid; - myxfer_t *x; - pmix_proc_t proc; - wait_tracker_t *child; - char *tdir; - uid_t uid = geteuid(); - pmix_info_t *info; - struct stat buf; - - /* define and pass a personal tmpdir to protect the system */ - if (NULL == (tdir = getenv("TMPDIR"))) { - if (NULL == (tdir = getenv("TEMP"))) { - if (NULL == (tdir = getenv("TMP"))) { - tdir = "/tmp"; - } - } - } - if (0 > asprintf(&tmpdir, "%s/pmix.%lu", tdir, (long unsigned) uid)) { - fprintf(stderr, "Out of memory\n"); - exit(1); - } - /* create the directory */ - if (0 != stat(tmpdir, &buf)) { - /* try to make directory */ - if (0 != mkdir(tmpdir, S_IRWXU)) { - fprintf(stderr, "Cannot make tmpdir %s", tmpdir); - exit(1); - } - } - asprintf(&cleanup, "rm -rf %s", tmpdir); - PMIX_INFO_CREATE(info, 1); - PMIX_INFO_LOAD(&info[0], PMIX_SERVER_TMPDIR, tmpdir, PMIX_STRING); - - /* setup the server library */ - if (PMIX_SUCCESS != (rc = PMIx_server_init(&mymodule, info, 1))) { - fprintf(stderr, "Init failed with error %d\n", rc); - return rc; - } - PMIX_INFO_FREE(info, 1); - - /* register the errhandler */ - PMIx_Register_event_handler(NULL, 0, NULL, 0, errhandler, errhandler_reg_callbk, NULL); - - /* setup the pub data, in case it is used */ - PMIX_CONSTRUCT(&pubdata, pmix_list_t); - - /* setup to see sigchld on the forked tests */ - PMIX_CONSTRUCT(&children, pmix_list_t); - event_assign(&handler, pmix_globals.evbase, SIGCHLD, EV_SIGNAL | EV_PERSIST, - wait_signal_callback, &handler); - event_add(&handler, NULL); - - /* see if we were passed the number of procs to run or - * the executable to use */ - for (n = 1; n < (argc - 1); n++) { - if (0 == strcmp("-n", argv[n]) && NULL != argv[n + 1]) { - nprocs = strtol(argv[n + 1], NULL, 10); - ++n; // step over the argument - } else if (0 == strcmp("-e", argv[n]) && NULL != argv[n + 1]) { - executable = strdup(argv[n + 1]); - for (k = n + 2; NULL != argv[k]; k++) { - PMIX_ARGV_APPEND_COMPAT(&client_argv, argv[k]); - } - n += k; - } - } - if (NULL == executable) { - executable = strdup("./simpclient"); - } - - /* we have a single namespace for all clients */ - atmp = NULL; - for (n = 0; n < nprocs; n++) { - asprintf(&tmp, "%d", n); - PMIX_ARGV_APPEND_COMPAT(&atmp, tmp); - free(tmp); - } - tmp = PMIX_ARGV_JOIN_COMPAT(atmp, ','); - PMIX_ARGV_FREE_COMPAT(atmp); - /* register the nspace */ - x = PMIX_NEW(myxfer_t); - set_namespace(nprocs, tmp, "foobar", opcbfunc, x); - - /* set common argv and env */ - client_env = PMIX_ARGV_COPY_COMPAT(environ); - pmix_argv_prepend_nosize(&client_argv, executable); - - wakeup = nprocs; - myuid = getuid(); - mygid = getgid(); - - /* if the nspace registration hasn't completed yet, - * wait for it here */ - PMIX_WAIT_FOR_COMPLETION(x->active); - free(tmp); - PMIX_RELEASE(x); - - /* prep the local node for launch */ - x = PMIX_NEW(myxfer_t); - if (PMIX_SUCCESS != (rc = PMIx_server_setup_local_support("foobar", NULL, 0, opcbfunc, x))) { - fprintf(stderr, "Setup local support failed: %d\n", rc); - PMIx_server_finalize(); - system(cleanup); - return rc; - } - PMIX_WAIT_FOR_COMPLETION(x->active); - PMIX_RELEASE(x); - - /* fork/exec the test */ - (void) strncpy(proc.nspace, "foobar", PMIX_MAX_NSLEN); - for (n = 0; n < nprocs; n++) { - proc.rank = n; - if (PMIX_SUCCESS != (rc = PMIx_server_setup_fork(&proc, &client_env))) { // n - fprintf(stderr, "Server fork setup failed with error %d\n", rc); - PMIx_server_finalize(); - system(cleanup); - return rc; - } - x = PMIX_NEW(myxfer_t); - if (PMIX_SUCCESS - != (rc = PMIx_server_register_client(&proc, myuid, mygid, NULL, opcbfunc, x))) { - fprintf(stderr, "Server fork setup failed with error %d\n", rc); - PMIx_server_finalize(); - system(cleanup); - return rc; - } - /* don't fork/exec the client until we know it is registered - * so we avoid a potential race condition in the server */ - PMIX_WAIT_FOR_COMPLETION(x->active); - PMIX_RELEASE(x); - pid = fork(); - if (pid < 0) { - fprintf(stderr, "Fork failed\n"); - PMIx_server_finalize(); - system(cleanup); - return -1; - } - child = PMIX_NEW(wait_tracker_t); - child->pid = pid; - pmix_list_append(&children, &child->super); - - if (pid == 0) { - execve(executable, client_argv, client_env); - /* Does not return */ - exit(0); - } - } - free(executable); - PMIX_ARGV_FREE_COMPAT(client_argv); - PMIX_ARGV_FREE_COMPAT(client_env); - - /* hang around until the client(s) finalize */ - while (0 < wakeup) { - struct timespec ts; - ts.tv_sec = 0; - ts.tv_nsec = 100000; - nanosleep(&ts, NULL); - } - - /* deregister the errhandler */ - PMIx_Deregister_event_handler(0, NULL, NULL); - - /* release any pub data */ - PMIX_LIST_DESTRUCT(&pubdata); - - /* finalize the server library */ - if (PMIX_SUCCESS != (rc = PMIx_server_finalize())) { - fprintf(stderr, "Finalize failed with error %d\n", rc); - } - - fprintf(stderr, "Test finished OK!\n"); - system(cleanup); - - return rc; -} - -static void setup_cbfunc(pmix_status_t status, pmix_info_t info[], size_t ninfo, - void *provided_cbdata, pmix_op_cbfunc_t cbfunc, void *cbdata) -{ - myxfer_t *myxfer = (myxfer_t *) provided_cbdata; - size_t i; - - if (PMIX_SUCCESS == status && 0 < ninfo) { - myxfer->ninfo = ninfo; - PMIX_INFO_CREATE(myxfer->info, ninfo); - for (i = 0; i < ninfo; i++) { - PMIX_INFO_XFER(&myxfer->info[i], &info[i]); - } - } - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, cbdata); - } - myxfer->active = false; -} - -static void set_namespace(int nprocs, char *ranks, char *nspace, pmix_op_cbfunc_t cbfunc, - myxfer_t *x) -{ - char *regex, *ppn; - char hostname[PMIX_MAXHOSTNAMELEN]; - pmix_status_t rc; - myxfer_t myxfer; - size_t i; - - gethostname(hostname, sizeof(hostname)); - - /* request application setup information - e.g., network - * security keys or endpoint info */ - PMIX_CONSTRUCT(&myxfer, myxfer_t); - myxfer.active = true; - if (PMIX_SUCCESS - != (rc = PMIx_server_setup_application(nspace, NULL, 0, setup_cbfunc, &myxfer))) { - PMIX_DESTRUCT(&myxfer); - fprintf(stderr, "Failed to setup application: %d\n", rc); - exit(1); - } - PMIX_WAIT_FOR_COMPLETION(myxfer.active); - x->ninfo = myxfer.ninfo + 7; - - PMIX_INFO_CREATE(x->info, x->ninfo); - if (0 < myxfer.ninfo) { - for (i = 0; i < myxfer.ninfo; i++) { - PMIX_INFO_XFER(&x->info[i], &myxfer.info[i]); - } - } - PMIX_DESTRUCT(&myxfer); - - (void) strncpy(x->info[i].key, PMIX_UNIV_SIZE, PMIX_MAX_KEYLEN); - x->info[i].value.type = PMIX_UINT32; - x->info[i].value.data.uint32 = nprocs; - - ++i; - (void) strncpy(x->info[i].key, PMIX_SPAWNED, PMIX_MAX_KEYLEN); - x->info[i].value.type = PMIX_UINT32; - x->info[i].value.data.uint32 = 0; - - ++i; - (void) strncpy(x->info[i].key, PMIX_LOCAL_SIZE, PMIX_MAX_KEYLEN); - x->info[i].value.type = PMIX_UINT32; - x->info[i].value.data.uint32 = nprocs; - - ++i; - (void) strncpy(x->info[i].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN); - x->info[i].value.type = PMIX_STRING; - x->info[i].value.data.string = strdup(ranks); - - ++i; - PMIx_generate_regex(hostname, ®ex); - (void) strncpy(x->info[i].key, PMIX_NODE_MAP, PMIX_MAX_KEYLEN); - x->info[i].value.type = PMIX_STRING; - x->info[i].value.data.string = regex; - - ++i; - PMIx_generate_ppn(ranks, &ppn); - (void) strncpy(x->info[i].key, PMIX_PROC_MAP, PMIX_MAX_KEYLEN); - x->info[i].value.type = PMIX_STRING; - x->info[i].value.data.string = ppn; - - ++i; - (void) strncpy(x->info[i].key, PMIX_JOB_SIZE, PMIX_MAX_KEYLEN); - x->info[i].value.type = PMIX_UINT32; - x->info[i].value.data.uint32 = nprocs; - - PMIx_server_register_nspace(nspace, nprocs, x->info, x->ninfo, cbfunc, x); -} - -static void errhandler(size_t evhdlr_registration_id, pmix_status_t status, - const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, - pmix_info_t results[], size_t nresults, - pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) -{ - pmix_output(0, "SERVER: ERRHANDLER CALLED WITH STATUS %d", status); -} - -static void errhandler_reg_callbk(pmix_status_t status, size_t errhandler_ref, void *cbdata) -{ - return; -} - -static pmix_status_t connected(const pmix_proc_t *proc, void *server_object, - pmix_op_cbfunc_t cbfunc, void *cbdata) -{ - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, cbdata); - } - return PMIX_SUCCESS; -} -static pmix_status_t finalized(const pmix_proc_t *proc, void *server_object, - pmix_op_cbfunc_t cbfunc, void *cbdata) -{ - pmix_output(0, "SERVER: FINALIZED %s:%d", proc->nspace, proc->rank); - --wakeup; - /* ensure we call the cbfunc so the proc can exit! */ - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, cbdata); - } - return PMIX_SUCCESS; -} - -static void abcbfunc(pmix_status_t status, void *cbdata) -{ - myxfer_t *x = (myxfer_t *) cbdata; - - /* be sure to release the caller */ - if (NULL != x->cbfunc) { - x->cbfunc(status, x->cbdata); - } - PMIX_RELEASE(x); -} - -static pmix_status_t abort_fn(const pmix_proc_t *proc, void *server_object, int status, - const char msg[], pmix_proc_t procs[], size_t nprocs, - pmix_op_cbfunc_t cbfunc, void *cbdata) -{ - pmix_status_t rc; - myxfer_t *x; - - if (NULL != procs) { - pmix_output(0, "SERVER: ABORT on %s:%d", procs[0].nspace, procs[0].rank); - } else { - pmix_output(0, "SERVER: ABORT OF ALL PROCS IN NSPACE %s", proc->nspace); - } - - /* instead of aborting the specified procs, notify them - * (if they have registered their errhandler) */ - - /* use the myxfer_t object to ensure we release - * the caller when notification has been queued */ - x = PMIX_NEW(myxfer_t); - (void) strncpy(x->caller.nspace, proc->nspace, PMIX_MAX_NSLEN); - x->caller.rank = proc->rank; - - PMIX_INFO_CREATE(x->info, 2); - (void) strncpy(x->info[0].key, "DARTH", PMIX_MAX_KEYLEN); - x->info[0].value.type = PMIX_INT8; - x->info[0].value.data.int8 = 12; - (void) strncpy(x->info[1].key, "VADER", PMIX_MAX_KEYLEN); - x->info[1].value.type = PMIX_DOUBLE; - x->info[1].value.data.dval = 12.34; - x->cbfunc = cbfunc; - x->cbdata = cbdata; - - if (PMIX_SUCCESS - != (rc = PMIx_Notify_event(status, &x->caller, PMIX_RANGE_NAMESPACE, x->info, 2, abcbfunc, - x))) { - pmix_output(0, "SERVER: FAILED NOTIFY ERROR %d", (int) rc); - } - - return PMIX_SUCCESS; -} - -static pmix_status_t fencenb_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], - size_t ninfo, char *data, size_t ndata, pmix_modex_cbfunc_t cbfunc, - void *cbdata) -{ - pmix_output(0, "SERVER: FENCENB"); - /* pass the provided data back to each participating proc */ - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, data, ndata, cbdata, NULL, NULL); - } - return PMIX_SUCCESS; -} - -static pmix_status_t dmodex_fn(const pmix_proc_t *proc, const pmix_info_t info[], size_t ninfo, - pmix_modex_cbfunc_t cbfunc, void *cbdata) -{ - pmix_output(0, "SERVER: DMODEX"); - - /* we don't have any data for remote procs as this - * test only runs one server - so report accordingly */ - if (NULL != cbfunc) { - cbfunc(PMIX_ERR_NOT_FOUND, NULL, 0, cbdata, NULL, NULL); - } - return PMIX_SUCCESS; -} - -static pmix_status_t publish_fn(const pmix_proc_t *proc, const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata) -{ - pmix_locdat_t *p; - size_t n; - pmix_status_t rc = PMIX_SUCCESS; - - pmix_output(0, "SERVER: PUBLISH"); - - for (n = 0; n < ninfo; n++) { - p = PMIX_NEW(pmix_locdat_t); - (void) strncpy(p->pdata.proc.nspace, proc->nspace, PMIX_MAX_NSLEN); - p->pdata.proc.rank = proc->rank; - (void) strncpy(p->pdata.key, info[n].key, PMIX_MAX_KEYLEN); - PMIX_VALUE_XFER(rc, &p->pdata.value, (pmix_value_t *) &info[n].value); - if (PMIX_SUCCESS != rc) { - break; - } - pmix_list_append(&pubdata, &p->super); - } - if (NULL != cbfunc) { - cbfunc(rc, cbdata); - } - return rc; -} - -static pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys, const pmix_info_t info[], - size_t ninfo, pmix_lookup_cbfunc_t cbfunc, void *cbdata) -{ - pmix_locdat_t *p, *p2; - pmix_list_t results; - size_t i, n; - pmix_pdata_t *pd = NULL; - pmix_status_t ret = PMIX_ERR_NOT_FOUND; - - pmix_output(0, "SERVER: LOOKUP"); - - PMIX_CONSTRUCT(&results, pmix_list_t); - - for (n = 0; NULL != keys[n]; n++) { - PMIX_LIST_FOREACH (p, &pubdata, pmix_locdat_t) { - if (0 == strncmp(keys[n], p->pdata.key, PMIX_MAX_KEYLEN)) { - p2 = PMIX_NEW(pmix_locdat_t); - (void) strncpy(p2->pdata.proc.nspace, p->pdata.proc.nspace, PMIX_MAX_NSLEN); - p2->pdata.proc.rank = p->pdata.proc.rank; - (void) strncpy(p2->pdata.key, p->pdata.key, PMIX_MAX_KEYLEN); - PMIX_VALUE_XFER(ret, &p2->pdata.value, &p->pdata.value); - if (PMIX_SUCCESS != ret) { - break; - } - pmix_list_append(&results, &p2->super); - break; - } - } - } - if (0 < (n = pmix_list_get_size(&results))) { - ret = PMIX_SUCCESS; - PMIX_PDATA_CREATE(pd, n); - for (i = 0; i < n; i++) { - p = (pmix_locdat_t *) pmix_list_remove_first(&results); - if (p) { - (void) strncpy(pd[i].proc.nspace, p->pdata.proc.nspace, PMIX_MAX_NSLEN); - pd[i].proc.rank = p->pdata.proc.rank; - (void) strncpy(pd[i].key, p->pdata.key, PMIX_MAX_KEYLEN); - PMIX_VALUE_XFER(ret, &pd[i].value, &p->pdata.value); - if (PMIX_SUCCESS != ret) { - break; - } - } - } - } - PMIX_LIST_DESTRUCT(&results); - if (NULL != cbfunc) { - cbfunc(ret, pd, n, cbdata); - } - if (0 < n) { - PMIX_PDATA_FREE(pd, n); - } - return PMIX_SUCCESS; -} - -static pmix_status_t unpublish_fn(const pmix_proc_t *proc, char **keys, const pmix_info_t info[], - size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) -{ - pmix_locdat_t *p, *p2; - size_t n; - - pmix_output(0, "SERVER: UNPUBLISH"); - - for (n = 0; NULL != keys[n]; n++) { - PMIX_LIST_FOREACH_SAFE (p, p2, &pubdata, pmix_locdat_t) { - if (0 == strncmp(keys[n], p->pdata.key, PMIX_MAX_KEYLEN)) { - pmix_list_remove_item(&pubdata, &p->super); - PMIX_RELEASE(p); - break; - } - } - } - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, cbdata); - } - return PMIX_SUCCESS; -} - -static void spcbfunc(pmix_status_t status, void *cbdata) -{ - myxfer_t *x = (myxfer_t *) cbdata; - - if (NULL != x->spcbfunc) { - x->spcbfunc(PMIX_SUCCESS, "DYNSPACE", x->cbdata); - } -} - -static pmix_status_t spawn_fn(const pmix_proc_t *proc, const pmix_info_t job_info[], size_t ninfo, - const pmix_app_t apps[], size_t napps, pmix_spawn_cbfunc_t cbfunc, - void *cbdata) -{ - myxfer_t *x; - - pmix_output(0, "SERVER: SPAWN"); - - /* in practice, we would pass this request to the local - * resource manager for launch, and then have that server - * execute our callback function. For now, we will fake - * the spawn and just pretend */ - - /* must register the nspace for the new procs before - * we return to the caller */ - x = PMIX_NEW(myxfer_t); - x->spcbfunc = cbfunc; - x->cbdata = cbdata; - - set_namespace(2, "0,1", "DYNSPACE", spcbfunc, x); - - return PMIX_SUCCESS; -} - -static pmix_status_t connect_fn(const pmix_proc_t procs[], size_t nprocs, const pmix_info_t info[], - size_t ninfo, pmix_op_cbfunc_t cbfunc, void *cbdata) -{ - pmix_output(0, "SERVER: CONNECT"); - - /* in practice, we would pass this request to the local - * resource manager for handling */ - - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, cbdata); - } - - return PMIX_SUCCESS; -} - -static pmix_status_t disconnect_fn(const pmix_proc_t procs[], size_t nprocs, - const pmix_info_t info[], size_t ninfo, pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - pmix_output(0, "SERVER: DISCONNECT"); - - /* in practice, we would pass this request to the local - * resource manager for handling */ - - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, cbdata); - } - - return PMIX_SUCCESS; -} - -static pmix_status_t register_event_fn(pmix_status_t *codes, size_t ncodes, - const pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata) -{ - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, cbdata); - } - return PMIX_SUCCESS; -} - -static pmix_status_t deregister_events(pmix_status_t *codes, size_t ncodes, pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - return PMIX_SUCCESS; -} - -static pmix_status_t notify_event(pmix_status_t code, const pmix_proc_t *source, - pmix_data_range_t range, pmix_info_t info[], size_t ninfo, - pmix_op_cbfunc_t cbfunc, void *cbdata) -{ - return PMIX_SUCCESS; -} - -typedef struct query_data_t { - pmix_info_t *data; - size_t ndata; -} query_data_t; - -static pmix_status_t query_fn(pmix_proc_t *proct, pmix_query_t *queries, size_t nqueries, - pmix_info_cbfunc_t cbfunc, void *cbdata) -{ - size_t n; - pmix_info_t *info; - - pmix_output(0, "SERVER: QUERY"); - - if (NULL == cbfunc) { - return PMIX_ERROR; - } - /* keep this simple */ - PMIX_INFO_CREATE(info, nqueries); - for (n = 0; n < nqueries; n++) { - (void) strncpy(info[n].key, queries[n].keys[0], PMIX_MAX_KEYLEN); - info[n].value.type = PMIX_STRING; - if (0 > asprintf(&info[n].value.data.string, "%d", (int) n)) { - return PMIX_ERROR; - } - } - cbfunc(PMIX_SUCCESS, info, nqueries, cbdata, NULL, NULL); - return PMIX_SUCCESS; -} - -static void tool_connect_fn(pmix_info_t *info, size_t ninfo, pmix_tool_connection_cbfunc_t cbfunc, - void *cbdata) -{ - pmix_proc_t proc; - - pmix_output(0, "SERVER: TOOL CONNECT"); - - /* just pass back an arbitrary nspace */ - (void) strncpy(proc.nspace, "TOOL", PMIX_MAX_NSLEN); - proc.rank = 0; - - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, &proc, cbdata); - } -} - -static void log_fn(const pmix_proc_t *client, const pmix_info_t data[], size_t ndata, - const pmix_info_t directives[], size_t ndirs, pmix_op_cbfunc_t cbfunc, - void *cbdata) -{ - pmix_output(0, "SERVER: LOG"); - - if (NULL != cbfunc) { - cbfunc(PMIX_SUCCESS, cbdata); - } -} - -static void wait_signal_callback(int fd, short event, void *arg) -{ - pmix_event_t *sig = (pmix_event_t *) arg; - int status; - pid_t pid; - wait_tracker_t *t2; - - if (SIGCHLD != event_get_signal(sig)) { - return; - } - - /* we can have multiple children leave but only get one - * sigchild callback, so reap all the waitpids until we - * don't get anything valid back */ - while (1) { - pid = waitpid(-1, &status, WNOHANG); - if (-1 == pid && EINTR == errno) { - /* try it again */ - continue; - } - /* if we got garbage, then nothing we can do */ - if (pid <= 0) { - return; - } - - /* we are already in an event, so it is safe to access the list */ - PMIX_LIST_FOREACH (t2, &children, wait_tracker_t) { - if (pid == t2->pid) { - /* found it! */ - --wakeup; - break; - } - } - } -} diff --git a/examples/showkeys.c b/examples/showkeys.c deleted file mode 100644 index a65be192b2..0000000000 --- a/examples/showkeys.c +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include - -#include "examples.h" -#include - -static pmix_proc_t myproc; - -int main(int argc, char **argv) -{ - pmix_status_t rc; - pmix_value_t *val; - pmix_info_t *info; - size_t ninfo, n; - - /* init us - note that the call to "init" includes the return of - * any job-related info provided by the RM. This includes any - * debugger flag instructing us to stop-in-init. If such a directive - * is included, then the process will be stopped in this call until - * the "debugger release" notification arrives */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %s\n", myproc.nspace, myproc.rank, - PMIx_Error_string(rc)); - exit(0); - } - /* get all keys provided to us */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, NULL, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get local rank failed: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(rc)); - exit(0); - } - fprintf(stderr, "Returned type %s\n", PMIx_Data_type_string(val->type)); - info = (pmix_info_t *) val->data.darray->array; - ninfo = val->data.darray->size; - - for (n = 0; n < ninfo; n++) { - fprintf(stderr, "KEY: %s\n", info[n].key); - } - - if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_PACKAGE_RANK, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get numa rank failed: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(rc)); - exit(0); - } - fprintf(stderr, "NUMA rank: %d\n", (int) val->data.uint16); - - /* finalize us */ - fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %s\n", myproc.nspace, - myproc.rank, PMIx_Error_string(rc)); - } else { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", - myproc.nspace, myproc.rank); - } - fflush(stderr); - return (0); -} diff --git a/examples/target.c b/examples/target.c deleted file mode 100644 index ca470c53c3..0000000000 --- a/examples/target.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include - -#include - -int main(int argc, char **argv) -{ - int rc; - pmix_value_t value; - pmix_value_t *val = &value; - pmix_proc_t proc; - pid_t pid; - pmix_proc_t myproc; - - pid = getpid(); - fprintf(stderr, "Client %lu: Running\n", (unsigned long) pid); - - /* init us - note that the call to "init" includes the return of - * any job-related info provided by the RM. This includes any - * debugger flag instructing us to stop-in-init. If such a directive - * is included, then the process will be stopped in this call until - * the "debugger release" notification arrives */ - if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, - rc); - exit(0); - } - fprintf(stderr, "Client ns %s rank %d pid %lu: Running\n", myproc.nspace, myproc.rank, - (unsigned long) pid); - - /* get our universe size */ - PMIX_PROC_CONSTRUCT(&proc); - (void) strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); - proc.rank = PMIX_RANK_WILDCARD; - - if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, - val->data.uint32); - - /* now just hang around for awhile */ - sleep(100); - -done: - /* finalize us */ - fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank); - if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, - myproc.rank, rc); - } else { - fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", - myproc.nspace, myproc.rank); - } - fflush(stderr); - return (0); -} diff --git a/examples/tool.c b/examples/tool.c deleted file mode 100644 index 4b99d485be..0000000000 --- a/examples/tool.c +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include -#include -#include -#include - -#include "examples.h" -#include - -static void cbfunc(pmix_status_t status, pmix_info_t *info, size_t ninfo, void *cbdata, - pmix_release_cbfunc_t release_fn, void *release_cbdata) -{ - myquery_data_t *mq = (myquery_data_t *) cbdata; - size_t n; - - mq->lock.status = status; - - /* save the returned info - it will be - * released in the release_fn */ - if (0 < ninfo) { - PMIX_INFO_CREATE(mq->info, ninfo); - mq->ninfo = ninfo; - for (n = 0; n < ninfo; n++) { - PMIX_INFO_XFER(&mq->info[n], &info[n]); - } - } - - /* let the library release the data */ - if (NULL != release_fn) { - release_fn(release_cbdata); - } - - /* release the block */ - DEBUG_WAKEUP_THREAD(&mq->lock); -} - -int main(int argc, char **argv) -{ - pmix_status_t rc; - pmix_proc_t myproc; - pmix_query_t *query; - size_t nq, ninfo = 0, n, m; - myquery_data_t mydata; - pmix_info_t *info = NULL, *iptr; - char *server_uri = NULL; - char *nspace = NULL; - char *nodename = NULL; - pmix_data_array_t *darray, *dptr; - bool geturi = false; - char hostname[1024]; - - gethostname(hostname, 1024); - for (n = 1; n < (size_t) argc; n++) { - if (0 == strcmp("-u", argv[n]) || 0 == strcmp("--url", argv[n])) { - if (NULL == argv[n + 1]) { - fprintf(stderr, "Must provide URI argument to %s option\n", argv[n]); - exit(1); - } - server_uri = argv[n + 1]; - } else if (0 == strcmp("-nspace", argv[n]) || 0 == strcmp("--nspace", argv[n])) { - if (NULL == argv[n + 1]) { - fprintf(stderr, "Must provide nspace argument to %s option\n", argv[n]); - exit(1); - } - nspace = argv[n + 1]; - } else if (0 == strcmp("-uri", argv[n]) || 0 == strcmp("--uri", argv[n])) { - /* retrieve the PMIx server's uri from the indicated node */ - nodename = argv[n + 1]; - geturi = true; - } - } - - if (NULL != server_uri) { - ninfo = 1; - PMIX_INFO_CREATE(info, ninfo); - PMIX_INFO_LOAD(&info[0], PMIX_SERVER_URI, server_uri, PMIX_STRING); - fprintf(stderr, "Connecting to %s\n", server_uri); - } - - /* init us */ - if (PMIX_SUCCESS != (rc = PMIx_tool_init(&myproc, info, ninfo))) { - fprintf(stderr, "PMIx_tool_init failed: %d\n", rc); - exit(rc); - } - if (NULL != info) { - PMIX_INFO_FREE(info, ninfo); - } - - if (geturi) { - nq = 1; - PMIX_QUERY_CREATE(query, nq); - PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_SERVER_URI); - if (NULL != nodename) { - PMIX_QUERY_QUALIFIERS_CREATE(&query[0], 1); - PMIX_INFO_LOAD(&query[0].qualifiers[0], PMIX_HOSTNAME, nodename, PMIX_STRING); - } - DEBUG_CONSTRUCT_MYQUERY(&mydata); - if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void *) &mydata))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - DEBUG_WAIT_THREAD(&mydata.lock); - /* find the response */ - if (PMIX_SUCCESS == mydata.lock.status) { - /* should be in the first key */ - if (PMIX_CHECK_KEY(&mydata.info[0], PMIX_SERVER_URI)) { - fprintf(stderr, "PMIx server URI for node %s: %s\n", - (NULL == nodename) ? hostname : nodename, mydata.info[0].value.data.string); - } else { - fprintf(stderr, "Query returned wrong info key at first posn: %s\n", - mydata.info[0].key); - } - } else { - fprintf(stderr, "Query returned error: %s\n", PMIx_Error_string(mydata.lock.status)); - } - DEBUG_DESTRUCT_MYQUERY(&mydata); - goto done; - } - - if (NULL == nspace) { - /* query the list of active nspaces */ - nq = 1; - PMIX_QUERY_CREATE(query, nq); - PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_QUERY_NAMESPACE_INFO); - DEBUG_CONSTRUCT_MYQUERY(&mydata); - if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void *) &mydata))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - DEBUG_WAIT_THREAD(&mydata.lock); - /* find the response */ - if (PMIX_SUCCESS == mydata.lock.status) { - /* should be in the first key */ - if (PMIX_CHECK_KEY(&mydata.info[0], PMIX_QUERY_NAMESPACE_INFO)) { - darray = mydata.info[0].value.data.darray; - fprintf(stderr, "ACTIVE NSPACES:\n"); - if (NULL == darray || 0 == darray->size || NULL == darray->array) { - fprintf(stderr, "\tNone\n"); - } else { - info = (pmix_info_t *) darray->array; - if (NULL == info) { - fprintf(stderr, "Error\n"); - } else { - for (n = 0; n < darray->size; n++) { - dptr = info[n].value.data.darray; - if (NULL == dptr || 0 == dptr->size || NULL == dptr->array) { - fprintf(stderr, "Error in array %s\n", - (NULL == dptr) ? "NULL" : "NON-NULL"); - break; - } - iptr = (pmix_info_t *) dptr->array; - for (m = 0; m < dptr->size; m++) { - fprintf(stderr, "\t%s", iptr[m].value.data.string); - } - fprintf(stderr, "\n"); - } - } - } - } else { - fprintf(stderr, "Query returned wrong info key at first posn: %s\n", - mydata.info[0].key); - } - } else { - fprintf(stderr, "Query returned error: %s\n", PMIx_Error_string(mydata.lock.status)); - } - DEBUG_DESTRUCT_MYQUERY(&mydata); - } else { - nq = 1; - PMIX_QUERY_CREATE(query, nq); - PMIX_ARGV_APPEND(rc, query[0].keys, PMIX_JOB_SIZE); - PMIX_INFO_CREATE(query[0].qualifiers, 1); - query[0].nqual = 1; - PMIX_INFO_LOAD(&query[0].qualifiers[0], PMIX_NSPACE, nspace, PMIX_STRING); - DEBUG_CONSTRUCT_MYQUERY(&mydata); - if (PMIX_SUCCESS != (rc = PMIx_Query_info_nb(query, nq, cbfunc, (void *) &mydata))) { - fprintf(stderr, "Client ns %s rank %d: PMIx_Query_info failed: %d\n", myproc.nspace, - myproc.rank, rc); - goto done; - } - DEBUG_WAIT_THREAD(&mydata.lock); - /* find the response */ - if (PMIX_SUCCESS == mydata.lock.status) { - /* should be in the first key */ - if (PMIX_CHECK_KEY(&mydata.info[0], PMIX_JOB_SIZE)) { - fprintf(stderr, "JOB SIZE FOR NSPACE %s: %lu\n", nspace, - (unsigned long) mydata.info[0].value.data.uint32); - } else { - fprintf(stderr, "Query returned wrong info key at first posn: %s\n", - mydata.info[0].key); - } - } else { - fprintf(stderr, "Query returned error: %s\n", PMIx_Error_string(mydata.lock.status)); - } - DEBUG_DESTRUCT_MYQUERY(&mydata); - } - -done: - /* finalize us */ - PMIx_Finalize(NULL, 0); - return (rc); -} diff --git a/src/docs/show-help-files/Makefile.am b/src/docs/show-help-files/Makefile.am index 0cedca735b..9cf3a3e3a3 100644 --- a/src/docs/show-help-files/Makefile.am +++ b/src/docs/show-help-files/Makefile.am @@ -40,7 +40,6 @@ RST_SOURCE_FILES = \ $(srcdir)/help-prted.rst \ $(srcdir)/help-prterun.rst \ $(srcdir)/help-prun.rst \ - $(srcdir)/help-psched.rst \ $(srcdir)/help-pterm.rst \ $(srcdir)/help-cli.rst \ $(srcdir)/help-dash-host.rst \ @@ -60,7 +59,6 @@ ALL_TXT_BUILT = \ $(TXT_OUTDIR)/help-prted.txt \ $(TXT_OUTDIR)/help-prterun.txt \ $(TXT_OUTDIR)/help-prun.txt \ - $(TXT_OUTDIR)/help-psched.txt \ $(TXT_OUTDIR)/help-pterm.txt \ $(TXT_OUTDIR)/help-cli.txt \ $(TXT_OUTDIR)/help-dash-host.txt \ diff --git a/src/docs/show-help-files/help-psched.rst b/src/docs/show-help-files/help-psched.rst deleted file mode 100644 index 39fbf62656..0000000000 --- a/src/docs/show-help-files/help-psched.rst +++ /dev/null @@ -1,204 +0,0 @@ -.. Copyright (c) 2021-2023 Nanook Consulting All rights reserved. - Copyright (c) 2022 IBM Corporation. All rights reserved. - Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved. - $COPYRIGHT$ - - Additional copyrights may follow - - $HEADER$ - -[bogus section] - -This section is not used by PRTE code. But we have to put a RST -section title in this file somewhere, or Sphinx gets unhappy. So we -put it in a section that is ignored by PRTE code. - -Hello, world ------------- - -[usage] - -%s (%s) %s - -Usage: %s [OPTION]... - -The following list of command line options are available. Note that -more detailed help for any option can be obtained by adding that -option to the help request as "--help