Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix/3687 #251

Merged
merged 20 commits into from
May 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
cd7a29e
3687: looking for 'JCL ERROR' in return code message, since that will…
richp405 Jan 27, 2021
672173f
Expanded changes for JCL error to 'fail fast'
richp405 Jan 28, 2021
d5d6c16
rebuilt timing delay loops for zos_job_submit
richp405 Feb 18, 2021
89410cb
Merge branch 'dev' into bugfix/3687
richp405 Feb 18, 2021
18b61e0
Merge branch 'dev' into bugfix/3687
richp405 Mar 8, 2021
4c41932
fixed variable usage, added more meaningful error for JCL ERROR and A…
richp405 Mar 8, 2021
a654371
simplified jcl error search string
richp405 Mar 8, 2021
bed3b34
added debug call/conv strings to return from job_submit, to understan…
richp405 Mar 9, 2021
72d4963
argh...typo in the tracking string
richp405 Mar 9, 2021
91ac19c
continuing to track non-response issue
richp405 Mar 9, 2021
3381ec8
corrected issue with temp_file.name (may need to add comment that it …
richp405 Mar 9, 2021
2371d87
eliminated duplicate temp file 2 removal
richp405 Mar 9, 2021
9f8e507
adding badjcl indicator, since the glitch moved
richp405 Mar 9, 2021
f78ed0a
added another fail_json to job_submit
richp405 Mar 9, 2021
d424b01
removed failed=true as an assertion
richp405 Mar 9, 2021
3f00c8a
removed tracking statements.
richp405 Mar 9, 2021
b2344d2
minor corrections on comments... still need to discuss return code an…
richp405 Mar 15, 2021
85ded1b
changed job to return msg_code=None on JCL error
richp405 Mar 18, 2021
e67abd8
Changed documentation to help clarify the wait/wait_time_s issue
richp405 May 6, 2021
2f1ad77
forgot to switch back to black editor...autopep8 moved the import sta…
richp405 May 6, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion plugins/module_utils/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,11 @@ def _parse_jobs(output_str):
job["ret_code"]["code"] = _get_return_code_num(ret_code_msg)
job["ret_code"]["msg_code"] = _get_return_code_str(ret_code_msg)
job["ret_code"]["msg_txt"] = ""
if "JCL ERROR" in ret_code_msg:
job["ret_code"][
"msg_txt"
] = "JCL Error detected. Check the data dumps for more information."

if ret_code_msg == "":
job["ret_code"]["msg"] = "AC"

Expand Down Expand Up @@ -452,6 +457,7 @@ def _get_return_code_num(rc_str):
Returns:
Union[int, NoneType] -- Returns integer RC if possible, if not returns NoneType
"""

rc = None
match = re.search(r"\s*CC\s*([0-9]+)", rc_str)
if match:
Expand All @@ -470,7 +476,9 @@ def _get_return_code_str(rc_str):
Union[str, NoneType] -- Returns string RC or ABEND code if possible, if not returns NoneType
"""
rc = None
match = re.search(r"(?:\s*CC\s*([0-9]+))|(?:ABEND\s*((?:S|U)[0-9]+))", rc_str)
match = re.search(
r"(?:\s*CC\s*([0-9]+))|(?:ABEND\s*((?:S|U)[0-9]+)|(?:JCL ERROR))", rc_str
)
if match:
rc = match.group(1) or match.group(2)
return rc
Expand Down
165 changes: 107 additions & 58 deletions plugins/modules/zos_job_submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,16 @@
type: bool
description:
- Wait for the Job to finish and capture the output. Default is false.
- User can specify the wait time, see option ``wait_time_s``.
- When I(wait) is false or absent, the module will wait up to 10 seconds for the job to start,
but will not wait for the job to complete.
- If I(wait) is true, User can specify the wait time, see option ``wait_time_s``.
wait_time_s:
required: false
default: 60
type: int
description:
- When wait is true, the module will wait for a maximum of 60 seconds by
default.
- User can set the wait time manually in this option.
- When I(wait) is true, the module will wait for the number of seconds for Job completion.
- User can set the wait time manually with this option.
max_rc:
required: false
type: int
Expand Down Expand Up @@ -221,10 +222,11 @@
description:
Returns additional information related to the job.
type: str
sample: "No job can be located with this job name: HELLO"
sample: "JCL Error detected. Check the data dumps for more information."
code:
description:
Return code converted to integer value (when possible).
For JCL ERRORs, this will be None.
type: int
sample: 00
sample:
Expand Down Expand Up @@ -489,20 +491,22 @@
wait_time_s: 30
"""

from ansible.module_utils.basic import AnsibleModule
from time import sleep
from os import chmod, path, remove, stat
from tempfile import NamedTemporaryFile
import re
from ansible_collections.ibm.ibm_zos_core.plugins.module_utils.job import job_output
from ansible_collections.ibm.ibm_zos_core.plugins.module_utils.better_arg_parser import (
BetterArgParser,
)
from ansible.module_utils.six import PY3
from stat import S_IEXEC, S_IREAD, S_IWRITE
from ansible_collections.ibm.ibm_zos_core.plugins.module_utils.encode import (
Defaults,
)
from stat import S_IEXEC, S_IREAD, S_IWRITE
from ansible.module_utils.six import PY3
from ansible_collections.ibm.ibm_zos_core.plugins.module_utils.better_arg_parser import (
BetterArgParser,
)
from ansible_collections.ibm.ibm_zos_core.plugins.module_utils.job import job_output
from timeit import default_timer as timer
import re
from tempfile import NamedTemporaryFile
from os import chmod, path, remove, stat
from time import sleep
from ansible.module_utils.basic import AnsibleModule


if PY3:
from shlex import quote
Expand All @@ -513,7 +517,7 @@
POLLING_INTERVAL = 1
POLLING_COUNT = 60

JOB_COMPLETION_MESSAGES = ["CC", "ABEND", "SEC ERROR"]
JOB_COMPLETION_MESSAGES = ["CC", "ABEND", "SEC ERROR", "JCL ERROR"]


def submit_pds_jcl(src, module):
Expand Down Expand Up @@ -612,7 +616,7 @@ def query_jobs_status(module, jobId):
)
if not output and timeout == 0:
raise SubmitJCLError(
"THE JOB CAN NOT BE QUERIED FROM JES (TIMEOUT=10s). PLEASE CHECK THE ZOS SYSTEM. IT IS SLOW TO RESPONSE."
"THE JOB CAN NOT BE QUERIED FROM JES (TIMEOUT=10s). PLEASE CHECK THE ZOS SYSTEM. IT IS SLOW TO RESPOND."
)
return output

Expand Down Expand Up @@ -692,12 +696,13 @@ def run_module():
max_rc = parsed_args.get("max_rc")
# get temporary file names for copied files
temp_file = parsed_args.get("temp_file")
temp_file_2 = None
if temp_file:
temp_file_2 = NamedTemporaryFile(delete=True)

if wait_time_s <= 0:
module.fail_json(
msg="The option wait_time_s is not valid it just be greater than 0.",
msg="The option wait_time_s is not valid. It must be greater than 0.",
**result
)

Expand All @@ -724,13 +729,14 @@ def run_module():

# added -c to iconv to try and prevent \r from mis-mapping as invalid char to EBCDIC
to_encoding = encoding.get("to")
conv_str = "iconv -c -f {0} -t {1} {2} > {3}".format(
from_encoding,
to_encoding,
quote(temp_file),
quote(temp_file_2.name),
)
(conv_rc, stdout, stderr) = module.run_command(
"iconv -c -f {0} -t {1} {2} > {3}".format(
from_encoding,
to_encoding,
quote(temp_file),
quote(temp_file_2.name),
),
conv_str,
use_unsafe_shell=True,
)
if conv_rc == 0:
Expand All @@ -745,57 +751,100 @@ def run_module():
except SubmitJCLError as e:
module.fail_json(msg=repr(e), **result)
if jobId is None or jobId == "":
result["job_id"] = jobId
result["job_id"] = ""
module.fail_json(
msg="JOB ID RETURNED IS None. PLEASE CHECK WHETHER THE JCL IS CORRECT.",
msg="JOB ID Returned is None. Please check whether the JCL is valid.",
**result
)

result["job_id"] = jobId
duration = 0
if wait is True:
# calculate the job elapse time
if not wait:
wait_time_s = 10

# real time loop - will be used regardless of 'wait' to capture data
starttime = timer()
loopdone = False
foundissue = None
while not loopdone:
try:
waitJob = query_jobs_status(module, jobId)
job_msg = waitJob[0].get("ret_code").get("msg")
except SubmitJCLError as e:
job_output_txt = job_output(job_id=jobId)
except IndexError:
pass
except Exception as e:
result["err_detail"] = "{1} {2}.\n".format(
"Error during job submission. The output is:", job_output_txt or " "
)
module.fail_json(msg=repr(e), **result)
# while (job_msg.startswith("CC") or job_msg.startswith("ABEND")) is False:
while not re.search(
"^(?:{0})".format("|".join(JOB_COMPLETION_MESSAGES)), job_msg
):
sleep(1)
duration = duration + 1
waitJob = job_output(job_id=jobId)
job_msg = waitJob[0].get("ret_code").get("msg")
if re.search("^(?:{0})".format("|".join(JOB_COMPLETION_MESSAGES)), job_msg):
break
if duration == wait_time_s: # Long running task. timeout return
break

try:
result = get_job_info(module, jobId, return_output)
if bool(job_output_txt):
jot_retcode = job_output_txt[0].get("ret_code")
if bool(jot_retcode):
job_msg = jot_retcode.get("msg")
if re.search(
"^(?:{0})".format("|".join(JOB_COMPLETION_MESSAGES)), job_msg
):
loopdone = True
# if the message doesn't have a CC, it is an improper completion (error/abend)
if re.search("^(?:CC)", job_msg) is None:
foundissue = job_msg

if not loopdone:
checktime = timer()
duration = round(checktime - starttime)
if duration >= wait_time_s:
loopdone = True
result["message"] = {
"stdout": "Submit JCL operation succeeded but it is a long running job, exceeding the timeout of "
+ str(wait_time_s)
+ " seconds. JobID is "
+ str(jobId)
+ ". Consider using module zos_job_query to poll for long running jobs."
}
else:
sleep(0.5)

# End real time loop ^^^

if bool(job_output_txt):
result["jobs"] = job_output_txt
if wait is True and return_output is True and max_rc is not None:
assert_valid_return_code(
max_rc, result.get("jobs")[0].get("ret_code").get("code")
)
except SubmitJCLError as e:
module.fail_json(msg=repr(e), **result)
except Exception as e:
module.fail_json(msg=repr(e), **result)
finally:
if temp_file:
remove(temp_file)

if temp_file:
remove(temp_file)

checktime = timer()
duration = round(checktime - starttime)
result["duration"] = duration
if duration == wait_time_s:
result["changed"] = True

if duration >= wait_time_s:
# This is a duplicate message, to handle the edge-case where the timeout was crossed after the check
result["message"] = {
"stdout": "Submit JCL operation succeeded but it is a long running job. Timeout is "
"stdout": "Submit JCL operation succeeded but it is a long running job, exceeding the timeout of "
+ str(wait_time_s)
+ " seconds."
+ " seconds. JobID is "
+ str(jobId)
+ ". Consider using module zos_job_query to poll for long running jobs."
}
else:
result["message"] = {"stdout": "Submit JCL operation succeeded."}
result["changed"] = True
if foundissue is not None:
result["changed"] = False
result["message"] = {
"stderr": "Submit succeeded, but job failed: " + foundissue
}
result["failed"] = True
module.fail_json(msg=result["message"], **result)
else:
result["message"] = {
"stdout": "Submit JCL operation succeeded with id of "
+ str(jobId)
+ "."
}

module.exit_json(**result)


Expand Down
26 changes: 24 additions & 2 deletions tests/functional/modules/test_zos_job_submit_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,18 @@
//SYSUT2 DD SYSOUT=*
//
"""
JCL_FILE_CONTENTS_BAD = """//HELLO JOB (T043JM,JM00,1,0,0,0),'HELLO WORLD - JRM',CLASS=R,
// MSGCLASS=X,MSGLEVEL=1,NOTIFY=S0JM
//STEP0001 EXEC PGM=IEBGENER
//SYSIN DD DUMMY
//SYSPRINT DD SYSOUT=*!!
//SYSUT1 DD *
HELLO, WORLD
/*
//SYSUT2 DD SYSOUT=*
//
"""


TEMP_PATH = "/tmp/ansible/jcl"
DATA_SET_NAME = "imstestl.ims1.test05"
Expand Down Expand Up @@ -127,7 +139,6 @@ def test_job_submit_LOCAL(ansible_zos_module):
results = hosts.all.zos_job_submit(src=tmp_file.name, location="LOCAL", wait=True)

for result in results.contacted.values():
print(result)
assert result.get("jobs")[0].get("ret_code").get("msg_code") == "0000"
assert result.get("jobs")[0].get("ret_code").get("code") == 0

Expand All @@ -142,13 +153,24 @@ def test_job_submit_LOCAL_extraR(ansible_zos_module):
results = hosts.all.zos_job_submit(src=tmp_file.name, location="LOCAL", wait=True)

for result in results.contacted.values():
print(result)
assert result.get("jobs")[0].get("ret_code").get("msg_code") == "0000"
assert result.get("jobs")[0].get("ret_code").get("code") == 0

assert result.get("changed") is True


def test_job_submit_LOCAL_BADJCL(ansible_zos_module):
tmp_file = tempfile.NamedTemporaryFile(delete=True)
with open(tmp_file.name, "w") as f:
f.write(JCL_FILE_CONTENTS_BAD)
hosts = ansible_zos_module
results = hosts.all.zos_job_submit(src=tmp_file.name, location="LOCAL", wait=True)

for result in results.contacted.values():

assert result.get("changed") is False


# * currently don't have volume support from ZOAU python API, so this will not be reproduceable
# * in CI/CD testing environment (for now)
# def test_job_submit_PDS_volume(ansible_zos_module):
Expand Down