Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for #664 and #678 for 1.4.1 #732

Merged
merged 3 commits into from
Apr 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions changelogs/fragments/732-zos_copy-encoding-bugs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
bugfixes:
- zos_copy - Fixes a bug where files not encoded in IBM-1047
would trigger an error while computing the record length
for a new destination dataset. Issue 664.
(https://github.com/ansible-collections/ibm_zos_core/pull/732)
- zos_copy - Fixes a bug where the code for fixing an issue with
newlines in files (issue 599) would use the wrong encoding
for normalization. Issue 678.
(https://github.com/ansible-collections/ibm_zos_core/pull/732)
173 changes: 129 additions & 44 deletions plugins/modules/zos_copy.py
Original file line number Diff line number Diff line change
Expand Up @@ -931,15 +931,20 @@ def file_has_crlf_endings(self, src):
{bool} -- True if the file uses CRLF endings, False if it uses LF
ones.
"""
# Python has to read the file in binary mode to not mask CRLF
# endings or enable universal newlines. If we used encoding="cp037",
# we would get '\n' as the line ending even when the file uses '\r\n'.
with open(src, "rb") as src_file:
# readline() will read until it finds a \n.
content = src_file.readline()
# Reading the file in 1024-byte chunks.
content = src_file.read(1024)

# In EBCDIC, \r\n are bytes 0d and 15, respectively.
if content.endswith(b'\x0d\x15'):
return True
else:
return False
while content:
# In EBCDIC, \r\n are bytes 0d and 15, respectively.
if b'\x0d\x15' in content:
return True
content = src_file.read(1024)

return False

def create_temp_with_lf_endings(self, src):
"""Creates a temporary file with the same content as src but without
Expand All @@ -960,10 +965,11 @@ def create_temp_with_lf_endings(self, src):

with open(converted_src, "wb") as converted_file:
with open(src, "rb") as src_file:
current_line = src_file.read()
converted_file.write(current_line.replace(b'\x0d', b''))
chunk = src_file.read(1024)
# In IBM-037, \r is the byte 0d.
converted_file.write(chunk.replace(b'\x0d', b''))

self._tag_file_encoding(converted_src, encode.Defaults.DEFAULT_EBCDIC_MVS_CHARSET)
self._tag_file_encoding(converted_src, "IBM-037")

return converted_src
except Exception as err:
Expand Down Expand Up @@ -1319,6 +1325,7 @@ def copy_to_pdse(
src_ds_type,
src_member=None,
dest_member=None,
encoding=None,
):
"""Copy source to a PDS/PDSE or PDS/PDSE member.

Expand All @@ -1328,12 +1335,13 @@ def copy_to_pdse(
Arguments:
src {str} -- Path to USS file/directory or data set name.
temp_path {str} -- Path to the location where the control node
transferred data to
transferred data to.
conv_path {str} -- Path to the converted source file/directory
dest {str} -- Name of destination data set
src_ds_type {str} -- The type of source
dest {str} -- Name of destination data set.
src_ds_type {str} -- The type of source.
src_member {bool, optional} -- Member of the source data set to copy.
dest_member {str, optional} -- Name of destination member in data set
dest_member {str, optional} -- Name of destination member in data set.
encoding {dict, optional} -- Dictionary with encoding options.
"""
new_src = conv_path or temp_path or src

Expand All @@ -1352,6 +1360,9 @@ def copy_to_pdse(
else:
dest_copy_name = "{0}({1})".format(dest, data_set.DataSet.get_member_name_from_file(file))

if not self.is_binary:
full_file_path = normalize_line_endings(full_file_path, encoding)

result = self.copy_to_member(full_file_path, dest_copy_name)

if result["rc"] != 0:
Expand Down Expand Up @@ -1459,7 +1470,7 @@ def get_file_record_length(file):
"""
max_line_length = 0

with open(file, "r") as src_file:
with open(file, "r", encoding="utf-8") as src_file:
current_line = src_file.readline()

while current_line:
Expand Down Expand Up @@ -2021,6 +2032,51 @@ def allocate_destination_data_set(
return True


def normalize_line_endings(src, encoding=None):
"""
Normalizes src's encoding to IBM-037 (a dataset's default) and then normalizes
its line endings to LF.
Arguments:
src (str) -- Path of a USS file.
encoding (dict, optional) -- Encoding options for the module.
Returns:
str -- Path to the normalized file.
"""
# Before copying into a destination dataset, we'll make sure that
# the source file doesn't contain any carriage returns that would
# result in empty records in the destination.
# Due to the differences between encodings, we'll normalize to IBM-037
# before checking the EOL sequence.
enc_utils = encode.EncodeUtils()
src_tag = enc_utils.uss_file_tag(src)
copy_handler = CopyHandler(AnsibleModuleHelper(dict()))

if src_tag == "untagged":
# This should only be true when src is a remote file and no encoding
# was specified by the user.
if not encoding:
encoding = {"from": encode.Defaults.get_default_system_charset()}
src_tag = encoding["from"]

if src_tag != "IBM-037":
fd, converted_src = tempfile.mkstemp()
os.close(fd)

enc_utils.uss_convert_encoding(
src,
converted_src,
src_tag,
"IBM-037"
)
copy_handler._tag_file_encoding(converted_src, "IBM-037")
src = converted_src

if copy_handler.file_has_crlf_endings(src):
src = copy_handler.create_temp_with_lf_endings(src)

return src


def run_module(module, arg_def):
# ********************************************************************
# Verify the validity of module args. BetterArgParser raises ValueError
Expand Down Expand Up @@ -2102,13 +2158,43 @@ def run_module(module, arg_def):
# and destination datasets, if needed.
# ********************************************************************
dest_member_exists = False
converted_src = None
try:
# If temp_path, the plugin has copied a file from the controller to USS.
if temp_path or "/" in src:
src_ds_type = "USS"

if remote_src and os.path.isdir(src):
is_src_dir = True

if not is_uss:
new_src = temp_path or src
new_src = os.path.normpath(new_src)
# Normalizing encoding when src is a USS file (only).
encode_utils = encode.EncodeUtils()
src_tag = encode_utils.uss_file_tag(new_src)
# Normalizing to UTF-8.
if not is_src_dir and src_tag != "UTF-8":
# If untagged, assuming the encoding/tag is the system's default.
if src_tag == "untagged" or src_tag is None:
if encoding:
src_tag = encoding["from"]
else:
src_tag = encode.Defaults.get_default_system_charset()

# Converting the original src to a temporary one in UTF-8.
fd, converted_src = tempfile.mkstemp()
os.close(fd)
encode_utils.uss_convert_encoding(
new_src,
converted_src,
src_tag,
"UTF-8"
)

# Creating the handler just for tagging, we're not copying yet!
copy_handler = CopyHandler(module, is_binary=is_binary)
copy_handler._tag_file_encoding(converted_src, "UTF-8")
else:
if data_set.DataSet.data_set_exists(src_name):
if src_member and not data_set.DataSet.data_set_member_exists(src):
Expand Down Expand Up @@ -2284,6 +2370,14 @@ def run_module(module, arg_def):
emergency_backup = data_set.DataSet.temp_name()
data_set.DataSet.allocate_model_data_set(emergency_backup, dest_name)

if converted_src:
if remote_src:
original_src = src
src = converted_src
else:
original_temp = temp_path
temp_path = converted_src

try:
if not is_uss:
res_args["changed"] = allocate_destination_data_set(
Expand All @@ -2300,8 +2394,19 @@ def run_module(module, arg_def):
if dest_exists and not force:
restore_backup(dest_name, emergency_backup, dest_ds_type, use_backup)
erase_backup(emergency_backup, dest_ds_type)
if converted_src:
if remote_src:
src = original_src
else:
temp_path = original_temp
module.fail_json(msg="Unable to allocate destination data set: {0}".format(str(err)))

if converted_src:
if remote_src:
src = original_src
else:
temp_path = original_temp

# ********************************************************************
# Encoding conversion is only valid if the source is a local file,
# local directory or a USS file/directory.
Expand Down Expand Up @@ -2370,35 +2475,8 @@ def run_module(module, arg_def):
# ---------------------------------------------------------------------
elif dest_ds_type in data_set.DataSet.MVS_SEQ:
if src_ds_type == "USS" and not is_binary:
# Before copying into the destination dataset, we'll make sure that
# the source file doesn't contain any carriage returns that would
# result in empty records in the destination.
# Due to the differences between encodings, we'll normalize to IBM-037
# before checking the EOL sequence.
new_src = conv_path or temp_path or src
enc_utils = encode.EncodeUtils()
src_tag = enc_utils.uss_file_tag(new_src)

if src_tag == "untagged":
src_tag = encode.Defaults.DEFAULT_EBCDIC_USS_CHARSET

if src_tag not in encode.Defaults.DEFAULT_EBCDIC_MVS_CHARSET:
fd, converted_src = tempfile.mkstemp()
os.close(fd)

enc_utils.uss_convert_encoding(
new_src,
converted_src,
src_tag,
encode.Defaults.DEFAULT_EBCDIC_MVS_CHARSET
)
copy_handler._tag_file_encoding(converted_src, encode.Defaults.DEFAULT_EBCDIC_MVS_CHARSET)
new_src = converted_src

if copy_handler.file_has_crlf_endings(new_src):
new_src = copy_handler.create_temp_with_lf_endings(new_src)

conv_path = new_src
conv_path = normalize_line_endings(new_src, encoding)

copy_handler.copy_to_seq(
src,
Expand All @@ -2421,7 +2499,14 @@ def run_module(module, arg_def):
)

pdse_copy_handler.copy_to_pdse(
src, temp_path, conv_path, dest_name, src_ds_type, src_member=src_member, dest_member=dest_member
src,
temp_path,
conv_path,
dest_name,
src_ds_type,
src_member=src_member,
dest_member=dest_member,
encoding=encoding
)
res_args["changed"] = True
dest = dest.upper()
Expand Down
90 changes: 90 additions & 0 deletions tests/functional/modules/test_zos_copy_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
DUMMY DATA ---- LINE 007 ------
"""

DUMMY_DATA_CRLF = b"00000001 DUMMY DATA\r\n00000002 DUMMY DATA\r\n"

VSAM_RECORDS = """00000001A record
00000002A record
00000003A record
Expand Down Expand Up @@ -109,6 +111,12 @@ def populate_dir(dir_path):
infile.write(DUMMY_DATA)


def populate_dir_crlf_endings(dir_path):
for i in range(5):
with open(os.path.join(dir_path, "file{0}".format(i)), "wb") as infile:
infile.write(DUMMY_DATA_CRLF)


def populate_partitioned_data_set(hosts, name, ds_type, members=None):
"""Creates a new partitioned data set and inserts records into various
members of it.
Expand Down Expand Up @@ -1058,6 +1066,56 @@ def test_copy_file_record_length_to_sequential_data_set(ansible_zos_module):
os.remove(src)


@pytest.mark.uss
@pytest.mark.seq
def test_copy_file_crlf_endings_to_sequential_data_set(ansible_zos_module):
hosts = ansible_zos_module
dest = "USER.TEST.SEQ.FUNCTEST"

fd, src = tempfile.mkstemp()
os.close(fd)
with open(src, "wb") as infile:
infile.write(DUMMY_DATA_CRLF)

try:
hosts.all.zos_data_set(name=dest, state="absent")

copy_result = hosts.all.zos_copy(
src=src,
dest=dest,
remote_src=False,
is_binary=False
)

verify_copy = hosts.all.shell(
cmd="cat \"//'{0}'\"".format(dest),
executable=SHELL_EXECUTABLE,
)

verify_recl = hosts.all.shell(
cmd="dls -l {0}".format(dest),
executable=SHELL_EXECUTABLE,
)

for cp_res in copy_result.contacted.values():
assert cp_res.get("msg") is None
assert cp_res.get("changed") is True
assert cp_res.get("dest") == dest
for v_cp in verify_copy.contacted.values():
assert v_cp.get("rc") == 0
assert len(v_cp.get("stdout_lines")) == 2
for v_recl in verify_recl.contacted.values():
assert v_recl.get("rc") == 0
stdout = v_recl.get("stdout").split()
assert len(stdout) == 5
assert stdout[1] == "PS"
assert stdout[2] == "FB"
assert stdout[3] == "19"
finally:
hosts.all.zos_data_set(name=dest, state="absent")
os.remove(src)


@pytest.mark.uss
@pytest.mark.seq
@pytest.mark.parametrize("src", [
Expand Down Expand Up @@ -1614,6 +1672,38 @@ def test_copy_dir_to_non_existing_pdse(ansible_zos_module):
hosts.all.zos_data_set(name=dest, state="absent")


@pytest.mark.uss
@pytest.mark.pdse
def test_copy_dir_crlf_endings_to_non_existing_pdse(ansible_zos_module):
hosts = ansible_zos_module
dest = "USER.TEST.PDSE.FUNCTEST"

temp_path = tempfile.mkdtemp()
src_basename = "source/"
source_path = "{0}/{1}".format(temp_path, src_basename)

try:
os.mkdir(source_path)
populate_dir_crlf_endings(source_path)

copy_res = hosts.all.zos_copy(src=source_path, dest=dest)
verify_copy = hosts.all.shell(
cmd="cat \"//'{0}({1})'\"".format(dest, "FILE2"),
executable=SHELL_EXECUTABLE,
)

for result in copy_res.contacted.values():
assert result.get("msg") is None
assert result.get("changed") is True
assert result.get("dest") == dest
for result in verify_copy.contacted.values():
assert result.get("rc") == 0
assert len(result.get("stdout_lines")) == 2
finally:
shutil.rmtree(temp_path)
hosts.all.zos_data_set(name=dest, state="absent")


@pytest.mark.uss
@pytest.mark.pdse
@pytest.mark.parametrize("src_type", ["pds", "pdse"])
Expand Down