diff --git a/.github/workflows/subscript.yml b/.github/workflows/subscript.yml index a03d9b95d..450893543 100644 --- a/.github/workflows/subscript.yml +++ b/.github/workflows/subscript.yml @@ -67,7 +67,7 @@ jobs: - name: Run tests run: | - pytest tests + pytest -n auto tests # Check that repository is untainted by test code: git status --porcelain test -z "$(git status --porcelain)" diff --git a/README.rst b/README.rst index 2c36fa6c3..9b3249e40 100644 --- a/README.rst +++ b/README.rst @@ -48,7 +48,7 @@ In a fresh virtual environment you should be able to do:: and all dependencies should be installed. Confirm your installation with:: - pytest + pytest -n auto and this should run for some minutes without failures. diff --git a/docs/contribution.rst b/docs/contribution.rst index 7f429eeac..a708196ff 100644 --- a/docs/contribution.rst +++ b/docs/contribution.rst @@ -61,7 +61,7 @@ repository, which you can do by running: .. code-block:: console - pytest + pytest -n auto Repository conventions diff --git a/setup.py b/setup.py index 509dd33fa..7aa96f77a 100755 --- a/setup.py +++ b/setup.py @@ -75,6 +75,7 @@ "scipy", "seaborn", "segyio", + "urllib3<2", "xlrd", "xtgeo", ] diff --git a/src/subscript/pack_sim/pack_sim.py b/src/subscript/pack_sim/pack_sim.py index a0bd2f448..fb7ecc547 100755 --- a/src/subscript/pack_sim/pack_sim.py +++ b/src/subscript/pack_sim/pack_sim.py @@ -7,7 +7,7 @@ from io import StringIO from pathlib import Path from shutil import copy -from typing import Dict, Optional, TextIO, Union +from typing import Dict, List, Optional, TextIO, Union from subscript import __version__, getLogger @@ -139,33 +139,57 @@ def _get_paths(filename: Path, org_sim_loc: Path) -> Dict[str, Path]: # Check if the filename can be found filename = _expand_filename(filename, org_sim_loc) - with open(filename, "r", encoding="utf8") as fhandle: - # Read through all lines of text - for line in fhandle: - line_strip = line.strip() - - if line_strip.startswith("PATHS"): - logger.info("Found Eclipse PATHS keyword, creating a dictionary.") - - # In the keyword, find the path definitions and ignore comments - for innerline in fhandle: - line_strip = innerline.strip() - if line_strip.startswith("--"): - continue + try: + with open(filename, encoding="utf-8") as fin: + lines = fin.readlines() + except UnicodeDecodeError as e: + error_words = str(e).split(" ") + hex_str = error_words[error_words.index("byte") + 1] + try: + bad_char = chr(int(hex_str, 16)) + except ValueError: + bad_char = f"hex:{hex_str}" + with open(filename, "rb") as fin: + byte_lines: List[bytes] = fin.readlines() + + for i, byte_line in enumerate(byte_lines): + try: + byte_line.decode("utf-8") + except UnicodeDecodeError: + bad_line_num = i + 1 + e.reason = ( + f"Unsupported non-UTF-8 character {bad_char!r} " + f"found in file: {filename.name} on line {bad_line_num}" + ) + break + raise e - if innerline.split("--")[0].strip() == "/": - # Finished reading the data for the PATHS keyword - break + # Read through all lines of text + for line in lines: + line_strip = line.strip() - # Assume we have found a PATHS definition line - try: - path_info = innerline.split("--")[0].strip().split("'") - paths[path_info[1]] = Path(path_info[3]) - except IndexError: - logger.warning( - "Could not parse %s as a PATHS definition, skipping", - line_strip, - ) + if line_strip.startswith("PATHS"): + logger.info("Found Eclipse PATHS keyword, creating a dictionary.") + + # In the keyword, find the path definitions and ignore comments + for innerline in lines: + line_strip = innerline.strip() + if line_strip.startswith("--"): + continue + + if innerline.split("--")[0].strip() == "/": + # Finished reading the data for the PATHS keyword + break + + # Assume we have found a PATHS definition line + try: + path_info = innerline.split("--")[0].strip().split("'") + paths[path_info[1]] = Path(path_info[3]) + except IndexError: + logger.warning( + "Could not parse %s as a PATHS definition, skipping", + line_strip, + ) logger.debug("Dictionary created: %s", str(paths)) return paths diff --git a/test_requirements.txt b/test_requirements.txt index b2dfd72df..1645ea47e 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -6,6 +6,7 @@ mypy pytest pytest-cov pytest-mock +pytest-xdist rstcheck rstcheck-core types-Jinja2 diff --git a/tests/test_pack_sim.py b/tests/test_pack_sim.py index 77c510fdd..56a6b06b8 100644 --- a/tests/test_pack_sim.py +++ b/tests/test_pack_sim.py @@ -53,6 +53,35 @@ def test_main_fmu(tmp_path, mocker): assert Path("include/props/reek.pvt").exists() +def test_helpful_latin1_encoding_exception(tmp_path, mocker): + """Test that a more helpful error message is given when a file with an + unsupported encoding is given""" + tmp_data_file = tmp_path / "TMP.DATA" + with open(tmp_data_file, "w", encoding="iso-8859-1") as fout: + fout.write("-- død") + mocker.patch("sys.argv", ["pack_sim", str(tmp_data_file), "."]) + with pytest.raises( + UnicodeDecodeError, match=(f"'ø' found in file: {tmp_data_file.name} on line 1") + ): + pack_sim.main() + + tmp_data_file2 = tmp_path / "TMP2.DATA" + with open(tmp_data_file2, "w", encoding="iso-8859-1") as fout: + fout.write("-- A\nRUNSPEC\n-- på sjøen") + mocker.patch("sys.argv", ["pack_sim", str(tmp_data_file2), "."]) + with pytest.raises( + UnicodeDecodeError, + match=(f"'å' found in file: {tmp_data_file2.name} on line 3"), + ): + pack_sim.main() + + tmp_data_file3 = tmp_path / "TMP3.DATA" + with open(tmp_data_file3, "w", encoding="utf-8") as fout: + fout.write(f"INCLUDE\n '{tmp_data_file.name}' /") + mocker.patch("sys.argv", ["pack_sim", str(tmp_data_file3), "."]) + pack_sim.main() + + def test_repeated_run(tmp_path, mocker): """Test what happens on repeated incovations""" os.chdir(tmp_path)