kinverarity1 · dcslagel · Jun 28, 2021 · Apr 17, 2021 · Apr 18, 2021 · Apr 18, 2021
diff --git a/lasio/las.py b/lasio/las.py
@@ -82,13 +82,15 @@ def __init__(self, file_ref=None, **read_kwargs):
     def read(
         self,
         file_ref,
-        ignore_data=False,
-        read_policy="default",
-        null_policy="strict",
         ignore_header_errors=False,
         ignore_comments=("#",),
         ignore_data_comments="#",
         mnemonic_case="upper",
+        ignore_data=False,
+        engine="numpy",
+        use_normal_engine_for_wrapped=True,
+        read_policy="default",
+        null_policy="strict",
         index_unit=None,
         dtypes="auto",
         **kwargs
@@ -100,10 +102,6 @@ def read(
                 object, or a string containing the contents of a file.
 
         Keyword Arguments:
-            null_policy (str or list): see
-                http://lasio.readthedocs.io/en/latest/data-section.html#handling-invalid-data-indicators-automatically
-            ignore_data (bool): if True, do not read in any of the actual data,
-                just the header metadata. False by default.
             ignore_header_errors (bool): ignore LASHeaderErrors (False by
                 default)
             ignore_comments (sequence/str): ignore lines beginning with these
@@ -113,6 +111,19 @@ def read(
             mnemonic_case (str): 'preserve': keep the case of HeaderItem mnemonics
                                  'upper': convert all HeaderItem mnemonics to uppercase
                                  'lower': convert all HeaderItem mnemonics to lowercase
+            ignore_data (bool): if True, do not read in any of the actual data,
+                just the header metadata. False by default.
+            engine (str): "normal": parse data section with normal Python reader
+                (quite slow); "numpy": parse data section with `numpy.genfromtxt` (fast).
+                By default the engine is "numpy".
+            use_normal_engine_for_wrapped (bool): if header metadata indicates that
+                the file is wrapped, always use the 'normal' engine. Default is True.
+                The only reason you should use False is if speed is a very high priority
+                and you had files with metadata that incorrectly indicates they are
+                wrapped.
+            read_policy (): TODO
+            null_policy (str or list): see
+                http://lasio.readthedocs.io/en/latest/data-section.html#handling-invalid-data-indicators-automatically
             index_unit (str): Optionally force-set the index curve's unit to "m" or "ft"
             dtypes ("auto", dict or list): specify the data types for each curve in the
                 ~ASCII data section. If "auto", each curve will be converted to floats if
@@ -151,7 +162,6 @@ def read(
             regexp_subs, value_null_subs, version_NULL = reader.get_substitutions(
                 read_policy, null_policy
             )
-
             provisional_version = 2.0
             provisional_wrapped = "YES"
             provisional_null = None
@@ -257,6 +267,22 @@ def read(
                     las3_data_section_indices.append(i)
 
             if not ignore_data:
+
+                # Override the default "numpy" parser with the 'normal' parser 
+                # for these conditions:
+                # - file is wrapped
+                # - null_policy is not "strict"
+                # - dtypes is not "auto". Numpy can handle specified dtypes but 
+                #   the performance decays to the 'normal' performance level.
+
+                # normal engine.
+                if provisional_wrapped == "YES" or null_policy != "strict" or dtypes != "auto":
+                    if engine != "normal":
+                        logger.warning("Only engine='normal' can read wrapped files")
+                        if use_normal_engine_for_wrapped:
+                            engine = "normal"
+
+                # Check for the number of columns in each data section.
                 for k, first_line, last_line, section_title in [
                     section_positions[i] for i in data_section_indices
                 ]:
@@ -283,23 +309,55 @@ def read(
                         dtypes = [dtypes.get(c.mnemonic, float) for c in self.curves]
 
                     # Notes see 2d9e43c3 and e960998f for 'try' background
-                    try:
-                        curves_data_gen = reader.read_data_section_iterative(
-                            file_obj,
-                            (first_line, last_line),
-                            regexp_subs,
-                            value_null_subs,
-                            ignore_comments=ignore_data_comments,
-                            n_columns=reader_n_columns,
-                            dtypes=dtypes,
-                        )
-                    except KeyboardInterrupt:
-                        raise
-                    except:
-                        raise exceptions.LASDataError(
-                            traceback.format_exc()[:-1]
-                            + " in data section beginning line {}".format(i + 1)
-                        )
+
+
+                    # Attempt to read the data section
+                    if engine == "numpy":
+                        try:
+                            curves_data_gen = reader.read_data_section_iterative_numpy_engine(
+                                file_obj, (first_line, last_line)
+                            )
+                        except KeyboardInterrupt:
+                            raise
+                        except:
+                            try:
+                                file_obj.seek(k)
+                                curves_data_gen = reader.read_data_section_iterative_normal_engine(
+                                    file_obj,
+                                    (first_line, last_line),
+                                    regexp_subs,
+                                    value_null_subs,
+                                    ignore_comments=ignore_data_comments,
+                                    n_columns=reader_n_columns,
+                                    dtypes=dtypes,
+                                )
+                            except KeyboardInterrupt:
+                                raise
+                            except:
+                                raise exceptions.LASDataError(
+                                    traceback.format_exc()[:-1]
+                                    + " in data section beginning line {}".format(i + 1)
+                                )
+
+                    if engine == "normal":
+                        try:
+                            curves_data_gen = reader.read_data_section_iterative_normal_engine(
+                                file_obj,
+                                (first_line, last_line),
+                                regexp_subs,
+                                value_null_subs,
+                                ignore_comments=ignore_data_comments,
+                                n_columns=reader_n_columns,
+                                dtypes=dtypes,
+                            )
+                        except KeyboardInterrupt:
+                            raise
+                        except:
+                            raise exceptions.LASDataError(
+                                traceback.format_exc()[:-1]
+                                + " in data section beginning line {}".format(i + 1)
+                            )
+
 
                     # Assign data to curves.
                     curve_idx = 0

diff --git a/lasio/reader.py b/lasio/reader.py
@@ -369,7 +369,7 @@ def inspect_data_section(file_obj, line_nos, regexp_subs, ignore_comments="#"):
         return item_counts[0]
 
 
-def read_data_section_iterative(
+def read_data_section_iterative_normal_engine(
     file_obj, line_nos, regexp_subs, value_null_subs, ignore_comments, n_columns, dtypes
 ):
     """Read data section into memory.
@@ -510,6 +510,42 @@ def identify_dtypes_from_data(row):
     return dtypes_list
 
 
+def read_data_section_iterative_numpy_engine(file_obj, line_nos):
+    """Read data section into memory.
+
+    Arguments:
+        file_obj: file-like object open for reading at the beginning of the section
+        line_nos (tuple): the first and last line no of the section to read
+
+
+    Returns:
+        A numpy ndarray.
+    """
+
+    first_line = line_nos[0] + 1
+    last_line = line_nos[1]
+    max_rows = last_line - first_line
+
+    file_obj.seek(0)
+
+    # unpack=True transforms the data from an array of rows to an array of columns.
+    # loose=False will throw an error on non-numerical data, which then sends the 
+    # parsing to the 'normal' parser.
+    array = np.genfromtxt(
+        file_obj, skip_header=first_line, max_rows=max_rows, names=None, unpack=True, loose=False
+    )
+
+
+    # If there is only one data row, np.genfromtxt treats it as one array of
+    # individual values. Lasio needs a array of arrays. This if statement
+    # converts the single line data array to an array of arrays(column data).
+    if len(array.shape) == 1:
+        arr_len = array.shape[0]
+        array = array.reshape(arr_len,1)
+
+    return array
+
+
 def get_substitutions(read_policy, null_policy):
     """Parse read and null policy definitions into a list of regexp and value
     substitutions.

diff --git a/tests/test_null_policy.py b/tests/test_null_policy.py
@@ -78,6 +78,7 @@ def test_null_policy_custom_2():
 
 
 def test_null_policy_ERR_strict():
+    # Verify we can read-in text in a mostly numerical column
     las = read(egfn("null_policy_ERR.las"), null_policy="strict")
     assert las["RHOB"][2] == "ERR"
 

diff --git a/tests/test_read.py b/tests/test_read.py
@@ -483,4 +483,4 @@ def test_sample_dtypes_specified_as_false():
 def test_index_null_issue227():
     las = lasio.examples.open("index_null.las")
     assert las['DEPT'].data[1] == 999.25
-    assert numpy.isnan(las['DT'].data[0])
+    assert numpy.isnan(las['DT'].data[0])
diff --git a/tests/test_speed.py b/tests/test_speed.py
@@ -22,5 +22,6 @@
 def read_file():
     las = lasio.read(stegfn("1.2", "sample_big.las"))
 
+
 def test_read_v12_sample_big(benchmark):
     benchmark(read_file)