Skip to content

Commit

Permalink
[QOLDEV-1015] extract Frictionless rows from file path isntead of buffer
Browse files Browse the repository at this point in the history
- Frictionless is not very good at working with strings in memory, it expects paths
  • Loading branch information
ThrawnCA committed Jan 10, 2025
1 parent e986b06 commit a0530fc
Showing 1 changed file with 10 additions and 10 deletions.
20 changes: 10 additions & 10 deletions ckanext/qa/sniff_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,9 @@ def sniff_file_format(filepath):
if is_json(buf):
format_ = {'format': 'JSON'}
# is it CSV?
elif is_psv(buf):
elif is_psv(buf, filepath=filepath):
format_ = {'format': 'PSV'}
elif is_csv(buf):
elif is_csv(buf, filepath=filepath):
format_ = {'format': 'CSV'}
# XML files without the "<?xml ... ?>" tag end up here
elif is_xml_but_without_declaration(buf):
Expand Down Expand Up @@ -213,12 +213,12 @@ def is_json(buf):
return True


def is_csv(buf):
return _is_spreadsheet(buf, 'CSV')
def is_csv(buf, **kwargs):
return _is_spreadsheet(buf, 'CSV', **kwargs)


def is_psv(buf):
return _is_spreadsheet(buf, 'PSV', '|')
def is_psv(buf, **kwargs):
return _is_spreadsheet(buf, 'PSV', '|', **kwargs)


def _messytables_extract_row_lengths(buf, format_, delimiter=None):
Expand All @@ -245,7 +245,7 @@ def _messytables_extract_row_lengths(buf, format_, delimiter=None):
return None


def _frictionless_extract_row_lengths(buf, format_, delimiter=None):
def _frictionless_extract_row_lengths(filepath, format_, delimiter=None):
# Return a list containing the count of cells in each row,
# using frictionless.Resource
import frictionless
Expand All @@ -255,7 +255,7 @@ def _frictionless_extract_row_lengths(buf, format_, delimiter=None):
dialect = frictionless.Dialect(descriptor={"delimiter": delimiter})
resource_kwargs['dialect'] = dialect
try:
table = frictionless.Resource(six.ensure_binary(buf), **resource_kwargs)
table = frictionless.Resource(filepath, **resource_kwargs)
for row in table.sample or table.read_rows():
row_lengths.append(len(row))
return row_lengths
Expand All @@ -264,9 +264,9 @@ def _frictionless_extract_row_lengths(buf, format_, delimiter=None):
return None


def _is_spreadsheet(buf, format_, delimiter=None):
def _is_spreadsheet(buf, format_, delimiter=None, **kwargs):
if toolkit.check_ckan_version('2.10'):
row_lengths = _frictionless_extract_row_lengths(buf, format_, delimiter)
row_lengths = _frictionless_extract_row_lengths(filepath=kwargs['filepath'], format_, delimiter)
else:
row_lengths = _messytables_extract_row_lengths(buf, format_, delimiter)
if not row_lengths:
Expand Down

0 comments on commit a0530fc

Please sign in to comment.