Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[QOLDEV-1015] extract Frictionless rows from file path instead of buffer #73

Merged
merged 1 commit into from
Jan 10, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions ckanext/qa/sniff_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,9 @@ def sniff_file_format(filepath):
if is_json(buf):
format_ = {'format': 'JSON'}
# is it CSV?
elif is_psv(buf):
elif is_psv(buf, filepath=filepath):
format_ = {'format': 'PSV'}
elif is_csv(buf):
elif is_csv(buf, filepath=filepath):
format_ = {'format': 'CSV'}
# XML files without the "<?xml ... ?>" tag end up here
elif is_xml_but_without_declaration(buf):
Expand Down Expand Up @@ -213,12 +213,12 @@ def is_json(buf):
return True


def is_csv(buf):
return _is_spreadsheet(buf, 'CSV')
def is_csv(buf, **kwargs):
return _is_spreadsheet(buf, 'CSV', **kwargs)


def is_psv(buf):
return _is_spreadsheet(buf, 'PSV', '|')
def is_psv(buf, **kwargs):
return _is_spreadsheet(buf, 'PSV', '|', **kwargs)


def _messytables_extract_row_lengths(buf, format_, delimiter=None):
Expand All @@ -245,7 +245,7 @@ def _messytables_extract_row_lengths(buf, format_, delimiter=None):
return None


def _frictionless_extract_row_lengths(buf, format_, delimiter=None):
def _frictionless_extract_row_lengths(filepath, format_, delimiter=None):
# Return a list containing the count of cells in each row,
# using frictionless.Resource
import frictionless
Expand All @@ -255,7 +255,7 @@ def _frictionless_extract_row_lengths(buf, format_, delimiter=None):
dialect = frictionless.Dialect(descriptor={"delimiter": delimiter})
resource_kwargs['dialect'] = dialect
try:
table = frictionless.Resource(six.ensure_binary(buf), **resource_kwargs)
table = frictionless.Resource(filepath, **resource_kwargs)
for row in table.sample or table.read_rows():
row_lengths.append(len(row))
return row_lengths
Expand All @@ -264,9 +264,9 @@ def _frictionless_extract_row_lengths(buf, format_, delimiter=None):
return None


def _is_spreadsheet(buf, format_, delimiter=None):
def _is_spreadsheet(buf, format_, delimiter=None, **kwargs):
if toolkit.check_ckan_version('2.10'):
row_lengths = _frictionless_extract_row_lengths(buf, format_, delimiter)
row_lengths = _frictionless_extract_row_lengths(kwargs['filepath'], format_, delimiter)
else:
row_lengths = _messytables_extract_row_lengths(buf, format_, delimiter)
if not row_lengths:
Expand Down
Loading