Skip to content

Commit

Permalink
stop point (#619)
Browse files Browse the repository at this point in the history
  • Loading branch information
martindurant authored Jun 8, 2021
1 parent c9ff13f commit cdbb821
Show file tree
Hide file tree
Showing 4 changed files with 17,696 additions and 12,860 deletions.
11 changes: 6 additions & 5 deletions fastparquet/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,9 @@ def __init__(self, fn, verify=False, open_with=default_open,
f.endswith(".parquet") or f.endswith(".parq")]
if not allfiles:
raise ValueError("No files in dir")
# TODO: we could fetch all of these at once, if we know roughly
# the footer size from just one.
basepath, fmd = metadata_from_many(allfiles, verify_schema=verify,
open_with=open_with, root=root)
open_with=open_with, root=root,
fs=fs)
if basepath:
self.fn = join_path(basepath, '_metadata') # effective file
else:
Expand All @@ -162,6 +161,7 @@ def _parse_header(self, f, verify=True):
head_size = struct.unpack('<i', f.read(4))[0]
if verify:
assert f.read() == b'PAR1'
self._head_size = head_size
f.seek(-(head_size + 8), 2)
data = f.read(head_size)
except (AssertionError, struct.error):
Expand All @@ -171,8 +171,7 @@ def _parse_header(self, f, verify=True):
try:
fmd = read_thrift(f, parquet_thrift.FileMetaData)
except Exception:
raise ParquetException('Metadata parse failed: %s' %
self.fn)
raise ParquetException('Metadata parse failed: %s' % self.fn)
self.fmd = fmd
self._set_attrs()

Expand Down Expand Up @@ -237,6 +236,8 @@ def __getitem__(self, item):
new_pf = copy.copy(self)
new_pf.fmd.row_groups = new_rgs
new_pf._set_attrs()
# would otherwise be "simple" when selecting one rg
new_pf.file_scheme = self.file_scheme
return new_pf

def row_group_filename(self, rg):
Expand Down
Loading

0 comments on commit cdbb821

Please sign in to comment.