From 5ae838304f6c6599350fbf867fa1622985c935ad Mon Sep 17 00:00:00 2001 From: MarkusZehner Date: Tue, 11 Jan 2022 13:52:46 +0100 Subject: [PATCH 01/12] added geometry as Column in data table, select and insert function to be able to select more precise spatially. as the column is simply added at the moment, this might lead to an error with the prior data table. --- pyroSAR/drivers.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/pyroSAR/drivers.py b/pyroSAR/drivers.py index 539c8496..f8cd845e 100644 --- a/pyroSAR/drivers.py +++ b/pyroSAR/drivers.py @@ -1972,7 +1972,8 @@ def __init__(self, dbfile, custom_fields=None, postgres=False, user='postgres', Column('vv', Integer), Column('hv', Integer), Column('vh', Integer), - Column('bbox', Geometry(geometry_type='POLYGON', management=True, srid=4326))) + Column('bbox', Geometry(geometry_type='POLYGON', management=True, srid=4326)), + Column('geometry', Geometry(geometry_type='POLYGON', management=True, srid=4326))) # add custom fields if self.custom_fields is not None: @@ -2089,13 +2090,13 @@ def __prepare_insertion(self, scene): insertion = self.Data() colnames = self.get_colnames() for attribute in colnames: - if attribute == 'bbox': - geom = id.bbox() + if attribute in ['bbox', 'geometry']: + geom = getattr(scene, 'attribute') geom.reproject(4326) geom = geom.convert2wkt(set3D=False)[0] geom = 'SRID=4326;' + str(geom) # set attributes of the Data object according to input - setattr(insertion, 'bbox', geom) + setattr(insertion, attribute, geom) elif attribute in ['hh', 'vv', 'hv', 'vh']: setattr(insertion, attribute, int(attribute in pols)) else: @@ -2509,7 +2510,7 @@ def move(self, scenelist, directory, pbar=False): log.info('The following scenes already exist at the target location:\n{}'.format('\n'.join(double))) def select(self, vectorobject=None, mindate=None, maxdate=None, processdir=None, - recursive=False, polarizations=None, **args): + recursive=False, polarizations=None, use_geometry=False, **args): """ select scenes from the database @@ -2528,6 +2529,8 @@ def select(self, vectorobject=None, mindate=None, maxdate=None, processdir=None, (only if `processdir` is not None) should also the subdirectories of the `processdir` be scanned? polarizations: list a list of polarization strings, e.g. ['HH', 'VV'] + use_geometry: bool + use the map_overlay as footprint instead of the bounding box **args: any further arguments (columns), which are registered in the database. See :meth:`~Archive.get_colnames()` @@ -2578,14 +2581,20 @@ def select(self, vectorobject=None, mindate=None, maxdate=None, processdir=None, if isinstance(vectorobject, Vector): vectorobject.reproject(4326) site_geom = vectorobject.convert2wkt(set3D=False)[0] + + if not use_geometry: + vector_id = 'bbox' + else: + log.info('Using precise footprints for selection.') + vector_id = 'geometry' + # postgres has a different way to store geometries if self.driver == 'postgresql': - arg_format.append("st_intersects(bbox, 'SRID=4326; {}')".format( - site_geom - )) + arg_format.append(F"st_intersects({vector_id}, 'SRID=4326; {site_geom}')") else: - arg_format.append('st_intersects(GeomFromText(?, 4326), bbox) = 1') + arg_format.append(F"st_intersects(GeomFromText(?, 4326), {vector_id}) = 1") vals.append(site_geom) + else: log.info('WARNING: argument vectorobject is ignored, must be of type spatialist.vector.Vector') From ad13f6a0ab06ed0cec2d49768a373d550ae6a02b Mon Sep 17 00:00:00 2001 From: MarkusZehner Date: Tue, 11 Jan 2022 14:19:16 +0100 Subject: [PATCH 02/12] fix in prepare insertion --- pyroSAR/drivers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyroSAR/drivers.py b/pyroSAR/drivers.py index f8cd845e..62032899 100644 --- a/pyroSAR/drivers.py +++ b/pyroSAR/drivers.py @@ -2091,7 +2091,7 @@ def __prepare_insertion(self, scene): colnames = self.get_colnames() for attribute in colnames: if attribute in ['bbox', 'geometry']: - geom = getattr(scene, 'attribute') + geom = getattr(scene, attribute) geom.reproject(4326) geom = geom.convert2wkt(set3D=False)[0] geom = 'SRID=4326;' + str(geom) From 833a404eff848e575174783e0a3e0c6021e74271 Mon Sep 17 00:00:00 2001 From: MarkusZehner Date: Tue, 11 Jan 2022 14:41:53 +0100 Subject: [PATCH 03/12] fix in prepare insertion no 2 --- pyroSAR/drivers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyroSAR/drivers.py b/pyroSAR/drivers.py index 62032899..4a5a2f29 100644 --- a/pyroSAR/drivers.py +++ b/pyroSAR/drivers.py @@ -2091,7 +2091,7 @@ def __prepare_insertion(self, scene): colnames = self.get_colnames() for attribute in colnames: if attribute in ['bbox', 'geometry']: - geom = getattr(scene, attribute) + geom = getattr(scene, attribute)() geom.reproject(4326) geom = geom.convert2wkt(set3D=False)[0] geom = 'SRID=4326;' + str(geom) From ac20ad584a39c5d9da0d787b3e27c7cfb7dbe017 Mon Sep 17 00:00:00 2001 From: MarkusZehner Date: Wed, 26 Jan 2022 14:05:11 +0100 Subject: [PATCH 04/12] fix in prepare drop_tables, update metadata --- pyroSAR/drivers.py | 34 ++++++++++++++++++++++++++++++---- tests/test_drivers.py | 2 ++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/pyroSAR/drivers.py b/pyroSAR/drivers.py index 4a5a2f29..84312f82 100644 --- a/pyroSAR/drivers.py +++ b/pyroSAR/drivers.py @@ -60,6 +60,7 @@ from sqlalchemy.engine.url import URL from sqlalchemy.ext.automap import automap_base from sqlalchemy_utils import database_exists, create_database, drop_database + from geoalchemy2 import Geometry import socket import time @@ -1891,7 +1892,7 @@ class Archive(object): """ def __init__(self, dbfile, custom_fields=None, postgres=False, user='postgres', - password='1234', host='localhost', port=5432, cleanup=True): + password='1234', host='localhost', port=5432, cleanup=True, add_geometry=False): # check for driver, if postgres then check if server is reachable if not postgres: self.driver = 'sqlite' @@ -1972,8 +1973,10 @@ def __init__(self, dbfile, custom_fields=None, postgres=False, user='postgres', Column('vv', Integer), Column('hv', Integer), Column('vh', Integer), - Column('bbox', Geometry(geometry_type='POLYGON', management=True, srid=4326)), - Column('geometry', Geometry(geometry_type='POLYGON', management=True, srid=4326))) + Column('bbox', Geometry(geometry_type='POLYGON', management=True, srid=4326))) + + if add_geometry: + self.data_schema.append_column(Column('geometry', Geometry(geometry_type='POLYGON', management=True, srid=4326))) # add custom fields if self.custom_fields is not None: @@ -2009,6 +2012,28 @@ def __init__(self, dbfile, custom_fields=None, postgres=False, user='postgres', log.info('checking for missing scenes') self.cleanup() sys.stdout.flush() + + def update_geometry_field(self): + """ + Add the geometry as column to an existing database, fill missing values + """ + + # note: alter table to make geometry column, then sth like this: (NOT YET CHECKED CODE!!) + if 'geometry' not in self.get_colnames(): + '''ALTER TABLE data ADD COLUMN geometry ;''' + # Column('geometry', Geometry(geometry_type='POLYGON', management=True, srid=4326)) + + session = self.Session() + to_update = session.query(self.Data).filter(self.Data.__table__.c['geometry'].is_(None)).all() + for entry in to_update: + geom = getattr(identify(entry.outname_base), 'geometry')() + geom.reproject(4326) + geom = geom.convert2wkt(set3D=False)[0] + geom = 'SRID=4326;' + str(geom) + setattr(entry, 'geometry', geom) + session.commit() + session.close() + pass def add_tables(self, tables): """ @@ -2063,7 +2088,7 @@ def __load_spatialite(dbapi_conn, connection_record): except sqlite3.OperationalError: continue elif platform.system() == 'Darwin': - for option in ['mod_spatialite.so']: # , 'mod_spatialite.dylib']: + for option in ['mod_spatialite.so', 'mod_spatialite.7.dylib']: # , 'mod_spatialite.dylib']: try: dbapi_conn.load_extension(option) except sqlite3.OperationalError: @@ -2786,6 +2811,7 @@ def drop_table(self, table): log.info('table {} dropped from database.'.format(table)) else: raise ValueError("table {} is not registered in the database!".format(table)) + self.meta = MetaData(self.engine) self.Base = automap_base(metadata=self.meta) self.Base.prepare(self.engine, reflect=True) diff --git a/tests/test_drivers.py b/tests/test_drivers.py index 5e328997..21a63354 100644 --- a/tests/test_drivers.py +++ b/tests/test_drivers.py @@ -182,6 +182,8 @@ def test_archive(tmpdir, testdata): db.add_tables(mytable) assert 'mytable' in db.get_tablenames() + db.drop_table('mytable') + assert 'mytable' not in db.get_tablenames() with pytest.raises(TypeError): db.filter_scenelist([1]) db.close() From a1e515b90b3c063bea8f89e325996bc7c69ffe46 Mon Sep 17 00:00:00 2001 From: MarkusZehner Date: Fri, 28 Jan 2022 16:51:05 +0100 Subject: [PATCH 05/12] added method to include geometry column, alter data table in place, update missing geometries by removing and adding scenes again from data table. fixed get_colnames --- pyroSAR/drivers.py | 60 ++++++++++++++++++++++++++----------------- tests/test_drivers.py | 19 ++++++++++++++ 2 files changed, 56 insertions(+), 23 deletions(-) diff --git a/pyroSAR/drivers.py b/pyroSAR/drivers.py index 84312f82..92cfb01f 100644 --- a/pyroSAR/drivers.py +++ b/pyroSAR/drivers.py @@ -52,7 +52,7 @@ from spatialist import crsConvert, sqlite3, Vector, bbox from spatialist.ancillary import parse_literal, finder -from sqlalchemy import create_engine, Table, MetaData, Column, Integer, String, exc +from sqlalchemy import create_engine, Table, MetaData, Column, Integer, String, exc, update from sqlalchemy import inspect as sql_inspect from sqlalchemy.event import listen from sqlalchemy.orm import sessionmaker @@ -1934,7 +1934,9 @@ def __init__(self, dbfile, custom_fields=None, postgres=False, user='postgres', conn.close() except exc.OperationalError: raise RuntimeError('could not load spatialite extension') - + + #print(sql_inspect(self.engine).has_table('data')) + # if database is new, (create postgres-db and) enable spatial extension if not database_exists(self.engine.url): if self.driver == 'postgresql': @@ -1952,7 +1954,7 @@ def __init__(self, dbfile, custom_fields=None, postgres=False, user='postgres', self.Session = sessionmaker(bind=self.engine) self.meta = MetaData(self.engine) self.custom_fields = custom_fields - + # create tables as schema self.data_schema = Table('data', self.meta, Column('sensor', String), @@ -1975,9 +1977,11 @@ def __init__(self, dbfile, custom_fields=None, postgres=False, user='postgres', Column('vh', Integer), Column('bbox', Geometry(geometry_type='POLYGON', management=True, srid=4326))) - if add_geometry: - self.data_schema.append_column(Column('geometry', Geometry(geometry_type='POLYGON', management=True, srid=4326))) - + if add_geometry and not sql_inspect(self.engine).has_table('data') or 'geometry' in self.get_colnames(): + # add geometry to schema if new database is created, or database had it enabled once + self.data_schema.append_column(Column('geometry', + Geometry(geometry_type='POLYGON', management=True, srid=4326))) + # add custom fields if self.custom_fields is not None: for key, val in self.custom_fields.items(): @@ -1997,6 +2001,17 @@ def __init__(self, dbfile, custom_fields=None, postgres=False, user='postgres', if not sql_inspect(self.engine).has_table('data'): log.debug("creating DB table 'data'") self.data_schema.create(self.engine) + + columns_data = sql_inspect(self.engine).get_columns('data') + # for col in columns_data: + # print(col['name']) + + # elif sql_inspect(self.engine).has_table('data') and add_geometry and 'geometry' not in self.get_colnames('data'): + # print(self.get_colnames('data')) + # # log.info + # print("add_geometry has been enabled after database is already created, " + # "if you want to update table 'data' to include this column, " + # "run Archive.update_geometry_field()!") if not sql_inspect(self.engine).has_table('duplicates'): log.debug("creating DB table 'duplicates'") self.duplicates_schema.create(self.engine) @@ -2017,23 +2032,22 @@ def update_geometry_field(self): """ Add the geometry as column to an existing database, fill missing values """ - - # note: alter table to make geometry column, then sth like this: (NOT YET CHECKED CODE!!) if 'geometry' not in self.get_colnames(): - '''ALTER TABLE data ADD COLUMN geometry ;''' - # Column('geometry', Geometry(geometry_type='POLYGON', management=True, srid=4326)) + self.conn.execute(f"SELECT AddGeometryColumn('data', 'geometry', 4326, 'POLYGON')") + self.Session = sessionmaker(bind=self.engine) + self.meta = MetaData(self.engine) + self.Base = automap_base(metadata=self.meta) + self.Base.prepare(self.engine, reflect=True) + self.Data = self.Base.classes.data + + temp_data = self.Session().query(self.Data.scene, self.Data.outname_base).filter(self.Data.geometry.is_(None)) + to_insert = [] + for entry in temp_data: + delete_statement = self.data_schema.delete().where(self.data_schema.c.scene == entry[0]) + self.conn.execute(delete_statement) + self.insert(to_insert) + - session = self.Session() - to_update = session.query(self.Data).filter(self.Data.__table__.c['geometry'].is_(None)).all() - for entry in to_update: - geom = getattr(identify(entry.outname_base), 'geometry')() - geom.reproject(4326) - geom = geom.convert2wkt(set3D=False)[0] - geom = 'SRID=4326;' + str(geom) - setattr(entry, 'geometry', geom) - session.commit() - session.close() - pass def add_tables(self, tables): """ @@ -2394,8 +2408,8 @@ def get_colnames(self, table='data'): the column names of the chosen table """ # get all columns of one table, but shows geometry columns not correctly - table_info = Table(table, self.meta, autoload=True, autoload_with=self.engine) - col_names = table_info.c.keys() + dicts = sql_inspect(self.engine).get_columns(table) + col_names = [i['name'] for i in dicts] return sorted([self.encode(x) for x in col_names]) diff --git a/tests/test_drivers.py b/tests/test_drivers.py index 21a63354..11079957 100644 --- a/tests/test_drivers.py +++ b/tests/test_drivers.py @@ -189,6 +189,25 @@ def test_archive(tmpdir, testdata): db.close() +def test_archive_geometry(tmpdir, testdata): + dbfile = os.path.join(str(tmpdir), 'scenes.db') + db = pyroSAR.Archive(dbfile) + db.insert(testdata['s1']) + db.close() + db = pyroSAR.Archive(dbfile, add_geometry=True) + db.update_geometry_field() + db.close() + os.remove(dbfile) + + dbfile = os.path.join(str(tmpdir), 'scenes_geo.db') + db_geo = pyroSAR.Archive(dbfile, add_geometry=True) + db_geo.insert(testdata['s1']) + db_geo.close() + db_geo = pyroSAR.Archive(dbfile) + db_geo.insert(testdata['s1_2']) + db_geo.close() + os.remove(dbfile) + def test_archive2(tmpdir, testdata): dbfile = os.path.join(str(tmpdir), 'scenes.db') with pyroSAR.Archive(dbfile) as db: From 22f40ded25b8c626fc382413d261c08da78ffa2e Mon Sep 17 00:00:00 2001 From: MarkusZehner Date: Fri, 28 Jan 2022 16:57:17 +0100 Subject: [PATCH 06/12] added method to include geometry column, alter data table in place, update missing geometries by removing and adding scenes again from data table. fixed get_colnames --- pyroSAR/drivers.py | 575 ++++++++++++++++++++++----------------------- 1 file changed, 283 insertions(+), 292 deletions(-) diff --git a/pyroSAR/drivers.py b/pyroSAR/drivers.py index 92cfb01f..1ae63268 100644 --- a/pyroSAR/drivers.py +++ b/pyroSAR/drivers.py @@ -52,7 +52,7 @@ from spatialist import crsConvert, sqlite3, Vector, bbox from spatialist.ancillary import parse_literal, finder -from sqlalchemy import create_engine, Table, MetaData, Column, Integer, String, exc, update +from sqlalchemy import create_engine, Table, MetaData, Column, Integer, String, exc from sqlalchemy import inspect as sql_inspect from sqlalchemy.event import listen from sqlalchemy.orm import sessionmaker @@ -115,7 +115,7 @@ def identify(scene): """ if not os.path.exists(scene): raise OSError("No such file or directory: '{}'".format(scene)) - + for handler in ID.__subclasses__(): try: return handler(scene) @@ -200,7 +200,7 @@ class ID(object): """ Abstract class for SAR meta data handlers """ - + def __init__(self, metadict): """ to be called by the __init__methods of the format drivers @@ -212,10 +212,10 @@ def __init__(self, metadict): self.locals = __LOCAL__ for item in self.locals: setattr(self, item, metadict[item]) - + def __getattr__(self, item): raise AttributeError("object has no attribute '{}'".format(item)) - + def __str__(self): lines = ['pyroSAR ID object of type {}'.format(self.__class__.__name__)] for item in sorted(self.locals): @@ -225,7 +225,7 @@ def __str__(self): line = '{0}: {1}'.format(item, value) lines.append(line) return '\n'.join(lines) - + def bbox(self, outname=None, driver=None, overwrite=True): """ get the bounding box of a scene either as a vector object or written to a file @@ -250,7 +250,7 @@ def bbox(self, outname=None, driver=None, overwrite=True): else: bbox(self.getCorners(), self.projection, outname=outname, driver=driver, overwrite=overwrite) - + def geometry(self, outname=None, driver=None, overwrite=True): """ get the footprint geometry of a scene either as a vector object or written to a file @@ -275,12 +275,12 @@ def geometry(self, outname=None, driver=None, overwrite=True): for coordinate in self.meta['coordinates']: ring.AddPoint(*coordinate) ring.CloseRings() - + geom = ogr.Geometry(ogr.wkbPolygon) geom.AddGeometry(ring) - + geom.FlattenTo2D() - + bbox = Vector(driver='Memory') bbox.addlayer('geometry', srs, geom.GetGeometryType()) bbox.addfield('area', ogr.OFTReal) @@ -290,7 +290,7 @@ def geometry(self, outname=None, driver=None, overwrite=True): return bbox else: bbox.write(outfile=outname, driver=driver, overwrite=overwrite) - + @property def compression(self): """ @@ -309,7 +309,7 @@ def compression(self): return 'tar' else: return None - + def export2dict(self): """ Return the uuid and the metadata that is defined in self.locals as a dictionary @@ -319,7 +319,7 @@ def export2dict(self): title = os.path.splitext(sq_file)[0] metadata['uuid'] = title return metadata - + def export2sqlite(self, dbfile): """ Export relevant metadata to an SQLite database @@ -332,7 +332,7 @@ def export2sqlite(self, dbfile): """ with Archive(dbfile) as archive: archive.insert(self) - + def examine(self, include_folders=False): """ check whether any items in the SAR scene structure (i.e. files/folders) match the regular expression pattern @@ -357,7 +357,7 @@ def examine(self, include_folders=False): raise RuntimeError('scene does not match {} naming convention'.format(type(self).__name__)) else: raise RuntimeError('file ambiguity detected:\n{}'.format('\n'.join(files))) - + def findfiles(self, pattern, include_folders=False): """ find files in the scene archive, which match a pattern. @@ -378,17 +378,17 @@ def findfiles(self, pattern, include_folders=False): :func:`spatialist.ancillary.finder` """ foldermode = 1 if include_folders else 0 - + files = finder(target=self.scene, matchlist=[pattern], foldermode=foldermode, regex=True) - + if os.path.isdir(self.scene) \ and re.search(pattern, os.path.basename(self.scene)) \ and include_folders: files.append(self.scene) - + return files - + def gdalinfo(self): """ read metadata directly from the GDAL SAR image drivers @@ -399,7 +399,7 @@ def gdalinfo(self): the metadata attributes """ files = self.findfiles(r'(?:\.[NE][12]$|DAT_01\.001$|product\.xml|manifest\.safe$)') - + if len(files) == 1: prefix = {'zip': '/vsizip/', 'tar': '/vsitar/', None: ''}[self.compression] header = files[0] @@ -407,34 +407,34 @@ def gdalinfo(self): raise RuntimeError('file ambiguity detected') else: raise RuntimeError('file type not supported') - + meta = {} - + ext_lookup = {'.N1': 'ASAR', '.E1': 'ERS1', '.E2': 'ERS2'} extension = os.path.splitext(header)[1] if extension in ext_lookup: meta['sensor'] = ext_lookup[extension] - + img = gdal.Open(prefix + header, GA_ReadOnly) gdalmeta = img.GetMetadata() meta['samples'], meta['lines'], meta['bands'] = img.RasterXSize, img.RasterYSize, img.RasterCount meta['projection'] = img.GetGCPProjection() meta['gcps'] = [((x.GCPPixel, x.GCPLine), (x.GCPX, x.GCPY, x.GCPZ)) for x in img.GetGCPs()] img = None - + for item in gdalmeta: entry = [item, parse_literal(gdalmeta[item].strip())] - + try: entry[1] = self.parse_date(str(entry[1])) except ValueError: pass - + if re.search('(?:LAT|LONG)', entry[0]): entry[1] /= 1000000. meta[entry[0]] = entry[1] return meta - + @abc.abstractmethod def getCorners(self): """ @@ -446,7 +446,7 @@ def getCorners(self): dictionary with keys `xmin`, `xmax`, `ymin` and `ymax` """ raise NotImplementedError - + def getFileObj(self, filename): """ Load a file into a readable file object. @@ -462,7 +462,7 @@ def getFileObj(self, filename): a file pointer object """ return getFileObj(self.scene, filename) - + def getGammaImages(self, directory=None): """ list all files processed by GAMMA @@ -489,7 +489,7 @@ def getGammaImages(self, directory=None): 'directory missing; please provide directory to function or define object attribute "gammadir"') return [x for x in finder(directory, [self.outname_base()], regex=True) if not re.search(r'\.(?:par|hdr|aux\.xml|swp|sh)$', x)] - + def getHGT(self): """ get the names of all SRTM HGT tiles overlapping with the SAR scene @@ -499,23 +499,23 @@ def getHGT(self): list names of the SRTM HGT tiles """ - + corners = self.getCorners() - + # generate sequence of integer coordinates marking the tie points of the overlapping hgt tiles lat = range(int(float(corners['ymin']) // 1), int(float(corners['ymax']) // 1) + 1) lon = range(int(float(corners['xmin']) // 1), int(float(corners['xmax']) // 1) + 1) - + # convert coordinates to string with leading zeros and hemisphere identification letter lat = [str(x).zfill(2 + len(str(x)) - len(str(x).strip('-'))) for x in lat] lat = [x.replace('-', 'S') if '-' in x else 'N' + x for x in lat] - + lon = [str(x).zfill(3 + len(str(x)) - len(str(x).strip('-'))) for x in lon] lon = [x.replace('-', 'W') if '-' in x else 'E' + x for x in lon] - + # concatenate all formatted latitudes and longitudes with each other as final product return [x + y + '.hgt' for x in lat for y in lon] - + def is_processed(self, outdir, recursive=False): """ check whether a scene has already been processed and stored in the defined output directory @@ -536,7 +536,7 @@ def is_processed(self, outdir, recursive=False): return len(finder(outdir, [self.outname_base()], regex=True, recursive=recursive)) != 0 else: return False - + def outname_base(self, extensions=None): """ parse a string containing basic information about the scene in standardized format. @@ -553,7 +553,7 @@ def outname_base(self, extensions=None): a standardized name unique to the scene """ - + fields = ('{:_<4}'.format(self.sensor), '{:_<4}'.format(self.acquisition_mode), self.orbit, @@ -563,7 +563,7 @@ def outname_base(self, extensions=None): ext = '_'.join([str(getattr(self, key)) for key in extensions]) out += '_' + ext return out - + @staticmethod def parse_date(x): """ @@ -581,7 +581,7 @@ def parse_date(x): the converted time stamp in format YYYYmmddTHHMMSS """ return parse_date(x) - + @abc.abstractmethod def quicklook(self, outname, format='kmz'): """ @@ -606,7 +606,7 @@ def quicklook(self, outname, format='kmz'): >>> scene.quicklook('S1A__IW___A_20180101T170648.kmz') """ raise NotImplementedError - + def summary(self): """ print the set of standardized scene metadata attributes @@ -616,7 +616,7 @@ def summary(self): """ print(self.__str__()) - + @abc.abstractmethod def scanMetadata(self): """ @@ -632,7 +632,7 @@ def scanMetadata(self): """ raise NotImplementedError - + @abc.abstractmethod def unpack(self, directory, overwrite=False, exist_ok=False): """ @@ -652,7 +652,7 @@ def unpack(self, directory, overwrite=False, exist_ok=False): """ raise NotImplementedError - + def _unpack(self, directory, offset=None, overwrite=False, exist_ok=False): """ general function for unpacking scene archives; to be called by implementations of ID.unpack. @@ -683,7 +683,7 @@ def _unpack(self, directory, offset=None, overwrite=False, exist_ok=False): else: raise RuntimeError('target scene directory already exists: {}'.format(directory)) os.makedirs(directory, exist_ok=True) - + if do_unpack: if tf.is_tarfile(self.scene): archive = tf.open(self.scene, 'r') @@ -691,7 +691,7 @@ def _unpack(self, directory, offset=None, overwrite=False, exist_ok=False): if offset is not None: names = [x for x in names if x.startswith(offset)] header = os.path.commonprefix(names) - + if header in names: if archive.getmember(header).isdir(): for item in sorted(names): @@ -704,7 +704,7 @@ def _unpack(self, directory, offset=None, overwrite=False, exist_ok=False): else: archive.extractall(directory) archive.close() - + elif zf.is_zipfile(self.scene): archive = zf.ZipFile(self.scene, 'r') names = archive.namelist() @@ -731,7 +731,7 @@ def _unpack(self, directory, offset=None, overwrite=False, exist_ok=False): else: log.info('unpacking is only supported for TAR and ZIP archives') return - + self.scene = directory main = os.path.join(self.scene, os.path.basename(self.file)) self.file = main if os.path.isfile(main) else self.scene @@ -749,7 +749,7 @@ class CEOS_ERS(ID): ER-IS-EPO-GS-5902-3: Annex C. ERS SAR.SLC/SLC-I. CCT and EXABYTE (`ESA 1998 `_) """ - + def __init__(self, scene): self.pattern = r'(?P(?:SAR|ASA)_(?:IM(?:S|P|G|M|_)|AP(?:S|P|G|M|_)|WV(?:I|S|W|_)|WS(?:M|S|_))_[012B][CP])' \ r'(?P[A-Z])' \ @@ -764,23 +764,23 @@ def __init__(self, scene): r'(?P[0-9]{4,})\.' \ r'(?P[EN][12])' \ r'(?P(?:\.zip|\.tar\.gz|\.PS|))$' - + self.pattern_pid = r'(?P(?:SAR|ASA))_' \ r'(?P(?:IM(?:S|P|G|M|_)|AP(?:S|P|G|M|_)|WV(?:I|S|W|_)|WS(?:M|S|_)))_' \ r'(?P[012B][CP])' - + self.scene = os.path.realpath(scene) - + self.examine() - + match = re.match(re.compile(self.pattern), os.path.basename(self.file)) match2 = re.match(re.compile(self.pattern_pid), match.group('product_id')) - + if re.search('IM__0', match.group('product_id')): raise RuntimeError('product level 0 not supported (yet)') - + self.meta = self.gdalinfo() - + self.meta['acquisition_mode'] = match2.group('image_mode') self.meta['polarizations'] = ['VV'] self.meta['product'] = 'SLC' if self.meta['acquisition_mode'] in ['IMS', 'APS', 'WSS'] else 'PRI' @@ -789,29 +789,29 @@ def __init__(self, scene): self.meta['incidence_angle'] = self.meta['CEOS_INC_ANGLE'] self.meta['k_db'] = -10 * math.log(float(self.meta['CEOS_CALIBRATION_CONSTANT_K']), 10) self.meta['sc_db'] = {'ERS1': 59.61, 'ERS2': 60}[self.meta['sensor']] - + # acquire additional metadata from the file LEA_01.001 self.meta.update(self.scanMetadata()) - + # register the standardized meta attributes as object attributes super(CEOS_ERS, self).__init__(self.meta) - + def getCorners(self): lat = [x[1][1] for x in self.meta['gcps']] lon = [x[1][0] for x in self.meta['gcps']] return {'xmin': min(lon), 'xmax': max(lon), 'ymin': min(lat), 'ymax': max(lat)} - + def unpack(self, directory, overwrite=False, exist_ok=False): if self.sensor in ['ERS1', 'ERS2']: base_file = re.sub(r'\.PS$', '', os.path.basename(self.file)) base_dir = os.path.basename(directory.strip('/')) - + outdir = directory if base_file == base_dir else os.path.join(directory, base_file) - + self._unpack(outdir, overwrite=overwrite, exist_ok=exist_ok) else: raise NotImplementedError('sensor {} not implemented yet'.format(self.sensor)) - + def scanMetadata(self): lea_obj = self.getFileObj(self.findfiles('LEA_01.001')[0]) lea = lea_obj.read() @@ -821,11 +821,11 @@ def scanMetadata(self): meta['sensor'] = lea[(offset + 396):(offset + 412)].strip() meta['start'] = self.parse_date(str(lea[(offset + 1814):(offset + 1838)].decode('utf-8'))) meta['stop'] = self.parse_date(str(lea[(offset + 1862):(offset + 1886)].decode('utf-8'))) - + looks_range = float(lea[(offset + 1174):(offset + 1190)]) looks_azimuth = float(lea[(offset + 1190):(offset + 1206)]) meta['looks'] = (looks_range, looks_azimuth) - + meta['heading'] = float(lea[(offset + 468):(offset + 476)]) meta['orbit'] = 'D' if meta['heading'] > 180 else 'A' orbitNumber, frameNumber = map(int, re.findall('[0-9]+', lea[(offset + 36):(offset + 68)].decode('utf-8'))) @@ -846,7 +846,7 @@ def scanMetadata(self): # meta['k_db'] = -10*math.log(float(text_subset[663:679].strip()), 10) # meta['antenna_flag'] = int(text_subset[659:663].strip()) return meta - + # def correctAntennaPattern(self): # the following section is only relevant for PRI products and can be considered future work # select antenna gain correction lookup file from extracted meta information @@ -925,11 +925,11 @@ class CEOS_PSR(ID): * VBS: Scan SAR wide mode Single polarization * VBD: Scan SAR wide mode Dual polarization """ - + def __init__(self, scene): - + self.scene = os.path.realpath(scene) - + patterns = [r'^LED-ALPSR' r'(?PP|S)' r'(?P[0-9]{5})' @@ -949,7 +949,7 @@ def __init__(self, scene): r'(?P[GR_])' r'(?P[UPML_])' r'(?PA|D)$'] - + for i, pattern in enumerate(patterns): self.pattern = pattern try: @@ -958,18 +958,18 @@ def __init__(self, scene): except RuntimeError as e: if i + 1 == len(patterns): raise e - + self.meta = self.scanMetadata() - + # register the standardized meta attributes as object attributes super(CEOS_PSR, self).__init__(self.meta) - + def _getLeaderfileContent(self): led_obj = self.getFileObj(self.led_filename) led = led_obj.read() led_obj.close() return led - + def _parseSummary(self): try: summary_file = self.getFileObj(self.findfiles('summary|workreport')[0]) @@ -981,24 +981,24 @@ def _parseSummary(self): for x, y in summary.items(): summary[x] = parse_literal(y) return summary - + @property def led_filename(self): return self.findfiles(self.pattern)[0] - + def scanMetadata(self): ################################################################################################################ # read leader (LED) file led = self._getLeaderfileContent() - + # read summary text file meta = self._parseSummary() - + # read polarizations from image file names meta['polarizations'] = [re.search('[HV]{2}', os.path.basename(x)).group(0) for x in self.findfiles('^IMG-')] ################################################################################################################ # read start and stop time - + try: meta['start'] = self.parse_date(meta['Img_SceneStartDateTime']) meta['stop'] = self.parse_date(meta['Img_SceneEndDateTime']) @@ -1037,9 +1037,9 @@ def scanMetadata(self): meta['sensor'] = {'AL1': 'PSR1', 'AL2': 'PSR2'}[fileDescriptor[48:51].decode('utf-8')] ################################################################################################################ # read leader file name information - + match = re.match(re.compile(self.pattern), os.path.basename(self.led_filename)) - + if meta['sensor'] == 'PSR1': meta['acquisition_mode'] = match.group('sub') + match.group('mode') else: @@ -1050,30 +1050,30 @@ def scanMetadata(self): p0 = p1 p1 += dss_l * dss_n dataSetSummary = led[p0:p1] - + if mpd_n > 0: p0 = p1 p1 += mpd_l * mpd_n mapProjectionData = led[p0:p1] else: mapProjectionData = None - + p0 = p1 p1 += ppd_l * ppd_n platformPositionData = led[p0:p1] - + p0 = p1 p1 += adr_l * adr_n attitudeData = led[p0:p1] - + p0 = p1 p1 += rdr_l * rdr_n radiometricData = led[p0:p1] - + p0 = p1 p1 += dqs_l * dqs_n dataQualitySummary = led[p0:p1] - + facilityRelatedData = [] while p1 < len(led): p0 = p1 @@ -1082,7 +1082,7 @@ def scanMetadata(self): facilityRelatedData.append(led[p0:p1]) ################################################################################################################ # read map projection data record - + if mapProjectionData is not None: lat = list(map(float, [mapProjectionData[1072:1088], mapProjectionData[1104:1120], @@ -1093,9 +1093,9 @@ def scanMetadata(self): mapProjectionData[1152:1168], mapProjectionData[1184:1200]])) meta['corners'] = {'xmin': min(lon), 'xmax': max(lon), 'ymin': min(lat), 'ymax': max(lat)} - + # https://github.com/datalyze-solutions/LandsatProcessingPlugin/blob/master/src/metageta/formats/alos.py - + src_srs = osr.SpatialReference() # src_srs.SetGeogCS('GRS 1980','GRS 1980','GRS 1980',6378137.00000,298.2572220972) src_srs.SetWellKnownGeogCS('WGS84') @@ -1129,14 +1129,14 @@ def scanMetadata(self): dfStdP2 = float(mapProjectionData[784, 800]) src_srs.SetLCC(dfStdP1, dfStdP2, dfCenterLat, dfCenterLon, 0, 0) meta['projection'] = src_srs.ExportToWkt() - + else: meta['projection'] = crsConvert(4326, 'wkt') ################################################################################################################ # read data set summary record - + scene_id = dataSetSummary[20:52].decode('ascii') - + if meta['sensor'] == 'PSR1': pattern = r'(?P[A-Z]{2})' \ r'(?P[A-Z]{3})' \ @@ -1150,16 +1150,16 @@ def scanMetadata(self): r'(?P[0-9]{6})[ ]{11}' else: raise ValueError('sensor must be either PSR1 or PSR2; is: {}'.format(meta['sensor'])) - + match = re.match(re.compile(pattern), scene_id) - + orbitsPerCycle = {'PSR1': 671, 'PSR2': 207}[meta['sensor']] - + meta['orbitNumber_abs'] = int(match.group('orbitNumber')) meta['orbitNumber_rel'] = meta['orbitNumber_abs'] % orbitsPerCycle meta['cycleNumber'] = meta['orbitNumber_abs'] // orbitsPerCycle + 1 meta['frameNumber'] = int(match.group('frameNumber')) - + try: meta['lines'] = int(dataSetSummary[324:332]) * 2 except ValueError: @@ -1173,16 +1173,16 @@ def scanMetadata(self): meta['proc_facility'] = dataSetSummary[1046:1062].strip() meta['proc_system'] = dataSetSummary[1062:1070].strip() meta['proc_version'] = dataSetSummary[1070:1078].strip() - + try: azlks = float(dataSetSummary[1174:1190]) rlks = float(dataSetSummary[1190:1206]) meta['looks'] = (rlks, azlks) except ValueError: meta['looks'] = (None, None) - + meta['orbit'] = dataSetSummary[1534:1542].decode('utf-8').strip()[0] - + try: spacing_azimuth = float(dataSetSummary[1686:1702]) spacing_range = float(dataSetSummary[1702:1718]) @@ -1197,7 +1197,7 @@ def scanMetadata(self): meta['k_dB'] = None ################################################################################################################ # additional notes - + # the following can be used to read platform position time from the led file # this covers a larger time frame than the actual scene sensing time # y, m, d, nd, s = platformPositionData[144:182].split() @@ -1207,13 +1207,13 @@ def scanMetadata(self): # stop = start + timedelta(seconds=(npoints - 1) * interval) # parse_date(start) # parse_date(stop) - + return meta - + def unpack(self, directory, overwrite=False, exist_ok=False): outdir = os.path.join(directory, os.path.basename(self.file).replace('LED-', '')) self._unpack(outdir, overwrite=overwrite, exist_ok=exist_ok) - + def getCorners(self): if 'corners' not in self.meta.keys(): lat = [y for x, y in self.meta.items() if 'Latitude' in x] @@ -1222,27 +1222,27 @@ def getCorners(self): img_filename = self.findfiles('IMG')[0] img_obj = self.getFileObj(img_filename) imageFileDescriptor = img_obj.read(720) - + lineRecordLength = int(imageFileDescriptor[186:192]) # bytes per line + 412 numberOfRecords = int(imageFileDescriptor[180:186]) - + signalDataDescriptor1 = img_obj.read(412) img_obj.seek(720 + lineRecordLength * (numberOfRecords - 1)) signalDataDescriptor2 = img_obj.read() - + img_obj.close() - + lat = [signalDataDescriptor1[192:196], signalDataDescriptor1[200:204], signalDataDescriptor2[192:196], signalDataDescriptor2[200:204]] - + lon = [signalDataDescriptor1[204:208], signalDataDescriptor1[212:216], signalDataDescriptor2[204:208], signalDataDescriptor2[212:216]] - + lat = [struct.unpack('>i', x)[0] / 1000000. for x in lat] lon = [struct.unpack('>i', x)[0] / 1000000. for x in lon] - + self.meta['corners'] = {'xmin': min(lon), 'xmax': max(lon), 'ymin': min(lat), 'ymax': max(lat)} - + return self.meta['corners'] @@ -1262,11 +1262,11 @@ class EORC_PSR(ID): * FBD: Fine mode Dual polarization * WBD: Scan SAR nominal [14MHz] mode Dual polarization """ - + def __init__(self, scene): - + self.scene = os.path.realpath(scene) - + self.pattern = r'^PSR2-' \ r'(?PSLTR)_' \ r'(?PRSP[0-9]{3})_' \ @@ -1279,21 +1279,21 @@ def __init__(self, scene): r'(?P[0-9A-Z]{5})_' \ r'(?P[0-9]{3})_' \ r'HDR$' - + self.examine() - + self.meta = self.scanMetadata() - + # register the standardized meta attributes as object attributes super(EORC_PSR, self).__init__(self.meta) - + def _getHeaderfileContent(self): head_obj = self.getFileObj(self.header_filename) head = head_obj.read().decode('utf-8') head = list(head.split('\n')) head_obj.close() return head - + def _parseFacter_m(self): try: facter_file = self.findfiles('facter_m.dat')[0] @@ -1304,35 +1304,35 @@ def _parseFacter_m(self): facter_m = list(facter_m.split('\n')) facter_obj.close() return facter_m - + @property def header_filename(self): return self.findfiles(self.pattern)[0] - + def scanMetadata(self): ################################################################################################################ # read header (HDR) file header = self._getHeaderfileContent() header = [head.replace(" ", "") for head in header] - + # read summary text file facter_m = self._parseFacter_m() facter_m = [fact.replace(" ", "") for fact in facter_m] - + meta = {} - + # read polarizations from image file names meta['polarizations'] = [re.search('[HV]{2}', os.path.basename(x)).group(0) for x in self.findfiles('^sar.')] meta['product'] = header[3] ################################################################################################################ # read start and stop time --> TODO: in what format is the start and stop time? - + try: start_time = facter_m[168].split('.')[0].zfill(2) + facter_m[168].split('.')[1][:4] stop_time = facter_m[170].split('.')[0].zfill(2) + facter_m[170].split('.')[1][:4] except (AttributeError): raise IndexError('start and stop time stamps cannot be extracted; see file facter_m.dat') - + meta['start'] = str(header[6]) # +'T'+start_time meta['stop'] = str(header[6]) # +'T'+stop_time ################################################################################################################ @@ -1343,42 +1343,42 @@ def scanMetadata(self): meta['acquisition_mode'] = header[12] # ############################################################################################################### # read map projection data - + lat = list(map(float, [header[33], header[35], header[37], header[39]])) lon = list(map(float, [header[34], header[36], header[38], header[40]])) - + meta['corners'] = {'xmin': min(lon), 'xmax': max(lon), 'ymin': min(lat), 'ymax': max(lat)} - + meta['projection'] = crsConvert(4918, 'wkt') # EPSG: 4918: ITRF97, GRS80 ################################################################################################################ # read data set summary record - + orbitsPerCycle = int(207) - + meta['orbitNumber_rel'] = int(header[7]) meta['cycleNumber'] = header[5] meta['frameNumber'] = '' meta['orbitNumber_abs'] = int(orbitsPerCycle * (meta['cycleNumber'] - 1) + meta['orbitNumber_rel']) - + meta['lines'] = int(float(facter_m[51])) meta['samples'] = int(float(facter_m[50])) meta['incidence'] = float(facter_m[119]) meta['proc_facility'] = header[73] - + meta['spacing'] = (float(header[51]), float(header[52])) - + meta['orbit'] = header[9] ################################################################################################################ # read radiometric data record - + meta['k_dB'] = float(header[64]) - + return meta - + def unpack(self, directory, overwrite=False, exist_ok=False): outdir = os.path.join(directory, os.path.basename(self.file).replace('LED-', '')) self._unpack(outdir, overwrite=overwrite, exist_ok=exist_ok) - + def getCorners(self): if 'corners' not in self.meta.keys(): lat = [y for x, y in self.meta.items() if 'Latitude' in x] @@ -1387,27 +1387,27 @@ def getCorners(self): img_filename = self.findfiles('IMG')[0] img_obj = self.getFileObj(img_filename) imageFileDescriptor = img_obj.read(720) - + lineRecordLength = int(imageFileDescriptor[186:192]) # bytes per line + 412 numberOfRecords = int(imageFileDescriptor[180:186]) - + signalDataDescriptor1 = img_obj.read(412) img_obj.seek(720 + lineRecordLength * (numberOfRecords - 1)) signalDataDescriptor2 = img_obj.read() - + img_obj.close() - + lat = [signalDataDescriptor1[192:196], signalDataDescriptor1[200:204], signalDataDescriptor2[192:196], signalDataDescriptor2[200:204]] - + lon = [signalDataDescriptor1[204:208], signalDataDescriptor1[212:216], signalDataDescriptor2[204:208], signalDataDescriptor2[212:216]] - + lat = [struct.unpack('>i', x)[0] / 1000000. for x in lat] lon = [struct.unpack('>i', x)[0] / 1000000. for x in lon] - + self.meta['corners'] = {'xmin': min(lon), 'xmax': max(lon), 'ymin': min(lat), 'ymax': max(lat)} - + return self.meta['corners'] @@ -1420,9 +1420,9 @@ class ESA(ID): * ERS1 * ERS2 """ - + def __init__(self, scene): - + self.pattern = r'(?P(?:SAR|ASA)_(?:IM(?:S|P|G|M|_)|AP(?:S|P|G|M|_)|WV(?:I|S|W|_)|WS(?:M|S|_))_[012B][CP])' \ r'(?P[A-Z])' \ r'(?P[A-Z\-]{3})' \ @@ -1436,60 +1436,60 @@ def __init__(self, scene): r'(?P[0-9]{4,})\.' \ r'(?P[EN][12])' \ r'(?P(?:\.zip|\.tar\.gz|))$' - + self.pattern_pid = r'(?P(?:SAR|ASA))_' \ r'(?P(?:IM(?:S|P|G|M|_)|AP(?:S|P|G|M|_)|WV(?:I|S|W|_)|WS(?:M|S|_)))_' \ r'(?P[012B][CP])' - + self.scene = os.path.realpath(scene) - + self.examine() - + match = re.match(re.compile(self.pattern), os.path.basename(self.file)) match2 = re.match(re.compile(self.pattern_pid), match.group('product_id')) - + if re.search('IM__0', match.group('product_id')): raise RuntimeError('product level 0 not supported (yet)') - + self.meta = self.scanMetadata() self.meta['acquisition_mode'] = match2.group('image_mode') self.meta['product'] = 'SLC' if self.meta['acquisition_mode'] in ['IMS', 'APS', 'WSS'] else 'PRI' self.meta['frameNumber'] = int(match.group('counter')) - + # register the standardized meta attributes as object attributes super(ESA, self).__init__(self.meta) - + def getCorners(self): lon = [self.meta[x] for x in self.meta if re.search('LONG', x)] lat = [self.meta[x] for x in self.meta if re.search('LAT', x)] return {'xmin': min(lon), 'xmax': max(lon), 'ymin': min(lat), 'ymax': max(lat)} - + def scanMetadata(self): meta = self.gdalinfo() - + if meta['sensor'] == 'ASAR': meta['polarizations'] = sorted([y.replace('/', '') for x, y in meta.items() if 'TX_RX_POLAR' in x and len(y) == 3]) elif meta['sensor'] in ['ERS1', 'ERS2']: meta['polarizations'] = ['VV'] - + meta['orbit'] = meta['SPH_PASS'][0] meta['start'] = meta['MPH_SENSING_START'] meta['stop'] = meta['MPH_SENSING_STOP'] meta['spacing'] = (meta['SPH_RANGE_SPACING'], meta['SPH_AZIMUTH_SPACING']) meta['looks'] = (meta['SPH_RANGE_LOOKS'], meta['SPH_AZIMUTH_LOOKS']) - + meta['orbitNumber_abs'] = meta['MPH_ABS_ORBIT'] meta['orbitNumber_rel'] = meta['MPH_REL_ORBIT'] meta['cycleNumber'] = meta['MPH_CYCLE'] return meta - + def unpack(self, directory, overwrite=False, exist_ok=False): base_file = os.path.basename(self.file).strip(r'\.zip|\.tar(?:\.gz|)') base_dir = os.path.basename(directory.strip('/')) - + outdir = directory if base_file == base_dir else os.path.join(directory, base_file) - + self._unpack(outdir, overwrite=overwrite, exist_ok=exist_ok) @@ -1505,11 +1505,11 @@ class SAFE(ID): * S1-RS-MDA-52-7443 Sentinel-1 IPF Auxiliary Product Specification * MPC-0243 Masking "No-value" Pixels on GRD Products generated by the Sentinel-1 ESA IPF """ - + def __init__(self, scene): - + self.scene = os.path.realpath(scene) - + self.pattern = r'^(?PS1[AB])_' \ r'(?PS1|S2|S3|S4|S5|S6|IW|EW|WV|EN|N1|N2|N3|N4|N5|N6|IM)_' \ r'(?PSLC|GRD|OCN)' \ @@ -1523,7 +1523,7 @@ def __init__(self, scene): r'(?P[0-9A-F]{6})_' \ r'(?P[0-9A-F]{4})' \ r'\.SAFE$' - + self.pattern_ds = r'^s1[ab]-' \ r'(?Ps[1-6]|iw[1-3]?|ew[1-5]?|wv[1-2]|n[1-6])-' \ r'(?Pslc|grd|ocn)-' \ @@ -1533,21 +1533,21 @@ def __init__(self, scene): r'(?:[0-9]{6})-(?:[0-9a-f]{6})-' \ r'(?P[0-9]{3})' \ r'\.xml$' - + self.examine(include_folders=True) - + if not re.match(re.compile(self.pattern), os.path.basename(self.file)): raise RuntimeError('folder does not match S1 scene naming convention') - + # scan the metadata XML files file and add selected attributes to a meta dictionary self.meta = self.scanMetadata() self.meta['projection'] = crsConvert(4326, 'wkt') - + # register the standardized meta attributes as object attributes super(SAFE, self).__init__(self.meta) - + self.gammafiles = {'slc': [], 'pri': [], 'grd': []} - + def removeGRDBorderNoise(self, method='pyroSAR'): """ mask out Sentinel-1 image border noise. @@ -1568,13 +1568,13 @@ def removeGRDBorderNoise(self, method='pyroSAR'): :func:`~pyroSAR.S1.removeGRDBorderNoise` """ S1.removeGRDBorderNoise(self, method=method) - + def getCorners(self): coordinates = self.meta['coordinates'] lat = [x[1] for x in coordinates] lon = [x[0] for x in coordinates] return {'xmin': min(lon), 'xmax': max(lon), 'ymin': min(lat), 'ymax': max(lat)} - + def getOSV(self, osvdir=None, osvType='POE', returnMatch=False, useLocal=True, timeout=20, url_option=1): """ download Orbit State Vector files for the scene @@ -1609,22 +1609,22 @@ def getOSV(self, osvdir=None, osvType='POE', returnMatch=False, useLocal=True, t :class:`pyroSAR.S1.OSV` """ date = datetime.strptime(self.start, '%Y%m%dT%H%M%S') - + # create a time span with one day before and one after the acquisition before = (date - timedelta(days=1)).strftime('%Y%m%dT%H%M%S') after = (date + timedelta(days=1)).strftime('%Y%m%dT%H%M%S') - + if useLocal: with S1.OSV(osvdir, timeout=timeout) as osv: match = osv.match(sensor=self.sensor, timestamp=self.start, osvtype=osvType) if match is not None: return match if returnMatch else None - + if osvType in ['POE', 'RES']: with S1.OSV(osvdir, timeout=timeout) as osv: files = osv.catch(sensor=self.sensor, osvtype=osvType, start=before, stop=after, url_option=url_option) - + elif sorted(osvType) == ['POE', 'RES']: with S1.OSV(osvdir, timeout=timeout) as osv: files = osv.catch(sensor=self.sensor, osvtype='POE', start=before, stop=after, @@ -1634,14 +1634,14 @@ def getOSV(self, osvdir=None, osvType='POE', returnMatch=False, useLocal=True, t url_option=url_option) else: raise TypeError("osvType must either be 'POE', 'RES' or a list of both") - + osv.retrieve(files) - + if returnMatch: with S1.OSV(osvdir, timeout=timeout) as osv: match = osv.match(sensor=self.sensor, timestamp=self.start, osvtype=osvType) return match - + def quicklook(self, outname, format='kmz'): if format != 'kmz': raise RuntimeError('currently only kmz is supported as format') @@ -1654,13 +1654,13 @@ def quicklook(self, outname, format='kmz'): out.writestr('doc.kml', data=kml) with self.getFileObj(png_name) as png_in: out.writestr('quick-look.png', data=png_in.getvalue()) - + def scanMetadata(self): with self.getFileObj(self.findfiles('manifest.safe')[0]) as input: manifest = input.getvalue() namespaces = getNamespaces(manifest) tree = ET.fromstring(manifest) - + meta = dict() meta['acquisition_mode'] = tree.find('.//s1sarl1:mode', namespaces).text meta['acquisition_time'] = dict( @@ -1669,12 +1669,12 @@ def scanMetadata(self): meta['coordinates'] = [tuple([float(y) for y in x.split(',')][::-1]) for x in tree.find('.//gml:coordinates', namespaces).text.split()] meta['orbit'] = tree.find('.//s1:pass', namespaces).text[0] - + meta['orbitNumber_abs'] = int(tree.find('.//safe:orbitNumber[@type="start"]', namespaces).text) meta['orbitNumber_rel'] = int(tree.find('.//safe:relativeOrbitNumber[@type="start"]', namespaces).text) meta['cycleNumber'] = int(tree.find('.//safe:cycleNumber', namespaces).text) meta['frameNumber'] = int(tree.find('.//s1sarl1:missionDataTakeID', namespaces).text) - + meta['orbitNumbers_abs'] = dict( [(x, int(tree.find('.//safe:orbitNumber[@type="{0}"]'.format(x), namespaces).text)) for x in ['start', 'stop']]) @@ -1689,11 +1689,11 @@ def scanMetadata(self): meta['IPF_version'] = float(tree.find('.//safe:software', namespaces).attrib['version']) meta['sliceNumber'] = int(tree.find('.//s1sarl1:sliceNumber', namespaces).text) meta['totalSlices'] = int(tree.find('.//s1sarl1:totalSlices', namespaces).text) - + annotations = self.findfiles(self.pattern_ds) with self.getFileObj(annotations[0]) as ann_xml: ann_tree = ET.fromstring(ann_xml.read()) - + meta['spacing'] = tuple([float(ann_tree.find('.//{}PixelSpacing'.format(dim)).text) for dim in ['range', 'azimuth']]) meta['samples'] = int(ann_tree.find('.//imageAnnotation/imageInformation/numberOfSamples').text) @@ -1702,9 +1702,9 @@ def scanMetadata(self): meta['heading'] = heading if heading > 0 else heading + 360 meta['incidence'] = float(ann_tree.find('.//incidenceAngleMidSwath').text) meta['image_geometry'] = ann_tree.find('.//projection').text.replace(' ', '_').upper() - + return meta - + def unpack(self, directory, overwrite=False, exist_ok=False): outdir = os.path.join(directory, os.path.basename(self.file)) self._unpack(outdir, overwrite=overwrite, exist_ok=exist_ok) @@ -1745,10 +1745,10 @@ class TSX(ID): * GEC: Geocoded Ellipsoid Corrected * EEC: Enhanced Ellipsoid Corrected """ - + def __init__(self, scene): self.scene = os.path.realpath(scene) - + self.pattern = r'^(?PT[DS]X1)_SAR__' \ r'(?PSSC|MGD|GEC|EEC)_' \ r'(?P____|SE__|RE__|MON1|MON2|BTX1|BRX2)_' \ @@ -1757,18 +1757,18 @@ def __init__(self, scene): r'(?:SRA|DRA)_' \ r'(?P[0-9]{8}T[0-9]{6})_' \ r'(?P[0-9]{8}T[0-9]{6})(?:\.xml|)$' - + self.pattern_ds = r'^IMAGE_(?PHH|HV|VH|VV)_(?:SRA|FWD|AFT)_(?P[^\.]+)\.(cos|tif)$' self.examine(include_folders=False) - + if not re.match(re.compile(self.pattern), os.path.basename(self.file)): raise RuntimeError('folder does not match TSX scene naming convention') - + self.meta = self.scanMetadata() self.meta['projection'] = crsConvert(4326, 'wkt') - + super(TSX, self).__init__(self.meta) - + def getCorners(self): geocs = self.getFileObj(self.findfiles('GEOREF.xml')[0]).getvalue() tree = ET.fromstring(geocs) @@ -1776,7 +1776,7 @@ def getCorners(self): lat = [float(x.find('lat').text) for x in pts] lon = [float(x.find('lon').text) for x in pts] return {'xmin': min(lon), 'xmax': max(lon), 'ymin': min(lat), 'ymax': max(lat)} - + def scanMetadata(self): annotation = self.getFileObj(self.file).getvalue() namespaces = getNamespaces(annotation) @@ -1787,12 +1787,12 @@ def scanMetadata(self): meta['orbit'] = tree.find('.//missionInfo/orbitDirection', namespaces).text[0] meta['polarizations'] = [x.text for x in tree.findall('.//acquisitionInfo/polarisationList/polLayer', namespaces)] - + meta['orbitNumber_abs'] = int(tree.find('.//missionInfo/absOrbit', namespaces).text) meta['orbitNumber_rel'] = int(tree.find('.//missionInfo/relOrbit', namespaces).text) meta['cycleNumber'] = int(tree.find('.//missionInfo/orbitCycle', namespaces).text) meta['frameNumber'] = int(tree.find('.//inputData/uniqueDataTakeID', namespaces).text) - + meta['acquisition_mode'] = tree.find('.//acquisitionInfo/imagingMode', namespaces).text meta['start'] = self.parse_date(tree.find('.//sceneInfo/start/timeUTC', namespaces).text) meta['stop'] = self.parse_date(tree.find('.//sceneInfo/stop/timeUTC', namespaces).text) @@ -1806,7 +1806,7 @@ def scanMetadata(self): meta['looks'] = (rlks, azlks) meta['incidence'] = float(tree.find('.//sceneInfo/sceneCenterCoord/incidenceAngle', namespaces).text) return meta - + def unpack(self, directory, overwrite=False, exist_ok=False): match = self.findfiles(self.pattern, True) header = [x for x in match if not x.endswith('xml') and 'iif' not in x][0].replace(self.scene, '').strip('/') @@ -1890,7 +1890,7 @@ class Archive(object): >>> with Archive(dbfile, driver='postgres', user='user', password='password', host='host', port=5432) as archive: >>> archive.insert(scenes_s1) """ - + def __init__(self, dbfile, custom_fields=None, postgres=False, user='postgres', password='1234', host='localhost', port=5432, cleanup=True, add_geometry=False): # check for driver, if postgres then check if server is reachable @@ -1904,7 +1904,7 @@ def __init__(self, dbfile, custom_fields=None, postgres=False, user='postgres', self.driver = 'postgresql' if not self.__check_host(host, port): sys.exit('Server not found!') - + # create dict, with which a URL to the db is created if self.driver == 'sqlite': self.url_dict = {'drivername': self.driver, @@ -1917,12 +1917,12 @@ def __init__(self, dbfile, custom_fields=None, postgres=False, user='postgres', 'host': host, 'port': port, 'database': dbfile} - + # create engine, containing URL and driver log.debug('starting DB engine for {}'.format(URL(**self.url_dict))) self.url = URL(**self.url_dict) self.engine = create_engine(self.url, echo=False) - + # call to ____load_spatialite() for sqlite, to load mod_spatialite via event handler listen() if self.driver == 'sqlite': log.debug('loading spatialite extension') @@ -1935,7 +1935,7 @@ def __init__(self, dbfile, custom_fields=None, postgres=False, user='postgres', except exc.OperationalError: raise RuntimeError('could not load spatialite extension') - #print(sql_inspect(self.engine).has_table('data')) + # print(sql_inspect(self.engine).has_table('data')) # if database is new, (create postgres-db and) enable spatial extension if not database_exists(self.engine.url): @@ -1977,7 +1977,7 @@ def __init__(self, dbfile, custom_fields=None, postgres=False, user='postgres', Column('vh', Integer), Column('bbox', Geometry(geometry_type='POLYGON', management=True, srid=4326))) - if add_geometry and not sql_inspect(self.engine).has_table('data') or 'geometry' in self.get_colnames(): + if (add_geometry and not sql_inspect(self.engine).has_table('data')) or 'geometry' in self.get_colnames(): # add geometry to schema if new database is created, or database had it enabled once self.data_schema.append_column(Column('geometry', Geometry(geometry_type='POLYGON', management=True, srid=4326))) @@ -1991,38 +1991,31 @@ def __init__(self, dbfile, custom_fields=None, postgres=False, user='postgres', self.data_schema.append_column(Column(key, String)) else: log.info('Value in dict custom_fields must be "integer" or "string"!') - + # schema for duplicates self.duplicates_schema = Table('duplicates', self.meta, Column('outname_base', String, primary_key=True), Column('scene', String, primary_key=True)) - + # create tables if not existing if not sql_inspect(self.engine).has_table('data'): log.debug("creating DB table 'data'") self.data_schema.create(self.engine) - - columns_data = sql_inspect(self.engine).get_columns('data') - # for col in columns_data: - # print(col['name']) - - # elif sql_inspect(self.engine).has_table('data') and add_geometry and 'geometry' not in self.get_colnames('data'): - # print(self.get_colnames('data')) - # # log.info - # print("add_geometry has been enabled after database is already created, " - # "if you want to update table 'data' to include this column, " - # "run Archive.update_geometry_field()!") + elif add_geometry and 'geometry' not in self.get_colnames('data'): + log.info("add_geometry has been enabled after database is already created, " + "if you want to update table 'data' to include this column, " + "run Archive.update_geometry_field() once!") if not sql_inspect(self.engine).has_table('duplicates'): log.debug("creating DB table 'duplicates'") self.duplicates_schema.create(self.engine) - + # reflect tables from (by now) existing db, make some variables available within self self.Base = automap_base(metadata=self.meta) self.Base.prepare(self.engine, reflect=True) self.Data = self.Base.classes.data self.Duplicates = self.Base.classes.duplicates self.dbfile = dbfile - + if cleanup: log.info('checking for missing scenes') self.cleanup() @@ -2030,7 +2023,7 @@ def __init__(self, dbfile, custom_fields=None, postgres=False, user='postgres', def update_geometry_field(self): """ - Add the geometry as column to an existing database, fill missing values + Add the geometry as column to an existing database, then re-ingests all scenes without geometry """ if 'geometry' not in self.get_colnames(): self.conn.execute(f"SELECT AddGeometryColumn('data', 'geometry', 4326, 'POLYGON')") @@ -2047,8 +2040,6 @@ def update_geometry_field(self): self.conn.execute(delete_statement) self.insert(to_insert) - - def add_tables(self, tables): """ Add tables to the database per :class:`sqlalchemy.schema.Table` @@ -2080,7 +2071,7 @@ def add_tables(self, tables): log.info('created table(s) {}.'.format(', '.join(created))) self.Base = automap_base(metadata=self.meta) self.Base.prepare(self.engine, reflect=True) - + @staticmethod def __load_spatialite(dbapi_conn, connection_record): """ @@ -2109,7 +2100,7 @@ def __load_spatialite(dbapi_conn, connection_record): continue else: dbapi_conn.load_extension('mod_spatialite') - + def __prepare_insertion(self, scene): """ read scene metadata and parse a string for inserting it into the database @@ -2147,9 +2138,9 @@ def __prepare_insertion(self, scene): raise AttributeError('could not find attribute {}'.format(attribute)) value = attr() if inspect.ismethod(attr) else attr setattr(insertion, str(attribute), value) - + return insertion # return the Data object - + def __select_missing(self, table): """ @@ -2167,7 +2158,7 @@ def __select_missing(self, table): raise ValueError("parameter 'table' must either be 'data' or 'duplicates'") files = [self.encode(x[0]) for x in scenes] return [x for x in files if not os.path.isfile(x)] - + def insert(self, scene_in, pbar=False, test=False): """ Insert one or many scenes into the database @@ -2182,13 +2173,13 @@ def insert(self, scene_in, pbar=False, test=False): should the insertion only be tested or directly be committed to the database? """ length = len(scene_in) if isinstance(scene_in, list) else 1 - + if isinstance(scene_in, (ID, str)): scene_in = [scene_in] if not isinstance(scene_in, list): raise RuntimeError('scene_in must either be a string pointing to a file, a pyroSAR.ID object ' 'or a list containing several of either') - + log.info('filtering scenes by name') scenes = self.filter_scenelist(scene_in) if len(scenes) == 0: @@ -2196,15 +2187,15 @@ def insert(self, scene_in, pbar=False, test=False): return log.info('identifying scenes and extracting metadata') scenes = identify_many(scenes, pbar=pbar) - + if len(scenes) == 0: log.info('all scenes are already registered') return - + counter_regulars = 0 counter_duplicates = 0 list_duplicates = [] - + message = 'inserting {0} scene{1} into database' log.info(message.format(len(scenes), '' if len(scenes) == 1 else 's')) log.debug('testing changes in temporary database') @@ -2229,16 +2220,16 @@ def insert(self, scene_in, pbar=False, test=False): log.debug('duplicate: {}'.format(id.scene)) else: list_duplicates.append(id.outname_base()) - + if progress is not None: progress.update(i + 1) basenames.append(basename) - + if progress is not None: progress.finish() - + session.add_all(insertions) - + if not test: log.debug('committing transactions to permanent database') # commit changes of the session @@ -2247,12 +2238,12 @@ def insert(self, scene_in, pbar=False, test=False): log.info('rolling back temporary database changes') # roll back changes of the session session.rollback() - + message = '{0} scene{1} registered regularly' log.info(message.format(counter_regulars, '' if counter_regulars == 1 else 's')) message = '{0} duplicate{1} registered' log.info(message.format(counter_duplicates, '' if counter_duplicates == 1 else 's')) - + def is_registered(self, scene): """ Simple check if a scene is already registered in the database. @@ -2280,7 +2271,7 @@ def is_registered(self, scene): if exists_duplicates: in_dup = len(exists_duplicates) != 0 return in_data or in_dup - + def __is_registered_in_duplicates(self, scene): """ Simple check if a scene is already registered in the database. @@ -2303,7 +2294,7 @@ def __is_registered_in_duplicates(self, scene): if exists_duplicates: in_dup = len(exists_duplicates) != 0 return in_dup - + def cleanup(self): """ Remove all scenes from the database, which are no longer stored in their registered location @@ -2316,14 +2307,14 @@ def cleanup(self): for scene in missing: log.info('Removing missing scene from database tables: {}'.format(scene)) self.drop_element(scene, with_duplicates=True) - + @staticmethod def encode(string, encoding='utf-8'): if not isinstance(string, str): return string.encode(encoding) else: return string - + def export2shp(self, path, table='data'): """ export the database to a shapefile @@ -2344,21 +2335,21 @@ def export2shp(self, path, table='data'): if table not in ['data', 'duplicates']: log.warning('Only data and duplicates can be exported!') return - + # add the .shp extension if missing if not path.endswith('.shp'): path += '.shp' - + # creates folder if not present, adds .shp if not within the path dirname = os.path.dirname(path) os.makedirs(dirname, exist_ok=True) - + # uses spatialist.ogr2ogr to write shps with given path (or db connection) if self.driver == 'sqlite': # ogr2ogr(self.dbfile, path, options={'format': 'ESRI Shapefile'}) subprocess.call(['ogr2ogr', '-f', 'ESRI Shapefile', path, self.dbfile, table]) - + if self.driver == 'postgresql': db_connection = """PG:host={0} port={1} user={2} dbname={3} password={4} active_schema=public""".format(self.url_dict['host'], @@ -2369,7 +2360,7 @@ def export2shp(self, path, table='data'): # ogr2ogr(db_connection, path, options={'format': 'ESRI Shapefile'}) subprocess.call(['ogr2ogr', '-f', 'ESRI Shapefile', path, db_connection, table]) - + def filter_scenelist(self, scenelist): """ Filter a list of scenes by file names already registered in the database. @@ -2388,7 +2379,7 @@ def filter_scenelist(self, scenelist): for item in scenelist: if not isinstance(item, (ID, str)): raise TypeError("items in scenelist must be of type 'str' or 'pyroSAR.ID'") - + # ORM query, get all scenes locations scenes_data = self.Session().query(self.Data.scene) registered = [os.path.basename(self.encode(x[0])) for x in scenes_data] @@ -2397,7 +2388,7 @@ def filter_scenelist(self, scenelist): names = [item.scene if isinstance(item, ID) else item for item in scenelist] filtered = [x for x, y in zip(scenelist, names) if os.path.basename(y) not in registered + duplicates] return filtered - + def get_colnames(self, table='data'): """ Return the names of all columns of a table. @@ -2410,9 +2401,9 @@ def get_colnames(self, table='data'): # get all columns of one table, but shows geometry columns not correctly dicts = sql_inspect(self.engine).get_columns(table) col_names = [i['name'] for i in dicts] - + return sorted([self.encode(x) for x in col_names]) - + def get_tablenames(self, return_all=False): """ Return the names of all tables in the database @@ -2447,7 +2438,7 @@ def get_tablenames(self, return_all=False): if i not in all_tables and 'idx_' not in i: ret.append(i) return ret - + def get_unique_directories(self): """ Get a list of directories containing registered scenes @@ -2461,7 +2452,7 @@ def get_unique_directories(self): scenes = self.Session().query(self.Data.scene) registered = [os.path.dirname(self.encode(x[0])) for x in scenes] return list(set(registered)) - + def import_outdated(self, dbfile): """ import an older data base in csv format @@ -2484,7 +2475,7 @@ def import_outdated(self, dbfile): for row in reader: scenes.append(row['scene']) self.insert(scenes) - + def move(self, scenelist, directory, pbar=False): """ Move a list of files while keeping the database entries up to date. @@ -2513,7 +2504,7 @@ def move(self, scenelist, directory, pbar=False): progress = pb.ProgressBar(max_value=len(scenelist)).start() else: progress = None - + for i, scene in enumerate(scenelist): new = os.path.join(directory, os.path.basename(scene)) if os.path.isfile(new): @@ -2542,12 +2533,12 @@ def move(self, scenelist, directory, pbar=False): self.conn.execute('''UPDATE {0} SET scene= '{1}' WHERE scene='{2}' '''.format(table, new, scene)) if progress is not None: progress.finish() - + if len(failed) > 0: log.info('The following scenes could not be moved:\n{}'.format('\n'.join(failed))) if len(double) > 0: log.info('The following scenes already exist at the target location:\n{}'.format('\n'.join(double))) - + def select(self, vectorobject=None, mindate=None, maxdate=None, processdir=None, recursive=False, polarizations=None, use_geometry=False, **args): """ @@ -2610,12 +2601,12 @@ def select(self, vectorobject=None, mindate=None, maxdate=None, processdir=None, vals.append(maxdate) else: log.info('WARNING: argument maxdate is ignored, must be in format YYYYmmddTHHMMSS') - + if polarizations: for pol in polarizations: if pol in ['HH', 'VV', 'HV', 'VH']: arg_format.append('{}=1'.format(pol.lower())) - + if vectorobject: if isinstance(vectorobject, Vector): vectorobject.reproject(4326) @@ -2636,16 +2627,16 @@ def select(self, vectorobject=None, mindate=None, maxdate=None, processdir=None, else: log.info('WARNING: argument vectorobject is ignored, must be of type spatialist.vector.Vector') - + query = '''SELECT scene, outname_base FROM data WHERE {}'''.format(' AND '.join(arg_format)) # the query gets assembled stepwise here for val in vals: query = query.replace('?', ''' '{0}' ''', 1).format(val) log.debug(query) - + # core SQL execution query_rs = self.conn.execute(query) - + if processdir and os.path.isdir(processdir): scenes = [x for x in query_rs if len(finder(processdir, [x[1]], regex=True, recursive=recursive)) == 0] @@ -2654,9 +2645,9 @@ def select(self, vectorobject=None, mindate=None, maxdate=None, processdir=None, ret = [] for x in scenes: ret.append(self.encode(x[0])) - + return ret - + def select_duplicates(self, outname_base=None, scene=None, value='id'): """ Select scenes from the duplicates table. In case both `outname_base` and `scene` are set to None all scenes in @@ -2682,7 +2673,7 @@ def select_duplicates(self, outname_base=None, scene=None, value='id'): key = 1 else: raise ValueError("argument 'value' must be either 0 or 1") - + if not outname_base and not scene: # core SQL execution scenes = self.conn.execute('SELECT * from duplicates') @@ -2700,13 +2691,13 @@ def select_duplicates(self, outname_base=None, scene=None, value='id'): query = query.replace('?', ''' '{0}' ''', 1).format(a) # core SQL execution scenes = self.conn.execute(query) - + ret = [] for x in scenes: ret.append(self.encode(x[key])) - + return ret - + @property def size(self): """ @@ -2723,10 +2714,10 @@ def size(self): r2 = session.query(self.Duplicates.outname_base).count() session.close() return r1, r2 - + def __enter__(self): return self - + def close(self): """ close the database connection @@ -2735,10 +2726,10 @@ def close(self): self.conn.close() self.engine.dispose() gc.collect(generation=2) # this was added as a fix for win PermissionError when deleting sqlite.db files. - + def __exit__(self, exc_type, exc_val, exc_tb): self.close() - + def drop_element(self, scene, with_duplicates=False): """ Drop a scene from the data table. @@ -2761,45 +2752,45 @@ def drop_element(self, scene, with_duplicates=False): for rowproxy in self.conn.execute(search): entry_data_outname_base.append((rowproxy[12])) # log.info(entry_data_outname_base) - + # delete entry in data table delete_statement = self.data_schema.delete().where(self.data_schema.c.scene == scene) self.conn.execute(delete_statement) - + return_sentence = 'Entry with scene-id: \n{} \nwas dropped from data'.format(scene) - + # with_duplicates == True, delete entry from duplicates if with_duplicates: delete_statement_dup = self.duplicates_schema.delete().where( self.duplicates_schema.c.outname_base == entry_data_outname_base[0]) self.conn.execute(delete_statement_dup) - + log.info(return_sentence + ' and duplicates!'.format(scene)) return - + # else select scene info matching outname_base from duplicates select_in_duplicates_statement = self.duplicates_schema.select().where( self.duplicates_schema.c.outname_base == entry_data_outname_base[0]) entry_duplicates_scene = [] for rowproxy in self.conn.execute(select_in_duplicates_statement): entry_duplicates_scene.append((rowproxy[1])) - + # check if there is a duplicate if len(entry_duplicates_scene) == 1: # remove entry from duplicates delete_statement_dup = self.duplicates_schema.delete().where( self.duplicates_schema.c.outname_base == entry_data_outname_base[0]) self.conn.execute(delete_statement_dup) - + # insert scene from duplicates into data self.insert(entry_duplicates_scene[0]) - + return_sentence += ' and entry with outname_base \n{} \nand scene \n{} \n' \ 'was moved from duplicates into data table'.format( entry_data_outname_base[0], entry_duplicates_scene[0]) - + log.info(return_sentence + '!') - + def drop_table(self, table): """ Drop a table from the database. @@ -2828,7 +2819,7 @@ def drop_table(self, table): self.meta = MetaData(self.engine) self.Base = automap_base(metadata=self.meta) self.Base.prepare(self.engine, reflect=True) - + @staticmethod def __is_open(ip, port): """ @@ -2857,7 +2848,7 @@ def __is_open(ip, port): return False finally: s.close() - + def __check_host(self, ip, port): """ Calls __is_open() on ip and port, from Ben Curtis (github: Fmstrat) @@ -2934,22 +2925,22 @@ def getFileObj(scene, filename): a file object """ membername = filename.replace(scene, '').strip(r'\/') - + if not os.path.exists(scene): raise RuntimeError('scene does not exist') - + if os.path.isdir(scene): obj = BytesIO() with open(filename, 'rb') as infile: obj.write(infile.read()) obj.seek(0) - + elif zf.is_zipfile(scene): obj = BytesIO() with zf.ZipFile(scene, 'r') as zip: obj.write(zip.open(membername).read()) obj.seek(0) - + elif tf.is_tarfile(scene): obj = BytesIO() tar = tf.open(scene, 'r:gz') From 819685d47351dd674b7b83e5784c8e953d991a00 Mon Sep 17 00:00:00 2001 From: MarkusZehner Date: Fri, 28 Jan 2022 17:13:40 +0100 Subject: [PATCH 07/12] postgres was complaining about me asking for colnames of a non existing table --- pyroSAR/drivers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyroSAR/drivers.py b/pyroSAR/drivers.py index 1ae63268..a97c31b8 100644 --- a/pyroSAR/drivers.py +++ b/pyroSAR/drivers.py @@ -1977,7 +1977,9 @@ def __init__(self, dbfile, custom_fields=None, postgres=False, user='postgres', Column('vh', Integer), Column('bbox', Geometry(geometry_type='POLYGON', management=True, srid=4326))) - if (add_geometry and not sql_inspect(self.engine).has_table('data')) or 'geometry' in self.get_colnames(): + colnames = self.get_colnames() if sql_inspect(self.engine).has_table('data') else [] + + if (add_geometry and not sql_inspect(self.engine).has_table('data')) or 'geometry' in colnames: # add geometry to schema if new database is created, or database had it enabled once self.data_schema.append_column(Column('geometry', Geometry(geometry_type='POLYGON', management=True, srid=4326))) From 74af25d4d3a0227bbfbf622e0e179d0b4ded58cb Mon Sep 17 00:00:00 2001 From: MarkusZehner Date: Mon, 14 Feb 2022 16:07:05 +0100 Subject: [PATCH 08/12] fixed update geometry method, added parameters that enable some methods to target different tables than only data --- pyroSAR/drivers.py | 112 ++++++++++++++++++++++++++++++------------ tests/test_drivers.py | 3 +- 2 files changed, 83 insertions(+), 32 deletions(-) diff --git a/pyroSAR/drivers.py b/pyroSAR/drivers.py index a97c31b8..68b8f417 100644 --- a/pyroSAR/drivers.py +++ b/pyroSAR/drivers.py @@ -1935,8 +1935,6 @@ def __init__(self, dbfile, custom_fields=None, postgres=False, user='postgres', except exc.OperationalError: raise RuntimeError('could not load spatialite extension') - # print(sql_inspect(self.engine).has_table('data')) - # if database is new, (create postgres-db and) enable spatial extension if not database_exists(self.engine.url): if self.driver == 'postgresql': @@ -2025,22 +2023,64 @@ def __init__(self, dbfile, custom_fields=None, postgres=False, user='postgres', def update_geometry_field(self): """ - Add the geometry as column to an existing database, then re-ingests all scenes without geometry + Add the geometry as column to an existing database, then re-ingests all scenes with added geometry column """ if 'geometry' not in self.get_colnames(): - self.conn.execute(f"SELECT AddGeometryColumn('data', 'geometry', 4326, 'POLYGON')") - self.Session = sessionmaker(bind=self.engine) - self.meta = MetaData(self.engine) - self.Base = automap_base(metadata=self.meta) - self.Base.prepare(self.engine, reflect=True) - self.Data = self.Base.classes.data - - temp_data = self.Session().query(self.Data.scene, self.Data.outname_base).filter(self.Data.geometry.is_(None)) - to_insert = [] - for entry in temp_data: - delete_statement = self.data_schema.delete().where(self.data_schema.c.scene == entry[0]) - self.conn.execute(delete_statement) - self.insert(to_insert) + # get all scenes from data table + temp_data = self.Session().query(self.Data.scene) + to_insert = [] + # save in new list + for entry in temp_data: + print(entry) + to_insert.append(entry[0]) + # remove old table + self.drop_table('data') + + # define new table + temp_table_schema = Table('data', self.meta, + Column('sensor', String), + Column('orbit', String), + Column('orbitNumber_abs', Integer), + Column('orbitNumber_rel', Integer), + Column('cycleNumber', Integer), + Column('frameNumber', Integer), + Column('acquisition_mode', String), + Column('start', String), + Column('stop', String), + Column('product', String), + Column('samples', Integer), + Column('lines', Integer), + Column('outname_base', String, primary_key=True), + Column('scene', String), + Column('hh', Integer), + Column('vv', Integer), + Column('hv', Integer), + Column('vh', Integer), + Column('bbox', Geometry(geometry_type='POLYGON', management=True, srid=4326)), + Column('geometry', Geometry(geometry_type='POLYGON', management=True, srid=4326))) + + # create table + log.debug("creating DB table 'data' with geometry field") + temp_table_schema.create(self.engine) + + # update base + self.Base = automap_base(metadata=self.meta) + self.Base.prepare(self.engine, reflect=True) + self.Data = self.Base.classes.data + # insert previous data in new table + self.insert(to_insert) + + + def get_class_by_tablename(self, tablename): + """Return class reference mapped to table. + adapted from https://stackoverflow.com/questions/11668355/sqlalchemy-get-model-from-table-name-this-may-imply-appending-some-function-to + + :param tablename: String with name of table. + :return: Class reference or None. + """ + for c in self.Base.classes: + if hasattr(c, '__table__') and str(c.__table__) == tablename: + return c def add_tables(self, tables): """ @@ -2103,7 +2143,7 @@ def __load_spatialite(dbapi_conn, connection_record): else: dbapi_conn.load_extension('mod_spatialite') - def __prepare_insertion(self, scene): + def __prepare_insertion(self, scene, table='data'): """ read scene metadata and parse a string for inserting it into the database @@ -2119,14 +2159,15 @@ def __prepare_insertion(self, scene): id = scene if isinstance(scene, ID) else identify(scene) pols = [x.lower() for x in id.polarizations] # insertion as an object of Class Data (reflected in the init()) - insertion = self.Data() - colnames = self.get_colnames() + # insertion = self.Data() + insertion = self.get_class_by_tablename(table)() + colnames = self.get_colnames(table) for attribute in colnames: if attribute in ['bbox', 'geometry']: geom = getattr(scene, attribute)() geom.reproject(4326) geom = geom.convert2wkt(set3D=False)[0] - geom = 'SRID=4326;' + str(geom) + geom = 'SRID=4326; ' + str(geom) # set attributes of the Data object according to input setattr(insertion, attribute, geom) elif attribute in ['hh', 'vv', 'hv', 'vh']: @@ -2161,7 +2202,7 @@ def __select_missing(self, table): files = [self.encode(x[0]) for x in scenes] return [x for x in files if not os.path.isfile(x)] - def insert(self, scene_in, pbar=False, test=False): + def insert(self, scene_in, table='data', pbar=False, test=False): """ Insert one or many scenes into the database @@ -2169,6 +2210,8 @@ def insert(self, scene_in, pbar=False, test=False): ---------- scene_in: str or ID or list a SAR scene or a list of scenes to be inserted + table: str + which table to insert to pbar: bool show a progress bar? test: bool @@ -2183,7 +2226,7 @@ def insert(self, scene_in, pbar=False, test=False): 'or a list containing several of either') log.info('filtering scenes by name') - scenes = self.filter_scenelist(scene_in) + scenes = self.filter_scenelist(scene_in, table) if len(scenes) == 0: log.info('...nothing to be done') return @@ -2210,8 +2253,8 @@ def insert(self, scene_in, pbar=False, test=False): session = self.Session() for i, id in enumerate(scenes): basename = id.outname_base() - if not self.is_registered(id) and basename not in basenames: - insertion = self.__prepare_insertion(id) + if not self.is_registered(id, table) and basename not in basenames: + insertion = self.__prepare_insertion(id, table) insertions.append(insertion) counter_regulars += 1 log.debug('regular: {}'.format(id.scene)) @@ -2246,7 +2289,7 @@ def insert(self, scene_in, pbar=False, test=False): message = '{0} duplicate{1} registered' log.info(message.format(counter_duplicates, '' if counter_duplicates == 1 else 's')) - def is_registered(self, scene): + def is_registered(self, scene, table='data'): """ Simple check if a scene is already registered in the database. @@ -2254,7 +2297,8 @@ def is_registered(self, scene): ---------- scene: str or ID the SAR scene - + table: + which table to search in (must contain outname_base column with scene id) Returns ------- bool @@ -2262,8 +2306,9 @@ def is_registered(self, scene): """ id = scene if isinstance(scene, ID) else identify(scene) # ORM query, where scene equals id.scene, return first - exists_data = self.Session().query(self.Data.outname_base).filter( - self.Data.outname_base == id.outname_base()).first() + table = self.get_class_by_tablename(table) + exists_data = self.Session().query(table.outname_base).filter( + table.outname_base == id.outname_base()).first() exists_duplicates = self.Session().query(self.Duplicates.outname_base).filter( self.Duplicates.outname_base == id.outname_base()).first() in_data = False @@ -2363,7 +2408,7 @@ def export2shp(self, path, table='data'): subprocess.call(['ogr2ogr', '-f', 'ESRI Shapefile', path, db_connection, table]) - def filter_scenelist(self, scenelist): + def filter_scenelist(self, scenelist, table='data'): """ Filter a list of scenes by file names already registered in the database. @@ -2371,6 +2416,8 @@ def filter_scenelist(self, scenelist): ---------- scenelist: :obj:`list` of :obj:`str` or :obj:`pyroSAR.drivers.ID` the scenes to be filtered + table: str + which table to search in Returns ------- @@ -2383,7 +2430,8 @@ def filter_scenelist(self, scenelist): raise TypeError("items in scenelist must be of type 'str' or 'pyroSAR.ID'") # ORM query, get all scenes locations - scenes_data = self.Session().query(self.Data.scene) + table = self.get_class_by_tablename(table) + scenes_data = self.Session().query(table.scene) registered = [os.path.basename(self.encode(x[0])) for x in scenes_data] scenes_duplicates = self.Session().query(self.Duplicates.scene) duplicates = [os.path.basename(self.encode(x[0])) for x in scenes_duplicates] @@ -2431,7 +2479,9 @@ def get_tablenames(self, return_all=False): 'virts_geometry_columns', 'virts_geometry_columns_auth', 'virts_geometry_columns_field_infos', 'virts_geometry_columns_statistics', 'data_licenses', 'KNN'] # get tablenames from metadata - tables = sorted([self.encode(x) for x in self.meta.tables.keys()]) + + insp = sql_inspect(self.engine) + tables = sorted([self.encode(x) for x in insp.get_table_names()]) if return_all: return tables else: diff --git a/tests/test_drivers.py b/tests/test_drivers.py index 11079957..dd9ea94c 100644 --- a/tests/test_drivers.py +++ b/tests/test_drivers.py @@ -196,9 +196,9 @@ def test_archive_geometry(tmpdir, testdata): db.close() db = pyroSAR.Archive(dbfile, add_geometry=True) db.update_geometry_field() + db.insert(testdata['s1_2']) db.close() os.remove(dbfile) - dbfile = os.path.join(str(tmpdir), 'scenes_geo.db') db_geo = pyroSAR.Archive(dbfile, add_geometry=True) db_geo.insert(testdata['s1']) @@ -208,6 +208,7 @@ def test_archive_geometry(tmpdir, testdata): db_geo.close() os.remove(dbfile) + def test_archive2(tmpdir, testdata): dbfile = os.path.join(str(tmpdir), 'scenes.db') with pyroSAR.Archive(dbfile) as db: From c59e2f3d20df2c373c073f80f5ca761f5633a472 Mon Sep 17 00:00:00 2001 From: MarkusZehner Date: Mon, 14 Feb 2022 16:21:10 +0100 Subject: [PATCH 09/12] matched changes --- pyroSAR/drivers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyroSAR/drivers.py b/pyroSAR/drivers.py index 68b8f417..dbea5476 100644 --- a/pyroSAR/drivers.py +++ b/pyroSAR/drivers.py @@ -430,7 +430,7 @@ def gdalinfo(self): except ValueError: pass - if re.search('(?:LAT|LONG)', entry[0]): + if re.search('LAT|LONG', entry[0]): entry[1] /= 1000000. meta[entry[0]] = entry[1] return meta From ff4b42776fdd70408b64421ca7e6bc690bb4179d Mon Sep 17 00:00:00 2001 From: MarkusZehner Date: Mon, 14 Feb 2022 16:48:46 +0100 Subject: [PATCH 10/12] added docs, merges --- pyroSAR/drivers.py | 62 ++++++++++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 22 deletions(-) diff --git a/pyroSAR/drivers.py b/pyroSAR/drivers.py index 96dc7173..2213384d 100644 --- a/pyroSAR/drivers.py +++ b/pyroSAR/drivers.py @@ -2069,16 +2069,21 @@ def update_geometry_field(self): # insert previous data in new table self.insert(to_insert) - - def get_class_by_tablename(self, tablename): + def get_class_by_tablename(self, table): """Return class reference mapped to table. - adapted from https://stackoverflow.com/questions/11668355/sqlalchemy-get-model-from-table-name-this-may-imply-appending-some-function-to + adapted from OrangeTux's comment on + https://stackoverflow.com/questions/11668355/sqlalchemy-get-model-from-table-name-this-may-imply-appending-some-function-to - :param tablename: String with name of table. - :return: Class reference or None. + Parameters + ---------- + table: str + String with name of table. + Returns + ------- + Class reference or None. """ for c in self.Base.classes: - if hasattr(c, '__table__') and str(c.__table__) == tablename: + if hasattr(c, '__table__') and str(c.__table__) == table: return c def add_tables(self, tables): @@ -2150,6 +2155,8 @@ def __prepare_insertion(self, scene, table='data'): ---------- scene: str or ID a SAR scene + table: str + which table to prepare insert obj for? Returns ------- @@ -2185,19 +2192,20 @@ def __prepare_insertion(self, scene, table='data'): def __select_missing(self, table): """ - + Parameters + ------- + table: str + which table to search missing scenes in, must contain scene column Returns ------- list the names of all scenes, which are no longer stored in their registered location """ - if table == 'data': - # using ORM query to get all scenes locations - scenes = self.Session().query(self.Data.scene) - elif table == 'duplicates': - scenes = self.Session().query(self.Duplicates.scene) - else: - raise ValueError("parameter 'table' must either be 'data' or 'duplicates'") + table_obj = self.get_class_by_tablename(table) + if table_obj is None: + log.info(f'Table {table} is not registered in the database') + return [] + scenes = self.Session().query(table_obj.scene) files = [self.encode(x[0]) for x in scenes] return [x for x in files if not os.path.isfile(x)] @@ -2216,8 +2224,6 @@ def insert(self, scene_in, table='data', pbar=False, test=False): test: bool should the insertion only be tested or directly be committed to the database? """ - length = len(scene_in) if isinstance(scene_in, list) else 1 - if isinstance(scene_in, (ID, str)): scene_in = [scene_in] if not isinstance(scene_in, list): @@ -2341,15 +2347,19 @@ def __is_registered_in_duplicates(self, scene): in_dup = len(exists_duplicates) != 0 return in_dup - def cleanup(self): + def cleanup(self, table='data'): """ Remove all scenes from the database, which are no longer stored in their registered location + Parameters + ---------- + table: str + tablename, table must contain scene column Returns ------- """ - missing = self.__select_missing('data') + missing = self.__select_missing(table) for scene in missing: log.info('Removing missing scene from database tables: {}'.format(scene)) self.drop_element(scene, with_duplicates=True) @@ -2442,12 +2452,16 @@ def get_colnames(self, table='data'): """ Return the names of all columns of a table. + Parameters + ---------- + table: str + tablename + Returns ------- list the column names of the chosen table """ - # get all columns of one table, but shows geometry columns not correctly dicts = sql_inspect(self.engine).get_columns(table) col_names = [i['name'] for i in dicts] @@ -2478,7 +2492,6 @@ def get_tablenames(self, return_all=False): 'virts_geometry_columns', 'virts_geometry_columns_auth', 'virts_geometry_columns_field_infos', 'virts_geometry_columns_statistics', 'data_licenses', 'KNN'] # get tablenames from metadata - insp = sql_inspect(self.engine) tables = sorted([self.encode(x) for x in insp.get_table_names()]) if return_all: @@ -2490,17 +2503,22 @@ def get_tablenames(self, return_all=False): ret.append(i) return ret - def get_unique_directories(self): + def get_unique_directories(self, table='data'): """ Get a list of directories containing registered scenes + Parameters + ---------- + table: str + tablename, table must contain scene column Returns ------- list the directory names """ # ORM query, get all directories - scenes = self.Session().query(self.Data.scene) + table = self.get_class_by_tablename(table) + scenes = self.Session().query(table.scene) registered = [os.path.dirname(self.encode(x[0])) for x in scenes] return list(set(registered)) From 3fe1883a4d0433b5918e78c62126e810fe3ea2a5 Mon Sep 17 00:00:00 2001 From: MarkusZehner Date: Mon, 14 Feb 2022 16:57:00 +0100 Subject: [PATCH 11/12] removed print statement --- pyroSAR/drivers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyroSAR/drivers.py b/pyroSAR/drivers.py index 2213384d..29199e71 100644 --- a/pyroSAR/drivers.py +++ b/pyroSAR/drivers.py @@ -2030,7 +2030,6 @@ def update_geometry_field(self): to_insert = [] # save in new list for entry in temp_data: - print(entry) to_insert.append(entry[0]) # remove old table self.drop_table('data') From 2b9c33fc67149360f44906cc3b42f7fbf878be80 Mon Sep 17 00:00:00 2001 From: MarkusZehner Date: Tue, 15 Feb 2022 16:41:15 +0100 Subject: [PATCH 12/12] added table argument to select --- pyroSAR/drivers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyroSAR/drivers.py b/pyroSAR/drivers.py index 29199e71..3c882640 100644 --- a/pyroSAR/drivers.py +++ b/pyroSAR/drivers.py @@ -2608,7 +2608,7 @@ def move(self, scenelist, directory, pbar=False): log.info('The following scenes already exist at the target location:\n{}'.format('\n'.join(double))) def select(self, vectorobject=None, mindate=None, maxdate=None, processdir=None, - recursive=False, polarizations=None, use_geometry=False, **args): + recursive=False, polarizations=None, use_geometry=False, table='data', **args): """ select scenes from the database @@ -2638,8 +2638,8 @@ def select(self, vectorobject=None, mindate=None, maxdate=None, processdir=None, the file names pointing to the selected scenes """ - arg_valid = [x for x in args.keys() if x in self.get_colnames()] - arg_invalid = [x for x in args.keys() if x not in self.get_colnames()] + arg_valid = [x for x in args.keys() if x in self.get_colnames(table)] + arg_invalid = [x for x in args.keys() if x not in self.get_colnames(table)] if len(arg_invalid) > 0: log.info('the following arguments will be ignored as they are not registered in the data base: {}'.format( ', '.join(arg_invalid))) @@ -2696,7 +2696,7 @@ def select(self, vectorobject=None, mindate=None, maxdate=None, processdir=None, else: log.info('WARNING: argument vectorobject is ignored, must be of type spatialist.vector.Vector') - query = '''SELECT scene, outname_base FROM data WHERE {}'''.format(' AND '.join(arg_format)) + query = '''SELECT scene, outname_base FROM {} WHERE '''.format(table) + '''{}'''.format(' AND '.join(arg_format)) # the query gets assembled stepwise here for val in vals: query = query.replace('?', ''' '{0}' ''', 1).format(val)