-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Migrate from setup.py to pyproject.toml
- Loading branch information
1 parent
ef7be73
commit 7de0de0
Showing
36 changed files
with
7,388 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
include README.md | ||
include VERSION | ||
recursive-include lbsntransform/ *.py | ||
include LICENSE.md | ||
include CHANGELOG.md | ||
recursive-include src/lbsntransform/ *.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
[build-system] | ||
requires = ["setuptools", "setuptools-scm"] | ||
build-backend = "setuptools.build_meta" | ||
|
||
[tool.setuptools_scm] | ||
|
||
[semantic_release] | ||
version_source = "tag" | ||
branch = "master" | ||
|
||
[tool.black] | ||
line-length = 88 | ||
include = '\.pyi?$' | ||
exclude = ''' | ||
/( | ||
\.eggs | ||
| \.git | ||
| \.hg | ||
| \.mypy_cache | ||
| \.tox | ||
| \.venv | ||
| \.vscode | ||
| _build | ||
| buck-out | ||
| build | ||
| dist | ||
| scripts/ | ||
| mkdocs_env/ | ||
| lbsntransform_env/ | ||
| dev/ | ||
)/ | ||
''' | ||
|
||
[tool.isort] | ||
profile = "black" | ||
known_first_party = ["lbsntransform"] | ||
line_length = 88 | ||
|
||
[tool.pytest.ini_options] | ||
addopts = "-ra" | ||
|
||
[tool.mypy] | ||
# Error output | ||
show_column_numbers = true | ||
show_error_codes = true | ||
show_error_context = true | ||
show_traceback = true | ||
pretty = true | ||
check_untyped_defs = false | ||
# Warnings | ||
warn_no_return = true | ||
warn_redundant_casts = true | ||
warn_unreachable = true | ||
files = ["src", "tests"] | ||
|
||
[tool.pylint.format] | ||
max-line-length = 88 | ||
|
||
[tool.pylint.message_control] | ||
enable = ["c-extension-no-member", "no-else-return"] | ||
|
||
[tool.pylint.variables] | ||
dummy-variables-rgx = "_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_" | ||
ignored-argument-names = "_.*|^ignored_|^unused_|args|kwargs" | ||
|
||
[tool.codespell] | ||
ignore-words-list = " " |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,45 @@ | ||
[semantic_release] | ||
version_variable: lbsntransform/version.py:__version__ | ||
|
||
[metadata] | ||
name = lbsntransform | ||
author = Alexander Dunkel | ||
author_email = [email protected] | ||
license = GNU GPLv3 or any higher | ||
description = Location based social network (LBSN) data structure format & transfer tool | ||
url = https://gitlab.vgiscience.de/lbsn/lbsntransform | ||
long_description = file: README.md | ||
long_description_content_type = text/markdown | ||
|
||
[options] | ||
python_requires = >=3.8 | ||
packages = find: | ||
package_dir = | ||
=src | ||
install_requires = | ||
lbsnstructure>=1.0.3 | ||
protobuf>=4.21.9,<5 | ||
psycopg2-binary | ||
ppygis3 | ||
shapely<2.0.0 | ||
emoji>=2.0.0 | ||
requests | ||
geos | ||
numpy | ||
requests | ||
regex | ||
include_package_data = True | ||
|
||
[options.package_data] | ||
* = README.md | ||
|
||
[options.packages.find] | ||
where = src | ||
include = | ||
lbsntransform | ||
lbsntransform.* | ||
|
||
[options.entry_points] | ||
console_scripts = | ||
lbsntransform = lbsntransform.__main__:main | ||
|
||
[options.extras_require] | ||
nltk_stopwords = nltk |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
"""LBSNtransform package import specifications""" | ||
|
||
import importlib.metadata | ||
__version__ = importlib.metadata.version("lbsntransform") | ||
|
||
from lbsntransform.lbsntransform_ import LBSNTransform | ||
from lbsntransform.config.config import BaseConfig | ||
from lbsntransform.input.load_data import LoadData | ||
from lbsntransform.output.hll.shared_structure_proto_hlldb import ProtoHLLMapping | ||
from lbsntransform.output.lbsn.shared_structure_proto_lbsndb import ProtoLBSNMapping | ||
from lbsntransform.output.shared_structure import ( | ||
GeocodeLocations, LBSNRecordDicts, TimeMonitor) | ||
from lbsntransform.tools.helper_functions import HelperFunctions as HF | ||
|
||
# pdoc documentation exclude/include format | ||
__pdoc__ = {} | ||
|
||
# pdoc documentation include format | ||
__pdoc__["lbsntransform.__main__"] = True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
""" | ||
lbsntransform package script to load, format and store data | ||
from and to common lbsn structure | ||
Import options: | ||
- Postgres Database or | ||
- local CSV/json/stacked json import | ||
Output options: | ||
- Postgres Database or | ||
- local ProtoBuf and CSV Import (prepared for Postgres /Copy) | ||
""" | ||
|
||
|
||
__author__ = "Alexander Dunkel" | ||
__license__ = "GNU GPLv3" | ||
|
||
|
||
# version: see version.py | ||
|
||
import sys | ||
|
||
from lbsntransform.tools.helper_functions import HelperFunctions as HF | ||
from lbsntransform.output.shared_structure import TimeMonitor | ||
from lbsntransform.input.load_data import LoadData | ||
from lbsntransform.config.config import BaseConfig | ||
from lbsntransform.lbsntransform_ import LBSNTransform | ||
|
||
|
||
def main(): | ||
""" Main function for cli-mode to process data | ||
from postgres db or local file input | ||
to postgres db or local file output | ||
""" | ||
|
||
# Load Config, will be overwritten if args are given | ||
config = BaseConfig() | ||
# Parse args | ||
config.parse_args() | ||
|
||
# initialize mapping class | ||
# depending on lbsn origin | ||
# e.g. 1 = Instagram, | ||
# 2 = Flickr, 2.1 = Flickr YFCC100m, | ||
# 3 = Twitter) | ||
importer = HF.load_importer_mapping_module( | ||
config.origin, config.mappings_path) | ||
|
||
# initialize lbsntransform | ||
lbsntransform = LBSNTransform( | ||
importer=importer, | ||
logging_level=config.logging_level, | ||
is_local_input=config.is_local_input, | ||
transfer_count=config.transfer_count, | ||
csv_output=config.csv_output, | ||
csv_suppress_linebreaks=config.csv_suppress_linebreaks, | ||
dbuser_output=config.dbuser_output, | ||
dbserveraddress_output=config.dbserveraddress_output, | ||
dbname_output=config.dbname_output, | ||
dbpassword_output=config.dbpassword_output, | ||
dbserverport_output=config.dbserverport_output, | ||
dbformat_output=config.dbformat_output, | ||
dbuser_input=config.dbuser_input, | ||
dbserveraddress_input=config.dbserveraddress_input, | ||
dbname_input=config.dbname_input, | ||
dbpassword_input=config.dbpassword_input, | ||
dbserverport_input=config.dbserverport_input, | ||
dbuser_hllworker=config.dbuser_hllworker, | ||
dbserveraddress_hllworker=config.dbserveraddress_hllworker, | ||
dbname_hllworker=config.dbname_hllworker, | ||
dbpassword_hllworker=config.dbpassword_hllworker, | ||
dbserverport_hllworker=config.dbserverport_hllworker, | ||
include_lbsn_bases=config.include_lbsn_bases, | ||
dry_run=config.dry_run, | ||
hmac_key=config.hmac_key, | ||
commit_volume=config.commit_volume) | ||
|
||
# initialize input reader | ||
input_data = LoadData( | ||
importer=importer, | ||
is_local_input=config.is_local_input, | ||
startwith_db_rownumber=config.startwith_db_rownumber, | ||
skip_until_file=config.skip_until_file, | ||
cursor_input=lbsntransform.cursor_input, | ||
input_path=config.input_path, | ||
recursive_load=config.recursive_load, | ||
local_file_type=config.local_file_type, | ||
endwith_db_rownumber=config.endwith_db_rownumber, | ||
is_stacked_json=config.is_stacked_json, | ||
is_line_separated_json=config.is_line_separated_json, | ||
csv_delim=config.csv_delim, | ||
use_csv_dictreader=config.use_csv_dictreader, | ||
input_lbsn_type=config.input_lbsn_type, | ||
dbformat_input=config.dbformat_input, | ||
geocode_locations=config.geocode_locations, | ||
ignore_input_source_list=config.ignore_input_source_list, | ||
disable_reactionpost_ref=config.disable_reactionpost_ref, | ||
map_relations=config.map_relations, | ||
transfer_reactions=config.transfer_reactions, | ||
ignore_non_geotagged=config.ignore_non_geotagged, | ||
min_geoaccuracy=config.min_geoaccuracy, | ||
source_web=config.source_web, | ||
skip_until_record=config.skip_until_record, | ||
zip_records=config.zip_records, | ||
include_lbsn_objects=config.include_lbsn_objects, | ||
override_lbsn_query_schema=config.override_lbsn_query_schema) | ||
|
||
# Manually add entries that need submission prior to parsing data | ||
# add_bundestag_group_example(import_mapper) | ||
|
||
# init time monitoring | ||
how_long = TimeMonitor() | ||
|
||
# read and process unfiltered input records from csv | ||
# start settings | ||
with input_data as records: | ||
for record in records: | ||
lbsntransform.add_processed_records(record) | ||
# report progress | ||
if lbsntransform.processed_total % 1000 == 0: | ||
stats_str = HF.report_stats( | ||
input_data.count_glob, | ||
input_data.continue_number, | ||
lbsntransform.lbsn_records) | ||
print(stats_str, end='\r') | ||
sys.stdout.flush() | ||
if (config.transferlimit and | ||
lbsntransform.processed_total >= config.transferlimit): | ||
break | ||
|
||
# finalize output (close db connection, submit remaining) | ||
lbsntransform.log.info( | ||
f'\nTransferring remaining ' | ||
f'{lbsntransform.lbsn_records.count_glob} to db.. ' | ||
f'{HF.null_notice(input_data.import_mapper.null_island)})') | ||
lbsntransform.finalize_output() | ||
|
||
# final report | ||
lbsntransform.log.info( | ||
f'\n\n{"".join([f"(Dry Run){chr(10)}" if config.dry_run else ""])}' | ||
f'Processed {input_data.count_glob} input records ' | ||
f'(Input {input_data.start_number} to ' | ||
f'{input_data.continue_number}). ' | ||
f'\n\nIdentified {lbsntransform.processed_total} LBSN records, ' | ||
f'with {lbsntransform.lbsn_records.count_glob_total} ' | ||
f'distinct LBSN records overall. ' | ||
f'{HF.get_skipped_report(input_data.import_mapper)}. ' | ||
f'Merged {lbsntransform.lbsn_records.count_dup_merge_total} ' | ||
f'duplicate records.') | ||
lbsntransform.log.info( | ||
f'\n{HF.get_count_stats(lbsntransform.lbsn_records)}') | ||
|
||
lbsntransform.log.info(f'Done. {how_long.stop_time()}') | ||
|
||
lbsntransform.close_log() | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
"""lbsntransform config submodule""" |
Oops, something went wrong.