Skip to content

Commit

Permalink
feat: Migrate from setup.py to pyproject.toml
Browse files Browse the repository at this point in the history
  • Loading branch information
Sieboldianus committed May 4, 2023
1 parent ef7be73 commit 7de0de0
Show file tree
Hide file tree
Showing 36 changed files with 7,388 additions and 65 deletions.
5 changes: 3 additions & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
include README.md
include VERSION
recursive-include lbsntransform/ *.py
include LICENSE.md
include CHANGELOG.md
recursive-include src/lbsntransform/ *.py
5 changes: 2 additions & 3 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
FROM python:slim

COPY lbsntransform/ ./lbsntransform/
COPY src/ ./src/
COPY resources/ ./resources/
COPY setup.py README.md ./
COPY setup.cfg pyproject.toml README.md ./

RUN set -ex; \
\
apt-get update; \
apt-get install -y --no-install-recommends \
libpq-dev \
build-essential \
; \
pip install --upgrade pip; \
Expand Down
67 changes: 67 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
[build-system]
requires = ["setuptools", "setuptools-scm"]
build-backend = "setuptools.build_meta"

[tool.setuptools_scm]

[semantic_release]
version_source = "tag"
branch = "master"

[tool.black]
line-length = 88
include = '\.pyi?$'
exclude = '''
/(
\.eggs
| \.git
| \.hg
| \.mypy_cache
| \.tox
| \.venv
| \.vscode
| _build
| buck-out
| build
| dist
| scripts/
| mkdocs_env/
| lbsntransform_env/
| dev/
)/
'''

[tool.isort]
profile = "black"
known_first_party = ["lbsntransform"]
line_length = 88

[tool.pytest.ini_options]
addopts = "-ra"

[tool.mypy]
# Error output
show_column_numbers = true
show_error_codes = true
show_error_context = true
show_traceback = true
pretty = true
check_untyped_defs = false
# Warnings
warn_no_return = true
warn_redundant_casts = true
warn_unreachable = true
files = ["src", "tests"]

[tool.pylint.format]
max-line-length = 88

[tool.pylint.message_control]
enable = ["c-extension-no-member", "no-else-return"]

[tool.pylint.variables]
dummy-variables-rgx = "_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_"
ignored-argument-names = "_.*|^ignored_|^unused_|args|kwargs"

[tool.codespell]
ignore-words-list = " "
47 changes: 45 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,2 +1,45 @@
[semantic_release]
version_variable: lbsntransform/version.py:__version__

[metadata]
name = lbsntransform
author = Alexander Dunkel
author_email = [email protected]
license = GNU GPLv3 or any higher
description = Location based social network (LBSN) data structure format & transfer tool
url = https://gitlab.vgiscience.de/lbsn/lbsntransform
long_description = file: README.md
long_description_content_type = text/markdown

[options]
python_requires = >=3.8
packages = find:
package_dir =
=src
install_requires =
lbsnstructure>=1.0.3
protobuf>=4.21.9,<5
psycopg2-binary
ppygis3
shapely<2.0.0
emoji>=2.0.0
requests
geos
numpy
requests
regex
include_package_data = True

[options.package_data]
* = README.md

[options.packages.find]
where = src
include =
lbsntransform
lbsntransform.*

[options.entry_points]
console_scripts =
lbsntransform = lbsntransform.__main__:main

[options.extras_require]
nltk_stopwords = nltk
58 changes: 0 additions & 58 deletions setup.py

This file was deleted.

21 changes: 21 additions & 0 deletions src/lbsntransform/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-

"""LBSNtransform package import specifications"""

import importlib.metadata
__version__ = importlib.metadata.version("lbsntransform")

from lbsntransform.lbsntransform_ import LBSNTransform
from lbsntransform.config.config import BaseConfig
from lbsntransform.input.load_data import LoadData
from lbsntransform.output.hll.shared_structure_proto_hlldb import ProtoHLLMapping
from lbsntransform.output.lbsn.shared_structure_proto_lbsndb import ProtoLBSNMapping
from lbsntransform.output.shared_structure import (
GeocodeLocations, LBSNRecordDicts, TimeMonitor)
from lbsntransform.tools.helper_functions import HelperFunctions as HF

# pdoc documentation exclude/include format
__pdoc__ = {}

# pdoc documentation include format
__pdoc__["lbsntransform.__main__"] = True
160 changes: 160 additions & 0 deletions src/lbsntransform/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
lbsntransform package script to load, format and store data
from and to common lbsn structure
Import options:
- Postgres Database or
- local CSV/json/stacked json import
Output options:
- Postgres Database or
- local ProtoBuf and CSV Import (prepared for Postgres /Copy)
"""


__author__ = "Alexander Dunkel"
__license__ = "GNU GPLv3"


# version: see version.py

import sys

from lbsntransform.tools.helper_functions import HelperFunctions as HF
from lbsntransform.output.shared_structure import TimeMonitor
from lbsntransform.input.load_data import LoadData
from lbsntransform.config.config import BaseConfig
from lbsntransform.lbsntransform_ import LBSNTransform


def main():
""" Main function for cli-mode to process data
from postgres db or local file input
to postgres db or local file output
"""

# Load Config, will be overwritten if args are given
config = BaseConfig()
# Parse args
config.parse_args()

# initialize mapping class
# depending on lbsn origin
# e.g. 1 = Instagram,
# 2 = Flickr, 2.1 = Flickr YFCC100m,
# 3 = Twitter)
importer = HF.load_importer_mapping_module(
config.origin, config.mappings_path)

# initialize lbsntransform
lbsntransform = LBSNTransform(
importer=importer,
logging_level=config.logging_level,
is_local_input=config.is_local_input,
transfer_count=config.transfer_count,
csv_output=config.csv_output,
csv_suppress_linebreaks=config.csv_suppress_linebreaks,
dbuser_output=config.dbuser_output,
dbserveraddress_output=config.dbserveraddress_output,
dbname_output=config.dbname_output,
dbpassword_output=config.dbpassword_output,
dbserverport_output=config.dbserverport_output,
dbformat_output=config.dbformat_output,
dbuser_input=config.dbuser_input,
dbserveraddress_input=config.dbserveraddress_input,
dbname_input=config.dbname_input,
dbpassword_input=config.dbpassword_input,
dbserverport_input=config.dbserverport_input,
dbuser_hllworker=config.dbuser_hllworker,
dbserveraddress_hllworker=config.dbserveraddress_hllworker,
dbname_hllworker=config.dbname_hllworker,
dbpassword_hllworker=config.dbpassword_hllworker,
dbserverport_hllworker=config.dbserverport_hllworker,
include_lbsn_bases=config.include_lbsn_bases,
dry_run=config.dry_run,
hmac_key=config.hmac_key,
commit_volume=config.commit_volume)

# initialize input reader
input_data = LoadData(
importer=importer,
is_local_input=config.is_local_input,
startwith_db_rownumber=config.startwith_db_rownumber,
skip_until_file=config.skip_until_file,
cursor_input=lbsntransform.cursor_input,
input_path=config.input_path,
recursive_load=config.recursive_load,
local_file_type=config.local_file_type,
endwith_db_rownumber=config.endwith_db_rownumber,
is_stacked_json=config.is_stacked_json,
is_line_separated_json=config.is_line_separated_json,
csv_delim=config.csv_delim,
use_csv_dictreader=config.use_csv_dictreader,
input_lbsn_type=config.input_lbsn_type,
dbformat_input=config.dbformat_input,
geocode_locations=config.geocode_locations,
ignore_input_source_list=config.ignore_input_source_list,
disable_reactionpost_ref=config.disable_reactionpost_ref,
map_relations=config.map_relations,
transfer_reactions=config.transfer_reactions,
ignore_non_geotagged=config.ignore_non_geotagged,
min_geoaccuracy=config.min_geoaccuracy,
source_web=config.source_web,
skip_until_record=config.skip_until_record,
zip_records=config.zip_records,
include_lbsn_objects=config.include_lbsn_objects,
override_lbsn_query_schema=config.override_lbsn_query_schema)

# Manually add entries that need submission prior to parsing data
# add_bundestag_group_example(import_mapper)

# init time monitoring
how_long = TimeMonitor()

# read and process unfiltered input records from csv
# start settings
with input_data as records:
for record in records:
lbsntransform.add_processed_records(record)
# report progress
if lbsntransform.processed_total % 1000 == 0:
stats_str = HF.report_stats(
input_data.count_glob,
input_data.continue_number,
lbsntransform.lbsn_records)
print(stats_str, end='\r')
sys.stdout.flush()
if (config.transferlimit and
lbsntransform.processed_total >= config.transferlimit):
break

# finalize output (close db connection, submit remaining)
lbsntransform.log.info(
f'\nTransferring remaining '
f'{lbsntransform.lbsn_records.count_glob} to db.. '
f'{HF.null_notice(input_data.import_mapper.null_island)})')
lbsntransform.finalize_output()

# final report
lbsntransform.log.info(
f'\n\n{"".join([f"(Dry Run){chr(10)}" if config.dry_run else ""])}'
f'Processed {input_data.count_glob} input records '
f'(Input {input_data.start_number} to '
f'{input_data.continue_number}). '
f'\n\nIdentified {lbsntransform.processed_total} LBSN records, '
f'with {lbsntransform.lbsn_records.count_glob_total} '
f'distinct LBSN records overall. '
f'{HF.get_skipped_report(input_data.import_mapper)}. '
f'Merged {lbsntransform.lbsn_records.count_dup_merge_total} '
f'duplicate records.')
lbsntransform.log.info(
f'\n{HF.get_count_stats(lbsntransform.lbsn_records)}')

lbsntransform.log.info(f'Done. {how_long.stop_time()}')

lbsntransform.close_log()


if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions src/lbsntransform/config/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""lbsntransform config submodule"""
Loading

0 comments on commit 7de0de0

Please sign in to comment.