Skip to content

Commit

Permalink
feat: add topical._hashtag_latlng and social.community base
Browse files Browse the repository at this point in the history
  • Loading branch information
Sieboldianus committed May 11, 2020
1 parent 0635767 commit d20e766
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 10 deletions.
1 change: 1 addition & 0 deletions lbsntransform/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ def parse_args(self):
'* hashtag '
'* emoji '
'* term '
'* _hashtag_latlng '
'* _term_latlng '
'* _emoji_latlng '
'* monthofyear '
Expand Down
11 changes: 7 additions & 4 deletions lbsntransform/output/hll/base/social.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""Hll bases for social facet
"""

from typing import Tuple
from typing import Tuple, Union

from lbsnstructure import lbsnstructure_pb2 as lbsn
from lbsntransform.output.hll import hll_bases as hll
Expand Down Expand Up @@ -61,15 +61,18 @@ class CommunityBase(SocialBase):
"""Extends Social Base Class"""
NAME = hll.HllBaseRef(facet=FACET, base='community')

def __init__(self, record: lbsn.Origin = None):
def __init__(self, record: Union[lbsn.Origin, lbsn.Post] = None):
super().__init__()
self.attrs['name'] = None
self.key['id'] = None
if record is None:
# init empty
return
self.attrs['name'] = record.DESCRIPTOR.name
self.key['id'] = record.origin_id
if hasattr(record, "origin_id"):
self.key['id'] = record.origin_id
self.attrs['name'] = record.DESCRIPTOR.name
else:
self.key['id'] = record.pkey.origin.origin_id


class CultureBase(SocialBase):
Expand Down
39 changes: 39 additions & 0 deletions lbsntransform/output/hll/base/topical.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,45 @@ def __init__(self, record: lbsn.Post = None, term: str = None):
"from lbsn.Post")


class HashtagLatLngBase(hll.HllBase):
"""Composite Base (c-base) that extends from HLL base Class
Note: To distinguish c-bases which are composite bases combining
aspects from multiple facets, they're termed with a leading underscore
"""
NAME = hll.HllBaseRef(facet=FACET, base='_hashtag_latlng')

def __init__(self, record: lbsn.Post = None, hashtag: str = None):
super().__init__()
self.key['latitude'] = None
self.key['longitude'] = None
self.key['hashtag'] = None
self.attrs['latlng_geom'] = None
self.metrics['date_hll'] = set()
if hashtag is None:
# init empty
return
self.key['hashtag'] = hashtag.lower()
if record is None:
# init empty
return
if isinstance(record, lbsn.Post):
coordinates_geom = record.post_latlng
coordinates = HF.get_coordinates_from_ewkt(
coordinates_geom
)
self.key['latitude'] = coordinates.lat
self.key['longitude'] = coordinates.lng
# additional (optional) attributes
# formatted ready for sql upsert
self.attrs['latlng_geom'] = HF.return_ewkb_from_geotext(
coordinates_geom)
else:
raise ValueError(
"Parsing of LatLngBase only supported "
"from lbsn.Post")


class EmojiLatLngBase(hll.HllBase):
"""Composite Base (c-base) that extends from HLL base Class
Expand Down
6 changes: 6 additions & 0 deletions lbsntransform/output/hll/hll_bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,12 @@ def base_factory(facet=None, base=None, record: lbsn.Post = None):
# create base for each term
records.append(
base_structure(record=record, term=term))
elif base == '_hashtag_latlng':
# any hashtag explicitly used
tag_terms = HF.filter_terms(record.hashtags)
for tag in tag_terms:
records.append(
base_structure(record=record, hashtag=tag))
elif base == '_emoji_latlng':
# any term mentioned in title,
# body or hashtag
Expand Down
19 changes: 14 additions & 5 deletions lbsntransform/output/hll/shared_structure_proto_hlldb.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
temporal.DateBase, temporal.MonthBase,
temporal.YearBase, topical.TermBase,
topical.EmojiBase, topical.TermLatLngBase,
topical.EmojiLatLngBase]
topical.HashtagLatLngBase, topical.EmojiLatLngBase,
social.CommunityBase]


class ProtoHLLMapping():
Expand Down Expand Up @@ -144,7 +145,8 @@ def extract_hll_bases(
base_list.extend(base_records)
# Topical Facet
topical_bases = [
'hashtag', 'emoji', 'term', '_term_latlng', '_emoji_latlng']
'hashtag', 'emoji', 'term', '_term_latlng', '_emoji_latlng',
'_hashtag_latlng']
topical_bases = self.filter_bases(
topical_bases, include_lbsn_bases)
base_records = self.make_bases(
Expand All @@ -153,6 +155,15 @@ def extract_hll_bases(
record=record)
if base_records:
base_list.extend(base_records)
social_bases = ['community']
social_bases = self.filter_bases(
social_bases, include_lbsn_bases)
base_records = self.make_bases(
facet='social',
bases=social_bases,
record=record)
if base_records:
base_list.extend(base_records)

# Places
if record_type == lbsn.Place.DESCRIPTOR.name:
Expand Down Expand Up @@ -238,9 +249,7 @@ def get_hll_metrics(cls, record) -> hll.HllMetrics:
@staticmethod
def get_origin_metrics(record) -> hll.HllMetrics:
"""Get hll metrics from lbsn.Origin record"""
post_hll = HLF.hll_concat_origin_guid(record)
hll_metrics = hll.HllMetrics(post_hll=post_hll)
return hll_metrics
return

@staticmethod
def get_country_metrics(record) -> hll.HllMetrics:
Expand Down
2 changes: 2 additions & 0 deletions lbsntransform/output/submit_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ def __init__(self, db_cursor=None,
topical.HashtagBase.NAME: dict(),
topical.EmojiBase.NAME: dict(),
topical.TermLatLngBase.NAME: dict(),
topical.HashtagLatLngBase.NAME: dict(),
topical.EmojiLatLngBase.NAME: dict(),
social.CommunityBase.NAME: dict()
}
self.count_round = 0
if dbformat_output == "lbsn":
Expand Down
2 changes: 1 addition & 1 deletion lbsntransform/tools/helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def sanitize_string(str_text: str):
def format_base_repr(base):
"""Return formatted string of base"""
return (
f'{base.__name__}\nFacet: {base.facet}, '
f'{base.NAME.base}\nFacet: {base.NAME.facet}, '
f'Key: {base.get_key_value()}, '
f'Metrics: \n'
f'{[":".join([k, str(len(v))]) for k, v in base.metrics.items()]}')
Expand Down

0 comments on commit d20e766

Please sign in to comment.