Skip to content

Commit

Permalink
feat: add method for hashtag extraction from string
Browse files Browse the repository at this point in the history
  • Loading branch information
Sieboldianus committed Jan 9, 2021
1 parent f8c6ed4 commit 134119c
Showing 1 changed file with 15 additions and 0 deletions.
15 changes: 15 additions & 0 deletions lbsntransform/tools/helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,21 @@ def get_coordinates_from_ewkt(
lng=shply_geom.x, lat=shply_geom.y) # pylint: disable=maybe-no-member
return coordinates

@staticmethod
def extract_hashtags_from_string(
text_str: str) -> List[str]:
"""Extract hashtags with leading hash-character (#) from string
- removes # from hashtags
- removes duplicates
- removes special chars (emoji etc.) from hashtags, e.g.:
- input: "#germany🇩🇪"
- output: [germany]
"""
hashtag_pattern = re.compile('(?i)(?<=\#)\w+')
hashtag_list = hashtag_pattern.findall(text_str)
return set(hashtag_list)

@staticmethod
def json_read_wrapper(gen):
"""Wraps json iterator and catches any error"""
Expand Down

0 comments on commit 134119c

Please sign in to comment.