Skip to content

Commit

Permalink
fix: remove wrong place id column, update photo id
Browse files Browse the repository at this point in the history
  • Loading branch information
Sieboldianus committed Jun 10, 2019
1 parent f88084e commit fbca35d
Showing 1 changed file with 10 additions and 11 deletions.
21 changes: 10 additions & 11 deletions lbsntransform/classes/field_mapping_yfcc100m.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def extract_flickr_post(self, record):
# therefore, return list of processed records
lbsn_records = []
# start mapping input to lbsn_records
post_guid = record[2]
post_guid = record[1]
if not HF.check_notice_empty_post_guid(post_guid):
return None
post_record = HF.new_lbsn_record_with_id(lbsnPost(),
Expand All @@ -139,7 +139,7 @@ def extract_flickr_post(self, record):
user_record = HF.new_lbsn_record_with_id(lbsnUser(),
record[3],
self.origin)
user_record.user_name = record[4]
user_record.user_name = unquote(record[4])
user_record.url = f'http://www.flickr.com/photos/{user_record.pkey.id}/'
if user_record:
post_record.user_pkey.CopyFrom(user_record.pkey)
Expand All @@ -149,17 +149,16 @@ def extract_flickr_post(self, record):
record[14])
if geoaccuracy:
post_record.post_geoaccuracy = geoaccuracy
# place record not completely provided in YFCCM directly
# only place_guid available
if record[1]:
# place record available in separate yfcc100m dataset
# if record[1]:
# we need some information from postRecord to create placeRecord
# (e.g. user language, geoaccuracy, post_latlng)
# some of the information from place will also modify postRecord
place_record = HF.new_lbsn_record_with_id(lbsnPlace(),
record[1],
self.origin)
lbsn_records.append(place_record)
post_record.place_pkey.CopyFrom(place_record.pkey)
# place_record = HF.new_lbsn_record_with_id(lbsnPlace(),
# record[1],
# self.origin)
# lbsn_records.append(place_record)
# post_record.place_pkey.CopyFrom(place_record.pkey)
post_record.post_publish_date.CopyFrom(
HF.parse_timestamp_string_to_protobuf(record[6]))
post_created_date = HF.parse_csv_datestring_to_protobuf(
Expand Down Expand Up @@ -192,7 +191,7 @@ def extract_flickr_post(self, record):
tag = FieldMappingYFCC100M.clean_tags_from_flickr(tag)
post_record.hashtags.append(tag)
record_machine_tags = list(
set(filter(None, re.split("[,+]+", record[11]))))
set(filter(None, [unquote(mtag) for mtag in re.split("[,+]+", record[11])])))
if 'video' in record_machine_tags:
# all videos appear to have 'video' in machine tags
post_record.post_type = lbsnPost.VIDEO
Expand Down

0 comments on commit fbca35d

Please sign in to comment.