-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathamazon_comments_scraper.py
executable file
·43 lines (32 loc) · 1.47 KB
/
amazon_comments_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import argparse
from core_extract_comments import *
from core_utils import *
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
def run(search, input_product_ids_filename):
if input_product_ids_filename is not None:
with open(input_product_ids_filename, 'r') as r:
product_ids = [p.strip() for p in r.readlines()]
logging.info('{} product ids were found.'.format(len(product_ids)))
reviews_counter = 0
for product_id in product_ids:
_, exist = get_reviews_filename(product_id)
if exist:
logging.info('product id [{}] was already fetched. Skipping.'.format(product_id))
continue
reviews = get_comments_with_product_id(product_id)
reviews_counter += len(reviews)
logging.info('{} reviews found so far.'.format(reviews_counter))
#persist_comment_to_disk(reviews)
def get_script_arguments():
parser = argparse.ArgumentParser()
parser.add_argument('-s', '--search')
parser.add_argument('-i', '--input')
args = parser.parse_args()
input_product_ids_filename = args.input
search = args.search
return search, input_product_ids_filename
def scrapper(input_product_ids_filename, search = '-i'):
#search, input_product_ids_filename = get_script_arguments()
run(search, input_product_ids_filename)
if __name__ == '__main__':
main()