-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_summary.py
47 lines (37 loc) · 1.34 KB
/
generate_summary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import pandas as pd
from os import path
from operations import (
groupby_country,
summarize,
groupby_location,
start_pipeline,
cluster_locations,
)
import click
@click.command()
@click.argument("orders")
@click.argument("output_dir")
def run(orders, output_dir):
"""
Will read a CSV file with parsed ORDERS and generate some summary statistics by location, country and total. The
results will be written to different files in OUTPUT_DIR
:param orders: path to CSV file with parsed orders
:param output_dir: directory to write summary statistics to
"""
# Read input
processed_data = pd.read_csv(orders)
# Transform
by_location = processed_data.pipe(groupby_location)
by_country = processed_data.pipe(groupby_country)
totals = processed_data.pipe(summarize)
# Geo-clustering (group nearby locations together for map)
location_clusters = by_location.pipe(start_pipeline).pipe(cluster_locations)
# Write output
by_location.to_csv(path.join(output_dir, "orders_by_location.csv"), index=None)
by_country.to_csv(path.join(output_dir, "orders_by_country.csv"), index=None)
totals.to_csv(path.join(output_dir, "orders_summary.csv"))
location_clusters.to_csv(
path.join(output_dir, "orders_by_location_cluster.csv"), index=None
)
if __name__ == "__main__":
run()