-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathread_data.py
47 lines (35 loc) · 844 Bytes
/
read_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import pandas as pd
import numpy as np
import sys
def cluster(df,k):
k_clusters = []
first_col_name = df.columns[0]
first_col = df.iloc[:,0]
clusters = first_col.unique()
new_df = []
for i in range(k):
row = df.loc[df[first_col_name] == clusters[i]]
new_df.append(row)
df = pd.concat(new_df)
points = list(zip(df.iloc[:,1].tolist(),df.iloc[:,2].tolist()))
return points
def main():
try:
filepath = sys.argv[1]
df = pd.read_csv(filepath)
df = df.dropna()
num_cols = len(df.columns)
k = 3
if len(sys.argv) == 3:
k = int(sys.argv[2])
if num_cols > 2:
points = cluster(df,k)
df = df.drop(columns = df.columns[0])
axis_labels = list(df)
else:
points = df.values.tolist()
axis_labels = list(df)
return points, axis_labels, k
except:
sys.exit("Please input file path!")
main()