-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtask1data.py
52 lines (41 loc) · 1.31 KB
/
task1data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import pandas as pd
import re
data = pd.read_csv('exp_data.csv')
# NUMBER = 9
# print(data['code'][NUMBER])
# split_code_line(data['code'][NUMBER])
def filter_try_catch(source):
source = str(source)
if len(re.findall(r'\Wcatch\W', source)) == 0 and len(re.findall(r'\Wtry\W', source)) == 1:
return False
return True
def split_code_line(source):
source = str(source)
source = re.sub(r'\Wtry\W\s*{', '###start###', source)
source = re.sub(r'}\s*catch\W[\s\S]*?{[\s\S]*?}', '###end###', source)
# print(source)
lines = re.split('\n+', source)
results = []
labels = []
flag = False
for line in lines:
line = line.strip()
if not line:
continue
if line == '###start###':
flag = True
elif line == '###end###':
flag = False
else:
results.append(line)
if flag:
labels.append(1)
else:
labels.append(0)
# print(results)
# print(labels)
assert len(results) == len(labels)
return results, labels
format_data = data['code'].apply(split_code_line)
task1_data = pd.DataFrame(data=format_data.tolist(), columns=['lines', 'labels'])
task1_data.to_pickle('data/task1_data.pkl')