-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathprepare_dataset.py
executable file
·54 lines (45 loc) · 1.46 KB
/
prepare_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import re
import shutil
import os
from collections import Counter
from glob import glob
import random
dataset = "crop_part1"
test_size =100
imgs = glob(f'{dataset}/*')
age_ls=[]
gender_ls=[]
for img_path in imgs:
age, gender = re.findall(pattern=r'(\d+)_(\d)_',string=img_path)[0]
age_ls.append(int(age))
gender_ls.append(int(gender))
old = [age for age in age_ls if age >= 50]
young = [age for age in age_ls if age >= 15 and age <=30]
print("Number of Young : "len(young),"Number of old",len(old))
os.makedirs("train/A", exist_ok=True)
os.makedirs("train/B", exist_ok=True)
os.makedirs('test/A', exist_ok=True)
os.makedirs('test/B', exist_ok=True)
count_a = count_b = 0
for fn in glob("crop_part1/*"):
age, _ = re.findall(pattern=r"(\d+)_(\d)_", string=fn)[0]
age = int(age)
if age >= 15 and age <= 30:
shutil.copy(fn, fn.replace("crop_part1", "train/A"))
count_a += 1
elif age >= 50:
shutil.copy(fn, fn.replace("crop_part1", "train/B"))
count_b += 1
else:
continue
print("No of images in young is {} and old is {}".format(count_a, count_b))
## Setting up the test dataset for evaluating the performance
test_size =100
os.makedirs('test/A', exist_ok=True)
os.makedirs('test/B', exist_ok=True)
for split in ['train/A','train/B']:
pattern = f'{split}/*'
files = glob(pattern)
paths = random.sample(files,k=test_size)
for path in paths:
shutil.move(path, path.replace("train", "test"))