-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathbench-unique.py
111 lines (79 loc) · 2.68 KB
/
bench-unique.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import platform
import timeit
import tracemalloc
from argparse import ArgumentParser
from itertools import product
import numpy
import pandas
import vigra
from rich.progress import Progress
SETUP = """
import numpy
import vigra
import pandas
data = numpy.random.randint(0, 256, {shape}, dtype="uint32")
def bincount_unique(a):
return numpy.bincount(a.reshape(-1)).nonzero()[0]
def pandas_unique(a):
a = numpy.ravel(a, order="K")
u = pandas.unique(a)
u.sort()
return u
"""
def bincount_unique(a):
return numpy.bincount(a.reshape(-1)).nonzero()[0]
def pandas_unique(a):
a = numpy.ravel(a, order="K")
u = pandas.unique(a)
u.sort()
return u
def check():
data = numpy.random.randint(0, 256, (1000, 500, 250), dtype="uint32")
# Sanity check
u1 = numpy.unique(data)
u2 = vigra.analysis.unique(data)
u3 = bincount_unique(data)
u4 = pandas_unique(data)
assert u1.tolist() == u2.tolist() == u3.tolist() == u4.tolist()
def parse_args():
p = ArgumentParser()
p.add_argument("--hostname", default=platform.node(), help="A way to identify your machine.")
args = p.parse_args()
return args
def main():
args = parse_args()
check()
print("Check OK")
py_version = platform.python_version()
npy_version = numpy.__version__
pd_version = pandas.__version__
vigra_version = vigra.version
host = args.hostname
pf = platform.platform()
results = {} # {(method, shape): "tab\tseparated\tvalues"}
number = 3
shapes = [(512, 128, 1), (1024, 512, 1), (2048, 1024, 1), (512, 512, 32), (1024, 1024, 256)]
methods = [
"vigra.analysis.unique(data)",
"numpy.unique(data)",
"bincount_unique(data)",
"pandas_unique(data)",
]
combos = list(product(methods, shapes))
with Progress() as p:
for method, shape in p.track(combos, description="Measuring runtime"):
t = timeit.timeit(method, setup=SETUP.format(shape=shape), number=number) / number
results[(method, shape)] = (
f"{method}\t{shape!s}\t{t}\t{py_version}\t{npy_version}\t{pd_version}\t{vigra_version}\t{host}\t{pf}"
)
for method, shape in p.track(combos, description="Measuring memory footprint"):
tracemalloc.start()
_t = timeit.timeit(method, setup=SETUP.format(shape=shape), number=number) / number
mem_min, mem_max = tracemalloc.get_traced_memory()
tracemalloc.stop()
results[(method, shape)] += f"\t{mem_min}\t{mem_max}"
with open("results-unique.csv", "a") as f:
f.write("\n".join(results.values()))
f.write("\n")
if __name__ == "__main__":
main()