-
Notifications
You must be signed in to change notification settings - Fork 93
/
Copy pathsetops.py
executable file
·158 lines (134 loc) · 4.87 KB
/
setops.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#!/usr/bin/env python3
import argparse
import time
import numpy as np
import arkouda as ak
OPS = ("intersect1d", "union1d", "setxor1d", "setdiff1d")
TYPES = (
"int64",
"uint64",
)
def time_ak_setops(N_per_locale, trials, dtype, seed):
print(">>> arkouda {} setops".format(dtype))
cfg = ak.get_config()
N = N_per_locale * cfg["numLocales"]
print("numLocales = {}, N = {:,}".format(cfg["numLocales"], N))
if dtype == "int64":
a = ak.randint(0, 2**32, N, seed=seed)
b = ak.randint(0, 2**32, N, seed=seed)
elif dtype == "uint64":
a = ak.randint(0, 2**32, N, seed=seed, dtype=ak.uint64)
b = ak.randint(0, 2**32, N, seed=seed, dtype=ak.uint64)
timings = {op: [] for op in OPS}
results = {}
for i in range(trials):
for op in timings.keys():
fxn = getattr(ak, op)
start = time.time()
r = fxn(a, b)
end = time.time()
timings[op].append(end - start)
results[op] = r
tavg = {op: sum(t) / trials for op, t in timings.items()}
for op, t in tavg.items():
print(" {} Average time = {:.4f} sec".format(op, t))
bytes_per_sec = (a.size * a.itemsize * 2) / t
print(" {} Average rate = {:.2f} GiB/sec".format(op, bytes_per_sec / 2**30))
def time_np_setops(N, trials, dtype, seed):
print(">>> numpy {} setops".format(dtype))
print("N = {:,}".format(N))
if seed is not None:
np.random.seed(seed)
if dtype == "int64":
a = np.random.randint(0, 2**32, N)
b = np.random.randint(0, 2**32, N)
elif dtype == "uint64":
a = np.random.randint(0, 2**32, N, "uint64")
b = np.random.randint(0, 2**32, N, "uint64")
timings = {op: [] for op in OPS}
results = {}
for i in range(trials):
for op in timings.keys():
fxn = getattr(np, op)
start = time.time()
r = fxn(a, b)
end = time.time()
timings[op].append(end - start)
results[op] = r
tavg = {op: sum(t) / trials for op, t in timings.items()}
for op, t in tavg.items():
print(" {} Average time = {:.4f} sec".format(op, t))
bytes_per_sec = (a.size * a.itemsize * 2) / t
print(" {} Average rate = {:.2f} GiB/sec".format(op, bytes_per_sec / 2**30))
def check_correctness(dtype, seed):
N = 10**4
if seed is not None:
np.random.seed(seed)
if dtype == "int64":
a = np.random.randint(0, 2**32, N)
b = np.random.randint(0, 2**32, N)
if dtype == "uint64":
a = np.random.randint(0, 2**32, N, dtype=ak.uint64)
b = np.random.randint(0, 2**32, N, dtype=ak.uint64)
for op in OPS:
npa = a
npb = b
aka = ak.array(a)
akb = ak.array(b)
fxn = getattr(np, op)
npr = fxn(npa, npb)
fxn = getattr(ak, op)
akr = fxn(aka, akb)
np.isclose(npr, akr.to_ndarray())
def create_parser():
parser = argparse.ArgumentParser(
description="Run the setops benchmarks: intersect1d, union1d, setdiff1d, setxor1d"
)
parser.add_argument("hostname", help="Hostname of arkouda server")
parser.add_argument("port", type=int, help="Port of arkouda server")
parser.add_argument(
"-n", "--size", type=int, default=10**8, help="Problem size: length of arrays A and B"
)
parser.add_argument(
"-t", "--trials", type=int, default=1, help="Number of times to run the benchmark"
)
parser.add_argument(
"-d", "--dtype", default="int64", help="Dtype of array ({})".format(", ".join(TYPES))
)
parser.add_argument(
"--numpy",
default=False,
action="store_true",
help="Run the same operation in NumPy to compare performance.",
)
parser.add_argument(
"--correctness-only",
default=False,
action="store_true",
help="Only check correctness, not performance.",
)
parser.add_argument(
"-s", "--seed", default=None, type=int, help="Value to initialize random number generator"
)
return parser
if __name__ == "__main__":
import sys
parser = create_parser()
args = parser.parse_args()
if args.dtype not in TYPES:
raise ValueError("Dtype must be {}, not {}".format("/".join(TYPES), args.dtype))
ak.verbose = False
ak.connect(args.hostname, args.port)
if args.correctness_only:
for dtype in TYPES:
check_correctness(dtype, args.seed)
sys.exit(0)
print("array size = {:,}".format(args.size))
print("number of trials = ", args.trials)
time_ak_setops(args.size, args.trials, args.dtype, args.seed)
if args.numpy:
time_np_setops(args.size, args.trials, args.dtype, args.seed)
print("Verifying agreement between arkouda and NumPy on small problem... ", end="")
check_correctness(args.dtype, args.seed)
print("CORRECT")
sys.exit(0)