Skip to content

Commit

Permalink
Add benchmarks for reductions, and plots for different archs
Browse files Browse the repository at this point in the history
  • Loading branch information
FrancescAlted committed Feb 5, 2025
1 parent 8d33de5 commit 7d9008a
Show file tree
Hide file tree
Showing 3 changed files with 314 additions and 0 deletions.
169 changes: 169 additions & 0 deletions bench/ndarray/jit-reduc-float64-plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <[email protected]>
# All rights reserved.
#
# This source code is licensed under a BSD-style license (found in the
# LICENSE file in the root directory of this source tree)
#######################################################################

# Plots for the jit vs. numpy benchmarks on different array sizes and platforms.

import matplotlib.pyplot as plt
import plotly.graph_objects as go

plotly = True

sizes = [1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 110, 115, 120]
sizes_GB = [n * 1000 * n * 1000 * 8 * 2 / 2**30 for n in sizes]

intel = False
amd = True
m2linux = False

# Load the data
if amd:
title_ = "AMD Ryzen 9 9800X3D (64 GB RAM)"
title_ = "np.sum(((a ** 3 + np.sin(a * 2)) < c) & (b > 0), axis=1)"

create_clevel0 = [ 0.0325, 0.2709, 1.0339, 4.0489, 9.0849, 12.4154, 16.7818, 25.5946, 47.5691, 35.9919, 45.4295, 93.3075, 66.6529 ]
compute_clevel0 = [ 0.0017, 0.0243, 0.0869, 0.3370, 0.7665, 1.0375, 1.3727, 1.7377, 2.1472, 2.6205, 3.0435, 18.5878, 28.0816 ]

create_clevel0_ooc = [ 0.0305, 0.3371, 1.3249, 5.0602, 11.0410, 16.3685, 22.2012, 27.1348, 31.7409, 38.0690, 47.4424, 56.9335, 62.6965 ]
compute_clevel0_ooc = [ 0.0019, 0.0243, 0.0885, 0.3434, 0.7761, 1.0724, 1.4082, 1.7373, 2.1827, 2.6124, 7.0940, 9.0734, 10.1089 ]

create_LZ4_l1 = [ 0.0304, 0.2582, 1.0298, 3.9502, 8.8945, 11.9267, 16.3965, 20.2368, 24.6837, 29.3425, 36.2631, 42.1709, 48.0605, 52.3962, 61.5175, 68.6328, 80.1160, 85.4322, 97.1122, 106.9973, 114.8584 ]
compute_LZ4_l1 = [ 0.0018, 0.0210, 0.0756, 0.3003, 0.6609, 0.8886, 1.1285, 1.4453, 1.7959, 2.1889, 2.6978, 3.1586, 3.4286, 3.9929, 4.4590, 5.3601, 5.6702, 6.4690, 6.9764, 7.8714, 8.6404 ]

create_LZ4_l1_ooc = [ 1.7980, 0.2617, 1.0480, 4.0809, 9.0720, 13.8294, 16.7269, 20.5108, 24.9465, 30.0428, 37.1903, 42.8075, 48.7775, 52.9890, 63.4071, 70.1766, 81.9747, 88.1830, 97.7921, 111.0611, 119.7673 ]
compute_LZ4_l1_ooc = [ 0.0019, 0.0214, 0.0795, 0.3060, 0.6985, 0.9195, 1.1766, 1.5213, 1.8845, 2.2972, 2.8044, 3.2587, 3.5898, 4.1524, 4.6293, 5.5485, 5.8715, 6.7386, 7.3019, 8.2307, 9.0145 ]

create_ZSTD_l1 = [ 0.0302, 0.2704, 1.0703, 4.1243, 9.2185, 12.5026, 17.0585, 20.8708, 25.5844, 31.0571, 37.7114, 42.8297, 50.2696, 54.5773, 63.6311, 73.0370, 84.0092, 89.0686, 100.3300, 108.8173, 119.1154 ]
compute_ZSTD_l1 = [ 0.0021, 0.0296, 0.1045, 0.3979, 0.8787, 1.3064, 1.7404, 2.1938, 2.6780, 3.3929, 3.8601, 4.3665, 5.0127, 5.7346, 6.1056, 7.9448, 8.2872, 9.4659, 9.2376, 10.4273, 11.6572 ]

create_ZSTD_l1_ooc = [ 0.6564, 0.2825, 1.0826, 4.1968, 9.5022, 13.4840, 17.5387, 21.5807, 26.0052, 31.3524, 38.5889, 44.1105, 49.8849, 55.5297, 64.6479, 72.7471, 84.6595, 90.4970, 99.9710, 111.6817, 120.8941 ]
compute_ZSTD_l1_ooc = [ 0.0022, 0.0300, 0.1066, 0.4099, 0.8974, 1.3218, 1.7679, 2.2154, 2.7007, 3.4267, 3.9255, 4.4597, 5.1155, 5.8251, 6.2064, 8.0141, 8.4316, 9.3195, 9.4570, 10.7034, 11.9192 ]

create_numpy = [ 0.0020, 0.0527, 0.2292, 0.9412, 2.1043, 2.8286, 3.7046, 4.7217, 5.8308, 7.0491 ]
compute_numpy = [ 0.0179, 0.2495, 0.9840, 3.9263, 8.8450, 12.0259, 16.3507, 40.1672, 155.1292, 302.5115 ]

create_numpy_jit = [ 0.0019, 0.0529, 0.2261, 0.9219, 2.0589, 2.8350, 3.7131, 18.4375, 26.5959, 34.5221, 33.7157, 49.6762, 63.1401 ]
compute_numpy_jit = [ 0.0035, 0.0180, 0.0622, 0.2307, 0.5196, 0.7095, 0.9251, 1.1981, 1.4729, 2.2007, 2.0953, 12.6746, 26.6424 ]

elif intel:
title_ = "Intel Core i9-13900K (32 GB RAM)"
create_clevel0 = [ 0.1810, 0.3511, 1.1511, 4.4575, 10.3164, 17.4344, 24.4274, 37.7116, 36.6179, 53.7264 ]
compute_clevel0 = [ 0.0045, 0.0133, 0.0506, 0.2086, 0.4603, 0.8689, 1.1458, 1.4150, 1.7656, 1.9475 ]

create_LZ4_l1 = [ 0.1834, 0.3457, 1.1234, 4.3301, 10.0406, 16.9509, 22.1617, 26.3818, 32.4472, 39.3830, 41.9484, 52.6316 ]
compute_LZ4_l1 = [ 0.0014, 0.0128, 0.0494, 0.1958, 0.4387, 0.8207, 1.0208, 1.2739, 1.5062, 1.7446, 2.1553, 2.4458 ]

create_ZSTD_l1 = [ 0.0362, 0.3734, 1.2009, 4.5362, 10.3706, 18.7104, 23.1148, 27.6572, 33.7207, 41.0326, 44.2322, 54.9467 ]
compute_ZSTD_l1 = [ 0.0028, 0.0193, 0.0799, 0.2226, 0.4983, 0.9072, 1.1624, 1.4375, 1.8162, 2.0918, 2.5067, 2.7760 ]

create_numpy = [ 0.0046, 0.1160, 0.4327, 1.7166, 3.8661, 7.5005, 14.1090, 18.6720, 64.2425, 108.4532, 529.1393, 962.1662 ]
compute_numpy = [ 0.0240, 0.1920, 0.7217, 2.9316, 6.5893, 14.0353, 47.4275, 99.0893, 187.8040, 202.3973, 460.5915, 551.2776 ]

elif m2linux:
title_ = "MacBook Air M2 (24 GB RAM)"
create_LZ4_l1 = [0.021555185317993164, 0.2862977981567383, 0.8696625232696533, 3.4979920387268066, 8.235799789428711, 13.708781242370605, 20.74394702911377, 33.23137378692627]
compute_LZ4_l1 = [0.0033464431762695312, 0.03627762794494629, 0.14009513854980468, 0.5438736915588379, 1.2493964672088622, 2.194223642349243, 3.5851136207580567, 5.067658472061157]

create_numpy = [0.0016903877258300781, 0.04910874366760254, 0.18264532089233398, 0.7124006748199463, 1.8350563049316406, 8.877023935317993, 101.2457287311554, 196.21723294258118]
compute_numpy = [0.003887462615966797, 0.026979732513427734, 0.11047358512878418, 0.4213367462158203, 0.9288184165954589, 1.6470709323883057, 5.5601390361785885, 9.401740503311157]

else:
title_ = "Mac Mini M4 Pro (24 GB RAM)"

create_numpy = [ 0.0024, 0.0686, 0.2857, 1.1800, 3.5006, 13.7092, 21.9491, 30.5237, 101.3553, 363.5005, 446.5876, 1509.1826 ]
compute_numpy = [ 0.0046, 0.1173, 0.5066, 2.0908, 5.6268, 13.0679, 16.7926, 20.9192, 25.5899, 34.5382, 46.0664, 1083.7046 ]

create_numpy_jit = [ 0.0024, 0.0686, 0.2857, 1.1800, 3.5006, 13.7092, 21.9491, 30.5237, 101.3553, 363.5005, 446.5876, 1509.1826 ]
compute_numpy_jit = [ ]

create_clevel0 = [ 0.0321, 0.6742, 1.6750, 5.1412, 12.4538, 20.6695, 28.0185, 34.7422, 41.3935, 50.4275, 59.5572, 71.3740 ]
compute_clevel0 = [ 0.0017, 0.0224, 0.0761, 0.2439, 0.5827, 0.9816, 1.2265, 6.8712, 9.6391, 11.2875, 13.1953, 15.4047 ]

create_LZ4_l1 = [ 0.0316, 0.6974, 1.6957, 5.2077, 11.8975, 20.2346, 26.6419, 32.7686, 38.2088, 47.5221, 55.7224, 66.6138 ]
compute_LZ4_l1 = [ 0.0019, 0.0234, 0.0763, 0.2421, 0.5533, 0.9384, 1.2476, 1.5299, 1.8564, 2.1836, 2.5633, 2.9245 ]

create_ZSTD_l1 = [ 0.0354, 0.7105, 1.7217, 5.2663, 12.7385, 20.3693, 28.1353, 33.0310, 40.3843, 50.2020, 58.0643, 69.5190 ]
compute_ZSTD_l1 = [ 0.0021, 0.0225, 0.0773, 0.2630, 0.8735, 1.0087, 1.9553, 1.5670, 3.1986, 3.3499, 4.0728, 4.6287 ]


# Plot the data. There will be 2 plots: one for create times and another for compute times
labels = ['lvl=0', 'LZ4 lvl=1', 'ZSTD lvl=1', 'NumPy', "numpy_jit"]
if plotly:
# Create the create times plot
fig_create = go.Figure()
fig_create.add_trace(
go.Scatter(x=sizes_GB, y=create_clevel0, mode='lines+markers', name=labels[0]))
fig_create.add_trace(
go.Scatter(x=sizes_GB, y=create_clevel0_ooc, mode='lines+markers', name=labels[0] + "(ooc)"))
fig_create.add_trace(
go.Scatter(x=sizes_GB, y=create_LZ4_l1, mode='lines+markers', name=labels[1]))
fig_create.add_trace(
go.Scatter(x=sizes_GB, y=create_LZ4_l1_ooc, mode='lines+markers', name=labels[1] + "(ooc)"))
fig_create.add_trace(
go.Scatter(x=sizes_GB, y=create_ZSTD_l1, mode='lines+markers', name=labels[2]))
fig_create.add_trace(
go.Scatter(x=sizes_GB, y=create_ZSTD_l1_ooc, mode='lines+markers', name=labels[2] + "(ooc)"))
fig_create.add_trace(
go.Scatter(x=sizes_GB, y=create_numpy_jit, mode='lines+markers', name=labels[3]))
fig_create.update_layout(title=f'Create operands times ({title_})', xaxis_title='Size (GB)', yaxis_title='Time (s)')

# Create the compute times plot
# Calculate the maximum y1 value
y1_max = max(max(compute_clevel0), max(compute_clevel0_ooc), max(compute_LZ4_l1), max(compute_LZ4_l1_ooc),
max(compute_ZSTD_l1), max(compute_ZSTD_l1_ooc), max(compute_numpy), max(compute_numpy_jit))

fig_compute = go.Figure()
fig_compute.add_trace(
go.Scatter(x=sizes_GB, y=compute_clevel0, mode='lines+markers', name=labels[0]))
fig_compute.add_trace(
go.Scatter(x=sizes_GB, y=compute_clevel0_ooc, mode='lines+markers', name=labels[0] + "(ooc)"))
fig_compute.add_trace(
go.Scatter(x=sizes_GB, y=compute_LZ4_l1, mode='lines+markers', name=labels[1]))
fig_compute.add_trace(
go.Scatter(x=sizes_GB, y=compute_LZ4_l1_ooc, mode='lines+markers', name=labels[1] + "(ooc)"))
fig_compute.add_trace(
go.Scatter(x=sizes_GB, y=compute_ZSTD_l1, mode='lines+markers', name=labels[2]))
fig_compute.add_trace(
go.Scatter(x=sizes_GB, y=compute_ZSTD_l1_ooc, mode='lines+markers', name=labels[2] + "(ooc)"))
fig_compute.add_trace(go.Scatter(x=sizes_GB, y=compute_numpy, mode='lines+markers', name=labels[3]))
fig_compute.add_trace(go.Scatter(x=sizes_GB, y=compute_numpy_jit, mode='lines+markers', name=labels[4]))
fig_compute.update_layout(title=f'Compute times ({title_})', xaxis_title='Size (GB)', yaxis_title='Time (s)')

# Add a vertical line at 64 GB
y1_max = 35
fig_compute.add_shape(
type="line", x0=64, y0=0, x1=64, y1=y1_max,
line=dict(color="Gray", width=2, dash="dot"),
)
if amd:
fig_compute.add_annotation(x=64, y=y1_max * .9, text="64 GB", showarrow=True, arrowhead=2, ax=40, ay=0)

# Show the plots
fig_create.show()
fig_compute.show()
else:
plt.figure()
plt.plot(sizes_GB, create_clevel0, "o-", label=labels[0])
plt.plot(sizes_GB, create_LZ4_l1, "o-", label=labels[1])
plt.plot(sizes_GB, create_ZSTD_l1, "o-", label=labels[2])
plt.plot(sizes_GB, create_numpy_jit, "o-", label=labels[3])
plt.xlabel("Size (GB)")
plt.ylabel("Time (s)")
plt.title(f"Create times ({title_})")
plt.legend()
# Now, the compute times
plt.figure()
plt.plot(sizes_GB, compute_clevel0, "o-", label=labels[0])
plt.plot(sizes_GB, compute_LZ4_l1, "o-", label=labels[1])
plt.plot(sizes_GB, compute_ZSTD_l1, "o-", label=labels[2])
plt.plot(sizes_GB, compute_numpy, "o-", label=labels[3])
plt.plot(sizes_GB, compute_numpy_jit, "o-", label=labels[4])
plt.xlabel("Size (GB)")
plt.ylabel("Time (s)")
plt.title(f"Compute times ({title_})")
plt.legend()
plt.show()
140 changes: 140 additions & 0 deletions bench/ndarray/jit-reduc-sizes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <[email protected]>
# All rights reserved.
#
# This source code is licensed under a BSD-style license (found in the
# LICENSE file in the root directory of this source tree)
#######################################################################

# Compute reductions for different array sizes, using the jit decorator
# and different operands (NumPy and NDArray). Different compression
# levels and codecs can be selected. For in-memory testing, set ooc to False.

from time import time
import blosc2
import numpy as np
import sys

niter = 5
#dtype = np.dtype("float32")
dtype = np.dtype("float64")
clevel = 1
numpy = False
numpy_jit = False
ooc = True # for in-memory testing, set to False

cparams = cparams_out = None

# For 64 GB RAM
# sizes_numpy = (1, 5, 10, 20, 30, 35, 40, 45, 50, 55)
# sizes_numpy_jit = (1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70)
# sizes_clevel0 = (1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70)
# size_list = (1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 110) # limit clevel>=1 float64

# For 24 GB RAM
sizes_numpy = (1, 5, 10, 20, 30, 35, 40) # limit numpy float64
sizes_numpy_jit = (1, 5, 10, 20, 30, 35, 40, 45) # limit numpy float64
sizes_clevel0 = (1, 5, 10, 20, 30, 35, 40, 45) # limit clevel==0 float64
size_list = (1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90) # limit clevel>=1 float64

codec = "LZ4" # default codec
if len(sys.argv) > 2:
codec = sys.argv[2]
if len(sys.argv) > 1:
try:
clevel = int(sys.argv[1])
except ValueError:
if sys.argv[1] == "numpy":
numpy = True
elif sys.argv[1] == "numpy_jit":
numpy = True
numpy_jit = True
else:
raise ValueError("Invalid argument")

apath = bpath = None
if numpy:
print("Using NumPy arrays as operands")
else:
print("Using NDArray arrays as operands")
cparams = cparams_out = blosc2.CParams(clevel=clevel, codec=blosc2.Codec[codec])
# cparams_out = blosc2.CParams(clevel=clevel, codec=blosc2.Codec.LZ4)
print("Using cparams: ", cparams)
if ooc:
apath = "a.b2nd"
bpath = "b.b2nd"


# The reductions to compute
def compute_reduction_numpy(a, b, c):
return np.sum(((a ** 3 + np.sin(a * 2)) < c) & (b > 0), axis=1)

@blosc2.jit
def compute_reduction(a, b, c):
return np.sum(((a ** 3 + np.sin(a * 2)) < c) & (b > 0), axis=1)


create_times = []
compute_times = []
for n in size_list:
if clevel == 0 and n not in sizes_clevel0:
break
if numpy_jit and n not in sizes_numpy_jit:
break
if numpy and not numpy_jit and n not in sizes_numpy:
break
N = n * 1000
print(f"\nN = {n}000, {dtype=}, size={N ** 2 * 2 * dtype.itemsize / 2**30 }")
chunks = (100, N)
blocks = (1, N)
chunks, blocks = None, None # automatic chunk and block sizes
# Lossy compression
#filters = [blosc2.Filter.TRUNC_PREC, blosc2.Filter.SHUFFLE]
#filters_meta = [8, 0] # keep 8 bits of precision in mantissa
#cparams = blosc2.CParams(clevel=1, codec=blosc2.Codec.LZ4, filters=filters, filters_meta=filters_meta)

# Create some data operands
t0 = time()
if numpy:
a = np.linspace(0, 1, N * N, dtype=dtype).reshape(N, N)
b = np.linspace(1, 2, N * N, dtype=dtype).reshape(N, N)
#b = a + 1
c = np.linspace(-10, 10, N, dtype=dtype)
else:
a = blosc2.linspace(0, 1, N * N, dtype=dtype, shape=(N, N), cparams=cparams, urlpath=apath, mode="w")
print("a.chunks, a.blocks, a.schunk.cratio: ", a.chunks, a.blocks, a.schunk.cratio)
b = blosc2.linspace(1, 2, N * N, dtype=dtype, shape=(N, N), cparams=cparams, urlpath=bpath, mode="w")
#b = (a + 1).compute(cparams=cparams, chunks=chunks, blocks=blocks)
#print(b.chunks, b.blocks, b.schunk.cratio, b.cparams)
c = blosc2.linspace(-10, 10, N, dtype=dtype, cparams=cparams) # broadcasting is supported
#c = blosc2.linspace(-10, 10, N * N, dtype=dtype, shape=(N, N), cparams=cparams)
t1 = time() - t0
print(f"Time to create data: {t1:.4f}")
create_times.append(t1)

if numpy:
if numpy_jit:
out = compute_reduction(a, b, c)
t0 = time()
for i in range(niter):
out = compute_reduction(a, b, c)
t1 = (time() - t0) / niter
print(f"Time to compute with numpy_jit and NumPy operands: {t1:.4f}")
else:
t0 = time()
nout = compute_reduction_numpy(a, b, c)
t1 = time() - t0
print(f"Time to compute with NumPy engine: {t1:.4f}")
else:
out = compute_reduction(a, b, c)
t0 = time()
for i in range(niter):
out = compute_reduction(a, b, c)
t1 = (time() - t0) / niter
print(f"Time to compute with numpy_jit and {clevel=}: {t1:.4f}")
compute_times.append(t1)
del a, b, c

print("\nCreate times: [", ", ".join([f"{t:.4f}" for t in create_times]), "]")
print("Compute times: [", ", ".join([f"{t:.4f}" for t in compute_times]), "]")
print("End of run!\n\n")
5 changes: 5 additions & 0 deletions bench/ndarray/run-jit-reduc-sizes.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/usr/bin/time -v python bench/ndarray/jit-reduc-sizes.py numpy
/usr/bin/time -v python bench/ndarray/jit-reduc-sizes.py numpy_jit
/usr/bin/time -v python bench/ndarray/jit-reduc-sizes.py 0
/usr/bin/time -v python bench/ndarray/jit-reduc-sizes.py 1 LZ4
/usr/bin/time -v python bench/ndarray/jit-reduc-sizes.py 1 ZSTD

0 comments on commit 7d9008a

Please sign in to comment.