Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[KUNLUNXIN] FlagGems operation test: update dockerfile, config for ops #717

Merged
merged 1 commit into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions operation/benchmarks/abs/kunlunxin/R300p/case_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/abs/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
2 changes: 2 additions & 0 deletions operation/benchmarks/add/kunlunxin/R300p/case_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/add/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
3 changes: 3 additions & 0 deletions operation/benchmarks/all/kunlunxin/R300p/case_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Shape: [4096, 256]
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/all/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
14 changes: 11 additions & 3 deletions operation/benchmarks/all/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import yaml
import sys
import subprocess
import math

sys.path.append("..")
from drivers.utils import *
Expand Down Expand Up @@ -67,14 +68,21 @@ def main(config, case_config):


m = case_config.Melements
# default shape: M * 1024 * 1024
shape = m * 1024 * 1024

if config.vendor == 'kunlunxin':
# if `Shape' specified in `case_config.yaml', use it
if case_config.__contains__('Shape') and case_config.Shape is not None:
shape = case_config.Shape

a = torch.arange(0, m * 1024 * 1024).to(0)
a = torch.arange(0, math.prod(shape)).to(0)
print(f'Shape for performance_test: {a.shape}')

latency_nowarm, latency_warm, cputime, kerneltime = do_test(
torch.all, (a, ), host_device_sync, config, case_config)

op2flops = lambda x: x * m * 1024 * 1024
op2flops = lambda x: x * math.prod(shape)

perf_result = cal_perf(cputime, kerneltime, op2flops,
config.spectflops)
Expand All @@ -99,4 +107,4 @@ def main(config, case_config):
print("Using flaggems")
else:
print("Using nativetorch")
main(config, case_config)
main(config, case_config)
3 changes: 3 additions & 0 deletions operation/benchmarks/amax/kunlunxin/R300p/case_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Shape: [4096, 1]
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/amax/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
14 changes: 11 additions & 3 deletions operation/benchmarks/amax/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import yaml
import sys
import subprocess
import math

sys.path.append("..")
from drivers.utils import *
Expand Down Expand Up @@ -68,14 +69,21 @@ def main(config, case_config):

m = case_config.M
n = case_config.N
# default shape: (M*80, N*80)
shape = (m * 80, n * 80)

if config.vendor == 'kunlunxin':
# if `Shape' specified in `case_config.yaml', use it
if case_config.__contains__('Shape') and case_config.Shape is not None:
shape = case_config.Shape

a = torch.randn(m * 80 , n * 80 , dtype=dtype[config.dataformat]).to(0)
a = torch.randn(shape, dtype=dtype[config.dataformat]).to(0)
print(f'Shape for performance_test: {a.shape}')

latency_nowarm, latency_warm, cputime, kerneltime = do_test(
torch.amax, (a, 1), host_device_sync, config, case_config)

op2flops = lambda x: x * m * 80 * n * 80
op2flops = lambda x: x * math.prod(shape)

perf_result = cal_perf(cputime, kerneltime, op2flops,
config.spectflops)
Expand All @@ -100,4 +108,4 @@ def main(config, case_config):
print("Using flaggems")
else:
print("Using nativetorch")
main(config, case_config)
main(config, case_config)
3 changes: 3 additions & 0 deletions operation/benchmarks/argmax/kunlunxin/R300p/case_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Shape: [4096, 1]
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/argmax/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
14 changes: 11 additions & 3 deletions operation/benchmarks/argmax/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import yaml
import sys
import subprocess
import math

sys.path.append("..")
from drivers.utils import *
Expand Down Expand Up @@ -68,14 +69,21 @@ def main(config, case_config):

m = case_config.M
n = case_config.N
# default shape: (M * 80, N * 80)
shape = (m * 80, n * 80)

if config.vendor == 'kunlunxin':
# if `Shape' specified in `case_config.yaml', use it
if case_config.__contains__('Shape') and case_config.Shape is not None:
shape = case_config.Shape

a = torch.randn(m * 80, n * 80, dtype=dtype[config.dataformat]).to(0)
a = torch.randn(shape, dtype=dtype[config.dataformat]).to(0)
print(f'Shape for performance_test: {a.shape}')

latency_nowarm, latency_warm, cputime, kerneltime = do_test(
torch.argmax, (a, 1), host_device_sync, config, case_config)

op2flops = lambda x: x * m * n * 80 * 80
op2flops = lambda x: x * math.prod(shape)

perf_result = cal_perf(cputime, kerneltime, op2flops,
config.spectflops)
Expand All @@ -100,4 +108,4 @@ def main(config, case_config):
print("Using flaggems")
else:
print("Using nativetorch")
main(config, case_config)
main(config, case_config)
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/bitwise_and/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/bitwise_not/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/bitwise_or/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
2 changes: 2 additions & 0 deletions operation/benchmarks/cos/kunlunxin/R300p/case_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ITERS: 50
SPECTFLOPS: 9999
6 changes: 6 additions & 0 deletions operation/benchmarks/cos/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda
export XPU_enable_reorder=1

echo "KUNLUNXIN ENV.SH end"
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
bs: 4096
elements: 256
ITERS: 50
SPECTFLOPS: 9999
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda
export XPU_enable_reorder=1

echo "KUNLUNXIN ENV.SH end"
2 changes: 2 additions & 0 deletions operation/benchmarks/div/kunlunxin/R300p/case_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/div/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
3 changes: 3 additions & 0 deletions operation/benchmarks/dropout/kunlunxin/R300p/case_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Melements: 1
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/dropout/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
2 changes: 2 additions & 0 deletions operation/benchmarks/eq/kunlunxin/R300p/case_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/eq/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
2 changes: 0 additions & 2 deletions operation/benchmarks/exp/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda
export TRITON_XPU_ARCH=3
export CUDART_DUMMY_REGISTER=1

echo "KUNLUNXIN ENV.SH end"
2 changes: 2 additions & 0 deletions operation/benchmarks/ge/kunlunxin/R300p/case_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/ge/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
3 changes: 3 additions & 0 deletions operation/benchmarks/gelu/kunlunxin/R300p/case_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Shape: [1024, 1024]
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/gelu/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
15 changes: 12 additions & 3 deletions operation/benchmarks/gelu/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import yaml
import sys
import subprocess
import math

sys.path.append("..")
from drivers.utils import *
Expand Down Expand Up @@ -66,14 +67,22 @@ def main(config, case_config):
set_ieee_float32(config.vendor)

m = case_config.Melements
f = torch.nn.GELU()
# default shape: (M, 1024, 1024)
shape = (m, 1024, 1024)

if config.vendor == 'kunlunxin':
# if `Shape' specified in `case_config.yaml', use it
if case_config.__contains__('Shape') and case_config.Shape is not None:
shape = case_config.Shape

a = torch.randn(m, 1024, 1024, dtype=dtype[config.dataformat]).to(0)
a = torch.randn(shape, dtype=dtype[config.dataformat]).to(0)
print(f'Shape for performance_test: {a.shape}')

f = torch.nn.GELU()
latency_nowarm, latency_warm, cputime, kerneltime = do_test(
f, (a, ), host_device_sync, config, case_config) # 调整为torch.sub

op2flops = lambda x: x * 9 * m * 1024 * 1024
op2flops = lambda x: x * 9 * math.prod(shape)

perf_result = cal_perf(cputime, kerneltime, op2flops,
config.spectflops)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
bs: 1
channel: 3
hiddensize: 4
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/group_norm/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
2 changes: 2 additions & 0 deletions operation/benchmarks/gt/kunlunxin/R300p/case_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/gt/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
2 changes: 2 additions & 0 deletions operation/benchmarks/isinf/kunlunxin/R300p/case_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/isinf/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
2 changes: 2 additions & 0 deletions operation/benchmarks/isnan/kunlunxin/R300p/case_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/isnan/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/layer_norm/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
2 changes: 2 additions & 0 deletions operation/benchmarks/le/kunlunxin/R300p/case_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/le/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
2 changes: 0 additions & 2 deletions operation/benchmarks/log_softmax/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda
export TRITON_XPU_ARCH=3
export CUDART_DUMMY_REGISTER=1

echo "KUNLUNXIN ENV.SH end"
2 changes: 2 additions & 0 deletions operation/benchmarks/lt/kunlunxin/R300p/case_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ITERS: 50
SPECTFLOPS: 9999
5 changes: 5 additions & 0 deletions operation/benchmarks/lt/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda

echo "KUNLUNXIN ENV.SH end"
3 changes: 3 additions & 0 deletions operation/benchmarks/max/kunlunxin/R300p/case_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Shape: [4096, 1]
ITERS: 50
SPECTFLOPS: 9999
6 changes: 6 additions & 0 deletions operation/benchmarks/max/kunlunxin/R300p/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
echo "KUNLUNXIN ENV.SH start"

source /root/miniconda/etc/profile.d/conda.sh && conda activate python38_torch201_cuda
export Triton_big_instcombine=1000

echo "KUNLUNXIN ENV.SH end"
Loading
Loading