Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CRISP Intergration #338

Draft
wants to merge 37 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
6b05a3a
combined adjustments
JRPan Dec 10, 2021
31dc13c
Create main.yml
JRPan Dec 10, 2021
15e1d75
tmp fix for f.seek error
JRPan Dec 10, 2021
1dfc835
save plots to new folder each time
JRPan Dec 10, 2021
2fe3f00
update jenkins
JRPan Dec 10, 2021
f4ac3b7
Merge branch 'mydev' of github.com:JRPan/accel-sim-framework into mydev
JRPan Dec 10, 2021
0f173e3
Apply Github CI to all branches
JRPan Dec 10, 2021
2260456
Merge pull request #114 from accel-sim/dev
JRPan Apr 5, 2022
ffd67ef
Add Orin config
JRPan Aug 4, 2022
12f2cad
Support Orin
JRPan Aug 5, 2022
b840308
Support TEX
JRPan Nov 7, 2022
16852d8
Graphics - perf memcpy
JRPan Nov 8, 2022
760af88
Merge remote-tracking branch 'upstream/dev' into mydev
JRPan Nov 9, 2022
6dcfe84
Merge branch 'accel-sim:release' into mydev
JRPan Jan 11, 2023
7b6e11a
multi-kernel stats
JRPan Jan 16, 2023
6946a7a
update stats expansion function name
JRPan Jan 20, 2023
bc25189
limited concurrent + multi-kernel stats update
JRPan Feb 6, 2023
6dbdb52
Fix L2_BW regex
JRPan Feb 7, 2023
27039c9
Merge branch 'dev' into dev-vulkan
JRPan Feb 12, 2023
62d5b72
Run compute until all grpahics ends
JRPan Mar 22, 2023
72cbfb5
move TEX to L1D, run both G/C to finish, new app, new config
JRPan Apr 4, 2023
96a5193
naive dynamic FG
JRPan Apr 12, 2023
5b5ded9
dynamic concurrent
JRPan Apr 26, 2023
528e3e4
dumb config
JRPan May 10, 2023
ac58b3c
Setup git submodule
JRPan May 20, 2023
beaa35e
adding the best scheduler
JRPan Oct 25, 2023
7087768
Lets make some good plots
JRPan Nov 7, 2023
3dd88dc
Update plot notebook
JRPan Nov 22, 2023
133cc23
update
JRPan Dec 15, 2023
8dc1241
Added: allow TB to exceed partition
JRPan Mar 7, 2024
f0c01e7
artifacts
JRPan Jul 23, 2024
5429471
Merge branch 'dev-vulkan' into crisp
JRPan Sep 26, 2024
d9d508b
merge clean
JRPan Sep 30, 2024
528f849
clang-format
JRPan Oct 7, 2024
d37946a
Merge remote-tracking branch 'upstream/dev' into crisp
JRPan Oct 9, 2024
a5c3e49
sync: compute runs
JRPan Oct 21, 2024
a23f581
remove is_graphics
JRPan Oct 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions collect.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#! /bin/bash

# configs="-C ORIN-SASS,ORIN-SASS-concurrent,ORIN-SASS-concurrent-7GR,ORIN-SASS-concurrent-7GR-MIG,ORIN-SASS-concurrent-7GR-finegrain,ORIN-SASS-concurrent-8GR,ORIN-SASS-concurrent-8GR-MIG,ORIN-SASS-concurrent-8GR-finegrain"
name="-N run-20230403-1728"

trap "trap - SIGTERM && kill -- -$$" SIGINT SIGTERM EXIT

./util/job_launching/get_stats.py -k -R -B vulkan:pbrtexture_2k $configs $name-pbrtexture_2k > pbrtexture_2k.csv &
./util/job_launching/get_stats.py -k -R -B vulkan:pbrtexture_4k $configs $name-pbrtexture_4k > pbrtexture_4k.csv &
./util/job_launching/get_stats.py -k -R -B vulkan:instancing_2k $configs $name-instancing_2k > instancing_2k.csv &
./util/job_launching/get_stats.py -k -R -B vulkan:instancing_4k $configs $name-instancing_4k > instancing_4k.csv &
./util/job_launching/get_stats.py -k -R -B vulkan:render_passes_2k $configs $name-render_passes_2k > render_passes_2k.csv &
./util/job_launching/get_stats.py -k -R -B vulkan:render_passes_4k $configs $name-render_passes_4k > render_passes_4k.csv &

wait < <(jobs -p)
3 changes: 1 addition & 2 deletions gpu-simulator/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
bin/
build/
gpgpu-sim/
build/
13 changes: 6 additions & 7 deletions gpu-simulator/ISA_Def/ampere_opcode.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,12 @@ static const std::unordered_map<std::string, OpcodeChar> Ampere_OpcodeMap = {
{"VOTEU", OpcodeChar(OP_VOTEU, SPECIALIZED_UNIT_4_OP)},

// Texture Instructions
// For now, we ignore texture loads, consider it as ALU_OP
{"TEX", OpcodeChar(OP_TEX, SPECIALIZED_UNIT_2_OP)},
{"TLD", OpcodeChar(OP_TLD, SPECIALIZED_UNIT_2_OP)},
{"TLD4", OpcodeChar(OP_TLD4, SPECIALIZED_UNIT_2_OP)},
{"TMML", OpcodeChar(OP_TMML, SPECIALIZED_UNIT_2_OP)},
{"TXD", OpcodeChar(OP_TXD, SPECIALIZED_UNIT_2_OP)},
{"TXQ", OpcodeChar(OP_TXQ, SPECIALIZED_UNIT_2_OP)},
{"TEX", OpcodeChar(OP_TEX, LOAD_OP)},
{"TLD", OpcodeChar(OP_TLD, LOAD_OP)},
{"TLD4", OpcodeChar(OP_TLD4, LOAD_OP)},
{"TMML", OpcodeChar(OP_TMML, LOAD_OP)},
{"TXD", OpcodeChar(OP_TXD, LOAD_OP)},
{"TXQ", OpcodeChar(OP_TXQ, LOAD_OP)},

// Surface Instructions //
{"SUATOM", OpcodeChar(OP_SUATOM, ALU_OP)},
Expand Down
202 changes: 184 additions & 18 deletions gpu-simulator/accel-sim.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#include "accelsim_version.h"

accel_sim_framework::accel_sim_framework(std::string config_file,
std::string trace_file) {
std::string trace_file) {
std::cout << "Accel-Sim [build " << g_accelsim_version << "]";
m_gpgpu_context = new gpgpu_context();

Expand Down Expand Up @@ -45,22 +45,69 @@ void accel_sim_framework::simulation_loop() {
// while loop till the end of the end kernel execution
// prints stats

if (finished_graphics == tracer.graphics_count) {
printf("No graphics kernel parsed\n");
printf("STEP1 - rendering done at %llu\n", m_gpgpu_sim->gpu_tot_sim_cycle);
graphics_done = true;
}
if (finished_computes == tracer.compute_count) {
printf("No compute kernel parsed\n");
printf("STEP1 - computes done at %llu\n", m_gpgpu_sim->gpu_tot_sim_cycle);
m_gpgpu_sim->gpu_compute_end_cycle = m_gpgpu_sim->gpu_tot_sim_cycle;
computes_done = true;
}

if (m_gpgpu_sim->getShaderCoreConfig()->gpgpu_concurrent_kernel_sm) {
if (m_gpgpu_sim->getShaderCoreConfig()->gpgpu_concurrent_finegrain) {
m_gpgpu_sim->concurrent_mode = m_gpgpu_sim->FINEGRAIN;
m_gpgpu_sim->concurrent_granularity = 6;
m_gpgpu_sim->dynamic_sm_count =
m_gpgpu_sim->get_config().dynamic_sm_count;
printf("defualt dynamic ratio %d\n", m_gpgpu_sim->dynamic_sm_count);
} else {
m_gpgpu_sim->concurrent_mode = m_gpgpu_sim->MPS;
m_gpgpu_sim->concurrent_granularity =
m_gpgpu_sim->get_config().num_shader();
m_gpgpu_sim->dynamic_sm_count = m_gpgpu_sim->get_config().mps_sm_count;
}
}

if (m_gpgpu_sim->get_config().gpgpu_slicer) {
m_gpgpu_sim->concurrent_granularity =
m_gpgpu_sim->get_config().num_shader();
m_gpgpu_sim->dynamic_sm_count = m_gpgpu_sim->get_config().num_shader() / 2;
}

while (commandlist_index < commandlist.size() || !kernels_info.empty()) {
parse_commandlist();

// Launch all kernels within window that are on a stream that isn't already
// running
for (auto k : kernels_info) {
bool is_graphics = m_gpgpu_sim->is_graphics(k->get_streamID());
bool stream_busy = false;
for (auto s : busy_streams) {
if (s == k->get_cuda_stream_id()) stream_busy = true;
}
if (!stream_busy && m_gpgpu_sim->can_start_kernel() &&
!k->was_launched()) {
if (launched_mesa ==
(m_gpgpu_sim->get_config().get_max_concurrent_kernel() * 3 /
4) &&
is_graphics) {
continue;
}
std::cout << "launching kernel name: " << k->get_name()
<< " uid: " << k->get_uid()
<< " cuda_stream_id: " << k->get_cuda_stream_id()
<< std::endl;
if (is_graphics) {
// graphics
m_gpgpu_sim->cipc = 0;
launched_mesa++;
} else {
m_gpgpu_sim->gipc = 0;
}
m_gpgpu_sim->launch(k);
k->set_launched();
busy_streams.push_back(k->get_cuda_stream_id());
Expand All @@ -86,28 +133,115 @@ void accel_sim_framework::simulation_loop() {
fflush(stdout);
break;
}

if (finished_graphics == tracer.graphics_count) {
printf("All graphics kernels finished one iteration\n");
printf("STEP1 - rendering done at %llu\n",
m_gpgpu_sim->gpu_tot_sim_cycle);
graphics_done = true;
}
if (finished_computes == tracer.compute_count && !computes_done) {
printf("All compute kernels finished one iteration\n");
printf("STEP1 - computes done at %llu\n", m_gpgpu_sim->gpu_tot_sim_cycle);
m_gpgpu_sim->gpu_compute_end_cycle = m_gpgpu_sim->gpu_tot_sim_cycle;
computes_done = true;
}
if (graphics_done && computes_done) {
printf(
"GPGPU-Sim: ** break due to finishing all kernels one iteration "
"**\n");
break;
}

if (finished_graphics == tracer.graphics_count &&
tracer.graphics_count > 0 && tracer.compute_count > 0 &&
m_gpgpu_sim->getShaderCoreConfig()->gpgpu_concurrent_kernel_sm &&
!computes_done) {
for (auto cmd : graphics_commands) {
commandlist.push_back(cmd);
}
finished_graphics = 0;
graphics_commands.clear();

printf("relaunching graphics kernels\n");
}
if (finished_computes == tracer.compute_count &&
tracer.graphics_count > 0 && tracer.compute_count > 0 &&
m_gpgpu_sim->getShaderCoreConfig()->gpgpu_concurrent_kernel_sm &&
!graphics_done) {
for (auto cmd : compute_commands) {
commandlist.push_back(cmd);
}
finished_computes = 0;
compute_commands.clear();
printf("relaunching compute kernels\n");
}
}
}

void accel_sim_framework::parse_commandlist() {
// gulp up as many commands as possible - either cpu_gpu_mem_copy
// or kernel_launch - until the vector "kernels_info" has reached
// the window_size or we have read every command from commandlist
while (kernels_info.size() < window_size && commandlist_index < commandlist.size()) {
while (kernels_info.size() < window_size &&
commandlist_index < commandlist.size()) {
trace_kernel_info_t *kernel_info = NULL;
if (commandlist[commandlist_index].m_type == command_type::cpu_gpu_mem_copy) {
if (commandlist[commandlist_index].m_type ==
command_type::cpu_gpu_mem_copy) {
size_t addre, Bcount;
tracer.parse_memcpy_info(commandlist[commandlist_index].command_string, addre, Bcount);
std::cout << "launching memcpy command : "
<< commandlist[commandlist_index].command_string << std::endl;
m_gpgpu_sim->perf_memcpy_to_gpu(addre, Bcount);
size_t per_CTA = -1;
tracer.parse_memcpy_info(commandlist[commandlist_index].command_string,
addre, Bcount, per_CTA);
if (commandlist[commandlist_index].command_string.find("MemcpyVulkan") ==
std::string::npos) {
// normal memcpy
std::cout << "launching memcpy command : "
<< commandlist[commandlist_index].command_string << std::endl;
m_gpgpu_sim->perf_memcpy_to_gpu(addre, Bcount, (uint64_t)-1);
// -1: does not belong to any stream
} else {
assert(per_CTA != (unsigned)-1);
kernel_vb_addr.push_back(addre);
kernel_vb_size.push_back(Bcount);
kernel_per_CTA.push_back(per_CTA);
graphics_commands.push_back(commandlist[commandlist_index]);
}
commandlist_index++;
} else if (commandlist[commandlist_index].m_type == command_type::kernel_launch) {
} else if (commandlist[commandlist_index].m_type ==
command_type::kernel_launch) {
// Read trace header info for window_size number of kernels
kernel_trace_t *kernel_trace_info =
tracer.parse_kernel_info(commandlist[commandlist_index].command_string);
kernel_trace_t *kernel_trace_info = tracer.parse_kernel_info(
commandlist[commandlist_index].command_string);
if (kernel_trace_info->kernel_name.find("VERTEX") != std::string::npos) {
m_gpgpu_sim->set_graphics(graphics_stream_id);
kernel_trace_info->cuda_stream_id = graphics_stream_id;
last_grpahics_stream_id = graphics_stream_id;
graphics_stream_id++;
} else if (kernel_trace_info->kernel_name.find("FRAG") !=
std::string::npos) {
kernel_trace_info->cuda_stream_id = last_grpahics_stream_id;
}
kernel_info = create_kernel_info(kernel_trace_info, m_gpgpu_context,
&tconfig, &tracer);

if (m_gpgpu_sim->is_graphics(kernel_info->get_streamID())) {
graphics_commands.push_back(commandlist[commandlist_index]);
unsigned kernel_id = kernel_info->get_uid();

// save kernel info
m_gpgpu_sim->vb_addr[kernel_id] = kernel_vb_addr;
m_gpgpu_sim->vb_size[kernel_id] = kernel_vb_size;
m_gpgpu_sim->vb_size_per_cta[kernel_id] = kernel_per_CTA;
// clear buffers for next kernel
kernel_vb_addr.clear();
kernel_vb_size.clear();
kernel_per_CTA.clear();
} else {
assert(kernel_trace_info->cuda_stream_id < 0xDEADBEEF ||
kernel_trace_info->cuda_stream_id > 0XDEAFBEEF + 1024);
compute_commands.push_back(commandlist[commandlist_index]);
}

kernels_info.push_back(kernel_info);
std::cout << "Header info loaded for kernel command : "
<< commandlist[commandlist_index].command_string << std::endl;
Expand All @@ -121,28 +255,61 @@ void accel_sim_framework::parse_commandlist() {

void accel_sim_framework::cleanup(unsigned finished_kernel) {
trace_kernel_info_t *k = NULL;
unsigned long long finished_kernel_cuda_stream_id = -1;
uint64_t finished_kernel_cuda_stream_id = -1;
unsigned finishd_kernel_uid = 0;
for (unsigned j = 0; j < kernels_info.size(); j++) {
k = kernels_info.at(j);
if (k->get_uid() == finished_kernel ||
m_gpgpu_sim->cycle_insn_cta_max_hit() || !m_gpgpu_sim->active()) {
for (unsigned int l = 0; l < busy_streams.size(); l++) {
if (busy_streams.at(l) == k->get_cuda_stream_id()) {
finished_kernel_cuda_stream_id = k->get_cuda_stream_id();
finishd_kernel_uid = k->get_uid();
busy_streams.erase(busy_streams.begin() + l);
break;
}
}
tracer.kernel_finalizer(k->get_trace_info());
delete k->entry();
delete k;
// delete k->entry(); // erased somewhere else
// delete k;
if (m_gpgpu_sim->getShaderCoreConfig()->gpgpu_concurrent_kernel_sm) {
if (m_gpgpu_sim->concurrent_mode == m_gpgpu_sim->FINEGRAIN) {
m_gpgpu_sim->dynamic_sm_count =
m_gpgpu_sim->get_config().dynamic_sm_count;
} else {
m_gpgpu_sim->dynamic_sm_count =
m_gpgpu_sim->get_config().mps_sm_count;
}
}
if (m_gpgpu_sim->is_graphics(k->get_streamID())) {
finished_graphics++;
launched_mesa--;
} else {
finished_computes++;

if (m_gpgpu_sim->get_config().gpgpu_slicer) {
m_gpgpu_sim->slicer_sampled = false;
for (unsigned cluster = 0;
cluster < m_gpgpu_sim->getShaderCoreConfig()->n_simt_clusters;
cluster++) {
assert(
m_gpgpu_sim->getShaderCoreConfig()->n_simt_cores_per_cluster ==
1);
m_gpgpu_sim->getSIMTCluster(cluster)->get_core(0)->shader_inst = 0;
}
m_gpgpu_sim->dynamic_sm_count =
m_gpgpu_sim->get_config().dynamic_sm_count;
} else {
}
}

kernels_info.erase(kernels_info.begin() + j);
if (!m_gpgpu_sim->cycle_insn_cta_max_hit() && m_gpgpu_sim->active())
break;
}
}
assert(k);
m_gpgpu_sim->print_stats(finished_kernel_cuda_stream_id);
m_gpgpu_sim->print_stats(finished_kernel_cuda_stream_id, finishd_kernel_uid);
}

unsigned accel_sim_framework::simulate() {
Expand All @@ -169,10 +336,9 @@ unsigned accel_sim_framework::simulate() {
return finished_kernel_uid;
}

trace_kernel_info_t *accel_sim_framework::create_kernel_info(kernel_trace_t *kernel_trace_info,
gpgpu_context *m_gpgpu_context,
trace_config *config,
trace_parser *parser) {
trace_kernel_info_t *accel_sim_framework::create_kernel_info(
kernel_trace_t *kernel_trace_info, gpgpu_context *m_gpgpu_context,
trace_config *config, trace_parser *parser) {
gpgpu_ptx_sim_info info;
info.smem = kernel_trace_info->shmem;
info.regs = kernel_trace_info->nregs;
Expand Down
27 changes: 23 additions & 4 deletions gpu-simulator/accel-sim.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ class accel_sim_framework {
commandlist = tracer.parse_commandlist_file();

kernels_info.reserve(window_size);
graphics_stream_id = 0xDEADBEEF;
last_grpahics_stream_id = -1;
launched_mesa = 0;
computes_done = false;
graphics_done = false;
finished_computes = 0;
finished_graphics = 0;
}
void simulation_loop();
void parse_commandlist();
Expand All @@ -51,9 +58,8 @@ class accel_sim_framework {
trace_config *config,
trace_parser *parser);
gpgpu_sim *gpgpu_trace_sim_init_perf_model(int argc, const char *argv[],
gpgpu_context *m_gpgpu_context,
trace_config *m_config);

gpgpu_context *m_gpgpu_context,
trace_config *m_config);

private:
gpgpu_context *m_gpgpu_context;
Expand All @@ -66,9 +72,22 @@ class accel_sim_framework {
bool sim_cycles;
unsigned window_size;
unsigned commandlist_index;
unsigned long graphics_stream_id;
unsigned last_grpahics_stream_id;
unsigned launched_mesa;
unsigned finished_computes;
unsigned finished_graphics;
bool computes_done;
bool graphics_done;

std::vector<unsigned long long> busy_streams;
std::vector<uint64_t> busy_streams;
std::vector<trace_kernel_info_t *> kernels_info;
std::vector<trace_command> commandlist;

std::vector<trace_command> compute_commands;
std::vector<trace_command> graphics_commands;

std::vector<unsigned long> kernel_vb_addr;
std::vector<unsigned long> kernel_vb_size;
std::vector<unsigned long> kernel_per_CTA;
};
Loading
Loading