Skip to content

Commit

Permalink
L1D and L2 sizes updated
Browse files Browse the repository at this point in the history
  • Loading branch information
christindbose committed Oct 24, 2024
1 parent f2d08bb commit d075d47
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions configs/tested-cfgs/SM90_H100/gpgpusim.config
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
# high level architecture configuration
-gpgpu_n_clusters 132
-gpgpu_n_cores_per_cluster 1
-gpgpu_n_mem 40
-gpgpu_n_mem 80
-gpgpu_n_sub_partition_per_mchannel 2
-gpgpu_clock_gated_lanes 1

Expand Down Expand Up @@ -137,16 +137,16 @@
## L1/shared memory configuration
# <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>
# ** Optional parameter - Required when mshr_type==Texture Fifo
# Defualt config is 32KB DL1 and 96KB shared memory
# Defualt config is 160KB DL1 and 96KB shared memory
# In Volta, we assign the remaining shared memory to L1 cache
# if the assigned shd mem = 0, then L1 cache = 128KB
# # if the assigned shd mem = 0, then L1 cache = 256KB
# For more info, see https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#shared-memory-7-x
# disable this mode in case of multi kernels/apps execution
-gpgpu_adaptive_cache_config 1
-gpgpu_shmem_option 0,8,16,32,64,96
-gpgpu_unified_l1d_size 128
# L1 cache configuration
-gpgpu_l1_banks 4
-gpgpu_l1_banks 8
-gpgpu_cache:dl1 S:4:128:64,L:T:m:L:L,A:512:8,16:0,32
-gpgpu_l1_cache_write_ratio 25
-gpgpu_l1_latency 20
Expand All @@ -159,8 +159,8 @@
-gpgpu_shmem_per_block 65536
-gpgpu_smem_latency 20

# 32 sets, each 128 bytes 24-way for each memory sub partition (96 KB per memory sub partition). This gives us 6MB L2 cache
-gpgpu_cache:dl2 S:32:128:24,L:B:m:L:P,A:192:4,32:0,32
# 64 sets, each 128 bytes 40-way for each memory sub partition (320 KB per memory sub partition). This gives us 50MB L2 cache
-gpgpu_cache:dl2 S:64:128:40,L:B:m:L:P,A:192:4,32:0,32
-gpgpu_cache:dl2_texture_only 0
-gpgpu_dram_partition_queues 64:64:64:64
-gpgpu_perf_sim_memcpy 1
Expand Down Expand Up @@ -196,7 +196,7 @@
-gpgpu_frfcfs_dram_sched_queue_size 64
-gpgpu_dram_return_queue_size 192

# for HBM3, 5 stacks, 40 channels, each (64 bits) 8 bytes width
# for HBM3, 5 stacks, 80 channels, each (64 bits) 8 bytes width
-gpgpu_n_mem_per_ctrlr 1
-gpgpu_dram_buswidth 8
-gpgpu_dram_burst_length 2
Expand Down

0 comments on commit d075d47

Please sign in to comment.