L1D and L2 sizes updated

accel-sim · Oct 24, 2024 · d075d47 · d075d47
1 parent f2d08bb
commit d075d47
Showing 1 changed file with 7 additions and 7 deletions.
diff --git a/configs/tested-cfgs/SM90_H100/gpgpusim.config b/configs/tested-cfgs/SM90_H100/gpgpusim.config
@@ -61,7 +61,7 @@
 # high level architecture configuration
 -gpgpu_n_clusters 132	 
 -gpgpu_n_cores_per_cluster 1
--gpgpu_n_mem 40
+-gpgpu_n_mem 80
 -gpgpu_n_sub_partition_per_mchannel 2 
 -gpgpu_clock_gated_lanes 1
 
@@ -137,16 +137,16 @@
 ## L1/shared memory configuration
 # <sector?>:<nsets>:<bsize>:<assoc>,<rep>:<wr>:<alloc>:<wr_alloc>:<set_index_fn>,<mshr>:<N>:<merge>,<mq>:**<fifo_entry>
 # ** Optional parameter - Required when mshr_type==Texture Fifo
-# Defualt config is 32KB DL1 and 96KB shared memory
+# Defualt config is 160KB DL1 and 96KB shared memory
 # In Volta, we assign the remaining shared memory to L1 cache 
-# if the assigned shd mem = 0, then L1 cache = 128KB
+# # if the assigned shd mem = 0, then L1 cache = 256KB
 # For more info, see https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#shared-memory-7-x 
 # disable this mode in case of multi kernels/apps execution
 -gpgpu_adaptive_cache_config 1
 -gpgpu_shmem_option 0,8,16,32,64,96
 -gpgpu_unified_l1d_size 128
 # L1 cache configuration
--gpgpu_l1_banks 4
+-gpgpu_l1_banks 8
 -gpgpu_cache:dl1  S:4:128:64,L:T:m:L:L,A:512:8,16:0,32
 -gpgpu_l1_cache_write_ratio 25
 -gpgpu_l1_latency 20
@@ -159,8 +159,8 @@
 -gpgpu_shmem_per_block 65536
 -gpgpu_smem_latency 20
 
-# 32 sets, each 128 bytes 24-way for each memory sub partition (96 KB per memory sub partition). This gives us 6MB L2 cache
--gpgpu_cache:dl2 S:32:128:24,L:B:m:L:P,A:192:4,32:0,32
+# 64 sets, each 128 bytes 40-way for each memory sub partition (320 KB per memory sub partition). This gives us 50MB L2 cache
+-gpgpu_cache:dl2 S:64:128:40,L:B:m:L:P,A:192:4,32:0,32
 -gpgpu_cache:dl2_texture_only 0
 -gpgpu_dram_partition_queues 64:64:64:64
 -gpgpu_perf_sim_memcpy 1
@@ -196,7 +196,7 @@
 -gpgpu_frfcfs_dram_sched_queue_size 64
 -gpgpu_dram_return_queue_size 192
 
-# for HBM3, 5 stacks, 40 channels, each (64 bits) 8 bytes width
+# for HBM3, 5 stacks, 80 channels, each (64 bits) 8 bytes width
 -gpgpu_n_mem_per_ctrlr 1
 -gpgpu_dram_buswidth 8
 -gpgpu_dram_burst_length 2