./bin/perplexity --hellaswag -f ../hellaswag_text_data/hellaswag_val_full.txt -m ../models/L2_7B/ggml-model-f16.gguf -t 1 -ngl 100 --hellaswag-tasks 400 main: build = 1065 (154725c) main: seed = 1692977821 ggml_init_cublas: found 1 CUDA devices: Device 0: NVIDIA GeForce RTX 4080, compute capability 8.9 llama_model_loader: loaded meta data with 14 key-value pairs and 291 tensors from ../models/L2_7B/ggml-model-f16.gguf (version GGUF V1 (latest)) llama_model_loader: - tensor 0: token_embd.weight f16 [ 4096, 32000, 1, 1 ] llama_model_loader: - tensor 1: output_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 2: output.weight f16 [ 4096, 32000, 1, 1 ] llama_model_loader: - tensor 3: blk.0.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 4: blk.0.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 5: blk.0.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 6: blk.0.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 7: blk.0.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 8: blk.0.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 9: blk.0.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 10: blk.0.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 11: blk.0.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 12: blk.1.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 13: blk.1.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 14: blk.1.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 15: blk.1.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 16: blk.1.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 17: blk.1.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 18: blk.1.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 19: blk.1.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 20: blk.1.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 21: blk.2.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 22: blk.2.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 23: blk.2.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 24: blk.2.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 25: blk.2.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 26: blk.2.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 27: blk.2.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 28: blk.2.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 29: blk.2.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 30: blk.3.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 31: blk.3.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 32: blk.3.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 33: blk.3.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 34: blk.3.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 35: blk.3.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 36: blk.3.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 37: blk.3.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 38: blk.3.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 39: blk.4.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 40: blk.4.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 41: blk.4.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 42: blk.4.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 43: blk.4.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 44: blk.4.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 45: blk.4.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 46: blk.4.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 47: blk.4.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 48: blk.5.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 49: blk.5.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 50: blk.5.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 51: blk.5.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 52: blk.5.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 53: blk.5.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 54: blk.5.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 55: blk.5.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 56: blk.5.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 57: blk.6.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 58: blk.6.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 59: blk.6.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 60: blk.6.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 61: blk.6.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 62: blk.6.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 63: blk.6.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 64: blk.6.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 65: blk.6.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 66: blk.7.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 67: blk.7.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 68: blk.7.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 69: blk.7.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 70: blk.7.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 71: blk.7.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 72: blk.7.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 73: blk.7.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 74: blk.7.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 75: blk.8.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 76: blk.8.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 77: blk.8.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 78: blk.8.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 79: blk.8.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 80: blk.8.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 81: blk.8.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 82: blk.8.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 83: blk.8.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 84: blk.9.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 85: blk.9.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 86: blk.9.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 87: blk.9.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 88: blk.9.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 89: blk.9.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 90: blk.9.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 91: blk.9.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 92: blk.9.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 93: blk.10.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 94: blk.10.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 95: blk.10.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 96: blk.10.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 97: blk.10.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 98: blk.10.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 99: blk.10.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 100: blk.10.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 101: blk.10.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 102: blk.11.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 103: blk.11.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 104: blk.11.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 105: blk.11.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 106: blk.11.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 107: blk.11.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 108: blk.11.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 109: blk.11.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 110: blk.11.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 111: blk.12.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 112: blk.12.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 113: blk.12.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 114: blk.12.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 115: blk.12.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 116: blk.12.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 117: blk.12.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 118: blk.12.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 119: blk.12.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 120: blk.13.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 121: blk.13.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 122: blk.13.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 123: blk.13.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 124: blk.13.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 125: blk.13.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 126: blk.13.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 127: blk.13.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 128: blk.13.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 129: blk.14.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 130: blk.14.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 131: blk.14.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 132: blk.14.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 133: blk.14.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 134: blk.14.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 135: blk.14.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 136: blk.14.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 137: blk.14.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 138: blk.15.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 139: blk.15.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 140: blk.15.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 141: blk.15.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 142: blk.15.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 143: blk.15.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 144: blk.15.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 145: blk.15.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 146: blk.15.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 147: blk.16.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 148: blk.16.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 149: blk.16.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 150: blk.16.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 151: blk.16.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 152: blk.16.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 153: blk.16.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 154: blk.16.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 155: blk.16.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 156: blk.17.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 157: blk.17.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 158: blk.17.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 159: blk.17.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 160: blk.17.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 161: blk.17.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 162: blk.17.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 163: blk.17.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 164: blk.17.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 165: blk.18.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 166: blk.18.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 167: blk.18.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 168: blk.18.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 169: blk.18.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 170: blk.18.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 171: blk.18.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 172: blk.18.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 173: blk.18.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 174: blk.19.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 175: blk.19.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 176: blk.19.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 177: blk.19.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 178: blk.19.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 179: blk.19.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 180: blk.19.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 181: blk.19.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 182: blk.19.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 183: blk.20.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 184: blk.20.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 185: blk.20.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 186: blk.20.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 187: blk.20.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 188: blk.20.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 189: blk.20.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 190: blk.20.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 191: blk.20.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 192: blk.21.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 193: blk.21.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 194: blk.21.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 195: blk.21.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 196: blk.21.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 197: blk.21.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 198: blk.21.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 199: blk.21.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 200: blk.21.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 201: blk.22.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 202: blk.22.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 203: blk.22.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 204: blk.22.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 205: blk.22.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 206: blk.22.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 207: blk.22.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 208: blk.22.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 209: blk.22.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 210: blk.23.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 211: blk.23.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 212: blk.23.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 213: blk.23.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 214: blk.23.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 215: blk.23.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 216: blk.23.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 217: blk.23.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 218: blk.23.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 219: blk.24.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 220: blk.24.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 221: blk.24.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 222: blk.24.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 223: blk.24.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 224: blk.24.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 225: blk.24.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 226: blk.24.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 227: blk.24.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 228: blk.25.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 229: blk.25.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 230: blk.25.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 231: blk.25.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 232: blk.25.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 233: blk.25.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 234: blk.25.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 235: blk.25.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 236: blk.25.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 237: blk.26.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 238: blk.26.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 239: blk.26.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 240: blk.26.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 241: blk.26.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 242: blk.26.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 243: blk.26.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 244: blk.26.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 245: blk.26.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 246: blk.27.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 247: blk.27.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 248: blk.27.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 249: blk.27.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 250: blk.27.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 251: blk.27.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 252: blk.27.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 253: blk.27.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 254: blk.27.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 255: blk.28.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 256: blk.28.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 257: blk.28.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 258: blk.28.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 259: blk.28.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 260: blk.28.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 261: blk.28.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 262: blk.28.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 263: blk.28.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 264: blk.29.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 265: blk.29.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 266: blk.29.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 267: blk.29.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 268: blk.29.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 269: blk.29.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 270: blk.29.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 271: blk.29.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 272: blk.29.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 273: blk.30.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 274: blk.30.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 275: blk.30.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 276: blk.30.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 277: blk.30.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 278: blk.30.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 279: blk.30.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 280: blk.30.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 281: blk.30.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 282: blk.31.attn_q.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 283: blk.31.attn_k.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 284: blk.31.attn_v.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 285: blk.31.attn_output.weight f16 [ 4096, 4096, 1, 1 ] llama_model_loader: - tensor 286: blk.31.ffn_gate.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 287: blk.31.ffn_down.weight f16 [ 11008, 4096, 1, 1 ] llama_model_loader: - tensor 288: blk.31.ffn_up.weight f16 [ 4096, 11008, 1, 1 ] llama_model_loader: - tensor 289: blk.31.attn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - tensor 290: blk.31.ffn_norm.weight f32 [ 4096, 1, 1, 1 ] llama_model_loader: - kv 0: general.architecture str llama_model_loader: - kv 1: general.name str llama_model_loader: - kv 2: llama.context_length u32 llama_model_loader: - kv 3: llama.embedding_length u32 llama_model_loader: - kv 4: llama.block_count u32 llama_model_loader: - kv 5: llama.feed_forward_length u32 llama_model_loader: - kv 6: llama.rope.dimension_count u32 llama_model_loader: - kv 7: llama.attention.head_count u32 llama_model_loader: - kv 8: llama.attention.head_count_kv u32 llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 llama_model_loader: - kv 10: tokenizer.ggml.model str llama_model_loader: - kv 11: tokenizer.ggml.tokens arr llama_model_loader: - kv 12: tokenizer.ggml.scores arr llama_model_loader: - kv 13: tokenizer.ggml.token_type arr llama_model_loader: - type f32: 65 tensors llama_model_loader: - type f16: 226 tensors llm_load_print_meta: format = GGUF V1 (latest) llm_load_print_meta: arch = llama llm_load_print_meta: vocab type = SPM llm_load_print_meta: n_vocab = 32000 llm_load_print_meta: n_merges = 0 llm_load_print_meta: n_ctx_train = 4096 llm_load_print_meta: n_ctx = 512 llm_load_print_meta: n_embd = 4096 llm_load_print_meta: n_head = 32 llm_load_print_meta: n_head_kv = 32 llm_load_print_meta: n_layer = 32 llm_load_print_meta: n_rot = 128 llm_load_print_meta: n_gqa = 1 llm_load_print_meta: f_norm_eps = 1.0e-05 llm_load_print_meta: f_norm_rms_eps = 1.0e-05 llm_load_print_meta: n_ff = 11008 llm_load_print_meta: freq_base = 10000.0 llm_load_print_meta: freq_scale = 1 llm_load_print_meta: model type = 7B llm_load_print_meta: model ftype = mostly F16 (guessed) llm_load_print_meta: model size = 6.74 B llm_load_print_meta: general.name = LLaMA llm_load_print_meta: BOS token = 1 '' llm_load_print_meta: EOS token = 2 '' llm_load_print_meta: UNK token = 0 '' llm_load_print_meta: LF token = 13 '<0x0A>' llm_load_tensors: ggml ctx size = 0.09 MB llm_load_tensors: using CUDA for GPU acceleration llm_load_tensors: mem required = 250.09 MB (+ 256.00 MB per state) llm_load_tensors: offloading 32 repeating layers to GPU llm_load_tensors: offloading non-repeating layers to GPU llm_load_tensors: offloading v cache to GPU llm_load_tensors: offloading k cache to GPU llm_load_tensors: offloaded 35/35 layers to GPU llm_load_tensors: VRAM used: 12860 MB ................................................................................................... llama_new_context_with_model: kv self size = 256.00 MB llama_new_context_with_model: compute buffer total size = 71.91 MB llama_new_context_with_model: VRAM scratch buffer: 70.50 MB system_info: n_threads = 1 / 32 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | VSX = 0 | hellaswag_score : loaded 10042 tasks from prompt. hellaswag_score : selecting 400 randomized tasks. hellaswag_score : calculating hellaswag score over selected tasks. task acc_norm 1 100.00000000 2 100.00000000 3 100.00000000 4 100.00000000 5 100.00000000 6 83.33333333 7 71.42857143 8 62.50000000 9 55.55555556 10 60.00000000 11 54.54545455 12 50.00000000 13 46.15384615 14 42.85714286 15 46.66666667 16 50.00000000 17 52.94117647 18 55.55555556 19 57.89473684 20 60.00000000 21 61.90476190 22 63.63636364 23 65.21739130 24 66.66666667 25 68.00000000 26 69.23076923 27 66.66666667 28 67.85714286 29 65.51724138 30 66.66666667 31 67.74193548 32 68.75000000 33 66.66666667 34 67.64705882 35 68.57142857 36 69.44444444 37 67.56756757 38 68.42105263 39 69.23076923 40 70.00000000 41 70.73170732 42 71.42857143 43 72.09302326 44 72.72727273 45 71.11111111 46 69.56521739 47 70.21276596 48 70.83333333 49 71.42857143 50 72.00000000 51 72.54901961 52 73.07692308 53 73.58490566 54 74.07407407 55 72.72727273 56 71.42857143 57 70.17543860 58 70.68965517 59 71.18644068 60 71.66666667 61 70.49180328 62 70.96774194 63 71.42857143 64 70.31250000 65 70.76923077 66 71.21212121 67 71.64179104 68 70.58823529 69 71.01449275 70 70.00000000 71 69.01408451 72 69.44444444 73 68.49315068 74 68.91891892 75 68.00000000 76 68.42105263 77 67.53246753 78 67.94871795 79 68.35443038 80 68.75000000 81 69.13580247 82 69.51219512 83 68.67469880 84 69.04761905 85 69.41176471 86 69.76744186 87 70.11494253 88 70.45454545 89 70.78651685 90 70.00000000 91 70.32967033 92 69.56521739 93 69.89247312 94 70.21276596 95 70.52631579 96 70.83333333 97 71.13402062 98 70.40816327 99 70.70707071 100 71.00000000 101 70.29702970 102 69.60784314 103 69.90291262 104 70.19230769 105 69.52380952 106 69.81132075 107 69.15887850 108 69.44444444 109 68.80733945 110 69.09090909 111 68.46846847 112 68.75000000 113 68.14159292 114 68.42105263 115 67.82608696 116 68.10344828 117 68.37606838 118 67.79661017 119 68.06722689 120 68.33333333 121 68.59504132 122 68.85245902 123 69.10569106 124 68.54838710 125 68.00000000 126 68.25396825 127 68.50393701 128 67.96875000 129 67.44186047 130 67.69230769 131 67.93893130 132 67.42424242 133 67.66917293 134 67.91044776 135 68.14814815 136 68.38235294 137 68.61313869 138 68.84057971 139 69.06474820 140 69.28571429 141 68.79432624 142 68.30985915 143 67.83216783 144 68.05555556 145 68.27586207 146 68.49315068 147 68.70748299 148 68.91891892 149 69.12751678 150 69.33333333 151 68.87417219 152 68.42105263 153 68.62745098 154 68.18181818 155 68.38709677 156 68.58974359 157 68.78980892 158 68.35443038 159 68.55345912 160 68.75000000 161 68.94409938 162 69.13580247 163 68.71165644 164 68.90243902 165 69.09090909 166 69.27710843 167 69.46107784 168 69.64285714 169 69.23076923 170 69.41176471 171 69.59064327 172 69.76744186 173 69.94219653 174 70.11494253 175 70.28571429 176 70.45454545 177 70.62146893 178 70.78651685 179 70.94972067 180 71.11111111 181 70.71823204 182 70.87912088 183 71.03825137 184 71.19565217 185 70.81081081 186 70.96774194 187 70.58823529 188 70.74468085 189 70.89947090 190 71.05263158 191 70.68062827 192 70.83333333 193 70.98445596 194 71.13402062 195 71.28205128 196 70.91836735 197 71.06598985 198 70.70707071 199 70.85427136 200 70.50000000 201 70.14925373 202 70.29702970 203 70.44334975 204 70.09803922 205 69.75609756 206 69.90291262 207 70.04830918 208 70.19230769 209 70.33492823 210 70.47619048 211 70.61611374 212 70.75471698 213 70.42253521 214 70.56074766 215 70.23255814 216 70.37037037 217 70.50691244 218 70.64220183 219 70.77625571 220 70.90909091 221 70.58823529 222 70.27027027 223 70.40358744 224 70.08928571 225 70.22222222 226 70.35398230 227 70.48458150 228 70.61403509 229 70.74235808 230 70.43478261 231 70.56277056 232 70.68965517 233 70.38626609 234 70.51282051 235 70.63829787 236 70.33898305 237 70.46413502 238 70.58823529 239 70.29288703 240 70.41666667 241 70.12448133 242 70.24793388 243 69.95884774 244 69.67213115 245 69.79591837 246 69.51219512 247 69.63562753 248 69.35483871 249 69.47791165 250 69.60000000 251 69.72111554 252 69.44444444 253 69.56521739 254 69.68503937 255 69.80392157 256 69.53125000 257 69.64980545 258 69.37984496 259 69.11196911 260 68.84615385 261 68.96551724 262 69.08396947 263 68.82129278 264 68.93939394 265 69.05660377 266 69.17293233 267 69.28838951 268 69.40298507 269 69.51672862 270 69.62962963 271 69.74169742 272 69.85294118 273 69.59706960 274 69.70802920 275 69.81818182 276 69.92753623 277 70.03610108 278 70.14388489 279 70.25089606 280 70.35714286 281 70.46263345 282 70.56737589 283 70.67137809 284 70.42253521 285 70.52631579 286 70.27972028 287 70.38327526 288 70.48611111 289 70.58823529 290 70.68965517 291 70.79037801 292 70.89041096 293 70.64846416 294 70.74829932 295 70.84745763 296 70.94594595 297 70.70707071 298 70.80536913 299 70.90301003 300 70.66666667 301 70.76411960 302 70.86092715 303 70.95709571 304 71.05263158 305 71.14754098 306 70.91503268 307 70.68403909 308 70.45454545 309 70.55016181 310 70.32258065 311 70.41800643 312 70.51282051 313 70.60702875 314 70.70063694 315 70.79365079 316 70.88607595 317 70.66246057 318 70.75471698 319 70.84639498 320 70.93750000 321 71.02803738 322 71.11801242 323 70.89783282 324 70.98765432 325 71.07692308 326 71.16564417 327 71.25382263 328 71.34146341 329 71.42857143 330 71.51515152 331 71.29909366 332 71.38554217 333 71.17117117 334 71.25748503 335 71.34328358 336 71.42857143 337 71.51335312 338 71.59763314 339 71.68141593 340 71.76470588 341 71.55425220 342 71.63742690 343 71.72011662 344 71.80232558 345 71.88405797 346 71.67630058 347 71.75792507 348 71.55172414 349 71.63323782 350 71.42857143 351 71.50997151 352 71.59090909 353 71.67138810 354 71.75141243 355 71.83098592 356 71.91011236 357 71.70868347 358 71.50837989 359 71.58774373 360 71.66666667 361 71.46814404 362 71.27071823 363 71.07438017 364 70.87912088 365 70.95890411 366 71.03825137 367 71.11716621 368 71.19565217 369 71.00271003 370 70.81081081 371 70.88948787 372 70.96774194 373 70.77747989 374 70.85561497 375 70.66666667 376 70.74468085 377 70.82228117 378 70.63492063 379 70.71240106 380 70.78947368 381 70.60367454 382 70.41884817 383 70.49608355 384 70.57291667 385 70.64935065 386 70.72538860 387 70.54263566 388 70.61855670 389 70.69408740 390 70.76923077 391 70.58823529 392 70.66326531 393 70.73791349 394 70.81218274 395 70.88607595 396 70.95959596 397 70.78085642 398 70.85427136 399 70.92731830 400 71.00000000 llama_print_timings: load time = 1073.18 ms llama_print_timings: sample time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) llama_print_timings: prompt eval time = 189769.47 ms / 75755 tokens ( 2.51 ms per token, 399.19 tokens per second) llama_print_timings: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) llama_print_timings: total time = 196732.03 ms