# 1 "ggml.c" # 1 "" 1 # 1 "" 3 # 394 "" 3 # 1 "" 1 # 1 "" 2 # 1 "ggml.c" 2 # 1 "./ggml.h" 1 # 198 "./ggml.h" # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stdint.h" 1 3 # 52 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stdint.h" 3 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdint.h" 1 3 4 # 32 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdint.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/wchar_limits.h" 1 3 4 # 36 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/wchar_limits.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/cdefs.h" 1 3 4 # 380 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/cdefs.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/android/versioning.h" 1 3 4 # 381 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/cdefs.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/android/api-level.h" 1 3 4 # 199 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/android/api-level.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/get_device_api_level_inlines.h" 1 3 4 # 38 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/get_device_api_level_inlines.h" 3 4 int __system_property_get(const char* __name, char* __value); int atoi(const char* __s) __attribute__((__pure__)); static __inline int android_get_device_api_level() { char value[92] = { 0 }; if (__system_property_get("ro.build.version.sdk", value) < 1) return -1; int api_level = atoi(value); return (api_level > 0) ? api_level : -1; } # 200 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/android/api-level.h" 2 3 4 # 382 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/cdefs.h" 2 3 4 # 37 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/wchar_limits.h" 2 3 4 # 33 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdint.h" 2 3 4 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stddef.h" 1 3 4 # 35 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stddef.h" 3 4 typedef int ptrdiff_t; # 46 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stddef.h" 3 4 typedef unsigned int size_t; # 74 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stddef.h" 3 4 typedef unsigned int wchar_t; # 109 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stddef.h" 3 4 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/__stddef_max_align_t.h" 1 3 4 # 19 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/__stddef_max_align_t.h" 3 4 typedef struct { long long __clang_max_align_nonce1 __attribute__((__aligned__(__alignof__(long long)))); long double __clang_max_align_nonce2 __attribute__((__aligned__(__alignof__(long double)))); } max_align_t; # 110 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stddef.h" 2 3 4 # 34 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdint.h" 2 3 4 typedef signed char __int8_t; typedef unsigned char __uint8_t; typedef short __int16_t; typedef unsigned short __uint16_t; typedef int __int32_t; typedef unsigned int __uint32_t; typedef long long __int64_t; typedef unsigned long long __uint64_t; typedef int __intptr_t; typedef unsigned int __uintptr_t; typedef __int8_t int8_t; typedef __uint8_t uint8_t; typedef __int16_t int16_t; typedef __uint16_t uint16_t; typedef __int32_t int32_t; typedef __uint32_t uint32_t; typedef __int64_t int64_t; typedef __uint64_t uint64_t; typedef __intptr_t intptr_t; typedef __uintptr_t uintptr_t; typedef int8_t int_least8_t; typedef uint8_t uint_least8_t; typedef int16_t int_least16_t; typedef uint16_t uint_least16_t; typedef int32_t int_least32_t; typedef uint32_t uint_least32_t; typedef int64_t int_least64_t; typedef uint64_t uint_least64_t; typedef int8_t int_fast8_t; typedef uint8_t uint_fast8_t; typedef int64_t int_fast64_t; typedef uint64_t uint_fast64_t; typedef int32_t int_fast16_t; typedef uint32_t uint_fast16_t; typedef int32_t int_fast32_t; typedef uint32_t uint_fast32_t; typedef uint64_t uintmax_t; typedef int64_t intmax_t; # 53 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stdint.h" 2 3 # 199 "./ggml.h" 2 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stddef.h" 1 3 # 200 "./ggml.h" 2 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stdbool.h" 1 3 # 201 "./ggml.h" 2 # 274 "./ggml.h" typedef __fp16 ggml_fp16_t; float ggml_fp16_to_fp32(ggml_fp16_t x); ggml_fp16_t ggml_fp32_to_fp16(float x); void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int n); void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n); struct ggml_object; struct ggml_context; enum ggml_type { GGML_TYPE_F32 = 0, GGML_TYPE_F16 = 1, GGML_TYPE_Q4_0 = 2, GGML_TYPE_Q4_1 = 3, GGML_TYPE_Q5_0 = 6, GGML_TYPE_Q5_1 = 7, GGML_TYPE_Q8_0 = 8, GGML_TYPE_Q8_1 = 9, GGML_TYPE_Q2_K = 10, GGML_TYPE_Q3_K = 11, GGML_TYPE_Q4_K = 12, GGML_TYPE_Q5_K = 13, GGML_TYPE_Q6_K = 14, GGML_TYPE_Q8_K = 15, GGML_TYPE_I8, GGML_TYPE_I16, GGML_TYPE_I32, GGML_TYPE_COUNT, }; enum ggml_backend { GGML_BACKEND_CPU = 0, GGML_BACKEND_GPU = 10, GGML_BACKEND_GPU_SPLIT = 20, }; enum ggml_ftype { GGML_FTYPE_UNKNOWN = -1, GGML_FTYPE_ALL_F32 = 0, GGML_FTYPE_MOSTLY_F16 = 1, GGML_FTYPE_MOSTLY_Q4_0 = 2, GGML_FTYPE_MOSTLY_Q4_1 = 3, GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, GGML_FTYPE_MOSTLY_Q8_0 = 7, GGML_FTYPE_MOSTLY_Q5_0 = 8, GGML_FTYPE_MOSTLY_Q5_1 = 9, GGML_FTYPE_MOSTLY_Q2_K = 10, GGML_FTYPE_MOSTLY_Q3_K = 11, GGML_FTYPE_MOSTLY_Q4_K = 12, GGML_FTYPE_MOSTLY_Q5_K = 13, GGML_FTYPE_MOSTLY_Q6_K = 14, }; enum ggml_op { GGML_OP_NONE = 0, GGML_OP_DUP, GGML_OP_ADD, GGML_OP_ADD1, GGML_OP_ACC, GGML_OP_SUB, GGML_OP_MUL, GGML_OP_DIV, GGML_OP_SQR, GGML_OP_SQRT, GGML_OP_LOG, GGML_OP_SUM, GGML_OP_SUM_ROWS, GGML_OP_MEAN, GGML_OP_ARGMAX, GGML_OP_REPEAT, GGML_OP_REPEAT_BACK, GGML_OP_CONCAT, GGML_OP_SILU_BACK, GGML_OP_NORM, GGML_OP_RMS_NORM, GGML_OP_RMS_NORM_BACK, GGML_OP_GROUP_NORM, GGML_OP_MUL_MAT, GGML_OP_OUT_PROD, GGML_OP_SCALE, GGML_OP_SET, GGML_OP_CPY, GGML_OP_CONT, GGML_OP_RESHAPE, GGML_OP_VIEW, GGML_OP_PERMUTE, GGML_OP_TRANSPOSE, GGML_OP_GET_ROWS, GGML_OP_GET_ROWS_BACK, GGML_OP_DIAG, GGML_OP_DIAG_MASK_INF, GGML_OP_DIAG_MASK_ZERO, GGML_OP_SOFT_MAX, GGML_OP_SOFT_MAX_BACK, GGML_OP_ROPE, GGML_OP_ROPE_BACK, GGML_OP_ALIBI, GGML_OP_CLAMP, GGML_OP_CONV_1D, GGML_OP_CONV_2D, GGML_OP_CONV_TRANSPOSE_2D, GGML_OP_POOL_1D, GGML_OP_POOL_2D, GGML_OP_UPSCALE, GGML_OP_FLASH_ATTN, GGML_OP_FLASH_FF, GGML_OP_FLASH_ATTN_BACK, GGML_OP_WIN_PART, GGML_OP_WIN_UNPART, GGML_OP_GET_REL_POS, GGML_OP_ADD_REL_POS, GGML_OP_UNARY, GGML_OP_MAP_UNARY, GGML_OP_MAP_BINARY, GGML_OP_MAP_CUSTOM1_F32, GGML_OP_MAP_CUSTOM2_F32, GGML_OP_MAP_CUSTOM3_F32, GGML_OP_MAP_CUSTOM1, GGML_OP_MAP_CUSTOM2, GGML_OP_MAP_CUSTOM3, GGML_OP_CROSS_ENTROPY_LOSS, GGML_OP_CROSS_ENTROPY_LOSS_BACK, GGML_OP_COUNT, }; enum ggml_unary_op { GGML_UNARY_OP_ABS, GGML_UNARY_OP_SGN, GGML_UNARY_OP_NEG, GGML_UNARY_OP_STEP, GGML_UNARY_OP_TANH, GGML_UNARY_OP_ELU, GGML_UNARY_OP_RELU, GGML_UNARY_OP_GELU, GGML_UNARY_OP_GELU_QUICK, GGML_UNARY_OP_SILU, }; enum ggml_object_type { GGML_OBJECT_TENSOR, GGML_OBJECT_GRAPH, GGML_OBJECT_WORK_BUFFER }; struct ggml_object { size_t offs; size_t size; struct ggml_object * next; enum ggml_object_type type; char padding[4]; }; static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object); struct ggml_tensor { enum ggml_type type; enum ggml_backend backend; int n_dims; int64_t ne[4]; size_t nb[4]; enum ggml_op op; int32_t op_params[32 / sizeof(int32_t)]; _Bool is_param; struct ggml_tensor * grad; struct ggml_tensor * src[6]; int perf_runs; int64_t perf_cycles; int64_t perf_time_us; struct ggml_tensor * view_src; size_t view_offs; void * data; char name[64]; void * extra; char padding[4]; }; static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor); struct ggml_cplan { size_t work_size; uint8_t * work_data; int n_threads; int n_tasks[4096]; _Bool (*abort_callback)(void * data); void * abort_callback_data; }; struct ggml_cgraph { int n_nodes; int n_leafs; struct ggml_tensor * nodes[4096]; struct ggml_tensor * grads[4096]; struct ggml_tensor * leafs[4096]; void * visited_hash_table[8273]; int perf_runs; int64_t perf_cycles; int64_t perf_time_us; }; static const size_t GGML_GRAPH_SIZE = sizeof(struct ggml_cgraph); struct ggml_scratch { size_t offs; size_t size; void * data; }; struct ggml_init_params { size_t mem_size; void * mem_buffer; _Bool no_alloc; }; enum ggml_task_type { GGML_TASK_INIT = 0, GGML_TASK_COMPUTE, GGML_TASK_FINALIZE, }; struct ggml_compute_params { enum ggml_task_type type; int ith, nth; size_t wsize; void * wdata; }; void ggml_time_init(void); int64_t ggml_time_ms(void); int64_t ggml_time_us(void); int64_t ggml_cycles(void); int64_t ggml_cycles_per_ms(void); void ggml_numa_init(void); _Bool ggml_is_numa(void); void ggml_print_object (const struct ggml_object * obj); void ggml_print_objects(const struct ggml_context * ctx); int64_t ggml_nelements (const struct ggml_tensor * tensor); int64_t ggml_nrows (const struct ggml_tensor * tensor); size_t ggml_nbytes (const struct ggml_tensor * tensor); size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split); int ggml_blck_size (enum ggml_type type); size_t ggml_type_size (enum ggml_type type); float ggml_type_sizef(enum ggml_type type); const char * ggml_type_name(enum ggml_type type); const char * ggml_op_name (enum ggml_op op); const char * ggml_op_symbol(enum ggml_op op); size_t ggml_element_size(const struct ggml_tensor * tensor); _Bool ggml_is_quantized(enum ggml_type type); enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype); _Bool ggml_is_transposed(const struct ggml_tensor * tensor); _Bool ggml_is_contiguous(const struct ggml_tensor * tensor); _Bool ggml_is_permuted (const struct ggml_tensor * tensor); _Bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1); size_t ggml_tensor_overhead(void); struct ggml_context * ggml_init(struct ggml_init_params params); void ggml_free(struct ggml_context * ctx); size_t ggml_used_mem(const struct ggml_context * ctx); size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch); _Bool ggml_get_no_alloc(struct ggml_context * ctx); void ggml_set_no_alloc(struct ggml_context * ctx, _Bool no_alloc); void * ggml_get_mem_buffer (const struct ggml_context * ctx); size_t ggml_get_mem_size (const struct ggml_context * ctx); size_t ggml_get_max_tensor_size(const struct ggml_context * ctx); struct ggml_tensor * ggml_new_tensor( struct ggml_context * ctx, enum ggml_type type, int n_dims, const int64_t *ne); struct ggml_tensor * ggml_new_tensor_1d( struct ggml_context * ctx, enum ggml_type type, int64_t ne0); struct ggml_tensor * ggml_new_tensor_2d( struct ggml_context * ctx, enum ggml_type type, int64_t ne0, int64_t ne1); struct ggml_tensor * ggml_new_tensor_3d( struct ggml_context * ctx, enum ggml_type type, int64_t ne0, int64_t ne1, int64_t ne2); struct ggml_tensor * ggml_new_tensor_4d( struct ggml_context * ctx, enum ggml_type type, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3); struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value); struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value); struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src); struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src); struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name); struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor); struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value); struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value); int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i); void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value); float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i); void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value); void * ggml_get_data (const struct ggml_tensor * tensor); float * ggml_get_data_f32(const struct ggml_tensor * tensor); enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor); const char * ggml_get_name (const struct ggml_tensor * tensor); struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name); struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...); struct ggml_tensor * ggml_dup( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_dup_inplace( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_add( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_add_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_add1( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_add1_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_acc( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset); struct ggml_tensor * ggml_acc_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset); struct ggml_tensor * ggml_sub( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_sub_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_mul( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_mul_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_div( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_div_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_sqr( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_sqr_inplace( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_sqrt( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_sqrt_inplace( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_log( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_log_inplace( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_sum( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_sum_rows( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_mean( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_argmax( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_repeat( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_repeat_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_concat( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_abs( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_abs_inplace( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_sgn( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_sgn_inplace( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_neg( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_neg_inplace( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_step( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_step_inplace( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_tanh( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_tanh_inplace( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_elu( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_elu_inplace( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_relu( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_relu_inplace( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_gelu( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_gelu_inplace( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_gelu_quick( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_gelu_quick_inplace( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_silu( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_silu_inplace( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_silu_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_norm( struct ggml_context * ctx, struct ggml_tensor * a, float eps); struct ggml_tensor * ggml_norm_inplace( struct ggml_context * ctx, struct ggml_tensor * a, float eps); struct ggml_tensor * ggml_rms_norm( struct ggml_context * ctx, struct ggml_tensor * a, float eps); struct ggml_tensor * ggml_rms_norm_inplace( struct ggml_context * ctx, struct ggml_tensor * a, float eps); struct ggml_tensor * ggml_group_norm( struct ggml_context * ctx, struct ggml_tensor * a, int n_groups); struct ggml_tensor * ggml_group_norm_inplace( struct ggml_context * ctx, struct ggml_tensor * a, int n_groups); struct ggml_tensor * ggml_rms_norm_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, float eps); struct ggml_tensor * ggml_mul_mat( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_out_prod( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_scale( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_scale_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_set( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset); struct ggml_tensor * ggml_set_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset); struct ggml_tensor * ggml_set_1d( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t offset); struct ggml_tensor * ggml_set_1d_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t offset); struct ggml_tensor * ggml_set_2d( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t offset); struct ggml_tensor * ggml_set_2d_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t offset); struct ggml_tensor * ggml_cpy( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_cpy_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_cont( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_cont_inplace( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_reshape( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_reshape_1d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0); struct ggml_tensor * ggml_reshape_2d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1); struct ggml_tensor * ggml_reshape_3d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2); struct ggml_tensor * ggml_reshape_4d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3); struct ggml_tensor * ggml_view_1d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, size_t offset); struct ggml_tensor * ggml_view_2d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, size_t nb1, size_t offset); struct ggml_tensor * ggml_view_3d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2, size_t nb1, size_t nb2, size_t offset); struct ggml_tensor * ggml_view_4d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3, size_t nb1, size_t nb2, size_t nb3, size_t offset); struct ggml_tensor * ggml_permute( struct ggml_context * ctx, struct ggml_tensor * a, int axis0, int axis1, int axis2, int axis3); struct ggml_tensor * ggml_transpose( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_get_rows( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_get_rows_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c); struct ggml_tensor * ggml_diag( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_diag_mask_inf( struct ggml_context * ctx, struct ggml_tensor * a, int n_past); struct ggml_tensor * ggml_diag_mask_inf_inplace( struct ggml_context * ctx, struct ggml_tensor * a, int n_past); struct ggml_tensor * ggml_diag_mask_zero( struct ggml_context * ctx, struct ggml_tensor * a, int n_past); struct ggml_tensor * ggml_diag_mask_zero_inplace( struct ggml_context * ctx, struct ggml_tensor * a, int n_past); struct ggml_tensor * ggml_soft_max( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_soft_max_inplace( struct ggml_context * ctx, struct ggml_tensor * a); struct ggml_tensor * ggml_soft_max_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_soft_max_back_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_rope( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_dims, int mode, int n_ctx); struct ggml_tensor * ggml_rope_inplace( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_dims, int mode, int n_ctx); struct ggml_tensor * ggml_rope_custom( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_dims, int mode, int n_ctx, float freq_base, float freq_scale); struct ggml_tensor * ggml_rope_custom_inplace( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_dims, int mode, int n_ctx, float freq_base, float freq_scale); struct ggml_tensor * ggml_rope_xpos_inplace( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_dims, float base, _Bool down); struct ggml_tensor * ggml_rope_back( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_dims, int mode, int n_ctx, float freq_base, float freq_scale, float xpos_base, _Bool xpos_down); struct ggml_tensor * ggml_alibi( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_head, float bias_max); struct ggml_tensor * ggml_clamp( struct ggml_context * ctx, struct ggml_tensor * a, float min, float max); struct ggml_tensor * ggml_conv_1d( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int s0, int p0, int d0); struct ggml_tensor* ggml_conv_1d_ph( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int s, int d); struct ggml_tensor * ggml_conv_2d( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int s0, int s1, int p0, int p1, int d0, int d1); # 1333 "./ggml.h" struct ggml_tensor * ggml_conv_2d_sk_p0( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); # 1346 "./ggml.h" struct ggml_tensor * ggml_conv_2d_s1_ph( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_conv_transpose_2d_p0( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int stride); enum ggml_op_pool { GGML_OP_POOL_MAX, GGML_OP_POOL_AVG, GGML_OP_POOL_COUNT, }; struct ggml_tensor * ggml_pool_1d( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_op_pool op, int k0, int s0, int p0); struct ggml_tensor * ggml_pool_2d( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_op_pool op, int k0, int k1, int s0, int s1, int p0, int p1); struct ggml_tensor * ggml_upscale( struct ggml_context * ctx, struct ggml_tensor * a, int scale_factor); struct ggml_tensor * ggml_flash_attn( struct ggml_context * ctx, struct ggml_tensor * q, struct ggml_tensor * k, struct ggml_tensor * v, _Bool masked); struct ggml_tensor * ggml_flash_attn_back( struct ggml_context * ctx, struct ggml_tensor * q, struct ggml_tensor * k, struct ggml_tensor * v, struct ggml_tensor * d, _Bool masked); struct ggml_tensor * ggml_flash_ff( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b0, struct ggml_tensor * b1, struct ggml_tensor * c0, struct ggml_tensor * c1); struct ggml_tensor * ggml_win_part( struct ggml_context * ctx, struct ggml_tensor * a, int w); struct ggml_tensor * ggml_win_unpart( struct ggml_context * ctx, struct ggml_tensor * a, int w0, int h0, int w); struct ggml_tensor * ggml_unary( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_unary_op op); struct ggml_tensor * ggml_unary_inplace( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_unary_op op); struct ggml_tensor * ggml_get_rel_pos( struct ggml_context * ctx, struct ggml_tensor * a, int qh, int kh); struct ggml_tensor * ggml_add_rel_pos( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * pw, struct ggml_tensor * ph); struct ggml_tensor * ggml_add_rel_pos_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * pw, struct ggml_tensor * ph); typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *); typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *); typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *); typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *); typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *); struct ggml_tensor * ggml_map_unary_f32( struct ggml_context * ctx, struct ggml_tensor * a, ggml_unary_op_f32_t fun) __attribute__((deprecated("use ggml_map_custom1 instead"))); struct ggml_tensor * ggml_map_unary_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, ggml_unary_op_f32_t fun) __attribute__((deprecated("use ggml_map_custom1_inplace instead"))); struct ggml_tensor * ggml_map_binary_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, ggml_binary_op_f32_t fun) __attribute__((deprecated("use ggml_map_custom2 instead"))); struct ggml_tensor * ggml_map_binary_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, ggml_binary_op_f32_t fun) __attribute__((deprecated("use ggml_map_custom2_inplace instead"))); struct ggml_tensor * ggml_map_custom1_f32( struct ggml_context * ctx, struct ggml_tensor * a, ggml_custom1_op_f32_t fun) __attribute__((deprecated("use ggml_map_custom1 instead"))); struct ggml_tensor * ggml_map_custom1_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, ggml_custom1_op_f32_t fun) __attribute__((deprecated("use ggml_map_custom1_inplace instead"))); struct ggml_tensor * ggml_map_custom2_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, ggml_custom2_op_f32_t fun) __attribute__((deprecated("use ggml_map_custom2 instead"))); struct ggml_tensor * ggml_map_custom2_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, ggml_custom2_op_f32_t fun) __attribute__((deprecated("use ggml_map_custom2_inplace instead"))); struct ggml_tensor * ggml_map_custom3_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, ggml_custom3_op_f32_t fun) __attribute__((deprecated("use ggml_map_custom3 instead"))); struct ggml_tensor * ggml_map_custom3_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, ggml_custom3_op_f32_t fun) __attribute__((deprecated("use ggml_map_custom3_inplace instead"))); # 1542 "./ggml.h" typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata); typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata); typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata); struct ggml_tensor * ggml_map_custom1( struct ggml_context * ctx, struct ggml_tensor * a, ggml_custom1_op_t fun, int n_tasks, void * userdata); struct ggml_tensor * ggml_map_custom1_inplace( struct ggml_context * ctx, struct ggml_tensor * a, ggml_custom1_op_t fun, int n_tasks, void * userdata); struct ggml_tensor * ggml_map_custom2( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, ggml_custom2_op_t fun, int n_tasks, void * userdata); struct ggml_tensor * ggml_map_custom2_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, ggml_custom2_op_t fun, int n_tasks, void * userdata); struct ggml_tensor * ggml_map_custom3( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, ggml_custom3_op_t fun, int n_tasks, void * userdata); struct ggml_tensor * ggml_map_custom3_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, ggml_custom3_op_t fun, int n_tasks, void * userdata); struct ggml_tensor * ggml_cross_entropy_loss( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); struct ggml_tensor * ggml_cross_entropy_loss_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c); void ggml_set_param( struct ggml_context * ctx, struct ggml_tensor * tensor); void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor); void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, _Bool keep); struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor); struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, _Bool keep); struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor); size_t ggml_graph_overhead(void); struct ggml_cplan ggml_graph_plan (struct ggml_cgraph * cgraph, int n_threads ); int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan); void ggml_graph_reset (struct ggml_cgraph * cgraph); void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads); struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name); void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname); struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval); void ggml_graph_print(const struct ggml_cgraph * cgraph); void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename); enum ggml_opt_type { GGML_OPT_ADAM, GGML_OPT_LBFGS, }; enum ggml_linesearch { GGML_LINESEARCH_DEFAULT = 1, GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0, GGML_LINESEARCH_BACKTRACKING_WOLFE = 1, GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2, }; enum ggml_opt_result { GGML_OPT_OK = 0, GGML_OPT_DID_NOT_CONVERGE, GGML_OPT_NO_CONTEXT, GGML_OPT_INVALID_WOLFE, GGML_OPT_FAIL, GGML_LINESEARCH_FAIL = -128, GGML_LINESEARCH_MINIMUM_STEP, GGML_LINESEARCH_MAXIMUM_STEP, GGML_LINESEARCH_MAXIMUM_ITERATIONS, GGML_LINESEARCH_INVALID_PARAMETERS, }; typedef void (*ggml_opt_callback)(void * data, float * sched); struct ggml_opt_params { enum ggml_opt_type type; int n_threads; int past; float delta; int max_no_improvement; _Bool print_forward_graph; _Bool print_backward_graph; struct { int n_iter; float sched; float decay; int decay_min_ndim; float alpha; float beta1; float beta2; float eps; float eps_f; float eps_g; float gclip; } adam; struct { int m; int n_iter; int max_linesearch; float eps; float ftol; float wolfe; float min_step; float max_step; enum ggml_linesearch linesearch; } lbfgs; }; struct ggml_opt_context { struct ggml_context * ctx; struct ggml_opt_params params; int iter; int64_t nx; _Bool just_initialized; float loss_before; float loss_after; struct { struct ggml_tensor * m; struct ggml_tensor * v; struct ggml_tensor * pf; float fx_best; float fx_prev; int n_no_improvement; } adam; struct { struct ggml_tensor * x; struct ggml_tensor * xp; struct ggml_tensor * g; struct ggml_tensor * gp; struct ggml_tensor * d; struct ggml_tensor * pf; struct ggml_tensor * lmal; struct ggml_tensor * lmys; struct ggml_tensor * lms; struct ggml_tensor * lmy; float fx_best; float step; int j; int k; int end; int n_no_improvement; } lbfgs; }; struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type); enum ggml_opt_result ggml_opt( struct ggml_context * ctx, struct ggml_opt_params params, struct ggml_tensor * f); void ggml_opt_init( struct ggml_context * ctx, struct ggml_opt_context * opt, struct ggml_opt_params params, int64_t nx); enum ggml_opt_result ggml_opt_resume( struct ggml_context * ctx, struct ggml_opt_context * opt, struct ggml_tensor * f); enum ggml_opt_result ggml_opt_resume_g( struct ggml_context * ctx, struct ggml_opt_context * opt, struct ggml_tensor * f, struct ggml_cgraph * gf, struct ggml_cgraph * gb, ggml_opt_callback callback, void * callback_data); size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist); size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist); size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist); size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist); size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist); size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist); enum gguf_type { GGUF_TYPE_UINT8 = 0, GGUF_TYPE_INT8 = 1, GGUF_TYPE_UINT16 = 2, GGUF_TYPE_INT16 = 3, GGUF_TYPE_UINT32 = 4, GGUF_TYPE_INT32 = 5, GGUF_TYPE_FLOAT32 = 6, GGUF_TYPE_BOOL = 7, GGUF_TYPE_STRING = 8, GGUF_TYPE_ARRAY = 9, GGUF_TYPE_UINT64 = 10, GGUF_TYPE_INT64 = 11, GGUF_TYPE_FLOAT64 = 12, GGUF_TYPE_COUNT, }; struct gguf_context; struct gguf_init_params { _Bool no_alloc; struct ggml_context ** ctx; }; struct gguf_context * gguf_init_empty(void); struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params); void gguf_free(struct gguf_context * ctx); const char * gguf_type_name(enum gguf_type type); int gguf_get_version (struct gguf_context * ctx); size_t gguf_get_alignment (struct gguf_context * ctx); size_t gguf_get_data_offset(struct gguf_context * ctx); void * gguf_get_data (struct gguf_context * ctx); int gguf_get_n_kv(struct gguf_context * ctx); int gguf_find_key(struct gguf_context * ctx, const char * key); const char * gguf_get_key (struct gguf_context * ctx, int i); enum gguf_type gguf_get_kv_type (struct gguf_context * ctx, int i); enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i); uint8_t gguf_get_val_u8 (struct gguf_context * ctx, int i); int8_t gguf_get_val_i8 (struct gguf_context * ctx, int i); uint16_t gguf_get_val_u16 (struct gguf_context * ctx, int i); int16_t gguf_get_val_i16 (struct gguf_context * ctx, int i); uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int i); int32_t gguf_get_val_i32 (struct gguf_context * ctx, int i); float gguf_get_val_f32 (struct gguf_context * ctx, int i); uint64_t gguf_get_val_u64 (struct gguf_context * ctx, int i); int64_t gguf_get_val_i64 (struct gguf_context * ctx, int i); double gguf_get_val_f64 (struct gguf_context * ctx, int i); _Bool gguf_get_val_bool(struct gguf_context * ctx, int i); const char * gguf_get_val_str (struct gguf_context * ctx, int i); int gguf_get_arr_n (struct gguf_context * ctx, int i); const void * gguf_get_arr_data(struct gguf_context * ctx, int i); const char * gguf_get_arr_str (struct gguf_context * ctx, int key_id, int i); int gguf_get_n_tensors (struct gguf_context * ctx); int gguf_find_tensor (struct gguf_context * ctx, const char * name); size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i); char * gguf_get_tensor_name (struct gguf_context * ctx, int i); void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val); void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val); void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val); void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val); void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val); void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val); void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val); void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val); void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val); void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val); void gguf_set_val_bool(struct gguf_context * ctx, const char * key, _Bool val); void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val); void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n); void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n); void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src); void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor); void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type); void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size); # 1946 "./ggml.h" void gguf_write_to_file(struct gguf_context * ctx, const char * fname, _Bool only_meta); size_t gguf_get_meta_size(struct gguf_context * ctx); void gguf_get_meta_data(struct gguf_context * ctx, void * data); int ggml_cpu_has_avx (void); int ggml_cpu_has_avx2 (void); int ggml_cpu_has_avx512 (void); int ggml_cpu_has_avx512_vbmi(void); int ggml_cpu_has_avx512_vnni(void); int ggml_cpu_has_fma (void); int ggml_cpu_has_neon (void); int ggml_cpu_has_arm_fma (void); int ggml_cpu_has_f16c (void); int ggml_cpu_has_fp16_va (void); int ggml_cpu_has_wasm_simd (void); int ggml_cpu_has_blas (void); int ggml_cpu_has_cublas (void); int ggml_cpu_has_clblast (void); int ggml_cpu_has_gpublas (void); int ggml_cpu_has_sse3 (void); int ggml_cpu_has_ssse3 (void); int ggml_cpu_has_vsx (void); # 1985 "./ggml.h" typedef void (*ggml_to_float_t) (const void * restrict x, float * restrict y, int k); typedef void (*ggml_from_float_t)(const float * restrict x, void * restrict y, int k); typedef void (*ggml_vec_dot_t) (const int n, float * restrict s, const void * restrict x, const void * restrict y); typedef struct { const char * type_name; int blck_size; size_t type_size; _Bool is_quantized; ggml_to_float_t to_float; ggml_from_float_t from_float; ggml_from_float_t from_float_reference; ggml_vec_dot_t vec_dot; enum ggml_type vec_dot_type; } ggml_type_traits_t; ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type); # 4 "ggml.c" 2 # 1 "./k_quants.h" 1 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/assert.h" 1 3 4 # 78 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/assert.h" 3 4 void __assert(const char* _Nonnull __file, int __line, const char* _Nonnull __msg) __attribute__((__noreturn__)); void __assert2(const char* _Nonnull __file, int __line, const char* _Nonnull __function, const char* _Nonnull __msg) __attribute__((__noreturn__)); # 7 "./k_quants.h" 2 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stddef.h" 1 3 # 8 "./k_quants.h" 2 # 34 "./k_quants.h" typedef struct { uint8_t scales[256/16]; uint8_t qs[256/4]; ggml_fp16_t d; ggml_fp16_t dmin; } block_q2_K; _Static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + 256/16 + 256/4, "wrong q2_K block size/padding"); # 55 "./k_quants.h" typedef struct { uint8_t hmask[256/8]; uint8_t qs[256/4]; uint8_t scales[12]; ggml_fp16_t d; } block_q3_K; _Static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + 256 / 4 + 256 / 8 + 12, "wrong q3_K block size/padding"); # 76 "./k_quants.h" typedef struct { ggml_fp16_t d; ggml_fp16_t dmin; uint8_t scales[12]; uint8_t qs[256/2]; } block_q4_K; _Static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + 12 + 256/2, "wrong q4_K block size/padding"); # 98 "./k_quants.h" typedef struct { ggml_fp16_t d; ggml_fp16_t dmin; uint8_t scales[12]; uint8_t qh[256/8]; uint8_t qs[256/2]; } block_q5_K; _Static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + 12 + 256/2 + 256/8, "wrong q5_K block size/padding"); typedef struct { uint8_t ql[256/2]; uint8_t qh[256/4]; int8_t scales[256/16]; ggml_fp16_t d; } block_q6_K; _Static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + 256 / 16 + 3*256/4, "wrong q6_K block size/padding"); typedef struct { float d; int8_t qs[256]; int16_t bsums[256/16]; } block_q8_K; _Static_assert(sizeof(block_q8_K) == sizeof(float) + 256 + 256/16*sizeof(int16_t), "wrong q8_K block size/padding"); void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k); void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k); void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k); void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k); void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k); void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k); void quantize_row_q2_K(const float * restrict x, void * restrict y, int k); void quantize_row_q3_K(const float * restrict x, void * restrict y, int k); void quantize_row_q4_K(const float * restrict x, void * restrict y, int k); void quantize_row_q5_K(const float * restrict x, void * restrict y, int k); void quantize_row_q6_K(const float * restrict x, void * restrict y, int k); void quantize_row_q8_K(const float * restrict x, void * restrict y, int k); void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k); void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k); void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k); void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k); void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k); void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k); void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy); void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy); void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy); void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy); void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy); size_t ggml_quantize_q2_K(const float * src, void * dst, int n, int k, int64_t * hist); size_t ggml_quantize_q3_K(const float * src, void * dst, int n, int k, int64_t * hist); size_t ggml_quantize_q4_K(const float * src, void * dst, int n, int k, int64_t * hist); size_t ggml_quantize_q5_K(const float * src, void * dst, int n, int k, int64_t * hist); size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist); # 7 "ggml.c" 2 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/alloca.h" 1 3 4 # 13 "ggml.c" 2 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/assert.h" 1 3 4 # 78 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/assert.h" 3 4 void __assert(const char* _Nonnull __file, int __line, const char* _Nonnull __msg) __attribute__((__noreturn__)); void __assert2(const char* _Nonnull __file, int __line, const char* _Nonnull __function, const char* _Nonnull __msg) __attribute__((__noreturn__)); # 16 "ggml.c" 2 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/errno.h" 1 3 4 # 37 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/errno.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/errno.h" 1 3 4 # 19 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/errno.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/errno.h" 1 3 4 # 19 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/errno.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/asm-generic/errno.h" 1 3 4 # 21 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/asm-generic/errno.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/asm-generic/errno-base.h" 1 3 4 # 22 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/asm-generic/errno.h" 2 3 4 # 20 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/errno.h" 2 3 4 # 20 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/errno.h" 2 3 4 # 38 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/errno.h" 2 3 4 # 52 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/errno.h" 3 4 int* _Nonnull __errno(void) __attribute__((__const__)); # 17 "ggml.c" 2 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/time.h" 1 3 4 # 33 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/time.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/time.h" 1 3 4 # 33 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/time.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/types.h" 1 3 4 # 32 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/types.h" 3 4 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stddef.h" 1 3 4 # 33 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/types.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/types.h" 1 3 4 # 21 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/types.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/types.h" 1 3 4 # 21 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/types.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/asm-generic/int-ll64.h" 1 3 4 # 21 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/asm-generic/int-ll64.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/bitsperlong.h" 1 3 4 # 19 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/bitsperlong.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/asm-generic/bitsperlong.h" 1 3 4 # 20 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/bitsperlong.h" 2 3 4 # 22 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/asm-generic/int-ll64.h" 2 3 4 typedef __signed__ char __s8; typedef unsigned char __u8; typedef __signed__ short __s16; typedef unsigned short __u16; typedef __signed__ int __s32; typedef unsigned int __u32; __extension__ typedef __signed__ long long __s64; __extension__ typedef unsigned long long __u64; # 22 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/types.h" 2 3 4 # 22 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/types.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/posix_types.h" 1 3 4 # 21 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/posix_types.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/stddef.h" 1 3 4 # 21 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/stddef.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/compiler_types.h" 1 3 4 # 11 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/compiler_types.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/compiler.h" 1 3 4 # 12 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/compiler_types.h" 2 3 4 # 22 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/stddef.h" 2 3 4 # 22 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/posix_types.h" 2 3 4 typedef struct { unsigned long fds_bits[1024 / (8 * sizeof(long))]; } __kernel_fd_set; typedef void(* __kernel_sighandler_t) (int); typedef int __kernel_key_t; typedef int __kernel_mqd_t; # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/posix_types.h" 1 3 4 # 21 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/posix_types.h" 3 4 typedef unsigned short __kernel_mode_t; typedef unsigned short __kernel_ipc_pid_t; typedef unsigned short __kernel_uid_t; typedef unsigned short __kernel_gid_t; typedef unsigned short __kernel_old_dev_t; # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/asm-generic/posix_types.h" 1 3 4 # 21 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/asm-generic/posix_types.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/bitsperlong.h" 1 3 4 # 22 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/asm-generic/posix_types.h" 2 3 4 typedef long __kernel_long_t; typedef unsigned long __kernel_ulong_t; typedef __kernel_ulong_t __kernel_ino_t; typedef int __kernel_pid_t; # 43 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/asm-generic/posix_types.h" 3 4 typedef __kernel_long_t __kernel_suseconds_t; typedef int __kernel_daddr_t; typedef unsigned int __kernel_uid32_t; typedef unsigned int __kernel_gid32_t; typedef __kernel_uid_t __kernel_old_uid_t; typedef __kernel_gid_t __kernel_old_gid_t; typedef unsigned int __kernel_size_t; typedef int __kernel_ssize_t; typedef int __kernel_ptrdiff_t; typedef struct { int val[2]; } __kernel_fsid_t; typedef __kernel_long_t __kernel_off_t; typedef long long __kernel_loff_t; typedef __kernel_long_t __kernel_old_time_t; typedef __kernel_long_t __kernel_time_t; typedef long long __kernel_time64_t; typedef __kernel_long_t __kernel_clock_t; typedef int __kernel_timer_t; typedef int __kernel_clockid_t; typedef char * __kernel_caddr_t; typedef unsigned short __kernel_uid16_t; typedef unsigned short __kernel_gid16_t; # 31 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/posix_types.h" 2 3 4 # 31 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/posix_types.h" 2 3 4 # 24 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/types.h" 2 3 4 typedef __u16 __le16; typedef __u16 __be16; typedef __u32 __le32; typedef __u32 __be32; typedef __u64 __le64; typedef __u64 __be64; typedef __u16 __sum16; typedef __u32 __wsum; typedef unsigned __poll_t; # 37 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/types.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/pthread_types.h" 1 3 4 # 32 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/pthread_types.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/types.h" 1 3 4 # 33 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/pthread_types.h" 2 3 4 typedef struct { uint32_t flags; void* stack_base; size_t stack_size; size_t guard_size; int32_t sched_policy; int32_t sched_priority; } pthread_attr_t; # 60 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/pthread_types.h" 3 4 typedef struct { int32_t __private[1]; } pthread_cond_t; typedef long pthread_condattr_t; typedef int pthread_key_t; typedef struct { int32_t __private[1]; } pthread_mutex_t; typedef long pthread_mutexattr_t; typedef int pthread_once_t; typedef struct { int32_t __private[10]; } pthread_rwlock_t; typedef long pthread_rwlockattr_t; # 104 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/pthread_types.h" 3 4 typedef long pthread_t; # 40 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/types.h" 2 3 4 typedef __kernel_gid32_t __gid_t; typedef __gid_t gid_t; typedef __kernel_uid32_t __uid_t; typedef __uid_t uid_t; typedef __kernel_pid_t __pid_t; typedef __pid_t pid_t; typedef uint32_t __id_t; typedef __id_t id_t; typedef unsigned long blkcnt_t; typedef unsigned long blksize_t; typedef __kernel_caddr_t caddr_t; typedef __kernel_clock_t clock_t; typedef __kernel_clockid_t __clockid_t; typedef __clockid_t clockid_t; typedef __kernel_daddr_t daddr_t; typedef unsigned long fsblkcnt_t; typedef unsigned long fsfilcnt_t; typedef __kernel_mode_t __mode_t; typedef __mode_t mode_t; typedef __kernel_key_t __key_t; typedef __key_t key_t; typedef __kernel_ino_t __ino_t; typedef __ino_t ino_t; typedef uint64_t ino64_t; typedef uint32_t __nlink_t; typedef __nlink_t nlink_t; typedef void* __timer_t; typedef __timer_t timer_t; typedef __kernel_suseconds_t __suseconds_t; typedef __suseconds_t suseconds_t; typedef uint32_t __useconds_t; typedef __useconds_t useconds_t; typedef uint32_t dev_t; typedef __kernel_time_t __time_t; typedef __time_t time_t; # 105 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/types.h" 3 4 typedef __kernel_off_t off_t; typedef __kernel_loff_t loff_t; typedef loff_t off64_t; typedef int32_t __socklen_t; typedef __socklen_t socklen_t; typedef __builtin_va_list __va_list; # 128 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/types.h" 3 4 typedef __kernel_ssize_t ssize_t; typedef unsigned int uint_t; typedef unsigned int uint; typedef unsigned char u_char; typedef unsigned short u_short; typedef unsigned int u_int; typedef unsigned long u_long; typedef uint32_t u_int32_t; typedef uint16_t u_int16_t; typedef uint8_t u_int8_t; typedef uint64_t u_int64_t; # 34 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/time.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/time.h" 1 3 4 # 21 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/time.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/timespec.h" 1 3 4 # 46 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/timespec.h" 3 4 struct timespec { time_t tv_sec; long tv_nsec; }; # 22 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/time.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/time_types.h" 1 3 4 # 22 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/time_types.h" 3 4 struct __kernel_timespec { __kernel_time64_t tv_sec; long long tv_nsec; }; struct __kernel_itimerspec { struct __kernel_timespec it_interval; struct __kernel_timespec it_value; }; struct __kernel_old_timespec { __kernel_old_time_t tv_sec; long tv_nsec; }; struct __kernel_sock_timeval { __s64 tv_sec; __s64 tv_usec; }; # 24 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/time.h" 2 3 4 struct timeval { __kernel_old_time_t tv_sec; __kernel_suseconds_t tv_usec; }; struct itimerspec { struct timespec it_interval; struct timespec it_value; }; struct itimerval { struct timeval it_interval; struct timeval it_value; }; struct timezone { int tz_minuteswest; int tz_dsttime; }; # 35 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/time.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/select.h" 1 3 4 # 40 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/select.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/signal.h" 1 3 4 # 35 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/signal.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/sigcontext.h" 1 3 4 # 21 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/sigcontext.h" 3 4 struct sigcontext { unsigned long trap_no; unsigned long error_code; unsigned long oldmask; unsigned long arm_r0; unsigned long arm_r1; unsigned long arm_r2; unsigned long arm_r3; unsigned long arm_r4; unsigned long arm_r5; unsigned long arm_r6; unsigned long arm_r7; unsigned long arm_r8; unsigned long arm_r9; unsigned long arm_r10; unsigned long arm_fp; unsigned long arm_ip; unsigned long arm_sp; unsigned long arm_lr; unsigned long arm_pc; unsigned long arm_cpsr; unsigned long fault_address; }; # 36 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/signal.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/signal_types.h" 1 3 4 # 31 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/signal_types.h" 3 4 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/limits.h" 1 3 4 # 21 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/limits.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/limits.h" 1 3 4 # 41 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/limits.h" 3 4 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/float.h" 1 3 4 # 42 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/limits.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/limits.h" 1 3 4 # 44 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/limits.h" 2 3 4 # 140 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/limits.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/posix_limits.h" 1 3 4 # 141 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/limits.h" 2 3 4 # 22 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/limits.h" 2 3 4 # 32 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/signal_types.h" 2 3 4 # 41 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/signal_types.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/signal.h" 1 3 4 # 21 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/signal.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/signal.h" 1 3 4 # 22 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/signal.h" 3 4 struct siginfo; typedef unsigned long sigset_t; # 66 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/signal.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/asm-generic/signal-defs.h" 1 3 4 # 57 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/asm-generic/signal-defs.h" 3 4 typedef void __signalfn_t(int); typedef __signalfn_t * __sighandler_t; typedef void __restorefn_t(void); typedef __restorefn_t * __sigrestore_t; # 67 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/signal.h" 2 3 4 struct __kernel_sigaction { union { __sighandler_t _sa_handler; void(* _sa_sigaction) (int, struct siginfo *, void *); } _u; sigset_t sa_mask; unsigned long sa_flags; void(* sa_restorer) (void); }; typedef struct sigaltstack { void * ss_sp; int ss_flags; __kernel_size_t ss_size; } stack_t; # 22 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/signal.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/siginfo.h" 1 3 4 # 19 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/siginfo.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/asm-generic/siginfo.h" 1 3 4 # 23 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/asm-generic/siginfo.h" 3 4 typedef union sigval { int sival_int; void * sival_ptr; } sigval_t; # 37 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/asm-generic/siginfo.h" 3 4 union __sifields { struct { __kernel_pid_t _pid; __kernel_uid32_t _uid; } _kill; struct { __kernel_timer_t _tid; int _overrun; sigval_t _sigval; int _sys_private; } _timer; struct { __kernel_pid_t _pid; __kernel_uid32_t _uid; sigval_t _sigval; } _rt; struct { __kernel_pid_t _pid; __kernel_uid32_t _uid; int _status; __kernel_clock_t _utime; __kernel_clock_t _stime; } _sigchld; struct { void * _addr; union { int _trapno; short _addr_lsb; struct { char _dummy_bnd[(__alignof__(void *) < sizeof(short) ? sizeof(short) : __alignof__(void *))]; void * _lower; void * _upper; } _addr_bnd; struct { char _dummy_pkey[(__alignof__(void *) < sizeof(short) ? sizeof(short) : __alignof__(void *))]; __u32 _pkey; } _addr_pkey; struct { unsigned long _data; __u32 _type; __u32 _flags; } _perf; }; } _sigfault; struct { long _band; int _fd; } _sigpoll; struct { void * _call_addr; int _syscall; unsigned int _arch; } _sigsys; }; typedef struct siginfo { union { struct { int si_signo; int si_errno; int si_code; union __sifields _sifields; }; int _si_pad[128 / sizeof(int)]; }; } siginfo_t; # 231 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/asm-generic/siginfo.h" 3 4 typedef struct sigevent { sigval_t sigev_value; int sigev_signo; int sigev_notify; union { int _pad[((64 - (sizeof(int) * 2 + sizeof(sigval_t))) / sizeof(int))]; int _tid; struct { void(* _function) (sigval_t); void * _attribute; } _sigev_thread; } _sigev_un; } sigevent_t; # 20 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/arm-linux-androideabi/asm/siginfo.h" 2 3 4 # 23 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/signal.h" 2 3 4 # 42 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/signal_types.h" 2 3 4 # 53 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/signal_types.h" 3 4 typedef int sig_atomic_t; typedef __sighandler_t sig_t; typedef __sighandler_t sighandler_t; typedef struct { unsigned long __bits[64/(8*sizeof(long))]; } sigset64_t; # 88 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/signal_types.h" 3 4 struct sigaction { union { sighandler_t sa_handler; void (*sa_sigaction)(int, struct siginfo*, void*); }; sigset_t sa_mask; int sa_flags; void (*sa_restorer)(void); }; struct sigaction64 { union { sighandler_t sa_handler; void (*sa_sigaction)(int, struct siginfo*, void*); }; int sa_flags; void (*sa_restorer)(void); sigset64_t sa_mask; }; # 38 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/signal.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/ucontext.h" 1 3 4 # 33 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/ucontext.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/signal.h" 1 3 4 # 34 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/ucontext.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/user.h" 1 3 4 # 32 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/user.h" 3 4 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stddef.h" 1 3 4 # 33 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/user.h" 2 3 4 # 176 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/user.h" 3 4 struct user_fpregs { struct fp_reg { unsigned int sign1:1; unsigned int unused:15; unsigned int sign2:1; unsigned int exponent:14; unsigned int j:1; unsigned int mantissa1:31; unsigned int mantissa0:32; } fpregs[8]; unsigned int fpsr:32; unsigned int fpcr:32; unsigned char ftype[8]; unsigned int init_flag; }; struct user_regs { unsigned long uregs[18]; }; struct user_vfp { unsigned long long fpregs[32]; unsigned long fpscr; }; struct user_vfp_exc { unsigned long fpexc; unsigned long fpinst; unsigned long fpinst2; }; struct user { struct user_regs regs; int u_fpvalid; unsigned long int u_tsize; unsigned long int u_dsize; unsigned long int u_ssize; unsigned long start_code; unsigned long start_stack; long int signal; int reserved; struct user_regs* u_ar0; unsigned long magic; char u_comm[32]; int u_debugreg[8]; struct user_fpregs u_fp; struct user_fpregs* u_fp0; }; # 35 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/ucontext.h" 2 3 4 enum { REG_R0 = 0, REG_R1, REG_R2, REG_R3, REG_R4, REG_R5, REG_R6, REG_R7, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15, }; typedef int greg_t; typedef greg_t gregset_t[18]; typedef struct user_fpregs fpregset_t; typedef struct sigcontext mcontext_t; typedef struct ucontext { unsigned long uc_flags; struct ucontext* uc_link; stack_t uc_stack; mcontext_t uc_mcontext; union { struct { sigset_t uc_sigmask; uint32_t __padding_rt_sigset; }; sigset64_t uc_sigmask64; }; char __padding[120]; unsigned long uc_regspace[128] __attribute__((__aligned__(8))); } ucontext_t; # 42 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/signal.h" 2 3 4 # 56 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/signal.h" 3 4 int __libc_current_sigrtmin(void) __attribute__((__availability__(android,strict,introduced=21))); int __libc_current_sigrtmax(void) __attribute__((__availability__(android,strict,introduced=21))); extern const char* _Nonnull const sys_siglist[(64 + 1)]; extern const char* _Nonnull const sys_signame[(64 + 1)]; int sigaction(int __signal, const struct sigaction* _Nullable __new_action, struct sigaction* _Nullable __old_action); int siginterrupt(int __signal, int __flag); sighandler_t _Nonnull signal(int __signal, sighandler_t _Nullable __handler) __attribute__((__availability__(android,strict,introduced=21))); int sigaddset(sigset_t* _Nonnull __set, int __signal) __attribute__((__availability__(android,strict,introduced=21))); # 88 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/signal.h" 3 4 int sigdelset(sigset_t* _Nonnull __set, int __signal) __attribute__((__availability__(android,strict,introduced=21))); # 98 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/signal.h" 3 4 int sigemptyset(sigset_t* _Nonnull __set) __attribute__((__availability__(android,strict,introduced=21))); # 108 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/signal.h" 3 4 int sigfillset(sigset_t* _Nonnull __set) __attribute__((__availability__(android,strict,introduced=21))); # 118 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/signal.h" 3 4 int sigismember(const sigset_t* _Nonnull __set, int __signal) __attribute__((__availability__(android,strict,introduced=21))); # 127 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/signal.h" 3 4 int sigpending(sigset_t* _Nonnull __set); int sigprocmask(int __how, const sigset_t* _Nullable __new_set, sigset_t* _Nullable __old_set); int sigsuspend(const sigset_t* _Nonnull __mask); int sigwait(const sigset_t* _Nonnull __set, int* _Nonnull __signal); # 169 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/signal.h" 3 4 int raise(int __signal); int kill(pid_t __pid, int __signal); int killpg(int __pgrp, int __signal); int tgkill(int __tgid, int __tid, int __signal); int sigaltstack(const stack_t* _Nullable __new_signal_stack, stack_t* _Nullable __old_signal_stack); void psiginfo(const siginfo_t* _Nonnull __info, const char* _Nullable __msg) __attribute__((__availability__(android,strict,introduced=17))); void psignal(int __signal, const char* _Nullable __msg) __attribute__((__availability__(android,strict,introduced=17))); int pthread_kill(pthread_t __pthread, int __signal); # 192 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/signal.h" 3 4 int pthread_sigmask(int __how, const sigset_t* _Nullable __new_set, sigset_t* _Nullable __old_set); # 201 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/signal.h" 3 4 int sigqueue(pid_t __pid, int __signal, const union sigval __value) __attribute__((__availability__(android,strict,introduced=23))); int sigtimedwait(const sigset_t* _Nonnull __set, siginfo_t* _Nullable __info, const struct timespec* _Nullable __timeout) __attribute__((__availability__(android,strict,introduced=23))); # 212 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/signal.h" 3 4 int sigwaitinfo(const sigset_t* _Nonnull __set, siginfo_t* _Nullable __info) __attribute__((__availability__(android,strict,introduced=23))); # 41 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/select.h" 2 3 4 typedef unsigned long fd_mask; # 57 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/select.h" 3 4 typedef struct { fd_mask fds_bits[1024/(8 * sizeof(fd_mask))]; } fd_set; # 76 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/select.h" 3 4 void __FD_CLR_chk(int, fd_set*, size_t) __attribute__((__availability__(android,strict,introduced=21))); void __FD_SET_chk(int, fd_set*, size_t) __attribute__((__availability__(android,strict,introduced=21))); int __FD_ISSET_chk(int, const fd_set*, size_t) __attribute__((__availability__(android,strict,introduced=21))); # 102 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/select.h" 3 4 int select(int __max_fd_plus_one, fd_set* __read_fds, fd_set* __write_fds, fd_set* __exception_fds, struct timeval* __timeout); # 113 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/select.h" 3 4 int pselect(int __max_fd_plus_one, fd_set* __read_fds, fd_set* __write_fds, fd_set* __exception_fds, const struct timespec* __timeout, const sigset_t* __mask); # 38 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/time.h" 2 3 4 int gettimeofday(struct timeval* __tv, struct timezone* __tz); int settimeofday(const struct timeval* __tv, const struct timezone* __tz); int getitimer(int __which, struct itimerval* __current_value); int setitimer(int __which, const struct itimerval* __new_value, struct itimerval* __old_value); int utimes(const char* __path, const struct timeval __times[2]); # 34 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/time.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/xlocale.h" 1 3 4 # 44 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/xlocale.h" 3 4 struct __locale_t; typedef struct __locale_t* locale_t; # 35 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/time.h" 2 3 4 extern char* _Nonnull tzname[]; extern int daylight; extern long int timezone; struct sigevent; struct tm { int tm_sec; int tm_min; int tm_hour; int tm_mday; int tm_mon; int tm_year; int tm_wday; int tm_yday; int tm_isdst; long int tm_gmtoff; const char* _Nullable tm_zone; }; time_t time(time_t* _Nullable __t); int nanosleep(const struct timespec* _Nonnull __request, struct timespec* _Nullable __remainder); char* _Nullable asctime(const struct tm* _Nonnull __tm); char* _Nullable asctime_r(const struct tm* _Nonnull __tm, char* _Nonnull __buf); double difftime(time_t __lhs, time_t __rhs); time_t mktime(struct tm* _Nonnull __tm); struct tm* _Nullable localtime(const time_t* _Nonnull __t); struct tm* _Nullable localtime_r(const time_t* _Nonnull __t, struct tm* _Nonnull __tm); struct tm* _Nullable gmtime(const time_t* _Nonnull __t); struct tm* _Nullable gmtime_r(const time_t* _Nonnull __t, struct tm* _Nonnull __tm); char* _Nullable strptime(const char* _Nonnull __s, const char* _Nonnull __fmt, struct tm* _Nonnull __tm) __attribute__((__format__(strftime, 2, 0))); size_t strftime(char* _Nonnull __buf, size_t __n, const char* _Nonnull __fmt, const struct tm* _Nullable __tm) __attribute__((__format__(strftime, 3, 0))); size_t strftime_l(char* _Nonnull __buf, size_t __n, const char* _Nonnull __fmt, const struct tm* _Nullable __tm, locale_t _Nonnull __l) __attribute__((__format__(strftime, 3, 0))) __attribute__((__availability__(android,strict,introduced=21))); char* _Nullable ctime(const time_t* _Nonnull __t); char* _Nullable ctime_r(const time_t* _Nonnull __t, char* _Nonnull __buf); void tzset(void); clock_t clock(void); int clock_getcpuclockid(pid_t __pid, clockid_t* _Nonnull __clock) __attribute__((__availability__(android,strict,introduced=23))); int clock_getres(clockid_t __clock, struct timespec* _Nullable __resolution); int clock_gettime(clockid_t __clock, struct timespec* _Nonnull __ts); int clock_nanosleep(clockid_t __clock, int __flags, const struct timespec* _Nonnull __request, struct timespec* _Nullable __remainder); int clock_settime(clockid_t __clock, const struct timespec* _Nonnull __ts); int timer_create(clockid_t __clock, struct sigevent* _Nullable __event, timer_t _Nonnull * _Nonnull __timer_ptr); int timer_delete(timer_t _Nonnull __timer); int timer_settime(timer_t _Nonnull __timer, int __flags, const struct itimerspec* _Nonnull __new_value, struct itimerspec* _Nullable __old_value); int timer_gettime(timer_t _Nonnull _timer, struct itimerspec* _Nonnull __ts); int timer_getoverrun(timer_t _Nonnull __timer); time_t timelocal(struct tm* _Nonnull __tm); time_t timegm(struct tm* _Nonnull __tm); # 18 "ggml.c" 2 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/math.h" 1 3 4 # 25 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/math.h" 3 4 typedef double __double_t; typedef __double_t double_t; typedef float __float_t; typedef __float_t float_t; # 76 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/math.h" 3 4 double acos(double __x); float acosf(float __x); long double acosl(long double __x) __asm__("acos") __attribute__((__availability__(android,strict,introduced=3))); double asin(double __x); float asinf(float __x); long double asinl(long double __x) __asm__("asin") __attribute__((__availability__(android,strict,introduced=3))); double atan(double __x); float atanf(float __x); long double atanl(long double __x) __asm__("atan") __attribute__((__availability__(android,strict,introduced=3))); double atan2(double __y, double __x); float atan2f(float __y, float __x); long double atan2l(long double __y, long double __x) __asm__("atan2") __attribute__((__availability__(android,strict,introduced=3))); double cos(double __x); float cosf(float __x); long double cosl(long double __x) __asm__("cos") __attribute__((__availability__(android,strict,introduced=3))); double sin(double __x); float sinf(float __x); long double sinl(long double __x) __asm__("sin") __attribute__((__availability__(android,strict,introduced=3))); double tan(double __x); float tanf(float __x); long double tanl(long double __x) __asm__("tan") __attribute__((__availability__(android,strict,introduced=3))); double acosh(double __x); float acoshf(float __x); long double acoshl(long double __x) __asm__("acosh") __attribute__((__availability__(android,strict,introduced=3))); double asinh(double __x); float asinhf(float __x); long double asinhl(long double __x) __asm__("asinh") __attribute__((__availability__(android,strict,introduced=3))); double atanh(double __x); float atanhf(float __x); long double atanhl(long double __x) __asm__("atanh") __attribute__((__availability__(android,strict,introduced=3))); double cosh(double __x); float coshf(float __x); long double coshl(long double __x) __asm__("cosh") __attribute__((__availability__(android,strict,introduced=3))); double sinh(double __x); float sinhf(float __x); long double sinhl(long double __x) __asm__("sinh") __attribute__((__availability__(android,strict,introduced=3))); double tanh(double __x); float tanhf(float __x); long double tanhl(long double __x) __asm__("tanh") __attribute__((__availability__(android,strict,introduced=3))); double exp(double __x); float expf(float __x); long double expl(long double __x) __asm__("exp") __attribute__((__availability__(android,strict,introduced=3))); double exp2(double __x); float exp2f(float __x); long double exp2l(long double __x) __asm__("exp2") __attribute__((__availability__(android,strict,introduced=3))); double expm1(double __x); float expm1f(float __x); long double expm1l(long double __x) __asm__("expm1") __attribute__((__availability__(android,strict,introduced=3))); double frexp(double __x, int* _Nonnull __exponent); float frexpf(float __x, int* _Nonnull __exponent); long double frexpl(long double __x, int* _Nonnull __exponent) __asm__("frexp") __attribute__((__availability__(android,strict,introduced=3))); int ilogb(double __x) __attribute__((__const__)); int ilogbf(float __x) __attribute__((__const__)); int ilogbl(long double __x) __asm__("ilogb") __attribute__((__availability__(android,strict,introduced=3))) __attribute__((__const__)); double ldexp(double __x, int __exponent); float ldexpf(float __x, int __exponent); long double ldexpl(long double __x, int __exponent) __asm__("ldexp") __attribute__((__availability__(android,strict,introduced=3))); double log(double __x); float logf(float __x); long double logl(long double __x) __asm__("log") __attribute__((__availability__(android,strict,introduced=3))); double log10(double __x); float log10f(float __x); long double log10l(long double __x) __asm__("log10") __attribute__((__availability__(android,strict,introduced=3))); double log1p(double __x); float log1pf(float __x); long double log1pl(long double __x) __asm__("log1p") __attribute__((__availability__(android,strict,introduced=3))); double log2(double __x) __attribute__((__availability__(android,strict,introduced=18))); float log2f(float __x) __attribute__((__availability__(android,strict,introduced=18))); long double log2l(long double __x) __asm__("log2") __attribute__((__availability__(android,strict,introduced=18))); double logb(double __x); float logbf(float __x); long double logbl(long double __x) __asm__("logb") __attribute__((__availability__(android,strict,introduced=3))); double modf(double __x, double* _Nonnull __integral_part); float modff(float __x, float* _Nonnull __integral_part); long double modfl(long double __x, long double* _Nonnull __integral_part) __asm__("modf") __attribute__((__availability__(android,strict,introduced=3))); double scalbn(double __x, int __exponent); float scalbnf(float __x, int __exponent); long double scalbnl(long double __x, int __exponent) __asm__("scalbn") __attribute__((__availability__(android,strict,introduced=3))); double scalbln(double __x, long __exponent) ; float scalblnf(float __x, long __exponent) ; long double scalblnl(long double __x, long __exponent) ; double cbrt(double __x); float cbrtf(float __x); long double cbrtl(long double __x) __asm__("cbrt") __attribute__((__availability__(android,strict,introduced=3))); double fabs(double __x) __attribute__((__const__)); float fabsf(float __x) __attribute__((__const__)); long double fabsl(long double __x) __asm__("fabs") __attribute__((__availability__(android,strict,introduced=3))) __attribute__((__const__)); double hypot(double __x, double __y); float hypotf(float __x, float __y); long double hypotl(long double __x, long double __y) __asm__("hypot") __attribute__((__availability__(android,strict,introduced=3))); double pow(double __x, double __y); float powf(float __x, float __y); long double powl(long double __x, long double __y) __asm__("pow") __attribute__((__availability__(android,strict,introduced=3))); double sqrt(double __x); float sqrtf(float __x); long double sqrtl(long double __x) __asm__("sqrt") __attribute__((__availability__(android,strict,introduced=3))); double erf(double __x); float erff(float __x); long double erfl(long double __x) __asm__("erf") __attribute__((__availability__(android,strict,introduced=3))); double erfc(double __x); float erfcf(float __x); long double erfcl(long double __x) __asm__("erfc") __attribute__((__availability__(android,strict,introduced=3))); double lgamma(double __x); float lgammaf(float __x); long double lgammal(long double __x) __asm__("lgamma") __attribute__((__availability__(android,strict,introduced=3))); double tgamma(double __x); float tgammaf(float __x); long double tgammal(long double __x) __asm__("tgamma") __attribute__((__availability__(android,strict,introduced=3))); double ceil(double __x); float ceilf(float __x); long double ceill(long double __x) __asm__("ceil") __attribute__((__availability__(android,strict,introduced=3))); double floor(double __x); float floorf(float __x); long double floorl(long double __x) __asm__("floor") __attribute__((__availability__(android,strict,introduced=3))); double nearbyint(double __x); float nearbyintf(float __x); long double nearbyintl(long double __x) __asm__("nearbyint") __attribute__((__availability__(android,strict,introduced=3))); double rint(double __x); float rintf(float __x); long double rintl(long double __x) __asm__("rint") __attribute__((__availability__(android,strict,introduced=3))); long lrint(double __x); long lrintf(float __x); long lrintl(long double __x) __asm__("lrint") __attribute__((__availability__(android,strict,introduced=3))); long long llrint(double __x); long long llrintf(float __x); long long llrintl(long double __x) __asm__("llrint") __attribute__((__availability__(android,strict,introduced=3))); double round(double __x); float roundf(float __x); long double roundl(long double __x) __asm__("roundl") __attribute__((__availability__(android,strict,introduced=3))); long lround(double __x); long lroundf(float __x); long lroundl(long double __x) __asm__("lround") __attribute__((__availability__(android,strict,introduced=3))); long long llround(double __x); long long llroundf(float __x); long long llroundl(long double __x) __asm__("llround") __attribute__((__availability__(android,strict,introduced=3))); double trunc(double __x); float truncf(float __x); long double truncl(long double __x) __asm__("trunc") __attribute__((__availability__(android,strict,introduced=3))); double fmod(double __x, double __y); float fmodf(float __x, float __y); long double fmodl(long double __x, long double __y) __asm__("fmod") __attribute__((__availability__(android,strict,introduced=3))); double remainder(double __x, double __y); float remainderf(float __x, float __y); long double remainderl(long double __x, long double __y) __asm__("remainder") __attribute__((__availability__(android,strict,introduced=3))); double remquo(double __x, double __y, int* _Nonnull __quotient_bits); float remquof(float __x, float __y, int* _Nonnull __quotient_bits); long double remquol(long double __x, long double __y, int* _Nonnull __quotient_bits) __asm__("remquo") __attribute__((__availability__(android,strict,introduced=3))); double copysign(double __value, double __sign) __attribute__((__const__)); float copysignf(float __value, float __sign) __attribute__((__const__)); long double copysignl(long double __value, long double __sign) __asm__("copysign") __attribute__((__availability__(android,strict,introduced=3))) __attribute__((__const__)); double nan(const char* _Nonnull __kind) __attribute__((__const__)); float nanf(const char* _Nonnull __kind) __attribute__((__const__)); long double nanl(const char* _Nonnull __kind) __asm__("nan") __attribute__((__availability__(android,strict,introduced=13))) __attribute__((__const__)); double nextafter(double __x, double __y); float nextafterf(float __x, float __y); long double nextafterl(long double __x, long double __y) __asm__("nextafter") ; double nexttoward(double __x, long double __y) ; float nexttowardf(float __x, long double __y); long double nexttowardl(long double __x, long double __y) __asm__("nexttoward") ; double fdim(double __x, double __y); float fdimf(float __x, float __y); long double fdiml(long double __x, long double __y) __asm__("fdim") __attribute__((__availability__(android,strict,introduced=3))); double fmax(double __x, double __y) __attribute__((__const__)); float fmaxf(float __x, float __y) __attribute__((__const__)); long double fmaxl(long double __x, long double __y) __asm__("fmax") __attribute__((__availability__(android,strict,introduced=3))) __attribute__((__const__)); double fmin(double __x, double __y) __attribute__((__const__)); float fminf(float __x, float __y) __attribute__((__const__)); long double fminl(long double __x, long double __y) __asm__("fmin") __attribute__((__availability__(android,strict,introduced=3))) __attribute__((__const__)); double fma(double __x, double __y, double __z); float fmaf(float __x, float __y, float __z); long double fmal(long double __x, long double __y, long double __z) __asm__("fma") ; # 329 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/math.h" 3 4 int (isinf)(double __x) __attribute__((__const__)) __attribute__((__availability__(android,strict,introduced=21))); int (isnan)(double __x) __attribute__((__const__)); extern int signgam; double j0(double __x); double j1(double __x); double jn(int __n, double __x); double y0(double __x); double y1(double __x); double yn(int __n, double __x); # 19 "ggml.c" 2 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdlib.h" 1 3 4 # 33 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdlib.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/wait.h" 1 3 4 # 38 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/wait.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/wait.h" 1 3 4 # 39 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/wait.h" 2 3 4 # 34 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdlib.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/malloc.h" 1 3 4 # 29 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/malloc.h" 3 4 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stddef.h" 1 3 4 # 30 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/malloc.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdio.h" 1 3 4 # 44 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdio.h" 3 4 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stdarg.h" 1 3 4 # 14 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stdarg.h" 3 4 typedef __builtin_va_list __gnuc_va_list; typedef __builtin_va_list va_list; # 45 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdio.h" 2 3 4 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stddef.h" 1 3 4 # 46 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdio.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/seek_constants.h" 1 3 4 # 48 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdio.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/struct_file.h" 1 3 4 # 36 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/struct_file.h" 3 4 struct __sFILE { char __private[84]; } __attribute__((aligned(sizeof(void*)))); # 51 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdio.h" 2 3 4 typedef off_t fpos_t; typedef off64_t fpos64_t; struct __sFILE; typedef struct __sFILE FILE; extern FILE* _Nonnull stdin __attribute__((__availability__(android,strict,introduced=23))); extern FILE* _Nonnull stdout __attribute__((__availability__(android,strict,introduced=23))); extern FILE* _Nonnull stderr __attribute__((__availability__(android,strict,introduced=23))); # 106 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdio.h" 3 4 void clearerr(FILE* _Nonnull __fp); int fclose(FILE* _Nonnull __fp); int feof(FILE* _Nonnull __fp); int ferror(FILE* _Nonnull __fp); int fflush(FILE* _Nullable __fp); int fgetc(FILE* _Nonnull __fp); char* _Nullable fgets(char* _Nonnull __buf, int __size, FILE* _Nonnull __fp); int fprintf(FILE* _Nonnull __fp , const char* _Nonnull __fmt, ...) __attribute__((__format__(printf, 2, 3))); int fputc(int __ch, FILE* _Nonnull __fp); int fputs(const char* _Nonnull __s, FILE* _Nonnull __fp); size_t fread(void* _Nonnull __buf, size_t __size, size_t __count, FILE* _Nonnull __fp); int fscanf(FILE* _Nonnull __fp, const char* _Nonnull __fmt, ...) __attribute__((__format__(scanf, 2, 3))); size_t fwrite(const void* _Nonnull __buf, size_t __size, size_t __count, FILE* _Nonnull __fp); int getc(FILE* _Nonnull __fp); int getchar(void); ssize_t getdelim(char* _Nullable * _Nonnull __line_ptr, size_t* _Nonnull __line_length_ptr, int __delimiter, FILE* _Nonnull __fp) __attribute__((__availability__(android,strict,introduced=18))); ssize_t getline(char* _Nullable * _Nonnull __line_ptr, size_t* _Nonnull __line_length_ptr, FILE* _Nonnull __fp) __attribute__((__availability__(android,strict,introduced=18))); void perror(const char* _Nullable __msg); int printf(const char* _Nonnull __fmt, ...) __attribute__((__format__(printf, 1, 2))); int putc(int __ch, FILE* _Nonnull __fp); int putchar(int __ch); int puts(const char* _Nonnull __s); int remove(const char* _Nonnull __path); void rewind(FILE* _Nonnull __fp); int scanf(const char* _Nonnull __fmt, ...) __attribute__((__format__(scanf, 1, 2))); void setbuf(FILE* _Nonnull __fp, char* _Nullable __buf); int setvbuf(FILE* _Nonnull __fp, char* _Nullable __buf, int __mode, size_t __size); int sscanf(const char* _Nonnull __s, const char* _Nonnull __fmt, ...) __attribute__((__format__(scanf, 2, 3))); int ungetc(int __ch, FILE* _Nonnull __fp); int vfprintf(FILE* _Nonnull __fp, const char* _Nonnull __fmt, va_list __args) __attribute__((__format__(printf, 2, 0))); int vprintf(const char* _Nonnull __fp, va_list __args) __attribute__((__format__(printf, 1, 0))); int dprintf(int __fd, const char* _Nonnull __fmt, ...) __attribute__((__format__(printf, 2, 3))) __attribute__((__availability__(android,strict,introduced=21))); int vdprintf(int __fd, const char* _Nonnull __fmt, va_list __args) __attribute__((__format__(printf, 2, 0))) __attribute__((__availability__(android,strict,introduced=21))); int sprintf(char* _Nonnull __s, const char* _Nonnull __fmt, ...) __attribute__((__format__(printf, 2, 3))) ; int vsprintf(char* _Nonnull __s, const char* _Nonnull __fmt, va_list __args) __attribute__((__format__(printf, 2, 0))) ; char* _Nullable tmpnam(char* _Nullable __s) __attribute__((deprecated("tmpnam is unsafe, use mkstemp or tmpfile instead"))); char* _Nullable tempnam(const char* _Nullable __dir, const char* _Nullable __prefix) __attribute__((deprecated("tempnam is unsafe, use mkstemp or tmpfile instead"))); int rename(const char* _Nonnull __old_path, const char* _Nonnull __new_path); int renameat(int __old_dir_fd, const char* _Nonnull __old_path, int __new_dir_fd, const char* _Nonnull __new_path); # 215 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdio.h" 3 4 int fseek(FILE* _Nonnull __fp, long __offset, int __whence); long ftell(FILE* _Nonnull __fp); # 241 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdio.h" 3 4 int fgetpos(FILE* _Nonnull __fp, fpos_t* _Nonnull __pos); int fsetpos(FILE* _Nonnull __fp, const fpos_t* _Nonnull __pos); int fseeko(FILE* _Nonnull __fp, off_t __offset, int __whence); off_t ftello(FILE* _Nonnull __fp); # 275 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdio.h" 3 4 FILE* _Nullable fopen(const char* _Nonnull __path, const char* _Nonnull __mode); FILE* _Nullable freopen(const char* _Nullable __path, const char* _Nonnull __mode, FILE* _Nonnull __fp); FILE* _Nullable tmpfile(void); int snprintf(char* _Nullable __buf, size_t __size, const char* _Nonnull __fmt, ...) __attribute__((__format__(printf, 3, 4))); int vfscanf(FILE* _Nonnull __fp, const char* _Nonnull __fmt, va_list __args) __attribute__((__format__(scanf, 2, 0))); int vscanf(const char* _Nonnull __fmt , va_list __args) __attribute__((__format__(scanf, 1, 0))); int vsnprintf(char* _Nullable __buf, size_t __size, const char* _Nonnull __fmt, va_list __args) __attribute__((__format__(printf, 3, 0))); int vsscanf(const char* _Nonnull __s, const char* _Nonnull __fmt, va_list __args) __attribute__((__format__(scanf, 2, 0))); # 307 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdio.h" 3 4 FILE* _Nullable fdopen(int __fd, const char* _Nonnull __mode); int fileno(FILE* _Nonnull __fp); int pclose(FILE* _Nonnull __fp); FILE* _Nullable popen(const char* _Nonnull __command, const char* _Nonnull __mode); void flockfile(FILE* _Nonnull __fp); int ftrylockfile(FILE* _Nonnull __fp); void funlockfile(FILE* _Nonnull __fp); int getc_unlocked(FILE* _Nonnull __fp); int getchar_unlocked(void); int putc_unlocked(int __ch, FILE* _Nonnull __fp); int putchar_unlocked(int __ch); FILE* _Nullable fmemopen(void* _Nullable __buf, size_t __size, const char* _Nonnull __mode) __attribute__((__availability__(android,strict,introduced=23))); FILE* _Nullable open_memstream(char* _Nonnull * _Nonnull __ptr, size_t* _Nonnull __size_ptr) __attribute__((__availability__(android,strict,introduced=23))); int asprintf(char* _Nullable * _Nonnull __s_ptr, const char* _Nonnull __fmt, ...) __attribute__((__format__(printf, 2, 3))); char* _Nullable fgetln(FILE* _Nonnull __fp, size_t* _Nonnull __length_ptr); int fpurge(FILE* _Nonnull __fp); void setbuffer(FILE* _Nonnull __fp, char* _Nullable __buf, int __size); int setlinebuf(FILE* _Nonnull __fp); int vasprintf(char* _Nullable * _Nonnull __s_ptr, const char* _Nonnull __fmt, va_list __args) __attribute__((__format__(printf, 2, 0))); void clearerr_unlocked(FILE* _Nonnull __fp) __attribute__((__availability__(android,strict,introduced=23))); int feof_unlocked(FILE* _Nonnull __fp) __attribute__((__availability__(android,strict,introduced=23))); int ferror_unlocked(FILE* _Nonnull __fp) __attribute__((__availability__(android,strict,introduced=23))); # 371 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdio.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/fortify/stdio.h" 1 3 4 # 35 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/fortify/stdio.h" 3 4 char* __fgets_chk(char*, int, FILE*, size_t) __attribute__((__availability__(android,strict,introduced=17))); # 49 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/fortify/stdio.h" 3 4 static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) __attribute__((__format__(printf, 3, 0))) int vsnprintf(char* const __attribute__((pass_object_size(1))) dest, size_t size, const char* format, va_list ap) __attribute__((diagnose_as_builtin(__builtin_vsnprintf, 1, 2, 3, 4))) __attribute__((overloadable)) { return __builtin___vsnprintf_chk(dest, size, 0, __builtin_object_size(((dest)), (1)), format, ap); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) __attribute__((__format__(printf, 2, 0))) int vsprintf(char* const __attribute__((pass_object_size(1))) dest, const char* format, va_list ap) __attribute__((overloadable)) { return __builtin___vsprintf_chk(dest, 0, __builtin_object_size(((dest)), (1)), format, ap); } static __attribute__((unused)) int sprintf(char* dest, const char* format) __attribute__((overloadable)) __attribute__((enable_if(((__builtin_object_size(((dest)), (1))) != ((size_t) -1) && (__builtin_object_size(((dest)), (1))) < (__builtin_strlen(format))), "format string will always overflow destination buffer"))) __attribute__((unavailable("format string will always overflow destination buffer"))); static __inline__ __attribute__((__format__(printf, 2, 3))) int sprintf(char* const __attribute__((pass_object_size(1))) dest, const char* format, ...) __attribute__((overloadable)) { va_list va; __builtin_va_start(va, format); int result = __builtin___vsprintf_chk(dest, 0, __builtin_object_size(((dest)), (1)), format, va); __builtin_va_end(va); return result; } static __inline__ __attribute__((__format__(printf, 3, 4))) int snprintf(char* const __attribute__((pass_object_size(1))) dest, size_t size, const char* format, ...) __attribute__((diagnose_as_builtin(__builtin_snprintf, 1, 2, 3))) __attribute__((overloadable)) { va_list va; __builtin_va_start(va, format); int result = __builtin___vsnprintf_chk(dest, size, 0, __builtin_object_size(((dest)), (1)), format, va); __builtin_va_end(va); return result; } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) size_t fread(void* const __attribute__((pass_object_size(0))) buf, size_t size, size_t count, FILE* stream) __attribute__((overloadable)) __attribute__((diagnose_if(((unsigned int)-1 / (size) < (count)), "in call to 'fread', size * count overflows", "error"))) __attribute__((diagnose_if(((__builtin_object_size(((buf)), (0))) != ((size_t) -1) && (__builtin_object_size(((buf)), (0))) < (size * count)), "in call to 'fread', size * count is too large for the given buffer", "error"))) { # 110 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/fortify/stdio.h" 3 4 return (&fread)(buf, size, count, stream); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) size_t fwrite(const void* const __attribute__((pass_object_size(0))) buf, size_t size, size_t count, FILE* stream) __attribute__((overloadable)) __attribute__((diagnose_if(((unsigned int)-1 / (size) < (count)), "in call to 'fwrite', size * count overflows", "error"))) __attribute__((diagnose_if(((__builtin_object_size(((buf)), (0))) != ((size_t) -1) && (__builtin_object_size(((buf)), (0))) < (size * count)), "in call to 'fwrite', size * count is too large for the given buffer", "error"))) { # 127 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/fortify/stdio.h" 3 4 return (&fwrite)(buf, size, count, stream); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) char* fgets(char* const __attribute__((pass_object_size(1))) dest, int size, FILE* stream) __attribute__((overloadable)) __attribute__((diagnose_if(size < 0, "in call to 'fgets', size should not be negative", "error"))) __attribute__((diagnose_if(((__builtin_object_size(((dest)), (1))) != ((size_t) -1) && (__builtin_object_size(((dest)), (1))) < (size)), "in call to 'fgets', size is larger than the destination buffer", "error"))) { size_t bos = __builtin_object_size(((dest)), (1)); if (!((bos) == ((size_t) -1) || (__builtin_constant_p((size_t)size) && bos >= (size_t)size && (size >= 0)))) { return __fgets_chk(dest, size, stream, bos); } return (&fgets)(dest, size, stream); } # 372 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdio.h" 2 3 4 # 31 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/malloc.h" 2 3 4 # 43 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/malloc.h" 3 4 void* _Nullable malloc(size_t __byte_count) __attribute__((__malloc__)) __attribute__((__alloc_size__(1))) __attribute__((__warn_unused_result__)); # 52 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/malloc.h" 3 4 void* _Nullable calloc(size_t __item_count, size_t __item_size) __attribute__((__malloc__)) __attribute__((__alloc_size__(1,2))) __attribute__((__warn_unused_result__)); # 61 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/malloc.h" 3 4 void* _Nullable realloc(void* _Nullable __ptr, size_t __byte_count) __attribute__((__alloc_size__(2))) __attribute__((__warn_unused_result__)); # 83 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/malloc.h" 3 4 void free(void* _Nullable __ptr); # 94 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/malloc.h" 3 4 void* _Nullable memalign(size_t __alignment, size_t __byte_count) __attribute__((__malloc__)) __attribute__((__alloc_size__(2))) __attribute__((__warn_unused_result__)); # 104 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/malloc.h" 3 4 size_t malloc_usable_size(const void* _Nullable __ptr) __attribute__((__availability__(android,strict,introduced=17))); # 132 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/malloc.h" 3 4 struct mallinfo { size_t arena; size_t ordblks; size_t smblks; size_t hblks; size_t hblkhd; size_t usmblks; size_t fsmblks; size_t uordblks; size_t fordblks; size_t keepcost; }; struct mallinfo mallinfo(void); struct mallinfo2 { size_t arena; size_t ordblks; size_t smblks; size_t hblks; size_t hblkhd; size_t usmblks; size_t fsmblks; size_t uordblks; size_t fordblks; size_t keepcost; }; struct mallinfo2 mallinfo2(void) __asm__("mallinfo"); # 181 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/malloc.h" 3 4 int malloc_info(int __must_be_zero, FILE* _Nonnull __fp) __attribute__((__availability__(android,strict,introduced=23))); # 305 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/malloc.h" 3 4 enum HeapTaggingLevel { M_HEAP_TAGGING_LEVEL_NONE = 0, M_HEAP_TAGGING_LEVEL_TBI = 1, M_HEAP_TAGGING_LEVEL_ASYNC = 2, M_HEAP_TAGGING_LEVEL_SYNC = 3, }; # 35 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdlib.h" 2 3 4 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stddef.h" 1 3 4 # 36 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdlib.h" 2 3 4 __attribute__((__noreturn__)) void abort(void) __attribute__((__nomerge__)); __attribute__((__noreturn__)) void exit(int __status); __attribute__((__noreturn__)) void _Exit(int __status) __attribute__((__availability__(android,strict,introduced=21))); int atexit(void (* _Nonnull __fn)(void)); int at_quick_exit(void (* _Nonnull __fn)(void)) __attribute__((__availability__(android,strict,introduced=21))); void quick_exit(int __status) __attribute__((__noreturn__)) __attribute__((__availability__(android,strict,introduced=21))); char* _Nullable getenv(const char* _Nonnull __name); int putenv(char* _Nonnull __assignment); int setenv(const char* _Nonnull __name, const char* _Nonnull __value, int __overwrite); int unsetenv(const char* _Nonnull __name); int clearenv(void); char* _Nullable mkdtemp(char* _Nonnull __template); char* _Nullable mktemp(char* _Nonnull __template) __attribute__((deprecated("mktemp is unsafe, use mkstemp or tmpfile instead"))); int mkostemp64(char* _Nonnull __template, int __flags) __attribute__((__availability__(android,strict,introduced=23))); int mkostemp(char* _Nonnull __template, int __flags) __attribute__((__availability__(android,strict,introduced=23))); int mkostemps64(char* _Nonnull __template, int __suffix_length, int __flags) __attribute__((__availability__(android,strict,introduced=23))); int mkostemps(char* _Nonnull __template, int __suffix_length, int __flags) __attribute__((__availability__(android,strict,introduced=23))); int mkstemp64(char* _Nonnull __template) __attribute__((__availability__(android,strict,introduced=21))); int mkstemp(char* _Nonnull __template); int mkstemps64(char* _Nonnull __template, int __flags) __attribute__((__availability__(android,strict,introduced=23))); int mkstemps(char* _Nonnull __template, int __flags); long strtol(const char* _Nonnull __s, char* _Nullable * _Nullable __end_ptr, int __base); long long strtoll(const char* _Nonnull __s, char* _Nullable * _Nullable __end_ptr, int __base); unsigned long strtoul(const char* _Nonnull __s, char* _Nullable * _Nullable __end_ptr, int __base); unsigned long long strtoull(const char* _Nonnull __s, char* _Nullable * _Nullable __end_ptr, int __base); int posix_memalign(void* _Nullable * _Nullable __memptr, size_t __alignment, size_t __size) __attribute__((__availability__(android,strict,introduced=17))); # 108 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdlib.h" 3 4 double strtod(const char* _Nonnull __s, char* _Nullable * _Nullable __end_ptr); long double strtold(const char* _Nonnull __s, char* _Nullable * _Nullable __end_ptr) __asm__("strtod") __attribute__((__availability__(android,strict,introduced=3))); int atoi(const char* _Nonnull __s) __attribute__((__pure__)); long atol(const char* _Nonnull __s) __attribute__((__pure__)); long long atoll(const char* _Nonnull __s) __attribute__((__pure__)); __attribute__((__warn_unused_result__)) char* _Nullable realpath(const char* _Nonnull __path, char* _Nullable __resolved); int system(const char* _Nonnull __command); void* _Nullable bsearch(const void* _Nonnull __key, const void* _Nullable __base, size_t __nmemb, size_t __size, int (* _Nonnull __comparator)(const void* _Nonnull __lhs, const void* _Nonnull __rhs)); void qsort(void* _Nullable __base, size_t __nmemb, size_t __size, int (* _Nonnull __comparator)(const void* _Nullable __lhs, const void* _Nullable __rhs)); uint32_t arc4random(void); uint32_t arc4random_uniform(uint32_t __upper_bound); void arc4random_buf(void* _Nonnull __buf, size_t __n); int rand_r(unsigned int* _Nonnull __seed_ptr) __attribute__((__availability__(android,strict,introduced=21))); double drand48(void); double erand48(unsigned short __xsubi[_Nonnull 3]); long jrand48(unsigned short __xsubi[_Nonnull 3]); void lcong48(unsigned short __param[_Nonnull 7]) __attribute__((__availability__(android,strict,introduced=23))); long lrand48(void); long mrand48(void); long nrand48(unsigned short __xsubi[_Nonnull 3]); unsigned short* _Nonnull seed48(unsigned short __seed16v[_Nonnull 3]); void srand48(long __seed); char* _Nullable initstate(unsigned int __seed, char* _Nonnull __state, size_t __n) __attribute__((__availability__(android,strict,introduced=21))); char* _Nullable setstate(char* _Nonnull __state) __attribute__((__availability__(android,strict,introduced=21))); int getpt(void); int posix_openpt(int __flags) __attribute__((__availability__(android,strict,introduced=21))); char* _Nullable ptsname(int __fd); int ptsname_r(int __fd, char* _Nonnull __buf, size_t __n); int unlockpt(int __fd); typedef struct { int quot; int rem; } div_t; div_t div(int __numerator, int __denominator) __attribute__((__const__)); typedef struct { long int quot; long int rem; } ldiv_t; ldiv_t ldiv(long __numerator, long __denominator) __attribute__((__const__)); typedef struct { long long int quot; long long int rem; } lldiv_t; lldiv_t lldiv(long long __numerator, long long __denominator) __attribute__((__const__)); # 214 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdlib.h" 3 4 const char* _Nullable getprogname(void) __attribute__((__availability__(android,strict,introduced=21))); void setprogname(const char* _Nonnull __name) __attribute__((__availability__(android,strict,introduced=21))); int mblen(const char* _Nullable __s, size_t __n) ; size_t mbstowcs(wchar_t* _Nullable __dst, const char* _Nullable __src, size_t __n) ; int mbtowc(wchar_t* _Nullable __wc_ptr, const char* _Nullable __s, size_t __n) ; int wctomb(char* _Nullable __dst, wchar_t __wc) ; size_t wcstombs(char* _Nullable __dst, const wchar_t* _Nullable __src, size_t __n) ; size_t __ctype_get_mb_cur_max(void) __attribute__((__availability__(android,strict,introduced=21))); # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/fortify/stdlib.h" 1 3 4 # 38 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/fortify/stdlib.h" 3 4 char* realpath(const char* path, char* resolved) __attribute__((diagnose_if(!path, "'realpath': NULL path is never correct; flipped arguments?", "error"))) __attribute__((diagnose_if(((__builtin_object_size(((resolved)), (1))) != ((size_t) -1) && (__builtin_object_size(((resolved)), (1))) < (4096)), "'realpath' output parameter must be NULL or a pointer to a buffer " "with >= PATH_MAX bytes", "error"))); # 235 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdlib.h" 2 3 4 int abs(int __x) __attribute__((__const__)) __attribute__((__availability__(android,strict,introduced=19))); long labs(long __x) __attribute__((__const__)) __attribute__((__availability__(android,strict,introduced=19))); long long llabs(long long __x) __attribute__((__const__)) __attribute__((__availability__(android,strict,introduced=19))); float strtof(const char* _Nonnull __s, char* _Nullable * _Nullable __end_ptr) __attribute__((__availability__(android,strict,introduced=21))); double atof(const char* _Nonnull __s) __attribute__((__pure__)) __attribute__((__availability__(android,strict,introduced=21))); int rand(void) __attribute__((__availability__(android,strict,introduced=21))); void srand(unsigned int __seed) __attribute__((__availability__(android,strict,introduced=21))); long random(void) __attribute__((__availability__(android,strict,introduced=21))); void srandom(unsigned int __seed) __attribute__((__availability__(android,strict,introduced=21))); int grantpt(int __fd) __attribute__((__availability__(android,strict,introduced=21))); long long strtoll_l(const char* _Nonnull __s, char* _Nullable * _Nullable __end_ptr, int __base, locale_t _Nonnull __l) __attribute__((__availability__(android,strict,introduced=21))); unsigned long long strtoull_l(const char* _Nonnull __s, char* _Nullable * _Nullable __end_ptr, int __base, locale_t _Nonnull __l) __attribute__((__availability__(android,strict,introduced=21))); long double strtold_l(const char* _Nonnull __s, char* _Nullable * _Nullable __end_ptr, locale_t _Nonnull __l) __attribute__((__availability__(android,strict,introduced=21))); # 271 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdlib.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/android/legacy_stdlib_inlines.h" 1 3 4 # 36 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/android/legacy_stdlib_inlines.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdlib.h" 1 3 4 # 37 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/android/legacy_stdlib_inlines.h" 2 3 4 static __inline double strtod_l(const char* __s, char** __end_ptr, locale_t __l) { return strtod(__s, __end_ptr); } static __inline float strtof_l(const char* __s, char** __end_ptr, locale_t __l) { return strtof(__s, __end_ptr); } static __inline long strtol_l(const char* __s, char** __end_ptr, int __base, locale_t __l) { return strtol(__s, __end_ptr, __base); } # 272 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/stdlib.h" 2 3 4 # 20 "ggml.c" 2 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/string.h" 1 3 4 # 33 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/string.h" 3 4 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stddef.h" 1 3 4 # 34 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/string.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/strcasecmp.h" 1 3 4 # 49 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/strcasecmp.h" 3 4 int strcasecmp(const char* __s1, const char* __s2) __attribute__((__pure__)); int strcasecmp_l(const char* __s1, const char* __s2, locale_t __l) __attribute__((__pure__)) __attribute__((__availability__(android,strict,introduced=23))); # 68 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/strcasecmp.h" 3 4 int strncasecmp(const char* __s1, const char* __s2, size_t __n) __attribute__((__pure__)); int strncasecmp_l(const char* __s1, const char* __s2, size_t __n, locale_t __l) __attribute__((__pure__)) __attribute__((__availability__(android,strict,introduced=23))); # 37 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/string.h" 2 3 4 void* _Nullable memccpy(void* _Nonnull __dst, const void* _Nonnull __src, int __stop_char, size_t __n); void* _Nullable memchr(const void* _Nonnull __s, int __ch, size_t __n) __attribute__((__pure__)); void* _Nullable memrchr(const void* _Nonnull __s, int __ch, size_t __n) __attribute__((__pure__)); int memcmp(const void* _Nonnull __lhs, const void* _Nonnull __rhs, size_t __n) __attribute__((__pure__)); void* _Nonnull memcpy(void* _Nonnull, const void* _Nonnull, size_t); void* _Nonnull memmove(void* _Nonnull __dst, const void* _Nonnull __src, size_t __n); void* _Nonnull memset(void* _Nonnull __dst, int __ch, size_t __n); # 84 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/string.h" 3 4 void* _Nullable memmem(const void* _Nonnull __haystack, size_t __haystack_size, const void* _Nonnull __needle, size_t __needle_size) __attribute__((__pure__)); char* _Nullable strchr(const char* _Nonnull __s, int __ch) __attribute__((__pure__)); char* _Nullable __strchr_chk(const char* _Nonnull __s, int __ch, size_t __n) __attribute__((__availability__(android,strict,introduced=18))); # 109 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/string.h" 3 4 char* _Nullable strrchr(const char* _Nonnull __s, int __ch) __attribute__((__pure__)); char* _Nullable __strrchr_chk(const char* _Nonnull __s, int __ch, size_t __n) __attribute__((__availability__(android,strict,introduced=18))); size_t strlen(const char* _Nonnull __s) __attribute__((__pure__)); size_t __strlen_chk(const char* _Nonnull __s, size_t __n) __attribute__((__availability__(android,strict,introduced=17))); int strcmp(const char* _Nonnull __lhs, const char* _Nonnull __rhs) __attribute__((__pure__)); char* _Nonnull stpcpy(char* _Nonnull __dst, const char* _Nonnull __src) __attribute__((__availability__(android,strict,introduced=21))); char* _Nonnull strcpy(char* _Nonnull __dst, const char* _Nonnull __src); char* _Nonnull strcat(char* _Nonnull __dst, const char* _Nonnull __src); char* _Nullable strdup(const char* _Nonnull __s); char* _Nullable strstr(const char* _Nonnull __haystack, const char* _Nonnull __needle) __attribute__((__pure__)); char* _Nullable strcasestr(const char* _Nonnull __haystack, const char* _Nonnull __needle) __attribute__((__pure__)); char* _Nullable strtok(char* _Nullable __s, const char* _Nonnull __delimiter); char* _Nullable strtok_r(char* _Nullable __s, const char* _Nonnull __delimiter, char* _Nonnull * _Nonnull __pos_ptr); char* _Nonnull strerror(int __errno_value); char* _Nonnull strerror_l(int __errno_value, locale_t _Nonnull __l) __attribute__((__availability__(android,strict,introduced=23))); int strerror_r(int __errno_value, char* _Nonnull __buf, size_t __n); size_t strnlen(const char* _Nonnull __s, size_t __n) __attribute__((__pure__)); char* _Nonnull strncat(char* _Nonnull __dst, const char* _Nonnull __src, size_t __n); char* _Nullable strndup(const char* _Nonnull __s, size_t __n); int strncmp(const char* _Nonnull __lhs, const char* _Nonnull __rhs, size_t __n) __attribute__((__pure__)); char* _Nonnull stpncpy(char* _Nonnull __dst, const char* _Nonnull __src, size_t __n) __attribute__((__availability__(android,strict,introduced=21))); char* _Nonnull strncpy(char* _Nonnull __dst, const char* _Nonnull __src, size_t __n); size_t strlcat(char* _Nonnull __dst, const char* _Nonnull __src, size_t __n); size_t strlcpy(char* _Nonnull __dst, const char* _Nonnull __src, size_t __n); size_t strcspn(const char* _Nonnull __s, const char* _Nonnull __reject) __attribute__((__pure__)); char* _Nullable strpbrk(const char* _Nonnull __s, const char* _Nonnull __accept) __attribute__((__pure__)); char* _Nullable strsep(char* _Nullable * _Nonnull __s_ptr, const char* _Nonnull __delimiter); size_t strspn(const char* _Nonnull __s, const char* _Nonnull __accept); char* _Nonnull strsignal(int __signal); int strcoll(const char* _Nonnull __lhs, const char* _Nonnull __rhs) __attribute__((__pure__)); size_t strxfrm(char* _Null_unspecified __dst, const char* _Nonnull __src, size_t __n); int strcoll_l(const char* _Nonnull __lhs, const char* _Nonnull __rhs, locale_t _Nonnull __l) __attribute__((__pure__)) __attribute__((__availability__(android,strict,introduced=21))); size_t strxfrm_l(char* _Null_unspecified __dst, const char* _Nonnull __src, size_t __n, locale_t _Nonnull __l) __attribute__((__availability__(android,strict,introduced=21))); # 208 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/string.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/fortify/string.h" 1 3 4 # 35 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/fortify/string.h" 3 4 void* __memchr_chk(const void*, int, size_t, size_t) __attribute__((__availability__(android,strict,introduced=23))); void* __memrchr_chk(const void*, int, size_t, size_t) __attribute__((__availability__(android,strict,introduced=23))); char* __stpncpy_chk2(char*, const char*, size_t, size_t, size_t) __attribute__((__availability__(android,strict,introduced=21))); char* __strncpy_chk2(char*, const char*, size_t, size_t, size_t) __attribute__((__availability__(android,strict,introduced=21))); size_t __strlcpy_chk(char*, const char*, size_t, size_t) __attribute__((__availability__(android,strict,introduced=17))); size_t __strlcat_chk(char*, const char*, size_t, size_t) __attribute__((__availability__(android,strict,introduced=17))); extern void* __memrchr_real(const void*, int, size_t) __asm__("memrchr"); static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) void* memcpy(void* const dst __attribute__((pass_object_size(0))), const void* src, size_t copy_amount) __attribute__((diagnose_as_builtin(__builtin_memcpy, 1, 2, 3))) __attribute__((overloadable)) { return __builtin___memcpy_chk(dst, src, copy_amount, __builtin_object_size(((dst)), (0))); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) void* memmove(void* const dst __attribute__((pass_object_size(0))), const void* src, size_t len) __attribute__((diagnose_as_builtin(__builtin_memmove, 1, 2, 3))) __attribute__((overloadable)) { return __builtin___memmove_chk(dst, src, len, __builtin_object_size(((dst)), (0))); } # 92 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/fortify/string.h" 3 4 static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) char* stpcpy(char* const dst __attribute__((pass_object_size(1))), const char* src) __attribute__((overloadable)) __attribute__((diagnose_if(((__builtin_object_size(((dst)), (1))) != ((size_t) -1) && (__builtin_object_size(((dst)), (1))) <= (__builtin_strlen(src))), "'stpcpy' called with string bigger than buffer", "error"))) { return __builtin___stpcpy_chk(dst, src, __builtin_object_size(((dst)), (1))); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) char* strcpy(char* const dst __attribute__((pass_object_size(1))), const char* src) __attribute__((diagnose_as_builtin(__builtin_strcpy, 1, 2))) __attribute__((overloadable)) __attribute__((diagnose_if(((__builtin_object_size(((dst)), (1))) != ((size_t) -1) && (__builtin_object_size(((dst)), (1))) <= (__builtin_strlen(src))), "'strcpy' called with string bigger than buffer", "error"))) { return __builtin___strcpy_chk(dst, src, __builtin_object_size(((dst)), (1))); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) char* strcat(char* const dst __attribute__((pass_object_size(1))), const char* src) __attribute__((overloadable)) __attribute__((diagnose_if(((__builtin_object_size(((dst)), (1))) != ((size_t) -1) && (__builtin_object_size(((dst)), (1))) <= (__builtin_strlen(src))), "'strcat' called with string bigger than buffer", "error"))) { return __builtin___strcat_chk(dst, src, __builtin_object_size(((dst)), (1))); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) char* strncat(char* const dst __attribute__((pass_object_size(1))), const char* src, size_t n) __attribute__((diagnose_as_builtin(__builtin_strncat, 1, 2, 3))) __attribute__((overloadable)) { return __builtin___strncat_chk(dst, src, n, __builtin_object_size(((dst)), (1))); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) void* memset(void* const s __attribute__((pass_object_size(0))), int c, size_t n) __attribute__((overloadable)) __attribute__((diagnose_as_builtin(__builtin_memset, 1, 2, 3))) __attribute__((diagnose_if(c && !n, "'memset' will set 0 bytes; maybe the arguments got flipped?", "warning"))) { return __builtin___memset_chk(s, c, n, __builtin_object_size(((s)), (0))); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) void* memchr(const void* const s __attribute__((pass_object_size(1))), int c, size_t n) __attribute__((overloadable)) { size_t bos = __builtin_object_size(((s)), (1)); if ((((bos)) == ((size_t) -1) || (__builtin_constant_p((n)) && (bos) >= (n) && (1)))) { return __builtin_memchr(s, c, n); } return __memchr_chk(s, c, n, bos); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) void* __memrchr_fortify(const void* const __attribute__((pass_object_size(1))) s, int c, size_t n) __attribute__((overloadable)) { size_t bos = __builtin_object_size(((s)), (1)); if ((((bos)) == ((size_t) -1) || (__builtin_constant_p((n)) && (bos) >= (n) && (1)))) { return __memrchr_real(s, c, n); } return __memrchr_chk(s, c, n, bos); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) char* stpncpy(char* const dst __attribute__((pass_object_size(1))), const char* const src __attribute__((pass_object_size(1))), size_t n) __attribute__((diagnose_as_builtin(__builtin_stpncpy, 1, 2, 3))) __attribute__((overloadable)) { size_t bos_dst = __builtin_object_size(((dst)), (1)); size_t bos_src = __builtin_object_size(((src)), (1)); if (bos_src == ((size_t) -1)) { return __builtin___stpncpy_chk(dst, src, n, bos_dst); } return __stpncpy_chk2(dst, src, n, bos_dst, bos_src); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) char* strncpy(char* const dst __attribute__((pass_object_size(1))), const char* const src __attribute__((pass_object_size(1))), size_t n) __attribute__((diagnose_as_builtin(__builtin_strncpy, 1, 2, 3))) __attribute__((overloadable)) { size_t bos_dst = __builtin_object_size(((dst)), (1)); size_t bos_src = __builtin_object_size(((src)), (1)); if (bos_src == ((size_t) -1)) { return __builtin___strncpy_chk(dst, src, n, bos_dst); } return __strncpy_chk2(dst, src, n, bos_dst, bos_src); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) size_t strlcpy(char* const dst __attribute__((pass_object_size(1))), const char* src, size_t size) __attribute__((overloadable)) __attribute__((diagnose_if(((__builtin_object_size(((dst)), (1))) != ((size_t) -1) && (__builtin_object_size(((dst)), (1))) < (size)), "'strlcpy' called with size bigger than buffer", "error"))) { return __strlcpy_chk(dst, src, size, __builtin_object_size(((dst)), (1))); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) size_t strlcat(char* const dst __attribute__((pass_object_size(1))), const char* src, size_t size) __attribute__((overloadable)) __attribute__((diagnose_if(((__builtin_object_size(((dst)), (1))) != ((size_t) -1) && (__builtin_object_size(((dst)), (1))) < (size)), "'strlcat' called with size bigger than buffer", "error"))) { return __strlcat_chk(dst, src, size, __builtin_object_size(((dst)), (1))); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) size_t strlen(const char* const s __attribute__((pass_object_size(0)))) __attribute__((overloadable)) { return __strlen_chk(s, __builtin_object_size(((s)), (0))); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) char* strchr(const char* const s __attribute__((pass_object_size(1))), int c) __attribute__((overloadable)) { size_t bos = __builtin_object_size(((s)), (1)); if (bos != ((size_t) -1)) { return __strchr_chk(s, c, bos); } return __builtin_strchr(s, c); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) char* strrchr(const char* const s __attribute__((pass_object_size(1))), int c) __attribute__((overloadable)) { size_t bos = __builtin_object_size(((s)), (1)); if (bos != ((size_t) -1)) { return __strrchr_chk(s, c, bos); } return __builtin_strrchr(s, c); } # 279 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/fortify/string.h" 3 4 static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) void* memrchr(const void* const __attribute__((pass_object_size(1))) s, int c, size_t n) __attribute__((overloadable)) { return __memrchr_fortify(s, c, n); } # 209 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/string.h" 2 3 4 # 21 "ggml.c" 2 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/inttypes.h" 1 3 # 21 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/inttypes.h" 3 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/inttypes.h" 1 3 4 # 324 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/inttypes.h" 3 4 typedef struct { intmax_t quot; intmax_t rem; } imaxdiv_t; intmax_t imaxabs(intmax_t __i) __attribute__((__const__)) __attribute__((__availability__(android,strict,introduced=19))); imaxdiv_t imaxdiv(intmax_t __numerator, intmax_t __denominator) __attribute__((__const__)) __attribute__((__availability__(android,strict,introduced=19))); intmax_t strtoimax(const char* _Nonnull __s, char* _Nullable * _Nullable __end_ptr, int __base); uintmax_t strtoumax(const char* _Nonnull __s, char* _Nullable * _Nullable __end_ptr, int __base); intmax_t wcstoimax(const wchar_t* _Nonnull __s, wchar_t* _Nullable * _Nullable __end_ptr, int __base) __attribute__((__availability__(android,strict,introduced=21))); uintmax_t wcstoumax(const wchar_t* _Nonnull __s, wchar_t* _Nullable * _Nullable __end_ptr, int __base) __attribute__((__availability__(android,strict,introduced=21))); # 22 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/inttypes.h" 2 3 # 23 "ggml.c" 2 # 100 "ggml.c" # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/pthread.h" 1 3 4 # 38 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/pthread.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sched.h" 1 3 4 # 37 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sched.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/sched.h" 1 3 4 # 51 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/sched.h" 3 4 struct clone_args { __u64 __attribute__((aligned(8))) flags; __u64 __attribute__((aligned(8))) pidfd; __u64 __attribute__((aligned(8))) child_tid; __u64 __attribute__((aligned(8))) parent_tid; __u64 __attribute__((aligned(8))) exit_signal; __u64 __attribute__((aligned(8))) stack; __u64 __attribute__((aligned(8))) stack_size; __u64 __attribute__((aligned(8))) tls; __u64 __attribute__((aligned(8))) set_tid; __u64 __attribute__((aligned(8))) set_tid_size; __u64 __attribute__((aligned(8))) cgroup; }; # 38 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sched.h" 2 3 4 # 97 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sched.h" 3 4 struct sched_param { int sched_priority; }; int sched_setscheduler(pid_t __pid, int __policy, const struct sched_param* _Nonnull __param); # 116 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sched.h" 3 4 int sched_getscheduler(pid_t __pid); int sched_yield(void); int sched_get_priority_max(int __policy); int sched_get_priority_min(int __policy); int sched_setparam(pid_t __pid, const struct sched_param* _Nonnull __param); int sched_getparam(pid_t __pid, struct sched_param* _Nonnull __param); int sched_rr_get_interval(pid_t __pid, struct timespec* _Nonnull __quantum); # 39 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/pthread.h" 2 3 4 enum { PTHREAD_MUTEX_NORMAL = 0, PTHREAD_MUTEX_RECURSIVE = 1, PTHREAD_MUTEX_ERRORCHECK = 2, PTHREAD_MUTEX_ERRORCHECK_NP = PTHREAD_MUTEX_ERRORCHECK, PTHREAD_MUTEX_RECURSIVE_NP = PTHREAD_MUTEX_RECURSIVE, PTHREAD_MUTEX_DEFAULT = PTHREAD_MUTEX_NORMAL }; # 65 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/pthread.h" 3 4 enum { PTHREAD_RWLOCK_PREFER_READER_NP = 0, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP = 1, }; # 97 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/pthread.h" 3 4 int pthread_atfork(void (* _Nullable __prepare)(void), void (* _Nullable __parent)(void), void (* _Nullable __child)(void)); int pthread_attr_destroy(pthread_attr_t* _Nonnull __attr); int pthread_attr_getdetachstate(const pthread_attr_t* _Nonnull __attr, int* _Nonnull __state); int pthread_attr_getguardsize(const pthread_attr_t* _Nonnull __attr, size_t* _Nonnull __size); int pthread_attr_getschedparam(const pthread_attr_t* _Nonnull __attr, struct sched_param* _Nonnull __param); int pthread_attr_getschedpolicy(const pthread_attr_t* _Nonnull __attr, int* _Nonnull __policy); int pthread_attr_getscope(const pthread_attr_t* _Nonnull __attr, int* _Nonnull __scope); int pthread_attr_getstack(const pthread_attr_t* _Nonnull __attr, void* _Nullable * _Nonnull __addr, size_t* _Nonnull __size); int pthread_attr_getstacksize(const pthread_attr_t* _Nonnull __attr, size_t* _Nonnull __size); int pthread_attr_init(pthread_attr_t* _Nonnull __attr); int pthread_attr_setdetachstate(pthread_attr_t* _Nonnull __attr, int __state); int pthread_attr_setguardsize(pthread_attr_t* _Nonnull __attr, size_t __size); int pthread_attr_setschedparam(pthread_attr_t* _Nonnull __attr, const struct sched_param* _Nonnull __param); int pthread_attr_setschedpolicy(pthread_attr_t* _Nonnull __attr, int __policy); int pthread_attr_setscope(pthread_attr_t* _Nonnull __attr, int __scope); int pthread_attr_setstack(pthread_attr_t* _Nonnull __attr, void* _Nonnull __addr, size_t __size); int pthread_attr_setstacksize(pthread_attr_t* _Nonnull __addr, size_t __size); int pthread_condattr_destroy(pthread_condattr_t* _Nonnull __attr); int pthread_condattr_getclock(const pthread_condattr_t* _Nonnull __attr, clockid_t* _Nonnull __clock) __attribute__((__availability__(android,strict,introduced=21))); int pthread_condattr_getpshared(const pthread_condattr_t* _Nonnull __attr, int* _Nonnull __shared); int pthread_condattr_init(pthread_condattr_t* _Nonnull __attr); int pthread_condattr_setclock(pthread_condattr_t* _Nonnull __attr, clockid_t __clock) __attribute__((__availability__(android,strict,introduced=21))); int pthread_condattr_setpshared(pthread_condattr_t* _Nonnull __attr, int __shared); int pthread_cond_broadcast(pthread_cond_t* _Nonnull __cond); int pthread_cond_destroy(pthread_cond_t* _Nonnull __cond); int pthread_cond_init(pthread_cond_t* _Nonnull __cond, const pthread_condattr_t* _Nullable __attr); int pthread_cond_signal(pthread_cond_t* _Nonnull __cond); int pthread_cond_timedwait(pthread_cond_t* _Nonnull __cond, pthread_mutex_t* _Nonnull __mutex, const struct timespec* _Nullable __timeout); # 164 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/pthread.h" 3 4 int pthread_cond_timedwait_monotonic_np(pthread_cond_t* _Nonnull __cond, pthread_mutex_t* _Nonnull __mutex, const struct timespec* _Nullable __timeout) ; int pthread_cond_wait(pthread_cond_t* _Nonnull __cond, pthread_mutex_t* _Nonnull __mutex); #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wbuiltin-requires-header" int pthread_create(pthread_t* _Nonnull __pthread_ptr, pthread_attr_t const* _Nullable __attr, void* _Nonnull (* _Nonnull __start_routine)(void* _Nonnull), void* _Nullable); #pragma clang diagnostic pop int pthread_detach(pthread_t __pthread); void pthread_exit(void* _Nullable __return_value) __attribute__((__noreturn__)); int pthread_equal(pthread_t __lhs, pthread_t __rhs); int pthread_getattr_np(pthread_t __pthread, pthread_attr_t* _Nonnull __attr); int pthread_getcpuclockid(pthread_t __pthread, clockid_t* _Nonnull __clock); void* _Nullable pthread_getspecific(pthread_key_t __key); pid_t pthread_gettid_np(pthread_t __pthread) __attribute__((__availability__(android,strict,introduced=21))); int pthread_join(pthread_t __pthread, void* _Nullable * _Nullable __return_value_ptr); int pthread_key_create(pthread_key_t* _Nonnull __key_ptr, void (* _Nullable __key_destructor)(void* _Nullable)); int pthread_key_delete(pthread_key_t __key); int pthread_mutexattr_destroy(pthread_mutexattr_t* _Nonnull __attr); int pthread_mutexattr_getpshared(const pthread_mutexattr_t* _Nonnull __attr, int* _Nonnull __shared); int pthread_mutexattr_gettype(const pthread_mutexattr_t* _Nonnull __attr, int* _Nonnull __type); int pthread_mutexattr_init(pthread_mutexattr_t* _Nonnull __attr); int pthread_mutexattr_setpshared(pthread_mutexattr_t* _Nonnull __attr, int __shared); int pthread_mutexattr_settype(pthread_mutexattr_t* _Nonnull __attr, int __type); # 230 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/pthread.h" 3 4 int pthread_mutex_destroy(pthread_mutex_t* _Nonnull __mutex); int pthread_mutex_init(pthread_mutex_t* _Nonnull __mutex, const pthread_mutexattr_t* _Nullable __attr); int pthread_mutex_lock(pthread_mutex_t* _Nonnull __mutex); int pthread_mutex_timedlock(pthread_mutex_t* _Nonnull __mutex, const struct timespec* _Nullable __timeout) __attribute__((__availability__(android,strict,introduced=21))); # 255 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/pthread.h" 3 4 int pthread_mutex_trylock(pthread_mutex_t* _Nonnull __mutex); int pthread_mutex_unlock(pthread_mutex_t* _Nonnull __mutex); int pthread_once(pthread_once_t* _Nonnull __once, void (* _Nonnull __init_routine)(void)); int pthread_rwlockattr_init(pthread_rwlockattr_t* _Nonnull __attr); int pthread_rwlockattr_destroy(pthread_rwlockattr_t* _Nonnull __attr); int pthread_rwlockattr_getpshared(const pthread_rwlockattr_t* _Nonnull __attr, int* _Nonnull __shared); int pthread_rwlockattr_setpshared(pthread_rwlockattr_t* _Nonnull __attr, int __shared); int pthread_rwlockattr_getkind_np(const pthread_rwlockattr_t* _Nonnull __attr, int* _Nonnull __kind) __attribute__((__availability__(android,strict,introduced=23))); int pthread_rwlockattr_setkind_np(pthread_rwlockattr_t* _Nonnull __attr, int __kind) __attribute__((__availability__(android,strict,introduced=23))); # 280 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/pthread.h" 3 4 int pthread_rwlock_destroy(pthread_rwlock_t* _Nonnull __rwlock); int pthread_rwlock_init(pthread_rwlock_t* _Nonnull __rwlock, const pthread_rwlockattr_t* _Nullable __attr); int pthread_rwlock_rdlock(pthread_rwlock_t* _Nonnull __rwlock); int pthread_rwlock_timedrdlock(pthread_rwlock_t* _Nonnull __rwlock, const struct timespec* _Nullable __timeout); int pthread_rwlock_timedwrlock(pthread_rwlock_t* _Nonnull __rwlock, const struct timespec* _Nullable __timeout); int pthread_rwlock_tryrdlock(pthread_rwlock_t* _Nonnull __rwlock); int pthread_rwlock_trywrlock(pthread_rwlock_t* _Nonnull __rwlock); int pthread_rwlock_unlock(pthread_rwlock_t* _Nonnull __rwlock); int pthread_rwlock_wrlock(pthread_rwlock_t* _Nonnull __rwlock); # 325 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/pthread.h" 3 4 pthread_t pthread_self(void) __attribute__((__const__)); # 335 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/pthread.h" 3 4 int pthread_setname_np(pthread_t __pthread, const char* _Nonnull __name); # 348 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/pthread.h" 3 4 int pthread_setschedparam(pthread_t __pthread, int __policy, const struct sched_param* _Nonnull __param); int pthread_getschedparam(pthread_t __pthread, int* _Nonnull __policy, struct sched_param* _Nonnull __param); # 377 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/pthread.h" 3 4 int pthread_setspecific(pthread_key_t __key, const void* _Nullable __value); typedef void (* _Nullable __pthread_cleanup_func_t)(void* _Nullable); typedef struct __pthread_cleanup_t { struct __pthread_cleanup_t* _Nullable __cleanup_prev; __pthread_cleanup_func_t _Nullable __cleanup_routine; void* _Nullable __cleanup_arg; } __pthread_cleanup_t; void __pthread_cleanup_push(__pthread_cleanup_t* _Nonnull c, __pthread_cleanup_func_t _Nullable, void* _Nullable); void __pthread_cleanup_pop(__pthread_cleanup_t* _Nonnull, int); # 101 "ggml.c" 2 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stdatomic.h" 1 3 # 131 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stdatomic.h" 3 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/bits/stdatomic.h" 1 3 # 43 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/bits/stdatomic.h" 3 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stddef.h" 1 3 # 44 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/bits/stdatomic.h" 2 3 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/uchar.h" 1 3 4 # 36 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/uchar.h" 3 4 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stddef.h" 1 3 4 # 37 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/uchar.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/mbstate_t.h" 1 3 4 # 42 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/mbstate_t.h" 3 4 typedef struct { unsigned char __seq[4]; } mbstate_t; # 39 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/uchar.h" 2 3 4 typedef unsigned short char16_t; typedef unsigned int char32_t; # 66 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/uchar.h" 3 4 size_t c16rtomb(char* _Nullable __buf, char16_t __ch16, mbstate_t* _Nullable __ps) __attribute__((__availability__(android,strict,introduced=21))); # 77 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/uchar.h" 3 4 size_t c32rtomb(char* _Nullable __buf, char32_t __ch32, mbstate_t* _Nullable __ps) __attribute__((__availability__(android,strict,introduced=21))); size_t mbrtoc16(char16_t* _Nullable __ch16, const char* _Nullable __s, size_t __n, mbstate_t* _Nullable __ps) __attribute__((__availability__(android,strict,introduced=21))); size_t mbrtoc32(char32_t* _Nullable __ch32, const char* _Nullable __s, size_t __n, mbstate_t* _Nullable __ps) __attribute__((__availability__(android,strict,introduced=21))); # 50 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/bits/stdatomic.h" 2 3 # 128 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/bits/stdatomic.h" 3 typedef enum { memory_order_relaxed = 0, memory_order_consume = 1, memory_order_acquire = 2, memory_order_release = 3, memory_order_acq_rel = 4, memory_order_seq_cst = 5 } memory_order; static __inline void atomic_thread_fence(memory_order __order __attribute__((unused))) { __c11_atomic_thread_fence(__order); } static __inline void atomic_signal_fence(memory_order __order __attribute__((unused))) { __c11_atomic_signal_fence(__order); } # 159 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/bits/stdatomic.h" 3 typedef _Atomic(_Bool) atomic_bool; typedef _Atomic(char) atomic_char; typedef _Atomic(signed char) atomic_schar; typedef _Atomic(unsigned char) atomic_uchar; typedef _Atomic(short) atomic_short; typedef _Atomic(unsigned short) atomic_ushort; typedef _Atomic(int) atomic_int; typedef _Atomic(unsigned int) atomic_uint; typedef _Atomic(long) atomic_long; typedef _Atomic(unsigned long) atomic_ulong; typedef _Atomic(long long) atomic_llong; typedef _Atomic(unsigned long long) atomic_ullong; typedef _Atomic(char16_t) atomic_char16_t; typedef _Atomic(char32_t) atomic_char32_t; typedef _Atomic(wchar_t) atomic_wchar_t; typedef _Atomic(int_least8_t) atomic_int_least8_t; typedef _Atomic(uint_least8_t) atomic_uint_least8_t; typedef _Atomic(int_least16_t) atomic_int_least16_t; typedef _Atomic(uint_least16_t) atomic_uint_least16_t; typedef _Atomic(int_least32_t) atomic_int_least32_t; typedef _Atomic(uint_least32_t) atomic_uint_least32_t; typedef _Atomic(int_least64_t) atomic_int_least64_t; typedef _Atomic(uint_least64_t) atomic_uint_least64_t; typedef _Atomic(int_fast8_t) atomic_int_fast8_t; typedef _Atomic(uint_fast8_t) atomic_uint_fast8_t; typedef _Atomic(int_fast16_t) atomic_int_fast16_t; typedef _Atomic(uint_fast16_t) atomic_uint_fast16_t; typedef _Atomic(int_fast32_t) atomic_int_fast32_t; typedef _Atomic(uint_fast32_t) atomic_uint_fast32_t; typedef _Atomic(int_fast64_t) atomic_int_fast64_t; typedef _Atomic(uint_fast64_t) atomic_uint_fast64_t; typedef _Atomic(intptr_t) atomic_intptr_t; typedef _Atomic(uintptr_t) atomic_uintptr_t; typedef _Atomic(size_t) atomic_size_t; typedef _Atomic(ptrdiff_t) atomic_ptrdiff_t; typedef _Atomic(intmax_t) atomic_intmax_t; typedef _Atomic(uintmax_t) atomic_uintmax_t; # 266 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/bits/stdatomic.h" 3 typedef struct { atomic_bool __flag; } atomic_flag; static __inline _Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *__object, memory_order __order) { return (__c11_atomic_exchange(&__object->__flag, 1, __order)); } static __inline void atomic_flag_clear_explicit(volatile atomic_flag *__object, memory_order __order) { __c11_atomic_store(&__object->__flag, 0, __order); } static __inline _Bool atomic_flag_test_and_set(volatile atomic_flag *__object) { return (atomic_flag_test_and_set_explicit(__object, memory_order_seq_cst)); } static __inline void atomic_flag_clear(volatile atomic_flag *__object) { atomic_flag_clear_explicit(__object, memory_order_seq_cst); } # 132 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stdatomic.h" 2 3 # 102 "ggml.c" 2 typedef void * thread_ret_t; # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/stat.h" 1 3 4 # 37 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/stat.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/stat.h" 1 3 4 # 54 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/linux/stat.h" 3 4 struct statx_timestamp { __s64 tv_sec; __u32 tv_nsec; __s32 __reserved; }; struct statx { __u32 stx_mask; __u32 stx_blksize; __u64 stx_attributes; __u32 stx_nlink; __u32 stx_uid; __u32 stx_gid; __u16 stx_mode; __u16 __spare0[1]; __u64 stx_ino; __u64 stx_size; __u64 stx_blocks; __u64 stx_attributes_mask; struct statx_timestamp stx_atime; struct statx_timestamp stx_btime; struct statx_timestamp stx_ctime; struct statx_timestamp stx_mtime; __u32 stx_rdev_major; __u32 stx_rdev_minor; __u32 stx_dev_major; __u32 stx_dev_minor; __u64 stx_mnt_id; __u32 stx_dio_mem_align; __u32 stx_dio_offset_align; __u64 __spare3[12]; }; # 38 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/stat.h" 2 3 4 # 102 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/stat.h" 3 4 struct stat { unsigned long long st_dev; unsigned char __pad0[4]; unsigned long __st_ino; unsigned int st_mode; nlink_t st_nlink; uid_t st_uid; gid_t st_gid; unsigned long long st_rdev; unsigned char __pad3[4]; long long st_size; unsigned long st_blksize; unsigned long long st_blocks; struct timespec st_atim; struct timespec st_mtim; struct timespec st_ctim; unsigned long long st_ino; }; struct stat64 { unsigned long long st_dev; unsigned char __pad0[4]; unsigned long __st_ino; unsigned int st_mode; nlink_t st_nlink; uid_t st_uid; gid_t st_gid; unsigned long long st_rdev; unsigned char __pad3[4]; long long st_size; unsigned long st_blksize; unsigned long long st_blocks; struct timespec st_atim; struct timespec st_mtim; struct timespec st_ctim; unsigned long long st_ino; }; # 139 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/stat.h" 3 4 int chmod(const char* _Nonnull __path, mode_t __mode); int fchmod(int __fd, mode_t __mode); int mkdir(const char* _Nonnull __path, mode_t __mode); int fstat(int __fd, struct stat* _Nonnull __buf); int fstat64(int __fd, struct stat64* _Nonnull __buf) __asm__("fstat") __attribute__((__availability__(android,strict,introduced=3))); int fstatat(int __dir_fd, const char* _Nonnull __path, struct stat* _Nonnull __buf, int __flags); int fstatat64(int __dir_fd, const char* _Nonnull __path, struct stat64* _Nonnull __buf, int __flags) __asm__("fstatat") __attribute__((__availability__(android,strict,introduced=3))); int lstat(const char* _Nonnull __path, struct stat* _Nonnull __buf); int lstat64(const char* _Nonnull __path, struct stat64* _Nonnull __buf) __asm__("lstat") __attribute__((__availability__(android,strict,introduced=3))); int stat(const char* _Nonnull __path, struct stat* _Nonnull __buf); int stat64(const char* _Nonnull __path, struct stat64* _Nonnull __buf) __asm__("stat") __attribute__((__availability__(android,strict,introduced=3))); int mknod(const char* _Nonnull __path, mode_t __mode, dev_t __dev); mode_t umask(mode_t __mask); # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/fortify/stat.h" 1 3 4 # 33 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/fortify/stat.h" 3 4 mode_t __umask_chk(mode_t) __attribute__((__availability__(android,strict,introduced=18))); mode_t __umask_real(mode_t mode) __asm__("umask"); static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) mode_t umask(mode_t mode) __attribute__((overloadable)) __attribute__((enable_if(1, ""))) __attribute__((diagnose_if(mode & ~0777, "'umask' called with invalid mode", "error"))) { return __umask_chk(mode); } # 157 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/stat.h" 2 3 4 int mkfifo(const char* _Nonnull __path, mode_t __mode) __attribute__((__availability__(android,strict,introduced=21))); int mkfifoat(int __dir_fd, const char* _Nonnull __path, mode_t __mode) __attribute__((__availability__(android,strict,introduced=23))); int fchmodat(int __dir_fd, const char* _Nonnull __path, mode_t __mode, int __flags); int mkdirat(int __dir_fd, const char* _Nonnull __path, mode_t __mode); int mknodat(int __dir_fd, const char* _Nonnull __path, mode_t __mode, dev_t __dev) __attribute__((__availability__(android,strict,introduced=21))); # 204 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/stat.h" 3 4 int utimensat(int __dir_fd, const char* _Null_unspecified __path, const struct timespec __times[_Nullable 2], int __flags); # 220 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/sys/stat.h" 3 4 int futimens(int __fd, const struct timespec __times[_Nullable 2]) __attribute__((__availability__(android,strict,introduced=19))); # 107 "ggml.c" 2 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 1 3 4 # 31 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 3 4 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/stddef.h" 1 3 4 # 32 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/fcntl.h" 1 3 4 # 46 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/fcntl.h" 3 4 int fcntl(int __fd, int __cmd, ...); # 37 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/getopt.h" 1 3 4 # 41 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/getopt.h" 3 4 int getopt(int __argc, char* const __argv[], const char* __options); extern char* optarg; extern int optind; extern int opterr; extern int optopt; # 38 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/ioctl.h" 1 3 4 # 43 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/ioctl.h" 3 4 int ioctl(int __fd, int __request, ...); # 60 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/ioctl.h" 3 4 int ioctl(int __fd, unsigned __request, ...) __attribute__((overloadable)) __attribute__((enable_if(1, ""))) __asm__("ioctl"); # 39 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/lockf.h" 1 3 4 # 40 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/sysconf.h" 1 3 4 # 193 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/sysconf.h" 3 4 long sysconf(int __name); # 43 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 2 3 4 # 76 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 3 4 extern char* _Nullable * _Nullable environ; __attribute__((__noreturn__)) void _exit(int __status); pid_t fork(void); pid_t vfork(void) __attribute__((__returns_twice__)); pid_t getpid(void); pid_t gettid(void) __attribute__((__const__)); pid_t getpgid(pid_t __pid); int setpgid(pid_t __pid, pid_t __pgid); pid_t getppid(void); pid_t getpgrp(void); int setpgrp(void); pid_t getsid(pid_t __pid) __attribute__((__availability__(android,strict,introduced=17))); pid_t setsid(void); int execv(const char* _Nonnull __path, char* _Nullable const* _Nullable __argv); int execvp(const char* _Nonnull __file, char* _Nullable const* _Nullable __argv); int execvpe(const char* _Nonnull __file, char* _Nullable const* _Nullable __argv, char* _Nullable const* _Nullable __envp) __attribute__((__availability__(android,strict,introduced=21))); int execve(const char* _Nonnull __file, char* _Nullable const* _Nullable __argv, char* _Nullable const* _Nullable __envp); int execl(const char* _Nonnull __path, const char* _Nullable __arg0, ...) __attribute__((__sentinel__)); int execlp(const char* _Nonnull __file, const char* _Nullable __arg0, ...) __attribute__((__sentinel__)); int execle(const char* _Nonnull __path, const char* _Nullable __arg0, ... ) __attribute__((__sentinel__(1))); int nice(int __incr); # 125 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 3 4 int setegid(gid_t __gid); # 136 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 3 4 int seteuid(uid_t __uid); # 147 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 3 4 int setgid(gid_t __gid); # 158 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 3 4 int setregid(gid_t __rgid, gid_t __egid); # 169 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 3 4 int setresgid(gid_t __rgid, gid_t __egid, gid_t __sgid); # 180 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 3 4 int setresuid(uid_t __ruid, uid_t __euid, uid_t __suid); # 191 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 3 4 int setreuid(uid_t __ruid, uid_t __euid); # 202 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 3 4 int setuid(uid_t __uid); uid_t getuid(void); uid_t geteuid(void); gid_t getgid(void); gid_t getegid(void); int getgroups(int __size, gid_t* _Nullable __list); int setgroups(size_t __size, const gid_t* _Nullable __list); int getresuid(uid_t* _Nonnull __ruid, uid_t* _Nonnull __euid, uid_t* _Nonnull __suid); int getresgid(gid_t* _Nonnull __rgid, gid_t* _Nonnull __egid, gid_t* _Nonnull __sgid); char* _Nullable getlogin(void); long fpathconf(int __fd, int __name); long pathconf(const char* _Nonnull __path, int __name); int access(const char* _Nonnull __path, int __mode); int faccessat(int __dirfd, const char* _Nonnull __path, int __mode, int __flags); int link(const char* _Nonnull __old_path, const char* _Nonnull __new_path); int linkat(int __old_dir_fd, const char* _Nonnull __old_path, int __new_dir_fd, const char* _Nonnull __new_path, int __flags) __attribute__((__availability__(android,strict,introduced=21))); int unlink(const char* _Nonnull __path); int unlinkat(int __dirfd, const char* _Nonnull __path, int __flags); int chdir(const char* _Nonnull __path); int fchdir(int __fd); int rmdir(const char* _Nonnull __path); int pipe(int __fds[_Nonnull 2]); int chroot(const char* _Nonnull __path); int symlink(const char* _Nonnull __old_path, const char* _Nonnull __new_path); int symlinkat(const char* _Nonnull __old_path, int __new_dir_fd, const char* _Nonnull __new_path) __attribute__((__availability__(android,strict,introduced=21))); ssize_t readlink(const char* _Nonnull __path, char* _Nonnull __buf, size_t __buf_size); ssize_t readlinkat(int __dir_fd, const char* _Nonnull __path, char* _Nonnull __buf, size_t __buf_size) __attribute__((__availability__(android,strict,introduced=21))); int chown(const char* _Nonnull __path, uid_t __owner, gid_t __group); int fchown(int __fd, uid_t __owner, gid_t __group); int fchownat(int __dir_fd, const char* _Nonnull __path, uid_t __owner, gid_t __group, int __flags); int lchown(const char* _Nonnull __path, uid_t __owner, gid_t __group); char* _Nullable getcwd(char* _Nullable __buf, size_t __size); void sync(void); # 268 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 3 4 int close(int __fd); # 280 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 3 4 ssize_t read(int __fd, void* _Null_unspecified __buf, size_t __count); # 292 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 3 4 ssize_t write(int __fd, const void* _Null_unspecified __buf, size_t __count); int dup(int __old_fd); int dup2(int __old_fd, int __new_fd); int dup3(int __old_fd, int __new_fd, int __flags) __attribute__((__availability__(android,strict,introduced=21))); int fsync(int __fd); int fdatasync(int __fd); # 316 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 3 4 int truncate(const char* _Nonnull __path, off_t __length); off_t lseek(int __fd, off_t __offset, int __whence); ssize_t pread(int __fd, void* _Nonnull __buf, size_t __count, off_t __offset); ssize_t pwrite(int __fd, const void* _Nonnull __buf, size_t __count, off_t __offset); int ftruncate(int __fd, off_t __length); int truncate64(const char* _Nonnull __path, off64_t __length) __attribute__((__availability__(android,strict,introduced=21))); off64_t lseek64(int __fd, off64_t __offset, int __whence); ssize_t pread64(int __fd, void* _Nonnull __buf, size_t __count, off64_t __offset); ssize_t pwrite64(int __fd, const void* _Nonnull __buf, size_t __count, off64_t __offset); int ftruncate64(int __fd, off64_t __length); int pause(void); unsigned int alarm(unsigned int __seconds); unsigned int sleep(unsigned int __seconds); int usleep(useconds_t __microseconds); int gethostname(char* _Nonnull _buf, size_t __buf_size); int sethostname(const char* _Nonnull __name, size_t __n) __attribute__((__availability__(android,strict,introduced=23))); int brk(void* _Nonnull __addr); void* _Nullable sbrk(ptrdiff_t __increment); int isatty(int __fd); char* _Nullable ttyname(int __fd); int ttyname_r(int __fd, char* _Nonnull __buf, size_t __buf_size); int acct(const char* _Nullable __path); int getpagesize(void) __attribute__((__availability__(android,strict,introduced=21))); long syscall(long __number, ...); int daemon(int __no_chdir, int __no_close); int cacheflush(long __addr, long __nbytes, long __cache); pid_t tcgetpgrp(int __fd); int tcsetpgrp(int __fd, pid_t __pid); # 428 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/fortify/unistd.h" 1 3 4 # 40 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/fortify/unistd.h" 3 4 ssize_t __pread_chk(int, void*, size_t, off_t, size_t) __attribute__((__availability__(android,strict,introduced=23))); ssize_t __pread_real(int, void*, size_t, off_t) __asm__("pread"); ssize_t __pread64_chk(int, void*, size_t, off64_t, size_t) __attribute__((__availability__(android,strict,introduced=23))); ssize_t __pread64_real(int, void*, size_t, off64_t) __asm__("pread64"); ssize_t __pwrite_real(int, const void*, size_t, off_t) __asm__("pwrite"); ssize_t __pwrite64_real(int, const void*, size_t, off64_t) __asm__("pwrite64"); ssize_t __read_chk(int, void*, size_t, size_t) __attribute__((__availability__(android,strict,introduced=21))); # 78 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/fortify/unistd.h" 3 4 ssize_t __readlink_chk(const char*, char*, size_t, size_t) __attribute__((__availability__(android,strict,introduced=23))); ssize_t __readlinkat_chk(int dirfd, const char*, char*, size_t, size_t) __attribute__((__availability__(android,strict,introduced=23))); # 104 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/fortify/unistd.h" 3 4 static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) char* getcwd(char* const __attribute__((pass_object_size(1))) buf, size_t size) __attribute__((overloadable)) __attribute__((diagnose_if((((__builtin_object_size(((buf)), (1)))) != ((size_t) -1) && ((__builtin_object_size(((buf)), (1)))) < ((size))), "in call to '" "getcwd" "', '" "size" "' bytes overflows the given object", "error"))) { return (&getcwd)(buf, size); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) ssize_t pread(int fd, void* const __attribute__((pass_object_size(0))) buf, size_t count, off_t offset) __attribute__((overloadable)) __attribute__((diagnose_if((count) > 2147483647, "in call to '" "pread" "', '" "count" "' must be <= SSIZE_MAX", "error"))) __attribute__((diagnose_if((((__builtin_object_size(((buf)), (0)))) != ((size_t) -1) && ((__builtin_object_size(((buf)), (0)))) < ((count))), "in call to '" "pread" "', '" "count" "' bytes overflows the given object", "error"))) { size_t bos = __builtin_object_size(((buf)), (0)); if (!(((((bos)) == ((size_t) -1) || (__builtin_constant_p((count)) && (bos) >= (count) && ((bos) <= 2147483647))) && __builtin_constant_p(count) && (count) <= 2147483647))) { return __pread_chk(fd, buf, count, offset, bos); } return __pread_real(fd, buf, count, offset); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) ssize_t pread64(int fd, void* const __attribute__((pass_object_size(0))) buf, size_t count, off64_t offset) __attribute__((overloadable)) __attribute__((diagnose_if((count) > 2147483647, "in call to '" "pread64" "', '" "count" "' must be <= SSIZE_MAX", "error"))) __attribute__((diagnose_if((((__builtin_object_size(((buf)), (0)))) != ((size_t) -1) && ((__builtin_object_size(((buf)), (0)))) < ((count))), "in call to '" "pread64" "', '" "count" "' bytes overflows the given object", "error"))) { size_t bos = __builtin_object_size(((buf)), (0)); if (!(((((bos)) == ((size_t) -1) || (__builtin_constant_p((count)) && (bos) >= (count) && ((bos) <= 2147483647))) && __builtin_constant_p(count) && (count) <= 2147483647))) { return __pread64_chk(fd, buf, count, offset, bos); } return __pread64_real(fd, buf, count, offset); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) ssize_t pwrite(int fd, const void* const __attribute__((pass_object_size(0))) buf, size_t count, off_t offset) __attribute__((overloadable)) __attribute__((diagnose_if((count) > 2147483647, "in call to '" "pwrite" "', '" "count" "' must be <= SSIZE_MAX", "error"))) __attribute__((diagnose_if((((__builtin_object_size(((buf)), (0)))) != ((size_t) -1) && ((__builtin_object_size(((buf)), (0)))) < ((count))), "in call to '" "pwrite" "', '" "count" "' bytes overflows the given object", "error"))) { return __pwrite_real(fd, buf, count, offset); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) ssize_t pwrite64(int fd, const void* const __attribute__((pass_object_size(0))) buf, size_t count, off64_t offset) __attribute__((overloadable)) __attribute__((diagnose_if((count) > 2147483647, "in call to '" "pwrite64" "', '" "count" "' must be <= SSIZE_MAX", "error"))) __attribute__((diagnose_if((((__builtin_object_size(((buf)), (0)))) != ((size_t) -1) && ((__builtin_object_size(((buf)), (0)))) < ((count))), "in call to '" "pwrite64" "', '" "count" "' bytes overflows the given object", "error"))) { return __pwrite64_real(fd, buf, count, offset); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) ssize_t read(int fd, void* const __attribute__((pass_object_size(0))) buf, size_t count) __attribute__((overloadable)) __attribute__((diagnose_if((count) > 2147483647, "in call to '" "read" "', '" "count" "' must be <= SSIZE_MAX", "error"))) __attribute__((diagnose_if((((__builtin_object_size(((buf)), (0)))) != ((size_t) -1) && ((__builtin_object_size(((buf)), (0)))) < ((count))), "in call to '" "read" "', '" "count" "' bytes overflows the given object", "error"))) { size_t bos = __builtin_object_size(((buf)), (0)); if (!(((((bos)) == ((size_t) -1) || (__builtin_constant_p((count)) && (bos) >= (count) && ((bos) <= 2147483647))) && __builtin_constant_p(count) && (count) <= 2147483647))) { return __read_chk(fd, buf, count, bos); } return (&read)(fd, buf, count); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) ssize_t write(int fd, const void* const __attribute__((pass_object_size(0))) buf, size_t count) __attribute__((overloadable)) __attribute__((diagnose_if((count) > 2147483647, "in call to '" "write" "', '" "count" "' must be <= SSIZE_MAX", "error"))) __attribute__((diagnose_if((((__builtin_object_size(((buf)), (0)))) != ((size_t) -1) && ((__builtin_object_size(((buf)), (0)))) < ((count))), "in call to '" "write" "', '" "count" "' bytes overflows the given object", "error"))) { return (&write)(fd, buf, count); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) ssize_t readlink(const char* path, char* const __attribute__((pass_object_size(1))) buf, size_t size) __attribute__((overloadable)) __attribute__((diagnose_if((size) > 2147483647, "in call to '" "readlink" "', '" "size" "' must be <= SSIZE_MAX", "error"))) __attribute__((diagnose_if((((__builtin_object_size(((buf)), (1)))) != ((size_t) -1) && ((__builtin_object_size(((buf)), (1)))) < ((size))), "in call to '" "readlink" "', '" "size" "' bytes overflows the given object", "error"))) { size_t bos = __builtin_object_size(((buf)), (1)); if (!(((((bos)) == ((size_t) -1) || (__builtin_constant_p((size)) && (bos) >= (size) && ((bos) <= 2147483647))) && __builtin_constant_p(size) && (size) <= 2147483647))) { return __readlink_chk(path, buf, size, bos); } return (&readlink)(path, buf, size); } static __inline__ __attribute__((no_stack_protector)) __attribute__((__always_inline__)) ssize_t readlinkat(int dirfd, const char* path, char* const __attribute__((pass_object_size(1))) buf, size_t size) __attribute__((overloadable)) __attribute__((diagnose_if((size) > 2147483647, "in call to '" "readlinkat" "', '" "size" "' must be <= SSIZE_MAX", "error"))) __attribute__((diagnose_if((((__builtin_object_size(((buf)), (1)))) != ((size_t) -1) && ((__builtin_object_size(((buf)), (1)))) < ((size))), "in call to '" "readlinkat" "', '" "size" "' bytes overflows the given object", "error"))) { size_t bos = __builtin_object_size(((buf)), (1)); if (!(((((bos)) == ((size_t) -1) || (__builtin_constant_p((size)) && (bos) >= (size) && ((bos) <= 2147483647))) && __builtin_constant_p(size) && (size) <= 2147483647))) { return __readlinkat_chk(dirfd, path, buf, size, bos); } return (&readlinkat)(dirfd, path, buf, size); } # 429 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 2 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/android/legacy_unistd_inlines.h" 1 3 4 # 36 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/android/legacy_unistd_inlines.h" 3 4 # 1 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/swab.h" 1 3 4 # 41 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/bits/swab.h" 3 4 static __inline void swab(const void* __void_src, void* __void_dst, ssize_t __byte_count) { const uint8_t* __src = ((const uint8_t*) (__void_src)); uint8_t* __dst = ((uint8_t*) (__void_dst)); while (__byte_count > 1) { uint8_t x = *__src++; uint8_t y = *__src++; *__dst++ = y; *__dst++ = x; __byte_count -= 2; } } # 37 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/android/legacy_unistd_inlines.h" 2 3 4 # 435 "/usr/local/hijklf/home/abcdefg/g/ndk/prebuilts/ndk/platform/sysroot/usr/include/unistd.h" 2 3 4 # 108 "ggml.c" 2 # 200 "ggml.c" inline static void * ggml_aligned_malloc(size_t size) { if (size == 0) { printf("WARNING: Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n"); return ((void*)0); } void * aligned_memory = ((void*)0); int result = posix_memalign(&aligned_memory, 4, size); if (result != 0) { const char *error_desc = "unknown allocation error"; switch (result) { case 22: error_desc = "invalid alignment value"; break; case 12: error_desc = "insufficient memory"; break; } printf("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0)); return ((void*)0); } return aligned_memory; } # 281 "ggml.c" typedef double ggml_float; # 292 "ggml.c" # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 1 3 # 37 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 # 1 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_bf16.h" 1 3 # 14 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_bf16.h" 3 typedef __bf16 bfloat16_t; # 38 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 2 3 typedef __bf16 bfloat16_t; typedef float float32_t; typedef __fp16 float16_t; # 51 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 typedef int8_t poly8_t; typedef int16_t poly16_t; typedef int64_t poly64_t; typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t; typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t; typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t; typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t; typedef __attribute__((neon_vector_type(2))) int32_t int32x2_t; typedef __attribute__((neon_vector_type(4))) int32_t int32x4_t; typedef __attribute__((neon_vector_type(1))) int64_t int64x1_t; typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t; typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t; typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t; typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t; typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t; typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t; typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t; typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t; typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t; typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t; typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t; typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t; typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t; typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t; typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t; typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t; typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t; typedef __attribute__((neon_polyvector_type(1))) poly64_t poly64x1_t; typedef __attribute__((neon_polyvector_type(2))) poly64_t poly64x2_t; typedef struct int8x8x2_t { int8x8_t val[2]; } int8x8x2_t; typedef struct int8x16x2_t { int8x16_t val[2]; } int8x16x2_t; typedef struct int16x4x2_t { int16x4_t val[2]; } int16x4x2_t; typedef struct int16x8x2_t { int16x8_t val[2]; } int16x8x2_t; typedef struct int32x2x2_t { int32x2_t val[2]; } int32x2x2_t; typedef struct int32x4x2_t { int32x4_t val[2]; } int32x4x2_t; typedef struct int64x1x2_t { int64x1_t val[2]; } int64x1x2_t; typedef struct int64x2x2_t { int64x2_t val[2]; } int64x2x2_t; typedef struct uint8x8x2_t { uint8x8_t val[2]; } uint8x8x2_t; typedef struct uint8x16x2_t { uint8x16_t val[2]; } uint8x16x2_t; typedef struct uint16x4x2_t { uint16x4_t val[2]; } uint16x4x2_t; typedef struct uint16x8x2_t { uint16x8_t val[2]; } uint16x8x2_t; typedef struct uint32x2x2_t { uint32x2_t val[2]; } uint32x2x2_t; typedef struct uint32x4x2_t { uint32x4_t val[2]; } uint32x4x2_t; typedef struct uint64x1x2_t { uint64x1_t val[2]; } uint64x1x2_t; typedef struct uint64x2x2_t { uint64x2_t val[2]; } uint64x2x2_t; typedef struct float16x4x2_t { float16x4_t val[2]; } float16x4x2_t; typedef struct float16x8x2_t { float16x8_t val[2]; } float16x8x2_t; typedef struct float32x2x2_t { float32x2_t val[2]; } float32x2x2_t; typedef struct float32x4x2_t { float32x4_t val[2]; } float32x4x2_t; # 176 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 typedef struct poly8x8x2_t { poly8x8_t val[2]; } poly8x8x2_t; typedef struct poly8x16x2_t { poly8x16_t val[2]; } poly8x16x2_t; typedef struct poly16x4x2_t { poly16x4_t val[2]; } poly16x4x2_t; typedef struct poly16x8x2_t { poly16x8_t val[2]; } poly16x8x2_t; typedef struct poly64x1x2_t { poly64x1_t val[2]; } poly64x1x2_t; typedef struct poly64x2x2_t { poly64x2_t val[2]; } poly64x2x2_t; typedef struct int8x8x3_t { int8x8_t val[3]; } int8x8x3_t; typedef struct int8x16x3_t { int8x16_t val[3]; } int8x16x3_t; typedef struct int16x4x3_t { int16x4_t val[3]; } int16x4x3_t; typedef struct int16x8x3_t { int16x8_t val[3]; } int16x8x3_t; typedef struct int32x2x3_t { int32x2_t val[3]; } int32x2x3_t; typedef struct int32x4x3_t { int32x4_t val[3]; } int32x4x3_t; typedef struct int64x1x3_t { int64x1_t val[3]; } int64x1x3_t; typedef struct int64x2x3_t { int64x2_t val[3]; } int64x2x3_t; typedef struct uint8x8x3_t { uint8x8_t val[3]; } uint8x8x3_t; typedef struct uint8x16x3_t { uint8x16_t val[3]; } uint8x16x3_t; typedef struct uint16x4x3_t { uint16x4_t val[3]; } uint16x4x3_t; typedef struct uint16x8x3_t { uint16x8_t val[3]; } uint16x8x3_t; typedef struct uint32x2x3_t { uint32x2_t val[3]; } uint32x2x3_t; typedef struct uint32x4x3_t { uint32x4_t val[3]; } uint32x4x3_t; typedef struct uint64x1x3_t { uint64x1_t val[3]; } uint64x1x3_t; typedef struct uint64x2x3_t { uint64x2_t val[3]; } uint64x2x3_t; typedef struct float16x4x3_t { float16x4_t val[3]; } float16x4x3_t; typedef struct float16x8x3_t { float16x8_t val[3]; } float16x8x3_t; typedef struct float32x2x3_t { float32x2_t val[3]; } float32x2x3_t; typedef struct float32x4x3_t { float32x4_t val[3]; } float32x4x3_t; # 290 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 typedef struct poly8x8x3_t { poly8x8_t val[3]; } poly8x8x3_t; typedef struct poly8x16x3_t { poly8x16_t val[3]; } poly8x16x3_t; typedef struct poly16x4x3_t { poly16x4_t val[3]; } poly16x4x3_t; typedef struct poly16x8x3_t { poly16x8_t val[3]; } poly16x8x3_t; typedef struct poly64x1x3_t { poly64x1_t val[3]; } poly64x1x3_t; typedef struct poly64x2x3_t { poly64x2_t val[3]; } poly64x2x3_t; typedef struct int8x8x4_t { int8x8_t val[4]; } int8x8x4_t; typedef struct int8x16x4_t { int8x16_t val[4]; } int8x16x4_t; typedef struct int16x4x4_t { int16x4_t val[4]; } int16x4x4_t; typedef struct int16x8x4_t { int16x8_t val[4]; } int16x8x4_t; typedef struct int32x2x4_t { int32x2_t val[4]; } int32x2x4_t; typedef struct int32x4x4_t { int32x4_t val[4]; } int32x4x4_t; typedef struct int64x1x4_t { int64x1_t val[4]; } int64x1x4_t; typedef struct int64x2x4_t { int64x2_t val[4]; } int64x2x4_t; typedef struct uint8x8x4_t { uint8x8_t val[4]; } uint8x8x4_t; typedef struct uint8x16x4_t { uint8x16_t val[4]; } uint8x16x4_t; typedef struct uint16x4x4_t { uint16x4_t val[4]; } uint16x4x4_t; typedef struct uint16x8x4_t { uint16x8_t val[4]; } uint16x8x4_t; typedef struct uint32x2x4_t { uint32x2_t val[4]; } uint32x2x4_t; typedef struct uint32x4x4_t { uint32x4_t val[4]; } uint32x4x4_t; typedef struct uint64x1x4_t { uint64x1_t val[4]; } uint64x1x4_t; typedef struct uint64x2x4_t { uint64x2_t val[4]; } uint64x2x4_t; typedef struct float16x4x4_t { float16x4_t val[4]; } float16x4x4_t; typedef struct float16x8x4_t { float16x8_t val[4]; } float16x8x4_t; typedef struct float32x2x4_t { float32x2_t val[4]; } float32x2x4_t; typedef struct float32x4x4_t { float32x4_t val[4]; } float32x4x4_t; # 404 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 typedef struct poly8x8x4_t { poly8x8_t val[4]; } poly8x8x4_t; typedef struct poly8x16x4_t { poly8x16_t val[4]; } poly8x16x4_t; typedef struct poly16x4x4_t { poly16x4_t val[4]; } poly16x4x4_t; typedef struct poly16x8x4_t { poly16x8_t val[4]; } poly16x8x4_t; typedef struct poly64x1x4_t { poly64x1_t val[4]; } poly64x1x4_t; typedef struct poly64x2x4_t { poly64x2_t val[4]; } poly64x2x4_t; typedef __attribute__((neon_vector_type(4))) bfloat16_t bfloat16x4_t; typedef __attribute__((neon_vector_type(8))) bfloat16_t bfloat16x8_t; typedef struct bfloat16x4x2_t { bfloat16x4_t val[2]; } bfloat16x4x2_t; typedef struct bfloat16x8x2_t { bfloat16x8_t val[2]; } bfloat16x8x2_t; typedef struct bfloat16x4x3_t { bfloat16x4_t val[3]; } bfloat16x4x3_t; typedef struct bfloat16x8x3_t { bfloat16x8_t val[3]; } bfloat16x8x3_t; typedef struct bfloat16x4x4_t { bfloat16x4_t val[4]; } bfloat16x4x4_t; typedef struct bfloat16x8x4_t { bfloat16x8_t val[4]; } bfloat16x8x4_t; # 1722 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vabdq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } # 1744 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vabdq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 1766 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vabdq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 1788 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vabdq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } # 1810 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vabdq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } # 1827 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vabdq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } # 1849 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vabdq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vabdq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } # 1871 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vabd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 1893 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vabd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 1915 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vabd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 1937 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vabd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } # 1959 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vabd_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } # 1976 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vabd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 1998 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vabd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vabd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 2020 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vabsq_s8(int8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 32); return __ret; } # 2036 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vabsq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 41); return __ret; } # 2052 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vabsq_s32(int32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 34); return __ret; } # 2068 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vabsq_s16(int16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vabsq_v((int8x16_t)__p0, 33); return __ret; } # 2084 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vabs_s8(int8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vabs_v((int8x8_t)__p0, 0); return __ret; } # 2100 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vabs_f32(float32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vabs_v((int8x8_t)__p0, 9); return __ret; } # 2116 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vabs_s32(int32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vabs_v((int8x8_t)__p0, 2); return __ret; } # 2132 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vabs_s16(int16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vabs_v((int8x8_t)__p0, 1); return __ret; } # 2148 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __p0 + __p1; return __ret; } # 2165 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __p0 + __p1; return __ret; } # 2182 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __p0 + __p1; return __ret; } # 2199 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __p0 + __p1; return __ret; } # 2216 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __p0 + __p1; return __ret; } # 2233 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vaddq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = __p0 + __p1; return __ret; } # 2250 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __p0 + __p1; return __ret; } # 2267 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vaddq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __p0 + __p1; return __ret; } # 2284 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __p0 + __p1; return __ret; } # 2301 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __p0 + __p1; return __ret; } # 2318 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __p0 + __p1; return __ret; } # 2334 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vadd_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 + __p1; return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __p0 + __p1; return __ret; } # 2357 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __p0 + __p1; return __ret; } # 2374 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vadd_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = __p0 + __p1; return __ret; } # 2391 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __p0 + __p1; return __ret; } # 2407 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vadd_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 + __p1; return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __p0 + __p1; return __ret; } # 2430 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vadd_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vadd_v((int8x8_t)__p0, (int8x8_t)__p1, 4); return __ret; } # 2446 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly64x1_t vadd_p64(poly64x1_t __p0, poly64x1_t __p1) { poly64x1_t __ret; __ret = (poly64x1_t) __builtin_neon_vadd_v((int8x8_t)__p0, (int8x8_t)__p1, 6); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4_t vadd_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4_t __ret; __ret = (poly16x4_t) __builtin_neon_vadd_v((int8x8_t)__p0, (int8x8_t)__p1, 5); return __ret; } # 2469 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vaddq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; __ret = (poly8x16_t) __builtin_neon_vaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 36); return __ret; } # 2486 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly64x2_t vaddq_p64(poly64x2_t __p0, poly64x2_t __p1) { poly64x2_t __ret; __ret = (poly64x2_t) __builtin_neon_vaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 38); return __ret; } # 2503 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8_t vaddq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8_t __ret; __ret = (poly16x8_t) __builtin_neon_vaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 37); return __ret; } # 2520 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vaddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); return __ret; } # 2542 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vaddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); return __ret; } # 2564 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vaddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); return __ret; } # 2586 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vaddhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); return __ret; } # 2608 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vaddhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); return __ret; } # 2630 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vaddhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vaddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); return __ret; } # 2652 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vandq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __p0 & __p1; return __ret; } # 2669 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vandq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __p0 & __p1; return __ret; } # 2686 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vandq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __p0 & __p1; return __ret; } # 2703 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vandq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __p0 & __p1; return __ret; } # 2720 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vandq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __p0 & __p1; return __ret; } # 2737 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vandq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __p0 & __p1; return __ret; } # 2754 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vandq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __p0 & __p1; return __ret; } # 2771 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vandq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __p0 & __p1; return __ret; } # 2788 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vand_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __p0 & __p1; return __ret; } # 2805 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vand_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __p0 & __p1; return __ret; } # 2821 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vand_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 & __p1; return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vand_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __p0 & __p1; return __ret; } # 2844 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vand_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __p0 & __p1; return __ret; } # 2861 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vand_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __p0 & __p1; return __ret; } # 2877 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vand_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 & __p1; return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vand_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __p0 & __p1; return __ret; } # 2900 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vbicq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __p0 & ~__p1; return __ret; } # 2917 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vbicq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __p0 & ~__p1; return __ret; } # 2934 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vbicq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __p0 & ~__p1; return __ret; } # 2951 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vbicq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __p0 & ~__p1; return __ret; } # 2968 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vbicq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __p0 & ~__p1; return __ret; } # 2985 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vbicq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __p0 & ~__p1; return __ret; } # 3002 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vbicq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __p0 & ~__p1; return __ret; } # 3019 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vbicq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __p0 & ~__p1; return __ret; } # 3036 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vbic_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __p0 & ~__p1; return __ret; } # 3053 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vbic_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __p0 & ~__p1; return __ret; } # 3069 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vbic_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 & ~__p1; return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vbic_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __p0 & ~__p1; return __ret; } # 3092 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vbic_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __p0 & ~__p1; return __ret; } # 3109 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vbic_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __p0 & ~__p1; return __ret; } # 3125 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vbic_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 & ~__p1; return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vbic_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __p0 & ~__p1; return __ret; } # 3148 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vbsl_p8(uint8x8_t __p0, poly8x8_t __p1, poly8x8_t __p2) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 4); return __ret; } # 3166 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4_t vbsl_p16(uint16x4_t __p0, poly16x4_t __p1, poly16x4_t __p2) { poly16x4_t __ret; __ret = (poly16x4_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 5); return __ret; } # 3184 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vbslq_p8(uint8x16_t __p0, poly8x16_t __p1, poly8x16_t __p2) { poly8x16_t __ret; __ret = (poly8x16_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 36); return __ret; } # 3202 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8_t vbslq_p16(uint16x8_t __p0, poly16x8_t __p1, poly16x8_t __p2) { poly16x8_t __ret; __ret = (poly16x8_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 37); return __ret; } # 3220 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vbslq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 48); return __ret; } # 3238 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vbslq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } # 3256 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vbslq_u64(uint64x2_t __p0, uint64x2_t __p1, uint64x2_t __p2) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 51); return __ret; } # 3274 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vbslq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 49); return __ret; } # 3292 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vbslq_s8(uint8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 32); return __ret; } # 3310 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vbslq_f32(uint32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } # 3328 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vbslq_s32(uint32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } # 3346 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vbslq_s64(uint64x2_t __p0, int64x2_t __p1, int64x2_t __p2) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 35); return __ret; } # 3364 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vbslq_s16(uint16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vbslq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33); return __ret; } # 3382 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vbsl_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 16); return __ret; } # 3400 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vbsl_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 18); return __ret; } # 3417 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vbsl_u64(uint64x1_t __p0, uint64x1_t __p1, uint64x1_t __p2) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 19); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vbsl_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 17); return __ret; } # 3441 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vbsl_s8(uint8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 0); return __ret; } # 3459 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vbsl_f32(uint32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } # 3477 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vbsl_s32(uint32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); return __ret; } # 3494 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vbsl_s64(uint64x1_t __p0, int64x1_t __p1, int64x1_t __p2) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 3); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vbsl_s16(uint16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vbsl_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 1); return __ret; } # 3518 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vcageq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcageq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 3535 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vcage_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcage_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 3552 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vcagtq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcagtq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 3569 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vcagt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcagt_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 3586 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vcaleq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcaleq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 3603 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vcale_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcale_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 3620 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vcaltq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcaltq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 3637 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vcalt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcalt_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 3654 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vceq_p8(poly8x8_t __p0, poly8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 == __p1); return __ret; } # 3671 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vceqq_p8(poly8x16_t __p0, poly8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 == __p1); return __ret; } # 3688 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vceqq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 == __p1); return __ret; } # 3705 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vceqq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 == __p1); return __ret; } # 3722 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vceqq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 == __p1); return __ret; } # 3739 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vceqq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 == __p1); return __ret; } # 3756 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vceqq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 == __p1); return __ret; } # 3773 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vceqq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 == __p1); return __ret; } # 3790 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vceqq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 == __p1); return __ret; } # 3807 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vceq_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 == __p1); return __ret; } # 3824 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vceq_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 == __p1); return __ret; } # 3841 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vceq_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 == __p1); return __ret; } # 3858 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vceq_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 == __p1); return __ret; } # 3875 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vceq_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 == __p1); return __ret; } # 3892 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vceq_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 == __p1); return __ret; } # 3909 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vceq_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 == __p1); return __ret; } # 3926 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vcgeq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 >= __p1); return __ret; } # 3943 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vcgeq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 >= __p1); return __ret; } # 3960 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vcgeq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 >= __p1); return __ret; } # 3977 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vcgeq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 >= __p1); return __ret; } # 3994 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vcgeq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 >= __p1); return __ret; } # 4011 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vcgeq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 >= __p1); return __ret; } # 4028 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vcgeq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 >= __p1); return __ret; } # 4045 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vcge_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 >= __p1); return __ret; } # 4062 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vcge_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 >= __p1); return __ret; } # 4079 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vcge_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 >= __p1); return __ret; } # 4096 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vcge_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 >= __p1); return __ret; } # 4113 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vcge_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 >= __p1); return __ret; } # 4130 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vcge_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 >= __p1); return __ret; } # 4147 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vcge_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 >= __p1); return __ret; } # 4164 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vcgtq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 > __p1); return __ret; } # 4181 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vcgtq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 > __p1); return __ret; } # 4198 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vcgtq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 > __p1); return __ret; } # 4215 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vcgtq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 > __p1); return __ret; } # 4232 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vcgtq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 > __p1); return __ret; } # 4249 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vcgtq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 > __p1); return __ret; } # 4266 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vcgtq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 > __p1); return __ret; } # 4283 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vcgt_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 > __p1); return __ret; } # 4300 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vcgt_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 > __p1); return __ret; } # 4317 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vcgt_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 > __p1); return __ret; } # 4334 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vcgt_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 > __p1); return __ret; } # 4351 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vcgt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 > __p1); return __ret; } # 4368 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vcgt_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 > __p1); return __ret; } # 4385 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vcgt_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 > __p1); return __ret; } # 4402 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vcleq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 <= __p1); return __ret; } # 4419 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vcleq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 <= __p1); return __ret; } # 4436 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vcleq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 <= __p1); return __ret; } # 4453 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vcleq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 <= __p1); return __ret; } # 4470 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vcleq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 <= __p1); return __ret; } # 4487 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vcleq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 <= __p1); return __ret; } # 4504 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vcleq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 <= __p1); return __ret; } # 4521 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vcle_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 <= __p1); return __ret; } # 4538 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vcle_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 <= __p1); return __ret; } # 4555 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vcle_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 <= __p1); return __ret; } # 4572 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vcle_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 <= __p1); return __ret; } # 4589 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vcle_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 <= __p1); return __ret; } # 4606 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vcle_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 <= __p1); return __ret; } # 4623 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vcle_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 <= __p1); return __ret; } # 4640 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vclsq_u8(uint8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 32); return __ret; } # 4656 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vclsq_u32(uint32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 34); return __ret; } # 4672 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vclsq_u16(uint16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 33); return __ret; } # 4688 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vclsq_s8(int8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 32); return __ret; } # 4704 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vclsq_s32(int32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 34); return __ret; } # 4720 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vclsq_s16(int16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vclsq_v((int8x16_t)__p0, 33); return __ret; } # 4736 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vcls_u8(uint8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vcls_v((int8x8_t)__p0, 0); return __ret; } # 4752 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vcls_u32(uint32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vcls_v((int8x8_t)__p0, 2); return __ret; } # 4768 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vcls_u16(uint16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vcls_v((int8x8_t)__p0, 1); return __ret; } # 4784 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vcls_s8(int8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vcls_v((int8x8_t)__p0, 0); return __ret; } # 4800 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vcls_s32(int32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vcls_v((int8x8_t)__p0, 2); return __ret; } # 4816 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vcls_s16(int16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vcls_v((int8x8_t)__p0, 1); return __ret; } # 4832 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vcltq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 < __p1); return __ret; } # 4849 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vcltq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 < __p1); return __ret; } # 4866 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vcltq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 < __p1); return __ret; } # 4883 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vcltq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0 < __p1); return __ret; } # 4900 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vcltq_f32(float32x4_t __p0, float32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 < __p1); return __ret; } # 4917 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vcltq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0 < __p1); return __ret; } # 4934 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vcltq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 < __p1); return __ret; } # 4951 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vclt_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 < __p1); return __ret; } # 4968 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vclt_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 < __p1); return __ret; } # 4985 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vclt_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 < __p1); return __ret; } # 5002 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vclt_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0 < __p1); return __ret; } # 5019 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vclt_f32(float32x2_t __p0, float32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 < __p1); return __ret; } # 5036 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vclt_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0 < __p1); return __ret; } # 5053 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vclt_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 < __p1); return __ret; } # 5070 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vclzq_u8(uint8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 48); return __ret; } # 5086 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vclzq_u32(uint32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 50); return __ret; } # 5102 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vclzq_u16(uint16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 49); return __ret; } # 5118 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vclzq_s8(int8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 32); return __ret; } # 5134 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vclzq_s32(int32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 34); return __ret; } # 5150 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vclzq_s16(int16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vclzq_v((int8x16_t)__p0, 33); return __ret; } # 5166 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vclz_u8(uint8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vclz_v((int8x8_t)__p0, 16); return __ret; } # 5182 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vclz_u32(uint32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vclz_v((int8x8_t)__p0, 18); return __ret; } # 5198 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vclz_u16(uint16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vclz_v((int8x8_t)__p0, 17); return __ret; } # 5214 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vclz_s8(int8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vclz_v((int8x8_t)__p0, 0); return __ret; } # 5230 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vclz_s32(int32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vclz_v((int8x8_t)__p0, 2); return __ret; } # 5246 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vclz_s16(int16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vclz_v((int8x8_t)__p0, 1); return __ret; } # 5262 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vcnt_p8(poly8x8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vcnt_v((int8x8_t)__p0, 4); return __ret; } # 5278 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vcntq_p8(poly8x16_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t) __builtin_neon_vcntq_v((int8x16_t)__p0, 36); return __ret; } # 5294 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vcntq_u8(uint8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vcntq_v((int8x16_t)__p0, 48); return __ret; } # 5310 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vcntq_s8(int8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vcntq_v((int8x16_t)__p0, 32); return __ret; } # 5326 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vcnt_u8(uint8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vcnt_v((int8x8_t)__p0, 16); return __ret; } # 5342 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vcnt_s8(int8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vcnt_v((int8x8_t)__p0, 0); return __ret; } # 5358 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vcombine_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); return __ret; } # 5375 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8_t vcombine_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } # 5392 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vcombine_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); return __ret; } # 5414 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vcombine_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3); return __ret; } # 5436 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vcombine_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1); return __ret; } # 5451 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vcombine_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } # 5473 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vcombine_s8(int8x8_t __p0, int8x8_t __p1) { int8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); return __ret; } # 5495 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vcombine_f32(float32x2_t __p0, float32x2_t __p1) { float32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3); return __ret; } # 5517 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x8_t vcombine_f16(float16x4_t __p0, float16x4_t __p1) { float16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } # 5539 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vcombine_s32(int32x2_t __p0, int32x2_t __p1) { int32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3); return __ret; } # 5561 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vcombine_s64(int64x1_t __p0, int64x1_t __p1) { int64x2_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1); return __ret; } # 5576 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vcombine_s16(int16x4_t __p0, int16x4_t __p1) { int16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } # 5670 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vcvtq_f32_u32(uint32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcvtq_f32_v((int8x16_t)__p0, 50); return __ret; } # 5686 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vcvtq_f32_s32(int32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcvtq_f32_v((int8x16_t)__p0, 34); return __ret; } # 5702 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vcvt_f32_u32(uint32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcvt_f32_v((int8x8_t)__p0, 18); return __ret; } # 5718 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vcvt_f32_s32(int32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcvt_f32_v((int8x8_t)__p0, 2); return __ret; } # 5878 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vcvtq_s32_f32(float32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vcvtq_s32_v((int8x16_t)__p0, 34); return __ret; } # 5894 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vcvt_s32_f32(float32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vcvt_s32_v((int8x8_t)__p0, 2); return __ret; } # 5910 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vcvtq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vcvtq_u32_v((int8x16_t)__p0, 50); return __ret; } # 5926 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vcvt_u32_f32(float32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vcvt_u32_v((int8x8_t)__p0, 18); return __ret; } # 6348 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vdup_n_p8(poly8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } # 6363 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4_t vdup_n_p16(poly16_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t) {__p0, __p0, __p0, __p0}; return __ret; } # 6378 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vdupq_n_p8(poly8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } # 6393 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8_t vdupq_n_p16(poly16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } # 6408 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vdupq_n_u8(uint8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } # 6423 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vdupq_n_u32(uint32_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) {__p0, __p0, __p0, __p0}; return __ret; } # 6438 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vdupq_n_u64(uint64_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) {__p0, __p0}; return __ret; } # 6453 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vdupq_n_u16(uint16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } # 6468 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vdupq_n_s8(int8_t __p0) { int8x16_t __ret; __ret = (int8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } # 6483 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vdupq_n_f32(float32_t __p0) { float32x4_t __ret; __ret = (float32x4_t) {__p0, __p0, __p0, __p0}; return __ret; } # 6515 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vdupq_n_s32(int32_t __p0) { int32x4_t __ret; __ret = (int32x4_t) {__p0, __p0, __p0, __p0}; return __ret; } # 6530 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vdupq_n_s64(int64_t __p0) { int64x2_t __ret; __ret = (int64x2_t) {__p0, __p0}; return __ret; } # 6545 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vdupq_n_s16(int16_t __p0) { int16x8_t __ret; __ret = (int16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } # 6560 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vdup_n_u8(uint8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } # 6575 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vdup_n_u32(uint32_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) {__p0, __p0}; return __ret; } # 6589 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vdup_n_u64(uint64_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) {__p0}; return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vdup_n_u16(uint16_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) {__p0, __p0, __p0, __p0}; return __ret; } # 6610 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vdup_n_s8(int8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } # 6625 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vdup_n_f32(float32_t __p0) { float32x2_t __ret; __ret = (float32x2_t) {__p0, __p0}; return __ret; } # 6657 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vdup_n_s32(int32_t __p0) { int32x2_t __ret; __ret = (int32x2_t) {__p0, __p0}; return __ret; } # 6671 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vdup_n_s64(int64_t __p0) { int64x1_t __ret; __ret = (int64x1_t) {__p0}; return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vdup_n_s16(int16_t __p0) { int16x4_t __ret; __ret = (int16x4_t) {__p0, __p0, __p0, __p0}; return __ret; } # 6692 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t veorq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __p0 ^ __p1; return __ret; } # 6709 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t veorq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __p0 ^ __p1; return __ret; } # 6726 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t veorq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __p0 ^ __p1; return __ret; } # 6743 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t veorq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __p0 ^ __p1; return __ret; } # 6760 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t veorq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __p0 ^ __p1; return __ret; } # 6777 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t veorq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __p0 ^ __p1; return __ret; } # 6794 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t veorq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __p0 ^ __p1; return __ret; } # 6811 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t veorq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __p0 ^ __p1; return __ret; } # 6828 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t veor_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __p0 ^ __p1; return __ret; } # 6845 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t veor_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __p0 ^ __p1; return __ret; } # 6861 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t veor_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 ^ __p1; return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t veor_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __p0 ^ __p1; return __ret; } # 6884 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t veor_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __p0 ^ __p1; return __ret; } # 6901 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t veor_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __p0 ^ __p1; return __ret; } # 6917 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t veor_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 ^ __p1; return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t veor_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __p0 ^ __p1; return __ret; } # 7374 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vget_high_p8(poly8x16_t __p0) { poly8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 8, 9, 10, 11, 12, 13, 14, 15); return __ret; } # 7395 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4_t vget_high_p16(poly16x8_t __p0) { poly16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7); return __ret; } # 7411 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vget_high_u8(uint8x16_t __p0) { uint8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 8, 9, 10, 11, 12, 13, 14, 15); return __ret; } # 7432 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vget_high_u32(uint32x4_t __p0) { uint32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 2, 3); return __ret; } # 7453 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vget_high_u64(uint64x2_t __p0) { uint64x1_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1); return __ret; } # 7468 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vget_high_u16(uint16x8_t __p0) { uint16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7); return __ret; } # 7489 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vget_high_s8(int8x16_t __p0) { int8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 8, 9, 10, 11, 12, 13, 14, 15); return __ret; } # 7510 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vget_high_f32(float32x4_t __p0) { float32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 2, 3); return __ret; } # 7531 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x4_t vget_high_f16(float16x8_t __p0) { float16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7); return __ret; } # 7552 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vget_high_s32(int32x4_t __p0) { int32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 2, 3); return __ret; } # 7573 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vget_high_s64(int64x2_t __p0) { int64x1_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1); return __ret; } # 7588 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vget_high_s16(int16x8_t __p0) { int16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7); return __ret; } # 8081 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vget_low_p8(poly8x16_t __p0) { poly8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } # 8097 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4_t vget_low_p16(poly16x8_t __p0) { poly16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3); return __ret; } # 8113 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vget_low_u8(uint8x16_t __p0) { uint8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } # 8129 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vget_low_u32(uint32x4_t __p0) { uint32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1); return __ret; } # 8145 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vget_low_u64(uint64x2_t __p0) { uint64x1_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0); return __ret; } # 8160 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vget_low_u16(uint16x8_t __p0) { uint16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3); return __ret; } # 8176 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vget_low_s8(int8x16_t __p0) { int8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } # 8192 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vget_low_f32(float32x4_t __p0) { float32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1); return __ret; } # 8208 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x4_t vget_low_f16(float16x8_t __p0) { float16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3); return __ret; } # 8224 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vget_low_s32(int32x4_t __p0) { int32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1); return __ret; } # 8240 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vget_low_s64(int64x2_t __p0) { int64x1_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0); return __ret; } # 8255 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vget_low_s16(int16x8_t __p0) { int16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3); return __ret; } # 8271 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vhaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } # 8288 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vhaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 8305 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vhaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 8322 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vhaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } # 8339 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vhaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } # 8356 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vhaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } # 8373 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vhadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 8390 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vhadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 8407 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vhadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 8424 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vhadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } # 8441 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vhadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 8458 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vhadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 8475 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vhsubq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } # 8492 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vhsubq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 8509 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vhsubq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 8526 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vhsubq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } # 8543 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vhsubq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } # 8560 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vhsubq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vhsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } # 8577 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vhsub_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 8594 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vhsub_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 8611 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vhsub_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 8628 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vhsub_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } # 8645 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vhsub_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 8662 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vhsub_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vhsub_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 13973 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vmaxq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } # 13990 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vmaxq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 14007 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vmaxq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 14024 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vmaxq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } # 14041 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vmaxq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } # 14058 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vmaxq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } # 14075 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vmaxq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vmaxq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } # 14092 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vmax_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 14109 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vmax_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 14126 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vmax_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 14143 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vmax_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } # 14160 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vmax_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } # 14177 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vmax_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 14194 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vmax_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vmax_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 14211 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vminq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } # 14228 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vminq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 14245 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vminq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 14262 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vminq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } # 14279 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vminq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } # 14296 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vminq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } # 14313 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vminq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vminq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } # 14330 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vmin_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 14347 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vmin_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 14364 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vmin_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 14381 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vmin_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } # 14398 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vmin_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } # 14415 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vmin_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 14432 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vmin_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vmin_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 14449 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vmlaq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } # 14467 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vmlaq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } # 14485 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vmlaq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } # 14503 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vmlaq_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } # 14521 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vmlaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } # 14539 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vmlaq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } # 14557 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vmlaq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } # 14575 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vmla_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } # 14593 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vmla_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint32x2_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } # 14611 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vmla_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint16x4_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } # 14629 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vmla_s8(int8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } # 14647 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vmla_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } # 14665 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vmla_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } # 14683 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vmla_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; __ret = __p0 + __p1 * __p2; return __ret; } # 14941 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vmlaq_n_u32(uint32x4_t __p0, uint32x4_t __p1, uint32_t __p2) { uint32x4_t __ret; __ret = __p0 + __p1 * (uint32x4_t) {__p2, __p2, __p2, __p2}; return __ret; } # 14958 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vmlaq_n_u16(uint16x8_t __p0, uint16x8_t __p1, uint16_t __p2) { uint16x8_t __ret; __ret = __p0 + __p1 * (uint16x8_t) {__p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2}; return __ret; } # 14975 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vmlaq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { float32x4_t __ret; __ret = __p0 + __p1 * (float32x4_t) {__p2, __p2, __p2, __p2}; return __ret; } # 14992 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vmlaq_n_s32(int32x4_t __p0, int32x4_t __p1, int32_t __p2) { int32x4_t __ret; __ret = __p0 + __p1 * (int32x4_t) {__p2, __p2, __p2, __p2}; return __ret; } # 15009 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vmlaq_n_s16(int16x8_t __p0, int16x8_t __p1, int16_t __p2) { int16x8_t __ret; __ret = __p0 + __p1 * (int16x8_t) {__p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2}; return __ret; } # 15026 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vmla_n_u32(uint32x2_t __p0, uint32x2_t __p1, uint32_t __p2) { uint32x2_t __ret; __ret = __p0 + __p1 * (uint32x2_t) {__p2, __p2}; return __ret; } # 15043 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vmla_n_u16(uint16x4_t __p0, uint16x4_t __p1, uint16_t __p2) { uint16x4_t __ret; __ret = __p0 + __p1 * (uint16x4_t) {__p2, __p2, __p2, __p2}; return __ret; } # 15060 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vmla_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { float32x2_t __ret; __ret = __p0 + __p1 * (float32x2_t) {__p2, __p2}; return __ret; } # 15077 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vmla_n_s32(int32x2_t __p0, int32x2_t __p1, int32_t __p2) { int32x2_t __ret; __ret = __p0 + __p1 * (int32x2_t) {__p2, __p2}; return __ret; } # 15094 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vmla_n_s16(int16x4_t __p0, int16x4_t __p1, int16_t __p2) { int16x4_t __ret; __ret = __p0 + __p1 * (int16x4_t) {__p2, __p2, __p2, __p2}; return __ret; } # 15111 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vmlsq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } # 15129 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vmlsq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } # 15147 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vmlsq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } # 15165 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vmlsq_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } # 15183 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vmlsq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } # 15201 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vmlsq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } # 15219 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vmlsq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } # 15237 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vmls_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } # 15255 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vmls_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint32x2_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } # 15273 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vmls_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint16x4_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } # 15291 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vmls_s8(int8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } # 15309 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vmls_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } # 15327 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vmls_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } # 15345 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vmls_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; __ret = __p0 - __p1 * __p2; return __ret; } # 15603 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vmlsq_n_u32(uint32x4_t __p0, uint32x4_t __p1, uint32_t __p2) { uint32x4_t __ret; __ret = __p0 - __p1 * (uint32x4_t) {__p2, __p2, __p2, __p2}; return __ret; } # 15620 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vmlsq_n_u16(uint16x8_t __p0, uint16x8_t __p1, uint16_t __p2) { uint16x8_t __ret; __ret = __p0 - __p1 * (uint16x8_t) {__p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2}; return __ret; } # 15637 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vmlsq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { float32x4_t __ret; __ret = __p0 - __p1 * (float32x4_t) {__p2, __p2, __p2, __p2}; return __ret; } # 15654 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vmlsq_n_s32(int32x4_t __p0, int32x4_t __p1, int32_t __p2) { int32x4_t __ret; __ret = __p0 - __p1 * (int32x4_t) {__p2, __p2, __p2, __p2}; return __ret; } # 15671 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vmlsq_n_s16(int16x8_t __p0, int16x8_t __p1, int16_t __p2) { int16x8_t __ret; __ret = __p0 - __p1 * (int16x8_t) {__p2, __p2, __p2, __p2, __p2, __p2, __p2, __p2}; return __ret; } # 15688 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vmls_n_u32(uint32x2_t __p0, uint32x2_t __p1, uint32_t __p2) { uint32x2_t __ret; __ret = __p0 - __p1 * (uint32x2_t) {__p2, __p2}; return __ret; } # 15705 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vmls_n_u16(uint16x4_t __p0, uint16x4_t __p1, uint16_t __p2) { uint16x4_t __ret; __ret = __p0 - __p1 * (uint16x4_t) {__p2, __p2, __p2, __p2}; return __ret; } # 15722 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vmls_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { float32x2_t __ret; __ret = __p0 - __p1 * (float32x2_t) {__p2, __p2}; return __ret; } # 15739 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vmls_n_s32(int32x2_t __p0, int32x2_t __p1, int32_t __p2) { int32x2_t __ret; __ret = __p0 - __p1 * (int32x2_t) {__p2, __p2}; return __ret; } # 15756 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vmls_n_s16(int16x4_t __p0, int16x4_t __p1, int16_t __p2) { int16x4_t __ret; __ret = __p0 - __p1 * (int16x4_t) {__p2, __p2, __p2, __p2}; return __ret; } # 15773 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vmov_n_p8(poly8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } # 15788 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4_t vmov_n_p16(poly16_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t) {__p0, __p0, __p0, __p0}; return __ret; } # 15803 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vmovq_n_p8(poly8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } # 15818 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8_t vmovq_n_p16(poly16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } # 15833 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vmovq_n_u8(uint8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } # 15848 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vmovq_n_u32(uint32_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) {__p0, __p0, __p0, __p0}; return __ret; } # 15863 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vmovq_n_u64(uint64_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) {__p0, __p0}; return __ret; } # 15878 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vmovq_n_u16(uint16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } # 15893 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vmovq_n_s8(int8_t __p0) { int8x16_t __ret; __ret = (int8x16_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } # 15908 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vmovq_n_f32(float32_t __p0) { float32x4_t __ret; __ret = (float32x4_t) {__p0, __p0, __p0, __p0}; return __ret; } # 15940 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vmovq_n_s32(int32_t __p0) { int32x4_t __ret; __ret = (int32x4_t) {__p0, __p0, __p0, __p0}; return __ret; } # 15955 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vmovq_n_s64(int64_t __p0) { int64x2_t __ret; __ret = (int64x2_t) {__p0, __p0}; return __ret; } # 15970 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vmovq_n_s16(int16_t __p0) { int16x8_t __ret; __ret = (int16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } # 15985 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vmov_n_u8(uint8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } # 16000 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vmov_n_u32(uint32_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) {__p0, __p0}; return __ret; } # 16014 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vmov_n_u64(uint64_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) {__p0}; return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vmov_n_u16(uint16_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) {__p0, __p0, __p0, __p0}; return __ret; } # 16035 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vmov_n_s8(int8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } # 16050 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vmov_n_f32(float32_t __p0) { float32x2_t __ret; __ret = (float32x2_t) {__p0, __p0}; return __ret; } # 16082 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vmov_n_s32(int32_t __p0) { int32x2_t __ret; __ret = (int32x2_t) {__p0, __p0}; return __ret; } # 16096 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vmov_n_s64(int64_t __p0) { int64x1_t __ret; __ret = (int64x1_t) {__p0}; return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vmov_n_s16(int16_t __p0) { int16x4_t __ret; __ret = (int16x4_t) {__p0, __p0, __p0, __p0}; return __ret; } # 16117 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vmovl_u8(uint8x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 49); return __ret; } # 16138 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vmovl_u32(uint32x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 51); return __ret; } # 16159 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vmovl_u16(uint16x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 50); return __ret; } # 16180 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vmovl_s8(int8x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 33); return __ret; } # 16201 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vmovl_s32(int32x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 35); return __ret; } # 16222 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vmovl_s16(int16x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vmovl_v((int8x8_t)__p0, 34); return __ret; } # 16243 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vmovn_u32(uint32x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 17); return __ret; } # 16264 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vmovn_u64(uint64x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 18); return __ret; } # 16285 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vmovn_u16(uint16x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 16); return __ret; } # 16306 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vmovn_s32(int32x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 1); return __ret; } # 16327 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vmovn_s64(int64x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 2); return __ret; } # 16348 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vmovn_s16(int16x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vmovn_v((int8x16_t)__p0, 0); return __ret; } # 16369 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vmulq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __p0 * __p1; return __ret; } # 16386 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vmulq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __p0 * __p1; return __ret; } # 16403 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vmulq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __p0 * __p1; return __ret; } # 16420 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vmulq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __p0 * __p1; return __ret; } # 16437 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vmulq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = __p0 * __p1; return __ret; } # 16454 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vmulq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __p0 * __p1; return __ret; } # 16471 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vmulq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __p0 * __p1; return __ret; } # 16488 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vmul_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __p0 * __p1; return __ret; } # 16505 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vmul_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __p0 * __p1; return __ret; } # 16522 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vmul_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __p0 * __p1; return __ret; } # 16539 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vmul_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __p0 * __p1; return __ret; } # 16556 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vmul_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = __p0 * __p1; return __ret; } # 16573 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vmul_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __p0 * __p1; return __ret; } # 16590 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vmul_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __p0 * __p1; return __ret; } # 16607 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vmul_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vmul_v((int8x8_t)__p0, (int8x8_t)__p1, 4); return __ret; } # 16624 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vmulq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16_t __ret; __ret = (poly8x16_t) __builtin_neon_vmulq_v((int8x16_t)__p0, (int8x16_t)__p1, 36); return __ret; } # 16851 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vmulq_n_u32(uint32x4_t __p0, uint32_t __p1) { uint32x4_t __ret; __ret = __p0 * (uint32x4_t) {__p1, __p1, __p1, __p1}; return __ret; } # 16867 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vmulq_n_u16(uint16x8_t __p0, uint16_t __p1) { uint16x8_t __ret; __ret = __p0 * (uint16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}; return __ret; } # 16883 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vmulq_n_f32(float32x4_t __p0, float32_t __p1) { float32x4_t __ret; __ret = __p0 * (float32x4_t) {__p1, __p1, __p1, __p1}; return __ret; } # 16899 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vmulq_n_s32(int32x4_t __p0, int32_t __p1) { int32x4_t __ret; __ret = __p0 * (int32x4_t) {__p1, __p1, __p1, __p1}; return __ret; } # 16915 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vmulq_n_s16(int16x8_t __p0, int16_t __p1) { int16x8_t __ret; __ret = __p0 * (int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}; return __ret; } # 16931 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vmul_n_u32(uint32x2_t __p0, uint32_t __p1) { uint32x2_t __ret; __ret = __p0 * (uint32x2_t) {__p1, __p1}; return __ret; } # 16947 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vmul_n_u16(uint16x4_t __p0, uint16_t __p1) { uint16x4_t __ret; __ret = __p0 * (uint16x4_t) {__p1, __p1, __p1, __p1}; return __ret; } # 16963 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vmul_n_f32(float32x2_t __p0, float32_t __p1) { float32x2_t __ret; __ret = __p0 * (float32x2_t) {__p1, __p1}; return __ret; } # 16979 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vmul_n_s32(int32x2_t __p0, int32_t __p1) { int32x2_t __ret; __ret = __p0 * (int32x2_t) {__p1, __p1}; return __ret; } # 16995 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vmul_n_s16(int16x4_t __p0, int16_t __p1) { int16x4_t __ret; __ret = __p0 * (int16x4_t) {__p1, __p1, __p1, __p1}; return __ret; } # 17011 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8_t vmull_p8(poly8x8_t __p0, poly8x8_t __p1) { poly16x8_t __ret; __ret = (poly16x8_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 37); return __ret; } # 17033 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vmull_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 49); return __ret; } # 17055 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vmull_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 51); return __ret; } # 17077 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vmull_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 50); return __ret; } # 17099 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vmull_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 33); return __ret; } # 17121 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vmull_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 35); return __ret; } # 17143 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vmull_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vmull_v((int8x8_t)__p0, (int8x8_t)__p1, 34); return __ret; } # 17249 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vmull_n_u32(uint32x2_t __p0, uint32_t __p1) { uint64x2_t __ret; __ret = vmull_u32(__p0, (uint32x2_t) {__p1, __p1}); return __ret; } # 17270 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vmull_n_u16(uint16x4_t __p0, uint16_t __p1) { uint32x4_t __ret; __ret = vmull_u16(__p0, (uint16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } # 17291 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vmull_n_s32(int32x2_t __p0, int32_t __p1) { int64x2_t __ret; __ret = vmull_s32(__p0, (int32x2_t) {__p1, __p1}); return __ret; } # 17312 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vmull_n_s16(int16x4_t __p0, int16_t __p1) { int32x4_t __ret; __ret = vmull_s16(__p0, (int16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } # 17333 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vmvn_p8(poly8x8_t __p0) { poly8x8_t __ret; __ret = ~__p0; return __ret; } # 17349 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vmvnq_p8(poly8x16_t __p0) { poly8x16_t __ret; __ret = ~__p0; return __ret; } # 17365 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vmvnq_u8(uint8x16_t __p0) { uint8x16_t __ret; __ret = ~__p0; return __ret; } # 17381 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vmvnq_u32(uint32x4_t __p0) { uint32x4_t __ret; __ret = ~__p0; return __ret; } # 17397 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vmvnq_u16(uint16x8_t __p0) { uint16x8_t __ret; __ret = ~__p0; return __ret; } # 17413 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vmvnq_s8(int8x16_t __p0) { int8x16_t __ret; __ret = ~__p0; return __ret; } # 17429 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vmvnq_s32(int32x4_t __p0) { int32x4_t __ret; __ret = ~__p0; return __ret; } # 17445 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vmvnq_s16(int16x8_t __p0) { int16x8_t __ret; __ret = ~__p0; return __ret; } # 17461 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vmvn_u8(uint8x8_t __p0) { uint8x8_t __ret; __ret = ~__p0; return __ret; } # 17477 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vmvn_u32(uint32x2_t __p0) { uint32x2_t __ret; __ret = ~__p0; return __ret; } # 17493 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vmvn_u16(uint16x4_t __p0) { uint16x4_t __ret; __ret = ~__p0; return __ret; } # 17509 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vmvn_s8(int8x8_t __p0) { int8x8_t __ret; __ret = ~__p0; return __ret; } # 17525 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vmvn_s32(int32x2_t __p0) { int32x2_t __ret; __ret = ~__p0; return __ret; } # 17541 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vmvn_s16(int16x4_t __p0) { int16x4_t __ret; __ret = ~__p0; return __ret; } # 17557 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vnegq_s8(int8x16_t __p0) { int8x16_t __ret; __ret = -__p0; return __ret; } # 17573 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vnegq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = -__p0; return __ret; } # 17589 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vnegq_s32(int32x4_t __p0) { int32x4_t __ret; __ret = -__p0; return __ret; } # 17605 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vnegq_s16(int16x8_t __p0) { int16x8_t __ret; __ret = -__p0; return __ret; } # 17621 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vneg_s8(int8x8_t __p0) { int8x8_t __ret; __ret = -__p0; return __ret; } # 17637 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vneg_f32(float32x2_t __p0) { float32x2_t __ret; __ret = -__p0; return __ret; } # 17653 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vneg_s32(int32x2_t __p0) { int32x2_t __ret; __ret = -__p0; return __ret; } # 17669 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vneg_s16(int16x4_t __p0) { int16x4_t __ret; __ret = -__p0; return __ret; } # 17685 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vornq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __p0 | ~__p1; return __ret; } # 17702 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vornq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __p0 | ~__p1; return __ret; } # 17719 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vornq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __p0 | ~__p1; return __ret; } # 17736 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vornq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __p0 | ~__p1; return __ret; } # 17753 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vornq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __p0 | ~__p1; return __ret; } # 17770 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vornq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __p0 | ~__p1; return __ret; } # 17787 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vornq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __p0 | ~__p1; return __ret; } # 17804 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vornq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __p0 | ~__p1; return __ret; } # 17821 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vorn_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __p0 | ~__p1; return __ret; } # 17838 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vorn_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __p0 | ~__p1; return __ret; } # 17854 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vorn_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 | ~__p1; return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vorn_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __p0 | ~__p1; return __ret; } # 17877 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vorn_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __p0 | ~__p1; return __ret; } # 17894 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vorn_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __p0 | ~__p1; return __ret; } # 17910 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vorn_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 | ~__p1; return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vorn_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __p0 | ~__p1; return __ret; } # 17933 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vorrq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __p0 | __p1; return __ret; } # 17950 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vorrq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __p0 | __p1; return __ret; } # 17967 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vorrq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __p0 | __p1; return __ret; } # 17984 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vorrq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __p0 | __p1; return __ret; } # 18001 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vorrq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __p0 | __p1; return __ret; } # 18018 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vorrq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __p0 | __p1; return __ret; } # 18035 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vorrq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __p0 | __p1; return __ret; } # 18052 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vorrq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __p0 | __p1; return __ret; } # 18069 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vorr_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __p0 | __p1; return __ret; } # 18086 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vorr_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __p0 | __p1; return __ret; } # 18102 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vorr_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 | __p1; return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vorr_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __p0 | __p1; return __ret; } # 18125 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vorr_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __p0 | __p1; return __ret; } # 18142 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vorr_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __p0 | __p1; return __ret; } # 18158 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vorr_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 | __p1; return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vorr_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __p0 | __p1; return __ret; } # 18181 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vpadalq_u8(uint16x8_t __p0, uint8x16_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vpadalq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 18198 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vpadalq_u32(uint64x2_t __p0, uint32x4_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vpadalq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } # 18215 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vpadalq_u16(uint32x4_t __p0, uint16x8_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vpadalq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 18232 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vpadalq_s8(int16x8_t __p0, int8x16_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vpadalq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } # 18249 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vpadalq_s32(int64x2_t __p0, int32x4_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vpadalq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } # 18266 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vpadalq_s16(int32x4_t __p0, int16x8_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vpadalq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } # 18283 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vpadal_u8(uint16x4_t __p0, uint8x8_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 18300 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vpadal_u32(uint64x1_t __p0, uint32x2_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } # 18315 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vpadal_u16(uint32x2_t __p0, uint16x4_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 18332 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vpadal_s8(int16x4_t __p0, int8x8_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 18349 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vpadal_s32(int64x1_t __p0, int32x2_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } # 18364 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vpadal_s16(int32x2_t __p0, int16x4_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vpadal_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 18381 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vpadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 18398 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vpadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 18415 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vpadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 18432 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vpadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } # 18449 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vpadd_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } # 18466 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vpadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 18483 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vpadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vpadd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 18500 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vpaddlq_u8(uint8x16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vpaddlq_v((int8x16_t)__p0, 49); return __ret; } # 18516 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vpaddlq_u32(uint32x4_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vpaddlq_v((int8x16_t)__p0, 51); return __ret; } # 18532 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vpaddlq_u16(uint16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vpaddlq_v((int8x16_t)__p0, 50); return __ret; } # 18548 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vpaddlq_s8(int8x16_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vpaddlq_v((int8x16_t)__p0, 33); return __ret; } # 18564 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vpaddlq_s32(int32x4_t __p0) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vpaddlq_v((int8x16_t)__p0, 35); return __ret; } # 18580 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vpaddlq_s16(int16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vpaddlq_v((int8x16_t)__p0, 34); return __ret; } # 18596 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vpaddl_u8(uint8x8_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vpaddl_v((int8x8_t)__p0, 17); return __ret; } # 18612 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vpaddl_u32(uint32x2_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vpaddl_v((int8x8_t)__p0, 19); return __ret; } # 18627 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vpaddl_u16(uint16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vpaddl_v((int8x8_t)__p0, 18); return __ret; } # 18643 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vpaddl_s8(int8x8_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vpaddl_v((int8x8_t)__p0, 1); return __ret; } # 18659 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vpaddl_s32(int32x2_t __p0) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vpaddl_v((int8x8_t)__p0, 3); return __ret; } # 18674 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vpaddl_s16(int16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vpaddl_v((int8x8_t)__p0, 2); return __ret; } # 18690 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vpmax_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 18707 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vpmax_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 18724 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vpmax_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 18741 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vpmax_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } # 18758 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vpmax_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } # 18775 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vpmax_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 18792 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vpmax_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vpmax_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 18809 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vpmin_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 18826 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vpmin_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 18843 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vpmin_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 18860 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vpmin_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } # 18877 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vpmin_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } # 18894 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vpmin_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 18911 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vpmin_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vpmin_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 18928 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vqabsq_s8(int8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqabsq_v((int8x16_t)__p0, 32); return __ret; } # 18944 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vqabsq_s32(int32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqabsq_v((int8x16_t)__p0, 34); return __ret; } # 18960 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vqabsq_s16(int16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqabsq_v((int8x16_t)__p0, 33); return __ret; } # 18976 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vqabs_s8(int8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqabs_v((int8x8_t)__p0, 0); return __ret; } # 18992 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vqabs_s32(int32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqabs_v((int8x8_t)__p0, 2); return __ret; } # 19008 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vqabs_s16(int16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqabs_v((int8x8_t)__p0, 1); return __ret; } # 19024 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vqaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } # 19041 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vqaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 19058 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vqaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } # 19075 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vqaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 19092 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vqaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } # 19109 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vqaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } # 19126 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vqaddq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } # 19143 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vqaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } # 19160 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vqadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 19177 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vqadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 19193 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vqadd_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vqadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 19216 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vqadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } # 19233 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vqadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 19249 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vqadd_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vqadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 19272 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vqdmlal_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vqdmlal_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 35); return __ret; } # 19295 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vqdmlal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqdmlal_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 34); return __ret; } # 19366 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vqdmlal_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; __ret = vqdmlal_s32(__p0, __p1, (int32x2_t) {__p2, __p2}); return __ret; } # 19388 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vqdmlal_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; __ret = vqdmlal_s16(__p0, __p1, (int16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } # 19410 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vqdmlsl_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vqdmlsl_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 35); return __ret; } # 19433 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vqdmlsl_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqdmlsl_v((int8x16_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 34); return __ret; } # 19504 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vqdmlsl_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; __ret = vqdmlsl_s32(__p0, __p1, (int32x2_t) {__p2, __p2}); return __ret; } # 19526 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vqdmlsl_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; __ret = vqdmlsl_s16(__p0, __p1, (int16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } # 19548 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vqdmulhq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } # 19570 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vqdmulhq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } # 19592 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vqdmulh_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 19614 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vqdmulh_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 19636 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vqdmulhq_n_s32(int32x4_t __p0, int32_t __p1) { int32x4_t __ret; __ret = vqdmulhq_s32(__p0, (int32x4_t) {__p1, __p1, __p1, __p1}); return __ret; } # 19652 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vqdmulhq_n_s16(int16x8_t __p0, int16_t __p1) { int16x8_t __ret; __ret = vqdmulhq_s16(__p0, (int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}); return __ret; } # 19668 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vqdmulh_n_s32(int32x2_t __p0, int32_t __p1) { int32x2_t __ret; __ret = vqdmulh_s32(__p0, (int32x2_t) {__p1, __p1}); return __ret; } # 19684 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vqdmulh_n_s16(int16x4_t __p0, int16_t __p1) { int16x4_t __ret; __ret = vqdmulh_s16(__p0, (int16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } # 19700 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vqdmull_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vqdmull_v((int8x8_t)__p0, (int8x8_t)__p1, 35); return __ret; } # 19722 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vqdmull_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqdmull_v((int8x8_t)__p0, (int8x8_t)__p1, 34); return __ret; } # 19786 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vqdmull_n_s32(int32x2_t __p0, int32_t __p1) { int64x2_t __ret; __ret = vqdmull_s32(__p0, (int32x2_t) {__p1, __p1}); return __ret; } # 19807 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vqdmull_n_s16(int16x4_t __p0, int16_t __p1) { int32x4_t __ret; __ret = vqdmull_s16(__p0, (int16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } # 19828 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vqmovn_u32(uint32x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 17); return __ret; } # 19849 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vqmovn_u64(uint64x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 18); return __ret; } # 19870 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vqmovn_u16(uint16x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 16); return __ret; } # 19891 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vqmovn_s32(int32x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 1); return __ret; } # 19912 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vqmovn_s64(int64x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 2); return __ret; } # 19933 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vqmovn_s16(int16x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqmovn_v((int8x16_t)__p0, 0); return __ret; } # 19954 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vqmovun_s32(int32x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vqmovun_v((int8x16_t)__p0, 17); return __ret; } # 19975 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vqmovun_s64(int64x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vqmovun_v((int8x16_t)__p0, 18); return __ret; } # 19996 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vqmovun_s16(int16x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqmovun_v((int8x16_t)__p0, 16); return __ret; } # 20017 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vqnegq_s8(int8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqnegq_v((int8x16_t)__p0, 32); return __ret; } # 20033 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vqnegq_s32(int32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqnegq_v((int8x16_t)__p0, 34); return __ret; } # 20049 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vqnegq_s16(int16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqnegq_v((int8x16_t)__p0, 33); return __ret; } # 20065 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vqneg_s8(int8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqneg_v((int8x8_t)__p0, 0); return __ret; } # 20081 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vqneg_s32(int32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqneg_v((int8x8_t)__p0, 2); return __ret; } # 20097 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vqneg_s16(int16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqneg_v((int8x8_t)__p0, 1); return __ret; } # 20113 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vqrdmulhq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqrdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } # 20135 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vqrdmulhq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqrdmulhq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } # 20157 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vqrdmulh_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqrdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 20179 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vqrdmulh_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqrdmulh_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 20201 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vqrdmulhq_n_s32(int32x4_t __p0, int32_t __p1) { int32x4_t __ret; __ret = vqrdmulhq_s32(__p0, (int32x4_t) {__p1, __p1, __p1, __p1}); return __ret; } # 20217 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vqrdmulhq_n_s16(int16x8_t __p0, int16_t __p1) { int16x8_t __ret; __ret = vqrdmulhq_s16(__p0, (int16x8_t) {__p1, __p1, __p1, __p1, __p1, __p1, __p1, __p1}); return __ret; } # 20233 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vqrdmulh_n_s32(int32x2_t __p0, int32_t __p1) { int32x2_t __ret; __ret = vqrdmulh_s32(__p0, (int32x2_t) {__p1, __p1}); return __ret; } # 20249 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vqrdmulh_n_s16(int16x4_t __p0, int16_t __p1) { int16x4_t __ret; __ret = vqrdmulh_s16(__p0, (int16x4_t) {__p1, __p1, __p1, __p1}); return __ret; } # 20265 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vqrshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } # 20282 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vqrshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 20299 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vqrshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } # 20316 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vqrshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 20333 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vqrshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } # 20350 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vqrshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } # 20367 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vqrshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } # 20384 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vqrshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } # 20401 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vqrshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 20418 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vqrshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 20434 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vqrshl_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vqrshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 20457 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vqrshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } # 20474 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vqrshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 20490 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vqrshl_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vqrshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 20729 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vqshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } # 20746 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vqshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 20763 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vqshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } # 20780 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vqshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 20797 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vqshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } # 20814 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vqshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } # 20831 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vqshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } # 20848 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vqshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } # 20865 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vqshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 20882 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vqshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 20898 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vqshl_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vqshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 20921 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vqshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } # 20938 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vqshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 20954 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vqshl_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vqshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 21589 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vqsubq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } # 21606 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vqsubq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 21623 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vqsubq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } # 21640 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vqsubq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 21657 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vqsubq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } # 21674 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vqsubq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } # 21691 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vqsubq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } # 21708 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vqsubq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqsubq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } # 21725 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vqsub_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 21742 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vqsub_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 21758 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vqsub_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vqsub_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 21781 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vqsub_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } # 21798 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vqsub_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 21814 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vqsub_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vqsub_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqsub_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 21837 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vraddhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); return __ret; } # 21859 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vraddhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); return __ret; } # 21881 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vraddhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); return __ret; } # 21903 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vraddhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); return __ret; } # 21925 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vraddhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); return __ret; } # 21947 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vraddhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vraddhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); return __ret; } # 21969 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vrecpeq_u32(uint32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vrecpeq_v((int8x16_t)__p0, 50); return __ret; } # 21985 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vrecpeq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vrecpeq_v((int8x16_t)__p0, 41); return __ret; } # 22001 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vrecpe_u32(uint32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 18); return __ret; } # 22017 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vrecpe_f32(float32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vrecpe_v((int8x8_t)__p0, 9); return __ret; } # 22033 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vrecpsq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vrecpsq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } # 22050 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vrecps_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vrecps_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } # 22067 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vrev16_p8(poly8x8_t __p0) { poly8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); return __ret; } # 22083 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vrev16q_p8(poly8x16_t __p0) { poly8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); return __ret; } # 22099 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vrev16q_u8(uint8x16_t __p0) { uint8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); return __ret; } # 22115 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vrev16q_s8(int8x16_t __p0) { int8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); return __ret; } # 22131 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vrev16_u8(uint8x8_t __p0) { uint8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); return __ret; } # 22147 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vrev16_s8(int8x8_t __p0) { int8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); return __ret; } # 22163 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vrev32_p8(poly8x8_t __p0) { poly8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); return __ret; } # 22179 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4_t vrev32_p16(poly16x4_t __p0) { poly16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); return __ret; } # 22195 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vrev32q_p8(poly8x16_t __p0) { poly8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); return __ret; } # 22211 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8_t vrev32q_p16(poly16x8_t __p0) { poly16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); return __ret; } # 22227 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vrev32q_u8(uint8x16_t __p0) { uint8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); return __ret; } # 22243 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vrev32q_u16(uint16x8_t __p0) { uint16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); return __ret; } # 22259 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vrev32q_s8(int8x16_t __p0) { int8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); return __ret; } # 22275 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vrev32q_s16(int16x8_t __p0) { int16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2, 5, 4, 7, 6); return __ret; } # 22291 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vrev32_u8(uint8x8_t __p0) { uint8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); return __ret; } # 22307 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vrev32_u16(uint16x4_t __p0) { uint16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); return __ret; } # 22323 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vrev32_s8(int8x8_t __p0) { int8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); return __ret; } # 22339 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vrev32_s16(int16x4_t __p0) { int16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); return __ret; } # 22355 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vrev64_p8(poly8x8_t __p0) { poly8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } # 22371 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4_t vrev64_p16(poly16x4_t __p0) { poly16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); return __ret; } # 22387 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vrev64q_p8(poly8x16_t __p0) { poly8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); return __ret; } # 22403 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8_t vrev64q_p16(poly16x8_t __p0) { poly16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); return __ret; } # 22419 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vrev64q_u8(uint8x16_t __p0) { uint8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); return __ret; } # 22435 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vrev64q_u32(uint32x4_t __p0) { uint32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); return __ret; } # 22451 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vrev64q_u16(uint16x8_t __p0) { uint16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); return __ret; } # 22467 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vrev64q_s8(int8x16_t __p0) { int8x16_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); return __ret; } # 22483 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vrev64q_f32(float32x4_t __p0) { float32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); return __ret; } # 22499 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vrev64q_s32(int32x4_t __p0) { int32x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0, 3, 2); return __ret; } # 22515 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vrev64q_s16(int16x8_t __p0) { int16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); return __ret; } # 22531 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vrev64_u8(uint8x8_t __p0) { uint8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } # 22547 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vrev64_u32(uint32x2_t __p0) { uint32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0); return __ret; } # 22563 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vrev64_u16(uint16x4_t __p0) { uint16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); return __ret; } # 22579 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vrev64_s8(int8x8_t __p0) { int8x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } # 22595 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vrev64_f32(float32x2_t __p0) { float32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0); return __ret; } # 22611 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vrev64_s32(int32x2_t __p0) { int32x2_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 1, 0); return __ret; } # 22627 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vrev64_s16(int16x4_t __p0) { int16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); return __ret; } # 22643 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vrhaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } # 22660 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vrhaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 22677 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vrhaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 22694 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vrhaddq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } # 22711 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vrhaddq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } # 22728 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vrhaddq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vrhaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } # 22745 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vrhadd_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 22762 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vrhadd_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 22779 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vrhadd_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 22796 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vrhadd_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } # 22813 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vrhadd_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 22830 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vrhadd_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vrhadd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 22847 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vrshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } # 22864 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vrshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 22881 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vrshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } # 22898 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vrshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 22915 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vrshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } # 22932 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vrshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } # 22949 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vrshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } # 22966 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vrshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vrshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } # 22983 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vrshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 23000 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vrshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 23016 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vrshl_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vrshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 23039 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vrshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } # 23056 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vrshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 23072 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vrshl_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vrshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vrshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 23503 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vrsqrteq_u32(uint32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vrsqrteq_v((int8x16_t)__p0, 50); return __ret; } # 23519 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vrsqrteq_f32(float32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vrsqrteq_v((int8x16_t)__p0, 41); return __ret; } # 23535 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vrsqrte_u32(uint32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 18); return __ret; } # 23551 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vrsqrte_f32(float32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vrsqrte_v((int8x8_t)__p0, 9); return __ret; } # 23567 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vrsqrtsq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vrsqrtsq_v((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } # 23584 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vrsqrts_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vrsqrts_v((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } # 23909 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vrsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); return __ret; } # 23931 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vrsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); return __ret; } # 23953 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vrsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); return __ret; } # 23975 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vrsubhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); return __ret; } # 23997 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vrsubhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); return __ret; } # 24019 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vrsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vrsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); return __ret; } # 24595 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vshlq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } # 24612 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vshlq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 24629 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vshlq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } # 24646 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vshlq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 24663 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vshlq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } # 24680 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vshlq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } # 24697 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vshlq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } # 24714 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vshlq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vshlq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } # 24731 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vshl_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 24748 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vshl_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 24764 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vshl_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vshl_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 24787 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vshl_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } # 24804 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vshl_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 24820 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vshl_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vshl_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vshl_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 29879 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vsubq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = __p0 - __p1; return __ret; } # 29896 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vsubq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = __p0 - __p1; return __ret; } # 29913 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vsubq_u64(uint64x2_t __p0, uint64x2_t __p1) { uint64x2_t __ret; __ret = __p0 - __p1; return __ret; } # 29930 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vsubq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = __p0 - __p1; return __ret; } # 29947 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vsubq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16_t __ret; __ret = __p0 - __p1; return __ret; } # 29964 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vsubq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = __p0 - __p1; return __ret; } # 29981 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vsubq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4_t __ret; __ret = __p0 - __p1; return __ret; } # 29998 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vsubq_s64(int64x2_t __p0, int64x2_t __p1) { int64x2_t __ret; __ret = __p0 - __p1; return __ret; } # 30015 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vsubq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8_t __ret; __ret = __p0 - __p1; return __ret; } # 30032 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vsub_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = __p0 - __p1; return __ret; } # 30049 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vsub_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = __p0 - __p1; return __ret; } # 30065 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vsub_u64(uint64x1_t __p0, uint64x1_t __p1) { uint64x1_t __ret; __ret = __p0 - __p1; return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vsub_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = __p0 - __p1; return __ret; } # 30088 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vsub_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = __p0 - __p1; return __ret; } # 30105 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vsub_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = __p0 - __p1; return __ret; } # 30122 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vsub_s32(int32x2_t __p0, int32x2_t __p1) { int32x2_t __ret; __ret = __p0 - __p1; return __ret; } # 30138 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vsub_s64(int64x1_t __p0, int64x1_t __p1) { int64x1_t __ret; __ret = __p0 - __p1; return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vsub_s16(int16x4_t __p0, int16x4_t __p1) { int16x4_t __ret; __ret = __p0 - __p1; return __ret; } # 30161 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vsubhn_u32(uint32x4_t __p0, uint32x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 17); return __ret; } # 30183 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vsubhn_u64(uint64x2_t __p0, uint64x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 18); return __ret; } # 30205 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vsubhn_u16(uint16x8_t __p0, uint16x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 16); return __ret; } # 30227 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vsubhn_s32(int32x4_t __p0, int32x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 1); return __ret; } # 30249 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vsubhn_s64(int64x2_t __p0, int64x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 2); return __ret; } # 30271 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vsubhn_s16(int16x8_t __p0, int16x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vsubhn_v((int8x16_t)__p0, (int8x16_t)__p1, 0); return __ret; } # 30293 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vsubl_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; __ret = vmovl_u8(__p0) - vmovl_u8(__p1); return __ret; } # 30310 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vsubl_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; __ret = vmovl_u32(__p0) - vmovl_u32(__p1); return __ret; } # 30327 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vsubl_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; __ret = vmovl_u16(__p0) - vmovl_u16(__p1); return __ret; } # 30344 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vsubl_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; __ret = vmovl_s8(__p0) - vmovl_s8(__p1); return __ret; } # 30361 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vsubl_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; __ret = vmovl_s32(__p0) - vmovl_s32(__p1); return __ret; } # 30378 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vsubl_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; __ret = vmovl_s16(__p0) - vmovl_s16(__p1); return __ret; } # 30395 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vsubw_u8(uint16x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; __ret = __p0 - vmovl_u8(__p1); return __ret; } # 30412 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vsubw_u32(uint64x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; __ret = __p0 - vmovl_u32(__p1); return __ret; } # 30429 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vsubw_u16(uint32x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; __ret = __p0 - vmovl_u16(__p1); return __ret; } # 30446 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vsubw_s8(int16x8_t __p0, int8x8_t __p1) { int16x8_t __ret; __ret = __p0 - vmovl_s8(__p1); return __ret; } # 30463 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vsubw_s32(int64x2_t __p0, int32x2_t __p1) { int64x2_t __ret; __ret = __p0 - vmovl_s32(__p1); return __ret; } # 30480 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vsubw_s16(int32x4_t __p0, int16x4_t __p1) { int32x4_t __ret; __ret = __p0 - vmovl_s16(__p1); return __ret; } # 30497 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vtbl1_p8(poly8x8_t __p0, uint8x8_t __p1) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vtbl1_v((int8x8_t)__p0, (int8x8_t)__p1, 4); return __ret; } # 30514 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vtbl1_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtbl1_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 30531 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vtbl1_s8(int8x8_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vtbl1_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } # 30548 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vtbl2_p8(poly8x8x2_t __p0, uint8x8_t __p1) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vtbl2_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p1, 4); return __ret; } # 30567 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vtbl2_u8(uint8x8x2_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtbl2_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p1, 16); return __ret; } # 30586 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vtbl2_s8(int8x8x2_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vtbl2_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p1, 0); return __ret; } # 30605 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vtbl3_p8(poly8x8x3_t __p0, uint8x8_t __p1) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vtbl3_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p0.val[2], (int8x8_t)__p1, 4); return __ret; } # 30625 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vtbl3_u8(uint8x8x3_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtbl3_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p0.val[2], (int8x8_t)__p1, 16); return __ret; } # 30645 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vtbl3_s8(int8x8x3_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vtbl3_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p0.val[2], (int8x8_t)__p1, 0); return __ret; } # 30665 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vtbl4_p8(poly8x8x4_t __p0, uint8x8_t __p1) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vtbl4_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p0.val[2], (int8x8_t)__p0.val[3], (int8x8_t)__p1, 4); return __ret; } # 30686 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vtbl4_u8(uint8x8x4_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtbl4_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p0.val[2], (int8x8_t)__p0.val[3], (int8x8_t)__p1, 16); return __ret; } # 30707 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vtbl4_s8(int8x8x4_t __p0, int8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vtbl4_v((int8x8_t)__p0.val[0], (int8x8_t)__p0.val[1], (int8x8_t)__p0.val[2], (int8x8_t)__p0.val[3], (int8x8_t)__p1, 0); return __ret; } # 30728 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vtbx1_p8(poly8x8_t __p0, poly8x8_t __p1, uint8x8_t __p2) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vtbx1_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 4); return __ret; } # 30746 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vtbx1_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtbx1_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 16); return __ret; } # 30764 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vtbx1_s8(int8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vtbx1_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 0); return __ret; } # 30782 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vtbx2_p8(poly8x8_t __p0, poly8x8x2_t __p1, uint8x8_t __p2) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vtbx2_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p2, 4); return __ret; } # 30802 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vtbx2_u8(uint8x8_t __p0, uint8x8x2_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtbx2_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p2, 16); return __ret; } # 30822 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vtbx2_s8(int8x8_t __p0, int8x8x2_t __p1, int8x8_t __p2) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vtbx2_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p2, 0); return __ret; } # 30842 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vtbx3_p8(poly8x8_t __p0, poly8x8x3_t __p1, uint8x8_t __p2) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vtbx3_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p1.val[2], (int8x8_t)__p2, 4); return __ret; } # 30863 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vtbx3_u8(uint8x8_t __p0, uint8x8x3_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtbx3_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p1.val[2], (int8x8_t)__p2, 16); return __ret; } # 30884 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vtbx3_s8(int8x8_t __p0, int8x8x3_t __p1, int8x8_t __p2) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vtbx3_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p1.val[2], (int8x8_t)__p2, 0); return __ret; } # 30905 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vtbx4_p8(poly8x8_t __p0, poly8x8x4_t __p1, uint8x8_t __p2) { poly8x8_t __ret; __ret = (poly8x8_t) __builtin_neon_vtbx4_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p1.val[2], (int8x8_t)__p1.val[3], (int8x8_t)__p2, 4); return __ret; } # 30927 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vtbx4_u8(uint8x8_t __p0, uint8x8x4_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtbx4_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p1.val[2], (int8x8_t)__p1.val[3], (int8x8_t)__p2, 16); return __ret; } # 30949 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vtbx4_s8(int8x8_t __p0, int8x8x4_t __p1, int8x8_t __p2) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vtbx4_v((int8x8_t)__p0, (int8x8_t)__p1.val[0], (int8x8_t)__p1.val[1], (int8x8_t)__p1.val[2], (int8x8_t)__p1.val[3], (int8x8_t)__p2, 0); return __ret; } # 30971 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8x2_t vtrn_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8x2_t __ret; __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 4); return __ret; } # 30990 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4x2_t vtrn_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4x2_t __ret; __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 5); return __ret; } # 31009 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16x2_t vtrnq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16x2_t __ret; __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 36); return __ret; } # 31028 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8x2_t vtrnq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8x2_t __ret; __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 37); return __ret; } # 31047 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16x2_t vtrnq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16x2_t __ret; __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } # 31066 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4x2_t vtrnq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4x2_t __ret; __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 31085 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8x2_t vtrnq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8x2_t __ret; __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 31104 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16x2_t vtrnq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16x2_t __ret; __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } # 31123 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4x2_t vtrnq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4x2_t __ret; __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } # 31142 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4x2_t vtrnq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4x2_t __ret; __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } # 31161 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8x2_t vtrnq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8x2_t __ret; __builtin_neon_vtrnq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } # 31180 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8x2_t vtrn_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8x2_t __ret; __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 31199 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2x2_t vtrn_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2x2_t __ret; __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 31218 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4x2_t vtrn_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4x2_t __ret; __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 31237 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8x2_t vtrn_s8(int8x8_t __p0, int8x8_t __p1) { int8x8x2_t __ret; __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } # 31256 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2x2_t vtrn_f32(float32x2_t __p0, float32x2_t __p1) { float32x2x2_t __ret; __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } # 31275 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2x2_t vtrn_s32(int32x2_t __p0, int32x2_t __p1) { int32x2x2_t __ret; __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 31294 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4x2_t vtrn_s16(int16x4_t __p0, int16x4_t __p1) { int16x4x2_t __ret; __builtin_neon_vtrn_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 31313 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vtst_p8(poly8x8_t __p0, poly8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 31330 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vtst_p16(poly16x4_t __p0, poly16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 31347 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vtstq_p8(poly8x16_t __p0, poly8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } # 31364 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vtstq_p16(poly16x8_t __p0, poly16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 31381 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vtstq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } # 31398 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vtstq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 31415 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vtstq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 31432 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vtstq_s8(int8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } # 31449 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vtstq_s32(int32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 31466 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vtstq_s16(int16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vtstq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 31483 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vtst_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 31500 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vtst_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 31517 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vtst_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 31534 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vtst_s8(int8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 31551 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vtst_s32(int32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 31568 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vtst_s16(int16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vtst_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 31585 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8x2_t vuzp_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8x2_t __ret; __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 4); return __ret; } # 31604 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4x2_t vuzp_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4x2_t __ret; __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 5); return __ret; } # 31623 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16x2_t vuzpq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16x2_t __ret; __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 36); return __ret; } # 31642 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8x2_t vuzpq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8x2_t __ret; __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 37); return __ret; } # 31661 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16x2_t vuzpq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16x2_t __ret; __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } # 31680 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4x2_t vuzpq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4x2_t __ret; __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 31699 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8x2_t vuzpq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8x2_t __ret; __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 31718 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16x2_t vuzpq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16x2_t __ret; __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } # 31737 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4x2_t vuzpq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4x2_t __ret; __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } # 31756 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4x2_t vuzpq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4x2_t __ret; __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } # 31775 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8x2_t vuzpq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8x2_t __ret; __builtin_neon_vuzpq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } # 31794 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8x2_t vuzp_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8x2_t __ret; __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 31813 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2x2_t vuzp_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2x2_t __ret; __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 31832 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4x2_t vuzp_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4x2_t __ret; __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 31851 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8x2_t vuzp_s8(int8x8_t __p0, int8x8_t __p1) { int8x8x2_t __ret; __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } # 31870 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2x2_t vuzp_f32(float32x2_t __p0, float32x2_t __p1) { float32x2x2_t __ret; __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } # 31889 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2x2_t vuzp_s32(int32x2_t __p0, int32x2_t __p1) { int32x2x2_t __ret; __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 31908 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4x2_t vuzp_s16(int16x4_t __p0, int16x4_t __p1) { int16x4x2_t __ret; __builtin_neon_vuzp_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 31927 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8x2_t vzip_p8(poly8x8_t __p0, poly8x8_t __p1) { poly8x8x2_t __ret; __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 4); return __ret; } # 31946 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4x2_t vzip_p16(poly16x4_t __p0, poly16x4_t __p1) { poly16x4x2_t __ret; __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 5); return __ret; } # 31965 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16x2_t vzipq_p8(poly8x16_t __p0, poly8x16_t __p1) { poly8x16x2_t __ret; __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 36); return __ret; } # 31984 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8x2_t vzipq_p16(poly16x8_t __p0, poly16x8_t __p1) { poly16x8x2_t __ret; __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 37); return __ret; } # 32003 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16x2_t vzipq_u8(uint8x16_t __p0, uint8x16_t __p1) { uint8x16x2_t __ret; __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } # 32022 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4x2_t vzipq_u32(uint32x4_t __p0, uint32x4_t __p1) { uint32x4x2_t __ret; __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } # 32041 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8x2_t vzipq_u16(uint16x8_t __p0, uint16x8_t __p1) { uint16x8x2_t __ret; __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 32060 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16x2_t vzipq_s8(int8x16_t __p0, int8x16_t __p1) { int8x16x2_t __ret; __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } # 32079 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4x2_t vzipq_f32(float32x4_t __p0, float32x4_t __p1) { float32x4x2_t __ret; __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } # 32098 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4x2_t vzipq_s32(int32x4_t __p0, int32x4_t __p1) { int32x4x2_t __ret; __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } # 32117 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8x2_t vzipq_s16(int16x8_t __p0, int16x8_t __p1) { int16x8x2_t __ret; __builtin_neon_vzipq_v(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } # 32136 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8x2_t vzip_u8(uint8x8_t __p0, uint8x8_t __p1) { uint8x8x2_t __ret; __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } # 32155 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2x2_t vzip_u32(uint32x2_t __p0, uint32x2_t __p1) { uint32x2x2_t __ret; __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } # 32174 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4x2_t vzip_u16(uint16x4_t __p0, uint16x4_t __p1) { uint16x4x2_t __ret; __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 32193 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8x2_t vzip_s8(int8x8_t __p0, int8x8_t __p1) { int8x8x2_t __ret; __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } # 32212 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2x2_t vzip_f32(float32x2_t __p0, float32x2_t __p1) { float32x2x2_t __ret; __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } # 32231 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2x2_t vzip_s32(int32x2_t __p0, int32x2_t __p1) { int32x2x2_t __ret; __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } # 32250 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4x2_t vzip_s16(int16x4_t __p0, int16x4_t __p1) { int16x4x2_t __ret; __builtin_neon_vzip_v(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } # 32365 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) float32x4_t vbfdotq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vbfdotq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } # 32388 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) float32x2_t vbfdot_f32(float32x2_t __p0, bfloat16x4_t __p1, bfloat16x4_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vbfdot_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } # 32523 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) float32x4_t vbfmlalbq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vbfmlalbq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } # 32546 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) float32x4_t vbfmlaltq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vbfmlaltq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } # 32569 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) float32x4_t vbfmmlaq_f32(float32x4_t __p0, bfloat16x8_t __p1, bfloat16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vbfmmlaq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } # 32587 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x8_t vcombine_bf16(bfloat16x4_t __p0, bfloat16x4_t __p1) { bfloat16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p1, 0, 1, 2, 3, 4, 5, 6, 7); return __ret; } # 32615 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) float32x4_t vcvt_f32_bf16(bfloat16x4_t __p0_134) { float32x4_t __ret_134; bfloat16x4_t __reint_134 = __p0_134; int32x4_t __reint1_134 = __extension__ ({ int32x4_t __ret; int16x4_t __s0 = *(int16x4_t *) &__reint_134; __ret = (int32x4_t) __builtin_neon_vshll_n_v((int8x8_t)__s0, 16, 34); __ret; }); __ret_134 = *(float32x4_t *) &__reint1_134; return __ret_134; } # 32641 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) float32_t vcvtah_f32_bf16(bfloat16_t __p0) { float32_t __ret; bfloat16_t __reint = __p0; int32_t __reint1 = *(int32_t *) &__reint << 16; __ret = *(float32_t *) &__reint1; return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16_t vcvth_bf16_f32(float32_t __p0) { bfloat16_t __ret; __ret = (bfloat16_t) __builtin_neon_vcvth_bf16_f32(__p0); return __ret; } # 32760 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x8_t vdupq_n_bf16(bfloat16_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t) {__p0, __p0, __p0, __p0, __p0, __p0, __p0, __p0}; return __ret; } # 32775 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x4_t vdup_n_bf16(bfloat16_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t) {__p0, __p0, __p0, __p0}; return __ret; } # 32790 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x4_t vget_high_bf16(bfloat16x8_t __p0) { bfloat16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 4, 5, 6, 7); return __ret; } # 32857 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x4_t vget_low_bf16(bfloat16x8_t __p0) { bfloat16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 0, 1, 2, 3); return __ret; } # 33836 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("dotprod"))) uint32x4_t vdotq_u32(uint32x4_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vdotq_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } # 33859 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("dotprod"))) int32x4_t vdotq_s32(int32x4_t __p0, int8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vdotq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } # 33882 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("dotprod"))) uint32x2_t vdot_u32(uint32x2_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vdot_u32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 18); return __ret; } # 33905 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("dotprod"))) int32x2_t vdot_s32(int32x2_t __p0, int8x8_t __p1, int8x8_t __p2) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vdot_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); return __ret; } # 34040 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8_t vabdq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vabdq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } # 34057 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vabd_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vabd_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } # 34074 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8_t vabsq_f16(float16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vabsq_f16((int8x16_t)__p0, 40); return __ret; } # 34090 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vabs_f16(float16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vabs_f16((int8x8_t)__p0, 8); return __ret; } # 34106 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8_t vaddq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = __p0 + __p1; return __ret; } # 34123 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vadd_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = __p0 + __p1; return __ret; } # 34140 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8_t vbslq_f16(uint16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vbslq_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); return __ret; } # 34158 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vbsl_f16(uint16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vbsl_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } # 34176 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x8_t vcageq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcageq_f16((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 34193 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x4_t vcage_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcage_f16((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 34210 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x8_t vcagtq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcagtq_f16((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 34227 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x4_t vcagt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcagt_f16((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 34244 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x8_t vcaleq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcaleq_f16((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 34261 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x4_t vcale_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcale_f16((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 34278 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x8_t vcaltq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcaltq_f16((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } # 34295 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x4_t vcalt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcalt_f16((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } # 34312 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x8_t vceqq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 == __p1); return __ret; } # 34329 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x4_t vceq_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 == __p1); return __ret; } # 34346 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x8_t vceqzq_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vceqzq_f16((int8x16_t)__p0, 49); return __ret; } # 34362 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x4_t vceqz_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vceqz_f16((int8x8_t)__p0, 17); return __ret; } # 34378 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x8_t vcgeq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 >= __p1); return __ret; } # 34395 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x4_t vcge_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 >= __p1); return __ret; } # 34412 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x8_t vcgezq_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcgezq_f16((int8x16_t)__p0, 49); return __ret; } # 34428 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x4_t vcgez_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcgez_f16((int8x8_t)__p0, 17); return __ret; } # 34444 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x8_t vcgtq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 > __p1); return __ret; } # 34461 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x4_t vcgt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 > __p1); return __ret; } # 34478 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x8_t vcgtzq_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcgtzq_f16((int8x16_t)__p0, 49); return __ret; } # 34494 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x4_t vcgtz_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcgtz_f16((int8x8_t)__p0, 17); return __ret; } # 34510 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x8_t vcleq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 <= __p1); return __ret; } # 34527 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x4_t vcle_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 <= __p1); return __ret; } # 34544 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x8_t vclezq_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vclezq_f16((int8x16_t)__p0, 49); return __ret; } # 34560 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x4_t vclez_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vclez_f16((int8x8_t)__p0, 17); return __ret; } # 34576 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x8_t vcltq_f16(float16x8_t __p0, float16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0 < __p1); return __ret; } # 34593 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x4_t vclt_f16(float16x4_t __p0, float16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0 < __p1); return __ret; } # 34610 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x8_t vcltzq_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcltzq_f16((int8x16_t)__p0, 49); return __ret; } # 34626 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x4_t vcltz_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcltz_f16((int8x8_t)__p0, 17); return __ret; } # 34642 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8_t vcvtq_f16_u16(uint16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcvtq_f16_u16((int8x16_t)__p0, 49); return __ret; } # 34658 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8_t vcvtq_f16_s16(int16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcvtq_f16_s16((int8x16_t)__p0, 33); return __ret; } # 34674 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vcvt_f16_u16(uint16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcvt_f16_u16((int8x8_t)__p0, 17); return __ret; } # 34690 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vcvt_f16_s16(int16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcvt_f16_s16((int8x8_t)__p0, 1); return __ret; } # 34850 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) int16x8_t vcvtq_s16_f16(float16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vcvtq_s16_f16((int8x16_t)__p0, 33); return __ret; } # 34866 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) int16x4_t vcvt_s16_f16(float16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vcvt_s16_f16((int8x8_t)__p0, 1); return __ret; } # 34882 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x8_t vcvtq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcvtq_u16_f16((int8x16_t)__p0, 49); return __ret; } # 34898 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x4_t vcvt_u16_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcvt_u16_f16((int8x8_t)__p0, 17); return __ret; } # 34914 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) int16x8_t vcvtaq_s16_f16(float16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vcvtaq_s16_f16((int8x16_t)__p0, 33); return __ret; } # 34930 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) int16x4_t vcvta_s16_f16(float16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vcvta_s16_f16((int8x8_t)__p0, 1); return __ret; } # 34946 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x8_t vcvtaq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcvtaq_u16_f16((int8x16_t)__p0, 49); return __ret; } # 34962 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x4_t vcvta_u16_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcvta_u16_f16((int8x8_t)__p0, 17); return __ret; } # 34978 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) int16x8_t vcvtmq_s16_f16(float16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vcvtmq_s16_f16((int8x16_t)__p0, 33); return __ret; } # 34994 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) int16x4_t vcvtm_s16_f16(float16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vcvtm_s16_f16((int8x8_t)__p0, 1); return __ret; } # 35010 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x8_t vcvtmq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcvtmq_u16_f16((int8x16_t)__p0, 49); return __ret; } # 35026 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x4_t vcvtm_u16_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcvtm_u16_f16((int8x8_t)__p0, 17); return __ret; } # 35042 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) int16x8_t vcvtnq_s16_f16(float16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vcvtnq_s16_f16((int8x16_t)__p0, 33); return __ret; } # 35058 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) int16x4_t vcvtn_s16_f16(float16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vcvtn_s16_f16((int8x8_t)__p0, 1); return __ret; } # 35074 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x8_t vcvtnq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcvtnq_u16_f16((int8x16_t)__p0, 49); return __ret; } # 35090 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x4_t vcvtn_u16_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcvtn_u16_f16((int8x8_t)__p0, 17); return __ret; } # 35106 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) int16x8_t vcvtpq_s16_f16(float16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vcvtpq_s16_f16((int8x16_t)__p0, 33); return __ret; } # 35122 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) int16x4_t vcvtp_s16_f16(float16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vcvtp_s16_f16((int8x8_t)__p0, 1); return __ret; } # 35138 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x8_t vcvtpq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vcvtpq_u16_f16((int8x16_t)__p0, 49); return __ret; } # 35154 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) uint16x4_t vcvtp_u16_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vcvtp_u16_f16((int8x8_t)__p0, 17); return __ret; } # 35212 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8_t vfmaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vfmaq_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); return __ret; } # 35235 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vfma_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vfma_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } # 35258 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8_t vfmsq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; __ret = vfmaq_f16(__p0, -__p1, __p2); return __ret; } # 35276 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vfms_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; __ret = vfma_f16(__p0, -__p1, __p2); return __ret; } # 35294 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8_t vmaxq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vmaxq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } # 35311 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vmax_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vmax_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } # 35328 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8_t vminq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vminq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } # 35345 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vmin_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vmin_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } # 35362 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8_t vmulq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = __p0 * __p1; return __ret; } # 35379 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vmul_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = __p0 * __p1; return __ret; } # 35478 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8_t vnegq_f16(float16x8_t __p0) { float16x8_t __ret; __ret = -__p0; return __ret; } # 35494 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vneg_f16(float16x4_t __p0) { float16x4_t __ret; __ret = -__p0; return __ret; } # 35510 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vpadd_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vpadd_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } # 35527 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vpmax_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vpmax_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } # 35544 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vpmin_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vpmin_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } # 35561 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8_t vrecpeq_f16(float16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vrecpeq_f16((int8x16_t)__p0, 40); return __ret; } # 35577 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vrecpe_f16(float16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vrecpe_f16((int8x8_t)__p0, 8); return __ret; } # 35593 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8_t vrecpsq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vrecpsq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } # 35610 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vrecps_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vrecps_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } # 35627 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8_t vrev64q_f16(float16x8_t __p0) { float16x8_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0, 7, 6, 5, 4); return __ret; } # 35643 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vrev64_f16(float16x4_t __p0) { float16x4_t __ret; __ret = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); return __ret; } # 35659 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8_t vrsqrteq_f16(float16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vrsqrteq_f16((int8x16_t)__p0, 40); return __ret; } # 35675 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vrsqrte_f16(float16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vrsqrte_f16((int8x8_t)__p0, 8); return __ret; } # 35691 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8_t vrsqrtsq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vrsqrtsq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } # 35708 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vrsqrts_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vrsqrts_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } # 35725 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8_t vsubq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = __p0 - __p1; return __ret; } # 35742 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4_t vsub_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = __p0 - __p1; return __ret; } # 35759 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8x2_t vtrnq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8x2_t __ret; __builtin_neon_vtrnq_f16(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } # 35778 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4x2_t vtrn_f16(float16x4_t __p0, float16x4_t __p1) { float16x4x2_t __ret; __builtin_neon_vtrn_f16(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } # 35797 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8x2_t vuzpq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8x2_t __ret; __builtin_neon_vuzpq_f16(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } # 35816 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4x2_t vuzp_f16(float16x4_t __p0, float16x4_t __p1) { float16x4x2_t __ret; __builtin_neon_vuzp_f16(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } # 35835 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x8x2_t vzipq_f16(float16x8_t __p0, float16x8_t __p1) { float16x8x2_t __ret; __builtin_neon_vzipq_f16(&__ret, (int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } # 35854 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("fullfp16"))) float16x4x2_t vzip_f16(float16x4_t __p0, float16x4_t __p1) { float16x4x2_t __ret; __builtin_neon_vzip_f16(&__ret, (int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } # 35873 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("i8mm"))) uint32x4_t vmmlaq_u32(uint32x4_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vmmlaq_u32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 50); return __ret; } # 35891 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("i8mm"))) int32x4_t vmmlaq_s32(int32x4_t __p0, int8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vmmlaq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } # 35909 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("i8mm"))) int32x4_t vusdotq_s32(int32x4_t __p0, uint8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vusdotq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } # 35932 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("i8mm"))) int32x2_t vusdot_s32(int32x2_t __p0, uint8x8_t __p1, int8x8_t __p2) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vusdot_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); return __ret; } # 36007 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("i8mm"))) int32x4_t vusmmlaq_s32(int32x4_t __p0, uint8x16_t __p1, int8x16_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vusmmlaq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } # 36025 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.1a"))) int32x4_t vqrdmlahq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqrdmlahq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } # 36048 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.1a"))) int16x8_t vqrdmlahq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqrdmlahq_s16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33); return __ret; } # 36071 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.1a"))) int32x2_t vqrdmlah_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqrdmlah_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); return __ret; } # 36094 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.1a"))) int16x4_t vqrdmlah_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqrdmlah_s16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 1); return __ret; } # 36213 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.1a"))) int32x4_t vqrdmlshq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vqrdmlshq_s32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 34); return __ret; } # 36236 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.1a"))) int16x8_t vqrdmlshq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vqrdmlshq_s16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 33); return __ret; } # 36259 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.1a"))) int32x2_t vqrdmlsh_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vqrdmlsh_s32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 2); return __ret; } # 36282 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.1a"))) int16x4_t vqrdmlsh_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vqrdmlsh_s16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 1); return __ret; } # 36401 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a"))) float32x2_t vcadd_rot270_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcadd_rot270_f32((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } # 36418 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a"))) float32x2_t vcadd_rot90_f32(float32x2_t __p0, float32x2_t __p1) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcadd_rot90_f32((int8x8_t)__p0, (int8x8_t)__p1, 9); return __ret; } # 36435 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a"))) float32x4_t vcaddq_rot270_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcaddq_rot270_f32((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } # 36452 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a"))) float32x4_t vcaddq_rot90_f32(float32x4_t __p0, float32x4_t __p1) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcaddq_rot90_f32((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } # 36469 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a"))) float32x4_t vcmlaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcmlaq_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } # 36492 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a"))) float32x2_t vcmla_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcmla_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } # 36627 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a"))) float32x4_t vcmlaq_rot180_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcmlaq_rot180_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } # 36650 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a"))) float32x2_t vcmla_rot180_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcmla_rot180_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } # 36785 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a"))) float32x4_t vcmlaq_rot270_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcmlaq_rot270_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } # 36808 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a"))) float32x2_t vcmla_rot270_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcmla_rot270_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } # 36943 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a"))) float32x4_t vcmlaq_rot90_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcmlaq_rot90_f32((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } # 36966 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a"))) float32x2_t vcmla_rot90_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vcmla_rot90_f32((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } # 37101 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a,fullfp16"))) float16x4_t vcadd_rot270_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcadd_rot270_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } # 37118 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a,fullfp16"))) float16x4_t vcadd_rot90_f16(float16x4_t __p0, float16x4_t __p1) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcadd_rot90_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); return __ret; } # 37135 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a,fullfp16"))) float16x8_t vcaddq_rot270_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcaddq_rot270_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } # 37152 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a,fullfp16"))) float16x8_t vcaddq_rot90_f16(float16x8_t __p0, float16x8_t __p1) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcaddq_rot90_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); return __ret; } # 37169 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a,fullfp16"))) float16x8_t vcmlaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcmlaq_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); return __ret; } # 37192 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a,fullfp16"))) float16x4_t vcmla_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcmla_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } # 37327 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a,fullfp16"))) float16x8_t vcmlaq_rot180_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcmlaq_rot180_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); return __ret; } # 37350 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a,fullfp16"))) float16x4_t vcmla_rot180_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcmla_rot180_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } # 37485 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a,fullfp16"))) float16x8_t vcmlaq_rot270_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcmlaq_rot270_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); return __ret; } # 37508 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a,fullfp16"))) float16x4_t vcmla_rot270_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcmla_rot270_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } # 37643 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a,fullfp16"))) float16x8_t vcmlaq_rot90_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { float16x8_t __ret; __ret = (float16x8_t) __builtin_neon_vcmlaq_rot90_f16((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); return __ret; } # 37666 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("v8.3a,fullfp16"))) float16x4_t vcmla_rot90_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcmla_rot90_f16((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); return __ret; } # 37969 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vreinterpret_p8_p16(poly16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vreinterpret_p8_u8(uint8x8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vreinterpret_p8_u32(uint32x2_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vreinterpret_p8_u64(uint64x1_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vreinterpret_p8_u16(uint16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vreinterpret_p8_s8(int8x8_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vreinterpret_p8_f32(float32x2_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vreinterpret_p8_f16(float16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vreinterpret_p8_s32(int32x2_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vreinterpret_p8_s64(int64x1_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x8_t vreinterpret_p8_s16(int16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4_t vreinterpret_p16_p8(poly8x8_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4_t vreinterpret_p16_u8(uint8x8_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4_t vreinterpret_p16_u32(uint32x2_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4_t vreinterpret_p16_u64(uint64x1_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4_t vreinterpret_p16_u16(uint16x4_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4_t vreinterpret_p16_s8(int8x8_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4_t vreinterpret_p16_f32(float32x2_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4_t vreinterpret_p16_f16(float16x4_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4_t vreinterpret_p16_s32(int32x2_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4_t vreinterpret_p16_s64(int64x1_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x4_t vreinterpret_p16_s16(int16x4_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vreinterpretq_p8_p16(poly16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vreinterpretq_p8_u8(uint8x16_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vreinterpretq_p8_u32(uint32x4_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vreinterpretq_p8_u64(uint64x2_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vreinterpretq_p8_u16(uint16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vreinterpretq_p8_s8(int8x16_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vreinterpretq_p8_f32(float32x4_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vreinterpretq_p8_f16(float16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vreinterpretq_p8_s32(int32x4_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vreinterpretq_p8_s64(int64x2_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly8x16_t vreinterpretq_p8_s16(int16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8_t vreinterpretq_p16_p8(poly8x16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8_t vreinterpretq_p16_u8(uint8x16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8_t vreinterpretq_p16_u32(uint32x4_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8_t vreinterpretq_p16_u64(uint64x2_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8_t vreinterpretq_p16_u16(uint16x8_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8_t vreinterpretq_p16_s8(int8x16_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8_t vreinterpretq_p16_f32(float32x4_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8_t vreinterpretq_p16_f16(float16x8_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8_t vreinterpretq_p16_s32(int32x4_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8_t vreinterpretq_p16_s64(int64x2_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) poly16x8_t vreinterpretq_p16_s16(int16x8_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vreinterpretq_u8_p8(poly8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vreinterpretq_u8_p16(poly16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vreinterpretq_u8_u32(uint32x4_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vreinterpretq_u8_u64(uint64x2_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vreinterpretq_u8_u16(uint16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vreinterpretq_u8_s8(int8x16_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vreinterpretq_u8_f32(float32x4_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vreinterpretq_u8_f16(float16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vreinterpretq_u8_s32(int32x4_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vreinterpretq_u8_s64(int64x2_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vreinterpretq_u8_s16(int16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vreinterpretq_u32_p8(poly8x16_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vreinterpretq_u32_p16(poly16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vreinterpretq_u32_u8(uint8x16_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vreinterpretq_u32_u64(uint64x2_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vreinterpretq_u32_u16(uint16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vreinterpretq_u32_s8(int8x16_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vreinterpretq_u32_f32(float32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vreinterpretq_u32_f16(float16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vreinterpretq_u32_s32(int32x4_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vreinterpretq_u32_s64(int64x2_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vreinterpretq_u32_s16(int16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vreinterpretq_u64_p8(poly8x16_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vreinterpretq_u64_p16(poly16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vreinterpretq_u64_u8(uint8x16_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vreinterpretq_u64_u32(uint32x4_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vreinterpretq_u64_u16(uint16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vreinterpretq_u64_s8(int8x16_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vreinterpretq_u64_f32(float32x4_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vreinterpretq_u64_f16(float16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vreinterpretq_u64_s32(int32x4_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vreinterpretq_u64_s64(int64x2_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vreinterpretq_u64_s16(int16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vreinterpretq_u16_p8(poly8x16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vreinterpretq_u16_p16(poly16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vreinterpretq_u16_u8(uint8x16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vreinterpretq_u16_u32(uint32x4_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vreinterpretq_u16_u64(uint64x2_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vreinterpretq_u16_s8(int8x16_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vreinterpretq_u16_f32(float32x4_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vreinterpretq_u16_f16(float16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vreinterpretq_u16_s32(int32x4_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vreinterpretq_u16_s64(int64x2_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vreinterpretq_u16_s16(int16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vreinterpretq_s8_p8(poly8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vreinterpretq_s8_p16(poly16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vreinterpretq_s8_u8(uint8x16_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vreinterpretq_s8_u32(uint32x4_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vreinterpretq_s8_u64(uint64x2_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vreinterpretq_s8_u16(uint16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vreinterpretq_s8_f32(float32x4_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vreinterpretq_s8_f16(float16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vreinterpretq_s8_s32(int32x4_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vreinterpretq_s8_s64(int64x2_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vreinterpretq_s8_s16(int16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vreinterpretq_f32_p8(poly8x16_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vreinterpretq_f32_p16(poly16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vreinterpretq_f32_u8(uint8x16_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vreinterpretq_f32_u32(uint32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vreinterpretq_f32_u64(uint64x2_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vreinterpretq_f32_u16(uint16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vreinterpretq_f32_s8(int8x16_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vreinterpretq_f32_f16(float16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vreinterpretq_f32_s32(int32x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vreinterpretq_f32_s64(int64x2_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vreinterpretq_f32_s16(int16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x8_t vreinterpretq_f16_p8(poly8x16_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x8_t vreinterpretq_f16_p16(poly16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x8_t vreinterpretq_f16_u8(uint8x16_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x8_t vreinterpretq_f16_u32(uint32x4_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x8_t vreinterpretq_f16_u64(uint64x2_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x8_t vreinterpretq_f16_u16(uint16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x8_t vreinterpretq_f16_s8(int8x16_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x8_t vreinterpretq_f16_f32(float32x4_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x8_t vreinterpretq_f16_s32(int32x4_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x8_t vreinterpretq_f16_s64(int64x2_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x8_t vreinterpretq_f16_s16(int16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vreinterpretq_s32_p8(poly8x16_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vreinterpretq_s32_p16(poly16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vreinterpretq_s32_u8(uint8x16_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vreinterpretq_s32_u32(uint32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vreinterpretq_s32_u64(uint64x2_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vreinterpretq_s32_u16(uint16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vreinterpretq_s32_s8(int8x16_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vreinterpretq_s32_f32(float32x4_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vreinterpretq_s32_f16(float16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vreinterpretq_s32_s64(int64x2_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vreinterpretq_s32_s16(int16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vreinterpretq_s64_p8(poly8x16_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vreinterpretq_s64_p16(poly16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vreinterpretq_s64_u8(uint8x16_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vreinterpretq_s64_u32(uint32x4_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vreinterpretq_s64_u64(uint64x2_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vreinterpretq_s64_u16(uint16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vreinterpretq_s64_s8(int8x16_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vreinterpretq_s64_f32(float32x4_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vreinterpretq_s64_f16(float16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vreinterpretq_s64_s32(int32x4_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vreinterpretq_s64_s16(int16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vreinterpretq_s16_p8(poly8x16_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vreinterpretq_s16_p16(poly16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vreinterpretq_s16_u8(uint8x16_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vreinterpretq_s16_u32(uint32x4_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vreinterpretq_s16_u64(uint64x2_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vreinterpretq_s16_u16(uint16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vreinterpretq_s16_s8(int8x16_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vreinterpretq_s16_f32(float32x4_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vreinterpretq_s16_f16(float16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vreinterpretq_s16_s32(int32x4_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vreinterpretq_s16_s64(int64x2_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vreinterpret_u8_p8(poly8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vreinterpret_u8_p16(poly16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vreinterpret_u8_u32(uint32x2_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vreinterpret_u8_u64(uint64x1_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vreinterpret_u8_u16(uint16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vreinterpret_u8_s8(int8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vreinterpret_u8_f32(float32x2_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vreinterpret_u8_f16(float16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vreinterpret_u8_s32(int32x2_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vreinterpret_u8_s64(int64x1_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vreinterpret_u8_s16(int16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vreinterpret_u32_p8(poly8x8_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vreinterpret_u32_p16(poly16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vreinterpret_u32_u8(uint8x8_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vreinterpret_u32_u64(uint64x1_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vreinterpret_u32_u16(uint16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vreinterpret_u32_s8(int8x8_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vreinterpret_u32_f32(float32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vreinterpret_u32_f16(float16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vreinterpret_u32_s32(int32x2_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vreinterpret_u32_s64(int64x1_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vreinterpret_u32_s16(int16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vreinterpret_u64_p8(poly8x8_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vreinterpret_u64_p16(poly16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vreinterpret_u64_u8(uint8x8_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vreinterpret_u64_u32(uint32x2_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vreinterpret_u64_u16(uint16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vreinterpret_u64_s8(int8x8_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vreinterpret_u64_f32(float32x2_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vreinterpret_u64_f16(float16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vreinterpret_u64_s32(int32x2_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vreinterpret_u64_s64(int64x1_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x1_t vreinterpret_u64_s16(int16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vreinterpret_u16_p8(poly8x8_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vreinterpret_u16_p16(poly16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vreinterpret_u16_u8(uint8x8_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vreinterpret_u16_u32(uint32x2_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vreinterpret_u16_u64(uint64x1_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vreinterpret_u16_s8(int8x8_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vreinterpret_u16_f32(float32x2_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vreinterpret_u16_f16(float16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vreinterpret_u16_s32(int32x2_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vreinterpret_u16_s64(int64x1_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vreinterpret_u16_s16(int16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vreinterpret_s8_p8(poly8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vreinterpret_s8_p16(poly16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vreinterpret_s8_u8(uint8x8_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vreinterpret_s8_u32(uint32x2_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vreinterpret_s8_u64(uint64x1_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vreinterpret_s8_u16(uint16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vreinterpret_s8_f32(float32x2_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vreinterpret_s8_f16(float16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vreinterpret_s8_s32(int32x2_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vreinterpret_s8_s64(int64x1_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vreinterpret_s8_s16(int16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vreinterpret_f32_p8(poly8x8_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vreinterpret_f32_p16(poly16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vreinterpret_f32_u8(uint8x8_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vreinterpret_f32_u32(uint32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vreinterpret_f32_u64(uint64x1_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vreinterpret_f32_u16(uint16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vreinterpret_f32_s8(int8x8_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vreinterpret_f32_f16(float16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vreinterpret_f32_s32(int32x2_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vreinterpret_f32_s64(int64x1_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vreinterpret_f32_s16(int16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x4_t vreinterpret_f16_p8(poly8x8_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x4_t vreinterpret_f16_p16(poly16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x4_t vreinterpret_f16_u8(uint8x8_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x4_t vreinterpret_f16_u32(uint32x2_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x4_t vreinterpret_f16_u64(uint64x1_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x4_t vreinterpret_f16_u16(uint16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x4_t vreinterpret_f16_s8(int8x8_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x4_t vreinterpret_f16_f32(float32x2_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x4_t vreinterpret_f16_s32(int32x2_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x4_t vreinterpret_f16_s64(int64x1_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x4_t vreinterpret_f16_s16(int16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vreinterpret_s32_p8(poly8x8_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vreinterpret_s32_p16(poly16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vreinterpret_s32_u8(uint8x8_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vreinterpret_s32_u32(uint32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vreinterpret_s32_u64(uint64x1_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vreinterpret_s32_u16(uint16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vreinterpret_s32_s8(int8x8_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vreinterpret_s32_f32(float32x2_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vreinterpret_s32_f16(float16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vreinterpret_s32_s64(int64x1_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vreinterpret_s32_s16(int16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vreinterpret_s64_p8(poly8x8_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vreinterpret_s64_p16(poly16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vreinterpret_s64_u8(uint8x8_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vreinterpret_s64_u32(uint32x2_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vreinterpret_s64_u64(uint64x1_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vreinterpret_s64_u16(uint16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vreinterpret_s64_s8(int8x8_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vreinterpret_s64_f32(float32x2_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vreinterpret_s64_f16(float16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vreinterpret_s64_s32(int32x2_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x1_t vreinterpret_s64_s16(int16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vreinterpret_s16_p8(poly8x8_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vreinterpret_s16_p16(poly16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vreinterpret_s16_u8(uint8x8_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vreinterpret_s16_u32(uint32x2_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vreinterpret_s16_u64(uint64x1_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vreinterpret_s16_u16(uint16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vreinterpret_s16_s8(int8x8_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vreinterpret_s16_f32(float32x2_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vreinterpret_s16_f16(float16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vreinterpret_s16_s32(int32x2_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vreinterpret_s16_s64(int64x1_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x4_t __a32_vcvt_bf16_f32(float32x4_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t) __builtin_neon___a32_vcvt_bf16_f32((int8x16_t)__p0, 11); return __ret; } # 39311 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x4_t vcvt_bf16_f32(float32x4_t __p0) { bfloat16x4_t __ret; __ret = __a32_vcvt_bf16_f32(__p0); return __ret; } # 39327 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x8_t vcvtq_high_bf16_f32(bfloat16x8_t __p0, float32x4_t __p1) { bfloat16x8_t __ret; __ret = vcombine_bf16(__a32_vcvt_bf16_f32(__p1), vget_low_bf16(__p0)); return __ret; } # 39344 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x8_t vcvtq_low_bf16_f32(float32x4_t __p0) { bfloat16x8_t __ret; __ret = vcombine_bf16((bfloat16x4_t)(0ULL), __a32_vcvt_bf16_f32(__p0)); return __ret; } # 39359 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) poly8x8_t vreinterpret_p8_bf16(bfloat16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) poly64x1_t vreinterpret_p64_bf16(bfloat16x4_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) poly16x4_t vreinterpret_p16_bf16(bfloat16x4_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) poly8x16_t vreinterpretq_p8_bf16(bfloat16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) poly64x2_t vreinterpretq_p64_bf16(bfloat16x8_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) poly16x8_t vreinterpretq_p16_bf16(bfloat16x8_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) uint8x16_t vreinterpretq_u8_bf16(bfloat16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) uint32x4_t vreinterpretq_u32_bf16(bfloat16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) uint64x2_t vreinterpretq_u64_bf16(bfloat16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) uint16x8_t vreinterpretq_u16_bf16(bfloat16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) int8x16_t vreinterpretq_s8_bf16(bfloat16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) float32x4_t vreinterpretq_f32_bf16(bfloat16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) float16x8_t vreinterpretq_f16_bf16(bfloat16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) int32x4_t vreinterpretq_s32_bf16(bfloat16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) int64x2_t vreinterpretq_s64_bf16(bfloat16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) int16x8_t vreinterpretq_s16_bf16(bfloat16x8_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) uint8x8_t vreinterpret_u8_bf16(bfloat16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) uint32x2_t vreinterpret_u32_bf16(bfloat16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) uint64x1_t vreinterpret_u64_bf16(bfloat16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) uint16x4_t vreinterpret_u16_bf16(bfloat16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) int8x8_t vreinterpret_s8_bf16(bfloat16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) float32x2_t vreinterpret_f32_bf16(bfloat16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) float16x4_t vreinterpret_f16_bf16(bfloat16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) int32x2_t vreinterpret_s32_bf16(bfloat16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) int64x1_t vreinterpret_s64_bf16(bfloat16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) int16x4_t vreinterpret_s16_bf16(bfloat16x4_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_p8(poly8x16_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_p64(poly64x2_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_p16(poly16x8_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_u8(uint8x16_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_u32(uint32x4_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_u64(uint64x2_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_u16(uint16x8_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_s8(int8x16_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_f32(float32x4_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_f16(float16x8_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_s32(int32x4_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_s64(int64x2_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x8_t vreinterpretq_bf16_s16(int16x8_t __p0) { bfloat16x8_t __ret; __ret = (bfloat16x8_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_p8(poly8x8_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_p64(poly64x1_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_p16(poly16x4_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_u8(uint8x8_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_u32(uint32x2_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_u64(uint64x1_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_u16(uint16x4_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_s8(int8x8_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_f32(float32x2_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_f16(float16x4_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_s32(int32x2_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_s64(int64x1_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) bfloat16x4_t vreinterpret_bf16_s16(int16x4_t __p0) { bfloat16x4_t __ret; __ret = (bfloat16x4_t)(__p0); return __ret; } static __inline__ __attribute__((__always_inline__, __nodebug__)) float16x4_t vcvt_f16_f32(float32x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vcvt_f16_f32((int8x16_t)__p0, 41); return __ret; } # 39643 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vcvt_f32_f16(float16x4_t __p0) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vcvt_f32_f16((int8x8_t)__p0, 8); return __ret; } # 41617 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vfmaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vfmaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } # 41640 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vfma_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vfma_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } # 41663 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vfmaq_n_f32(float32x4_t __p0, float32x4_t __p1, float32_t __p2) { float32x4_t __ret; __ret = vfmaq_f32(__p0, __p1, (float32x4_t) {__p2, __p2, __p2, __p2}); return __ret; } # 41680 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vfma_n_f32(float32x2_t __p0, float32x2_t __p1, float32_t __p2) { float32x2_t __ret; __ret = vfma_f32(__p0, __p1, (float32x2_t) {__p2, __p2}); return __ret; } # 41697 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x4_t vfmsq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { float32x4_t __ret; __ret = vfmaq_f32(__p0, -__p1, __p2); return __ret; } # 41715 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) float32x2_t vfms_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { float32x2_t __ret; __ret = vfma_f32(__p0, -__p1, __p2); return __ret; } # 66733 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x16_t vabaq_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { uint8x16_t __ret; __ret = __p0 + vabdq_u8(__p1, __p2); return __ret; } # 66751 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vabaq_u32(uint32x4_t __p0, uint32x4_t __p1, uint32x4_t __p2) { uint32x4_t __ret; __ret = __p0 + vabdq_u32(__p1, __p2); return __ret; } # 66769 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vabaq_u16(uint16x8_t __p0, uint16x8_t __p1, uint16x8_t __p2) { uint16x8_t __ret; __ret = __p0 + vabdq_u16(__p1, __p2); return __ret; } # 66787 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x16_t vabaq_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { int8x16_t __ret; __ret = __p0 + vabdq_s8(__p1, __p2); return __ret; } # 66805 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vabaq_s32(int32x4_t __p0, int32x4_t __p1, int32x4_t __p2) { int32x4_t __ret; __ret = __p0 + vabdq_s32(__p1, __p2); return __ret; } # 66823 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vabaq_s16(int16x8_t __p0, int16x8_t __p1, int16x8_t __p2) { int16x8_t __ret; __ret = __p0 + vabdq_s16(__p1, __p2); return __ret; } # 66841 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint8x8_t vaba_u8(uint8x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint8x8_t __ret; __ret = __p0 + vabd_u8(__p1, __p2); return __ret; } # 66859 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x2_t vaba_u32(uint32x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint32x2_t __ret; __ret = __p0 + vabd_u32(__p1, __p2); return __ret; } # 66877 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x4_t vaba_u16(uint16x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint16x4_t __ret; __ret = __p0 + vabd_u16(__p1, __p2); return __ret; } # 66895 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int8x8_t vaba_s8(int8x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int8x8_t __ret; __ret = __p0 + vabd_s8(__p1, __p2); return __ret; } # 66913 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x2_t vaba_s32(int32x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int32x2_t __ret; __ret = __p0 + vabd_s32(__p1, __p2); return __ret; } # 66931 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x4_t vaba_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int16x4_t __ret; __ret = __p0 + vabd_s16(__p1, __p2); return __ret; } # 66949 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vabdl_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t)(vmovl_u8((uint8x8_t)(vabd_u8(__p0, __p1)))); return __ret; } # 66971 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vabdl_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t)(vmovl_u32((uint32x2_t)(vabd_u32(__p0, __p1)))); return __ret; } # 66993 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vabdl_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t)(vmovl_u16((uint16x4_t)(vabd_u16(__p0, __p1)))); return __ret; } # 67015 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vabdl_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t)(vmovl_u8((uint8x8_t)(vabd_s8(__p0, __p1)))); return __ret; } # 67037 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vabdl_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t)(vmovl_u32((uint32x2_t)(vabd_s32(__p0, __p1)))); return __ret; } # 67059 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vabdl_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t)(vmovl_u16((uint16x4_t)(vabd_s16(__p0, __p1)))); return __ret; } # 67081 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vaddl_u8(uint8x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; __ret = vmovl_u8(__p0) + vmovl_u8(__p1); return __ret; } # 67098 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vaddl_u32(uint32x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; __ret = vmovl_u32(__p0) + vmovl_u32(__p1); return __ret; } # 67115 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vaddl_u16(uint16x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; __ret = vmovl_u16(__p0) + vmovl_u16(__p1); return __ret; } # 67132 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vaddl_s8(int8x8_t __p0, int8x8_t __p1) { int16x8_t __ret; __ret = vmovl_s8(__p0) + vmovl_s8(__p1); return __ret; } # 67149 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vaddl_s32(int32x2_t __p0, int32x2_t __p1) { int64x2_t __ret; __ret = vmovl_s32(__p0) + vmovl_s32(__p1); return __ret; } # 67166 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vaddl_s16(int16x4_t __p0, int16x4_t __p1) { int32x4_t __ret; __ret = vmovl_s16(__p0) + vmovl_s16(__p1); return __ret; } # 67183 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vaddw_u8(uint16x8_t __p0, uint8x8_t __p1) { uint16x8_t __ret; __ret = __p0 + vmovl_u8(__p1); return __ret; } # 67200 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vaddw_u32(uint64x2_t __p0, uint32x2_t __p1) { uint64x2_t __ret; __ret = __p0 + vmovl_u32(__p1); return __ret; } # 67217 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vaddw_u16(uint32x4_t __p0, uint16x4_t __p1) { uint32x4_t __ret; __ret = __p0 + vmovl_u16(__p1); return __ret; } # 67234 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vaddw_s8(int16x8_t __p0, int8x8_t __p1) { int16x8_t __ret; __ret = __p0 + vmovl_s8(__p1); return __ret; } # 67251 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vaddw_s32(int64x2_t __p0, int32x2_t __p1) { int64x2_t __ret; __ret = __p0 + vmovl_s32(__p1); return __ret; } # 67268 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vaddw_s16(int32x4_t __p0, int16x4_t __p1) { int32x4_t __ret; __ret = __p0 + vmovl_s16(__p1); return __ret; } # 67343 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vmlal_u8(uint16x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint16x8_t __ret; __ret = __p0 + vmull_u8(__p1, __p2); return __ret; } # 67366 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vmlal_u32(uint64x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint64x2_t __ret; __ret = __p0 + vmull_u32(__p1, __p2); return __ret; } # 67389 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vmlal_u16(uint32x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint32x4_t __ret; __ret = __p0 + vmull_u16(__p1, __p2); return __ret; } # 67412 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vmlal_s8(int16x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int16x8_t __ret; __ret = __p0 + vmull_s8(__p1, __p2); return __ret; } # 67435 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vmlal_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; __ret = __p0 + vmull_s32(__p1, __p2); return __ret; } # 67458 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vmlal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; __ret = __p0 + vmull_s16(__p1, __p2); return __ret; } # 67577 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vmlal_n_u32(uint64x2_t __p0, uint32x2_t __p1, uint32_t __p2) { uint64x2_t __ret; __ret = __p0 + vmull_u32(__p1, (uint32x2_t) {__p2, __p2}); return __ret; } # 67599 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vmlal_n_u16(uint32x4_t __p0, uint16x4_t __p1, uint16_t __p2) { uint32x4_t __ret; __ret = __p0 + vmull_u16(__p1, (uint16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } # 67621 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vmlal_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; __ret = __p0 + vmull_s32(__p1, (int32x2_t) {__p2, __p2}); return __ret; } # 67643 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vmlal_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; __ret = __p0 + vmull_s16(__p1, (int16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } # 67665 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vmlsl_u8(uint16x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint16x8_t __ret; __ret = __p0 - vmull_u8(__p1, __p2); return __ret; } # 67688 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vmlsl_u32(uint64x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint64x2_t __ret; __ret = __p0 - vmull_u32(__p1, __p2); return __ret; } # 67711 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vmlsl_u16(uint32x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint32x4_t __ret; __ret = __p0 - vmull_u16(__p1, __p2); return __ret; } # 67734 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vmlsl_s8(int16x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int16x8_t __ret; __ret = __p0 - vmull_s8(__p1, __p2); return __ret; } # 67757 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vmlsl_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; __ret = __p0 - vmull_s32(__p1, __p2); return __ret; } # 67780 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vmlsl_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; __ret = __p0 - vmull_s16(__p1, __p2); return __ret; } # 67899 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vmlsl_n_u32(uint64x2_t __p0, uint32x2_t __p1, uint32_t __p2) { uint64x2_t __ret; __ret = __p0 - vmull_u32(__p1, (uint32x2_t) {__p2, __p2}); return __ret; } # 67921 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vmlsl_n_u16(uint32x4_t __p0, uint16x4_t __p1, uint16_t __p2) { uint32x4_t __ret; __ret = __p0 - vmull_u16(__p1, (uint16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } # 67943 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vmlsl_n_s32(int64x2_t __p0, int32x2_t __p1, int32_t __p2) { int64x2_t __ret; __ret = __p0 - vmull_s32(__p1, (int32x2_t) {__p2, __p2}); return __ret; } # 67965 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vmlsl_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2) { int32x4_t __ret; __ret = __p0 - vmull_s16(__p1, (int16x4_t) {__p2, __p2, __p2, __p2}); return __ret; } # 68135 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) float32x4_t vcvtq_high_f32_bf16(bfloat16x8_t __p0) { float32x4_t __ret; __ret = vcvt_f32_bf16(vget_high_bf16(__p0)); return __ret; } # 68151 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) __attribute__((target("bf16"))) float32x4_t vcvtq_low_f32_bf16(bfloat16x8_t __p0) { float32x4_t __ret; __ret = vcvt_f32_bf16(vget_low_bf16(__p0)); return __ret; } # 69470 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint16x8_t vabal_u8(uint16x8_t __p0, uint8x8_t __p1, uint8x8_t __p2) { uint16x8_t __ret; __ret = __p0 + vabdl_u8(__p1, __p2); return __ret; } # 69493 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint64x2_t vabal_u32(uint64x2_t __p0, uint32x2_t __p1, uint32x2_t __p2) { uint64x2_t __ret; __ret = __p0 + vabdl_u32(__p1, __p2); return __ret; } # 69516 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) uint32x4_t vabal_u16(uint32x4_t __p0, uint16x4_t __p1, uint16x4_t __p2) { uint32x4_t __ret; __ret = __p0 + vabdl_u16(__p1, __p2); return __ret; } # 69539 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int16x8_t vabal_s8(int16x8_t __p0, int8x8_t __p1, int8x8_t __p2) { int16x8_t __ret; __ret = __p0 + vabdl_s8(__p1, __p2); return __ret; } # 69562 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int64x2_t vabal_s32(int64x2_t __p0, int32x2_t __p1, int32x2_t __p2) { int64x2_t __ret; __ret = __p0 + vabdl_s32(__p1, __p2); return __ret; } # 69585 "../ndk/prebuilts/clang/host/linux-x86/clang-r487747b/lib/clang/17/include/arm_neon.h" 3 static __inline__ __attribute__((__always_inline__, __nodebug__)) int32x4_t vabal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2) { int32x4_t __ret; __ret = __p0 + vabdl_s16(__p1, __p2); return __ret; } # 293 "ggml.c" 2 # 453 "ggml.c" static ggml_fp16_t table_gelu_f16[1 << 16]; static ggml_fp16_t table_gelu_quick_f16[1 << 16]; static ggml_fp16_t table_silu_f16[1 << 16]; static ggml_fp16_t table_exp_f16[1 << 16]; static float table_f32_f16[1 << 16]; # 478 "ggml.c" static const uint64_t table_b2b_0[1 << 8] = { 0x0000000000000000 , 0x0000000000000010, 0x0000000000001000 , 0x0000000000001010, 0x0000000000100000 , 0x0000000000100010, 0x0000000000101000 , 0x0000000000101010, 0x0000000010000000 , 0x0000000010000010, 0x0000000010001000 , 0x0000000010001010, 0x0000000010100000 , 0x0000000010100010, 0x0000000010101000 , 0x0000000010101010, 0x0000001000000000 , 0x0000001000000010, 0x0000001000001000 , 0x0000001000001010, 0x0000001000100000 , 0x0000001000100010, 0x0000001000101000 , 0x0000001000101010, 0x0000001010000000 , 0x0000001010000010, 0x0000001010001000 , 0x0000001010001010, 0x0000001010100000 , 0x0000001010100010, 0x0000001010101000 , 0x0000001010101010, 0x0000100000000000 , 0x0000100000000010, 0x0000100000001000 , 0x0000100000001010, 0x0000100000100000 , 0x0000100000100010, 0x0000100000101000 , 0x0000100000101010, 0x0000100010000000 , 0x0000100010000010, 0x0000100010001000 , 0x0000100010001010, 0x0000100010100000 , 0x0000100010100010, 0x0000100010101000 , 0x0000100010101010, 0x0000101000000000 , 0x0000101000000010, 0x0000101000001000 , 0x0000101000001010, 0x0000101000100000 , 0x0000101000100010, 0x0000101000101000 , 0x0000101000101010, 0x0000101010000000 , 0x0000101010000010, 0x0000101010001000 , 0x0000101010001010, 0x0000101010100000 , 0x0000101010100010, 0x0000101010101000 , 0x0000101010101010, 0x0010000000000000 , 0x0010000000000010, 0x0010000000001000 , 0x0010000000001010, 0x0010000000100000 , 0x0010000000100010, 0x0010000000101000 , 0x0010000000101010, 0x0010000010000000 , 0x0010000010000010, 0x0010000010001000 , 0x0010000010001010, 0x0010000010100000 , 0x0010000010100010, 0x0010000010101000 , 0x0010000010101010, 0x0010001000000000 , 0x0010001000000010, 0x0010001000001000 , 0x0010001000001010, 0x0010001000100000 , 0x0010001000100010, 0x0010001000101000 , 0x0010001000101010, 0x0010001010000000 , 0x0010001010000010, 0x0010001010001000 , 0x0010001010001010, 0x0010001010100000 , 0x0010001010100010, 0x0010001010101000 , 0x0010001010101010, 0x0010100000000000 , 0x0010100000000010, 0x0010100000001000 , 0x0010100000001010, 0x0010100000100000 , 0x0010100000100010, 0x0010100000101000 , 0x0010100000101010, 0x0010100010000000 , 0x0010100010000010, 0x0010100010001000 , 0x0010100010001010, 0x0010100010100000 , 0x0010100010100010, 0x0010100010101000 , 0x0010100010101010, 0x0010101000000000 , 0x0010101000000010, 0x0010101000001000 , 0x0010101000001010, 0x0010101000100000 , 0x0010101000100010, 0x0010101000101000 , 0x0010101000101010, 0x0010101010000000 , 0x0010101010000010, 0x0010101010001000 , 0x0010101010001010, 0x0010101010100000 , 0x0010101010100010, 0x0010101010101000 , 0x0010101010101010, 0x1000000000000000 , 0x1000000000000010, 0x1000000000001000 , 0x1000000000001010, 0x1000000000100000 , 0x1000000000100010, 0x1000000000101000 , 0x1000000000101010, 0x1000000010000000 , 0x1000000010000010, 0x1000000010001000 , 0x1000000010001010, 0x1000000010100000 , 0x1000000010100010, 0x1000000010101000 , 0x1000000010101010, 0x1000001000000000 , 0x1000001000000010, 0x1000001000001000 , 0x1000001000001010, 0x1000001000100000 , 0x1000001000100010, 0x1000001000101000 , 0x1000001000101010, 0x1000001010000000 , 0x1000001010000010, 0x1000001010001000 , 0x1000001010001010, 0x1000001010100000 , 0x1000001010100010, 0x1000001010101000 , 0x1000001010101010, 0x1000100000000000 , 0x1000100000000010, 0x1000100000001000 , 0x1000100000001010, 0x1000100000100000 , 0x1000100000100010, 0x1000100000101000 , 0x1000100000101010, 0x1000100010000000 , 0x1000100010000010, 0x1000100010001000 , 0x1000100010001010, 0x1000100010100000 , 0x1000100010100010, 0x1000100010101000 , 0x1000100010101010, 0x1000101000000000 , 0x1000101000000010, 0x1000101000001000 , 0x1000101000001010, 0x1000101000100000 , 0x1000101000100010, 0x1000101000101000 , 0x1000101000101010, 0x1000101010000000 , 0x1000101010000010, 0x1000101010001000 , 0x1000101010001010, 0x1000101010100000 , 0x1000101010100010, 0x1000101010101000 , 0x1000101010101010, 0x1010000000000000 , 0x1010000000000010, 0x1010000000001000 , 0x1010000000001010, 0x1010000000100000 , 0x1010000000100010, 0x1010000000101000 , 0x1010000000101010, 0x1010000010000000 , 0x1010000010000010, 0x1010000010001000 , 0x1010000010001010, 0x1010000010100000 , 0x1010000010100010, 0x1010000010101000 , 0x1010000010101010, 0x1010001000000000 , 0x1010001000000010, 0x1010001000001000 , 0x1010001000001010, 0x1010001000100000 , 0x1010001000100010, 0x1010001000101000 , 0x1010001000101010, 0x1010001010000000 , 0x1010001010000010, 0x1010001010001000 , 0x1010001010001010, 0x1010001010100000 , 0x1010001010100010, 0x1010001010101000 , 0x1010001010101010, 0x1010100000000000 , 0x1010100000000010, 0x1010100000001000 , 0x1010100000001010, 0x1010100000100000 , 0x1010100000100010, 0x1010100000101000 , 0x1010100000101010, 0x1010100010000000 , 0x1010100010000010, 0x1010100010001000 , 0x1010100010001010, 0x1010100010100000 , 0x1010100010100010, 0x1010100010101000 , 0x1010100010101010, 0x1010101000000000 , 0x1010101000000010, 0x1010101000001000 , 0x1010101000001010, 0x1010101000100000 , 0x1010101000100010, 0x1010101000101000 , 0x1010101000101010, 0x1010101010000000 , 0x1010101010000010, 0x1010101010001000 , 0x1010101010001010, 0x1010101010100000 , 0x1010101010100010, 0x1010101010101000 , 0x1010101010101010 }; static const uint64_t table_b2b_1[1 << 8] = { 0x1010101010101010 , 0x1010101010101000, 0x1010101010100010 , 0x1010101010100000, 0x1010101010001010 , 0x1010101010001000, 0x1010101010000010 , 0x1010101010000000, 0x1010101000101010 , 0x1010101000101000, 0x1010101000100010 , 0x1010101000100000, 0x1010101000001010 , 0x1010101000001000, 0x1010101000000010 , 0x1010101000000000, 0x1010100010101010 , 0x1010100010101000, 0x1010100010100010 , 0x1010100010100000, 0x1010100010001010 , 0x1010100010001000, 0x1010100010000010 , 0x1010100010000000, 0x1010100000101010 , 0x1010100000101000, 0x1010100000100010 , 0x1010100000100000, 0x1010100000001010 , 0x1010100000001000, 0x1010100000000010 , 0x1010100000000000, 0x1010001010101010 , 0x1010001010101000, 0x1010001010100010 , 0x1010001010100000, 0x1010001010001010 , 0x1010001010001000, 0x1010001010000010 , 0x1010001010000000, 0x1010001000101010 , 0x1010001000101000, 0x1010001000100010 , 0x1010001000100000, 0x1010001000001010 , 0x1010001000001000, 0x1010001000000010 , 0x1010001000000000, 0x1010000010101010 , 0x1010000010101000, 0x1010000010100010 , 0x1010000010100000, 0x1010000010001010 , 0x1010000010001000, 0x1010000010000010 , 0x1010000010000000, 0x1010000000101010 , 0x1010000000101000, 0x1010000000100010 , 0x1010000000100000, 0x1010000000001010 , 0x1010000000001000, 0x1010000000000010 , 0x1010000000000000, 0x1000101010101010 , 0x1000101010101000, 0x1000101010100010 , 0x1000101010100000, 0x1000101010001010 , 0x1000101010001000, 0x1000101010000010 , 0x1000101010000000, 0x1000101000101010 , 0x1000101000101000, 0x1000101000100010 , 0x1000101000100000, 0x1000101000001010 , 0x1000101000001000, 0x1000101000000010 , 0x1000101000000000, 0x1000100010101010 , 0x1000100010101000, 0x1000100010100010 , 0x1000100010100000, 0x1000100010001010 , 0x1000100010001000, 0x1000100010000010 , 0x1000100010000000, 0x1000100000101010 , 0x1000100000101000, 0x1000100000100010 , 0x1000100000100000, 0x1000100000001010 , 0x1000100000001000, 0x1000100000000010 , 0x1000100000000000, 0x1000001010101010 , 0x1000001010101000, 0x1000001010100010 , 0x1000001010100000, 0x1000001010001010 , 0x1000001010001000, 0x1000001010000010 , 0x1000001010000000, 0x1000001000101010 , 0x1000001000101000, 0x1000001000100010 , 0x1000001000100000, 0x1000001000001010 , 0x1000001000001000, 0x1000001000000010 , 0x1000001000000000, 0x1000000010101010 , 0x1000000010101000, 0x1000000010100010 , 0x1000000010100000, 0x1000000010001010 , 0x1000000010001000, 0x1000000010000010 , 0x1000000010000000, 0x1000000000101010 , 0x1000000000101000, 0x1000000000100010 , 0x1000000000100000, 0x1000000000001010 , 0x1000000000001000, 0x1000000000000010 , 0x1000000000000000, 0x0010101010101010 , 0x0010101010101000, 0x0010101010100010 , 0x0010101010100000, 0x0010101010001010 , 0x0010101010001000, 0x0010101010000010 , 0x0010101010000000, 0x0010101000101010 , 0x0010101000101000, 0x0010101000100010 , 0x0010101000100000, 0x0010101000001010 , 0x0010101000001000, 0x0010101000000010 , 0x0010101000000000, 0x0010100010101010 , 0x0010100010101000, 0x0010100010100010 , 0x0010100010100000, 0x0010100010001010 , 0x0010100010001000, 0x0010100010000010 , 0x0010100010000000, 0x0010100000101010 , 0x0010100000101000, 0x0010100000100010 , 0x0010100000100000, 0x0010100000001010 , 0x0010100000001000, 0x0010100000000010 , 0x0010100000000000, 0x0010001010101010 , 0x0010001010101000, 0x0010001010100010 , 0x0010001010100000, 0x0010001010001010 , 0x0010001010001000, 0x0010001010000010 , 0x0010001010000000, 0x0010001000101010 , 0x0010001000101000, 0x0010001000100010 , 0x0010001000100000, 0x0010001000001010 , 0x0010001000001000, 0x0010001000000010 , 0x0010001000000000, 0x0010000010101010 , 0x0010000010101000, 0x0010000010100010 , 0x0010000010100000, 0x0010000010001010 , 0x0010000010001000, 0x0010000010000010 , 0x0010000010000000, 0x0010000000101010 , 0x0010000000101000, 0x0010000000100010 , 0x0010000000100000, 0x0010000000001010 , 0x0010000000001000, 0x0010000000000010 , 0x0010000000000000, 0x0000101010101010 , 0x0000101010101000, 0x0000101010100010 , 0x0000101010100000, 0x0000101010001010 , 0x0000101010001000, 0x0000101010000010 , 0x0000101010000000, 0x0000101000101010 , 0x0000101000101000, 0x0000101000100010 , 0x0000101000100000, 0x0000101000001010 , 0x0000101000001000, 0x0000101000000010 , 0x0000101000000000, 0x0000100010101010 , 0x0000100010101000, 0x0000100010100010 , 0x0000100010100000, 0x0000100010001010 , 0x0000100010001000, 0x0000100010000010 , 0x0000100010000000, 0x0000100000101010 , 0x0000100000101000, 0x0000100000100010 , 0x0000100000100000, 0x0000100000001010 , 0x0000100000001000, 0x0000100000000010 , 0x0000100000000000, 0x0000001010101010 , 0x0000001010101000, 0x0000001010100010 , 0x0000001010100000, 0x0000001010001010 , 0x0000001010001000, 0x0000001010000010 , 0x0000001010000000, 0x0000001000101010 , 0x0000001000101000, 0x0000001000100010 , 0x0000001000100000, 0x0000001000001010 , 0x0000001000001000, 0x0000001000000010 , 0x0000001000000000, 0x0000000010101010 , 0x0000000010101000, 0x0000000010100010 , 0x0000000010100000, 0x0000000010001010 , 0x0000000010001000, 0x0000000010000010 , 0x0000000010000000, 0x0000000000101010 , 0x0000000000101000, 0x0000000000100010 , 0x0000000000100000, 0x0000000000001010 , 0x0000000000001000, 0x0000000000000010 , 0x0000000000000000 }; # 500 "ggml.c" float ggml_fp16_to_fp32(ggml_fp16_t x) { return (float) ((float) (x)); } ggml_fp16_t ggml_fp32_to_fp16(float x) { return (x); } void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int n) { for (int i = 0; i < n; i++) { y[i] = ((float) (x[i])); } } void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n) { int i = 0; # 528 "ggml.c" for (; i < n; i++) { y[i] = (x[i]); } } # 561 "ggml.c" void ggml_time_init(void) {} int64_t ggml_time_ms(void) { struct timespec ts; clock_gettime(1, &ts); return (int64_t)ts.tv_sec*1000 + (int64_t)ts.tv_nsec/1000000; } int64_t ggml_time_us(void) { struct timespec ts; clock_gettime(1, &ts); return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000; } int64_t ggml_cycles(void) { return clock(); } int64_t ggml_cycles_per_ms(void) { return 1000000/1000; } # 610 "ggml.c" static const size_t CACHE_LINE_SIZE_F32 = 64/sizeof(float); # 838 "ggml.c" inline static int32_t vaddvq_s32(int32x4_t v) { return __extension__ ({ int32_t __ret; int32x4_t __s0 = v; __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, 0); __ret; }) + __extension__ ({ int32_t __ret; int32x4_t __s0 = v; __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, 1); __ret; }) + __extension__ ({ int32_t __ret; int32x4_t __s0 = v; __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, 2); __ret; }) + __extension__ ({ int32_t __ret; int32x4_t __s0 = v; __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, 3); __ret; }); } inline static float vaddvq_f32(float32x4_t v) { return __extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 0); __ret; }) + __extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 1); __ret; }) + __extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 2); __ret; }) + __extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 3); __ret; }); } inline static float vmaxvq_f32(float32x4_t v) { return ((((__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 0); __ret; })) > (__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 1); __ret; })) ? (__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 0); __ret; })) : (__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 1); __ret; })))) > (((__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 2); __ret; })) > (__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 3); __ret; })) ? (__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 2); __ret; })) : (__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 3); __ret; })))) ? (((__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 0); __ret; })) > (__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 1); __ret; })) ? (__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 0); __ret; })) : (__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 1); __ret; })))) : (((__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 2); __ret; })) > (__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 3); __ret; })) ? (__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 2); __ret; })) : (__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 3); __ret; }))))); } inline static int32x4_t vcvtnq_s32_f32(float32x4_t v) { int32x4_t res; res[0] = roundf(__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 0); __ret; })); res[1] = roundf(__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 1); __ret; })); res[2] = roundf(__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 2); __ret; })); res[3] = roundf(__extension__ ({ float32_t __ret; float32x4_t __s0 = v; __ret = (float32_t) __builtin_neon_vgetq_lane_f32((float32x4_t)__s0, 3); __ret; })); return res; } typedef struct { ggml_fp16_t d; uint8_t qs[32 / 2]; } block_q4_0; _Static_assert(sizeof(block_q4_0) == sizeof(ggml_fp16_t) + 32 / 2, "wrong q4_0 block size/padding"); typedef struct { ggml_fp16_t d; ggml_fp16_t m; uint8_t qs[32 / 2]; } block_q4_1; _Static_assert(sizeof(block_q4_1) == 2 * sizeof(ggml_fp16_t) + 32 / 2, "wrong q4_1 block size/padding"); typedef struct { ggml_fp16_t d; uint8_t qh[4]; uint8_t qs[32 / 2]; } block_q5_0; _Static_assert(sizeof(block_q5_0) == sizeof(ggml_fp16_t) + sizeof(uint32_t) + 32 / 2, "wrong q5_0 block size/padding"); typedef struct { ggml_fp16_t d; ggml_fp16_t m; uint8_t qh[4]; uint8_t qs[32 / 2]; } block_q5_1; _Static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + 32 / 2, "wrong q5_1 block size/padding"); typedef struct { ggml_fp16_t d; int8_t qs[32]; } block_q8_0; _Static_assert(sizeof(block_q8_0) == sizeof(ggml_fp16_t) + 32, "wrong q8_0 block size/padding"); typedef struct { float d; float s; int8_t qs[32]; } block_q8_1; _Static_assert(sizeof(block_q8_1) == 2*sizeof(float) + 32, "wrong q8_1 block size/padding"); static void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k) { static const int qk = 32; ((void) (0)); const int nb = k / qk; for (int i = 0; i < nb; i++) { float amax = 0.0f; float max = 0.0f; for (int j = 0; j < qk; j++) { const float v = x[i*qk + j]; if (amax < fabsf(v)) { amax = fabsf(v); max = v; } } const float d = max / -8; const float id = d ? 1.0f/d : 0.0f; y[i].d = (d); for (int j = 0; j < qk/2; ++j) { const float x0 = x[i*qk + 0 + j]*id; const float x1 = x[i*qk + qk/2 + j]*id; const uint8_t xi0 = ((15) < ((int8_t)(x0 + 8.5f)) ? (15) : ((int8_t)(x0 + 8.5f))); const uint8_t xi1 = ((15) < ((int8_t)(x1 + 8.5f)) ? (15) : ((int8_t)(x1 + 8.5f))); y[i].qs[j] = xi0; y[i].qs[j] |= xi1 << 4; } } } static void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) { quantize_row_q4_0_reference(x, y, k); } static void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k) { const int qk = 32; ((void) (0)); const int nb = k / qk; for (int i = 0; i < nb; i++) { float min = 3.40282347e+38F; float max = -3.40282347e+38F; for (int j = 0; j < qk; j++) { const float v = x[i*qk + j]; if (v < min) min = v; if (v > max) max = v; } const float d = (max - min) / ((1 << 4) - 1); const float id = d ? 1.0f/d : 0.0f; y[i].d = (d); y[i].m = (min); for (int j = 0; j < qk/2; ++j) { const float x0 = (x[i*qk + 0 + j] - min)*id; const float x1 = (x[i*qk + qk/2 + j] - min)*id; const uint8_t xi0 = ((15) < ((int8_t)(x0 + 0.5f)) ? (15) : ((int8_t)(x0 + 0.5f))); const uint8_t xi1 = ((15) < ((int8_t)(x1 + 0.5f)) ? (15) : ((int8_t)(x1 + 0.5f))); y[i].qs[j] = xi0; y[i].qs[j] |= xi1 << 4; } } } static void quantize_row_q4_1(const float * restrict x, void * restrict y, int k) { quantize_row_q4_1_reference(x, y, k); } static void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k) { static const int qk = 32; ((void) (0)); const int nb = k / qk; for (int i = 0; i < nb; i++) { float amax = 0.0f; float max = 0.0f; for (int j = 0; j < qk; j++) { const float v = x[i*qk + j]; if (amax < fabsf(v)) { amax = fabsf(v); max = v; } } const float d = max / -16; const float id = d ? 1.0f/d : 0.0f; y[i].d = (d); uint32_t qh = 0; for (int j = 0; j < qk/2; ++j) { const float x0 = x[i*qk + 0 + j]*id; const float x1 = x[i*qk + qk/2 + j]*id; const uint8_t xi0 = ((31) < ((int8_t)(x0 + 16.5f)) ? (31) : ((int8_t)(x0 + 16.5f))); const uint8_t xi1 = ((31) < ((int8_t)(x1 + 16.5f)) ? (31) : ((int8_t)(x1 + 16.5f))); y[i].qs[j] = (xi0 & 0x0F) | ((xi1 & 0x0F) << 4); qh |= ((xi0 & 0x10) >> 4) << (j + 0); qh |= ((xi1 & 0x10) >> 4) << (j + qk/2); } memcpy(&y[i].qh, &qh, sizeof(qh)); } } static void quantize_row_q5_0(const float * restrict x, void * restrict y, int k) { quantize_row_q5_0_reference(x, y, k); } static void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k) { const int qk = 32; ((void) (0)); const int nb = k / qk; for (int i = 0; i < nb; i++) { float min = 3.40282347e+38F; float max = -3.40282347e+38F; for (int j = 0; j < qk; j++) { const float v = x[i*qk + j]; if (v < min) min = v; if (v > max) max = v; } const float d = (max - min) / ((1 << 5) - 1); const float id = d ? 1.0f/d : 0.0f; y[i].d = (d); y[i].m = (min); uint32_t qh = 0; for (int j = 0; j < qk/2; ++j) { const float x0 = (x[i*qk + 0 + j] - min)*id; const float x1 = (x[i*qk + qk/2 + j] - min)*id; const uint8_t xi0 = (uint8_t)(x0 + 0.5f); const uint8_t xi1 = (uint8_t)(x1 + 0.5f); y[i].qs[j] = (xi0 & 0x0F) | ((xi1 & 0x0F) << 4); qh |= ((xi0 & 0x10) >> 4) << (j + 0); qh |= ((xi1 & 0x10) >> 4) << (j + qk/2); } memcpy(&y[i].qh, &qh, sizeof(y[i].qh)); } } static void quantize_row_q5_1(const float * restrict x, void * restrict y, int k) { quantize_row_q5_1_reference(x, y, k); } static void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k) { ((void) (0)); const int nb = k / 32; for (int i = 0; i < nb; i++) { float amax = 0.0f; for (int j = 0; j < 32; j++) { const float v = x[i*32 + j]; amax = ((amax) > (fabsf(v)) ? (amax) : (fabsf(v))); } const float d = amax / ((1 << 7) - 1); const float id = d ? 1.0f/d : 0.0f; y[i].d = (d); for (int j = 0; j < 32; ++j) { const float x0 = x[i*32 + j]*id; y[i].qs[j] = roundf(x0); } } } static void quantize_row_q8_0(const float * restrict x, void * restrict vy, int k) { ((void) (0)); ((void) (0)); const int nb = k / 32; block_q8_0 * restrict y = vy; for (int i = 0; i < nb; i++) { float32x4_t srcv [8]; float32x4_t asrcv[8]; float32x4_t amaxv[8]; for (int j = 0; j < 8; j++) srcv[j] = __extension__ ({ float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vld1q_v(x + i*32 + 4*j, 41); __ret; }); for (int j = 0; j < 8; j++) asrcv[j] = vabsq_f32(srcv[j]); for (int j = 0; j < 4; j++) amaxv[2*j] = vmaxq_f32(asrcv[2*j], asrcv[2*j+1]); for (int j = 0; j < 2; j++) amaxv[4*j] = vmaxq_f32(amaxv[4*j], amaxv[4*j+2]); for (int j = 0; j < 1; j++) amaxv[8*j] = vmaxq_f32(amaxv[8*j], amaxv[8*j+4]); const float amax = vmaxvq_f32(amaxv[0]); const float d = amax / ((1 << 7) - 1); const float id = d ? 1.0f/d : 0.0f; y[i].d = (d); for (int j = 0; j < 8; j++) { const float32x4_t v = vmulq_n_f32(srcv[j], id); const int32x4_t vi = vcvtnq_s32_f32(v); y[i].qs[4*j + 0] = __extension__ ({ int32_t __ret; int32x4_t __s0 = vi; __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, 0); __ret; }); y[i].qs[4*j + 1] = __extension__ ({ int32_t __ret; int32x4_t __s0 = vi; __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, 1); __ret; }); y[i].qs[4*j + 2] = __extension__ ({ int32_t __ret; int32x4_t __s0 = vi; __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, 2); __ret; }); y[i].qs[4*j + 3] = __extension__ ({ int32_t __ret; int32x4_t __s0 = vi; __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, 3); __ret; }); } } # 1276 "ggml.c" } static void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k) { ((void) (0)); ((void) (0)); const int nb = k / 32; for (int i = 0; i < nb; i++) { float amax = 0.0f; for (int j = 0; j < 32; j++) { const float v = x[i*32 + j]; amax = ((amax) > (fabsf(v)) ? (amax) : (fabsf(v))); } const float d = amax / ((1 << 7) - 1); const float id = d ? 1.0f/d : 0.0f; y[i].d = d; int sum = 0; for (int j = 0; j < 32/2; ++j) { const float v0 = x[i*32 + j]*id; const float v1 = x[i*32 + 32/2 + j]*id; y[i].qs[ j] = roundf(v0); y[i].qs[32/2 + j] = roundf(v1); sum += y[i].qs[ j]; sum += y[i].qs[32/2 + j]; } y[i].s = sum*d; } } static void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) { ((void) (0)); const int nb = k / 32; block_q8_1 * restrict y = vy; for (int i = 0; i < nb; i++) { float32x4_t srcv [8]; float32x4_t asrcv[8]; float32x4_t amaxv[8]; for (int j = 0; j < 8; j++) srcv[j] = __extension__ ({ float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vld1q_v(x + i*32 + 4*j, 41); __ret; }); for (int j = 0; j < 8; j++) asrcv[j] = vabsq_f32(srcv[j]); for (int j = 0; j < 4; j++) amaxv[2*j] = vmaxq_f32(asrcv[2*j], asrcv[2*j+1]); for (int j = 0; j < 2; j++) amaxv[4*j] = vmaxq_f32(amaxv[4*j], amaxv[4*j+2]); for (int j = 0; j < 1; j++) amaxv[8*j] = vmaxq_f32(amaxv[8*j], amaxv[8*j+4]); const float amax = vmaxvq_f32(amaxv[0]); const float d = amax / ((1 << 7) - 1); const float id = d ? 1.0f/d : 0.0f; y[i].d = d; int32x4_t accv = vdupq_n_s32(0); for (int j = 0; j < 8; j++) { const float32x4_t v = vmulq_n_f32(srcv[j], id); const int32x4_t vi = vcvtnq_s32_f32(v); y[i].qs[4*j + 0] = __extension__ ({ int32_t __ret; int32x4_t __s0 = vi; __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, 0); __ret; }); y[i].qs[4*j + 1] = __extension__ ({ int32_t __ret; int32x4_t __s0 = vi; __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, 1); __ret; }); y[i].qs[4*j + 2] = __extension__ ({ int32_t __ret; int32x4_t __s0 = vi; __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, 2); __ret; }); y[i].qs[4*j + 3] = __extension__ ({ int32_t __ret; int32x4_t __s0 = vi; __ret = (int32_t) __builtin_neon_vgetq_lane_i32((int32x4_t)__s0, 3); __ret; }); accv = vaddq_s32(accv, vi); } y[i].s = d * vaddvq_s32(accv); } # 1494 "ggml.c" } static void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k) { static const int qk = 32; ((void) (0)); const int nb = k / qk; for (int i = 0; i < nb; i++) { const float d = ((float) (x[i].d)); for (int j = 0; j < qk/2; ++j) { const int x0 = (x[i].qs[j] & 0x0F) - 8; const int x1 = (x[i].qs[j] >> 4) - 8; y[i*qk + j + 0 ] = x0*d; y[i*qk + j + qk/2] = x1*d; } } } static void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k) { static const int qk = 32; ((void) (0)); const int nb = k / qk; for (int i = 0; i < nb; i++) { const float d = ((float) (x[i].d)); const float m = ((float) (x[i].m)); for (int j = 0; j < qk/2; ++j) { const int x0 = (x[i].qs[j] & 0x0F); const int x1 = (x[i].qs[j] >> 4); y[i*qk + j + 0 ] = x0*d + m; y[i*qk + j + qk/2] = x1*d + m; } } } static void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k) { static const int qk = 32; ((void) (0)); const int nb = k / qk; for (int i = 0; i < nb; i++) { const float d = ((float) (x[i].d)); uint32_t qh; memcpy(&qh, x[i].qh, sizeof(qh)); for (int j = 0; j < qk/2; ++j) { const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10; const uint8_t xh_1 = ((qh >> (j + 12)) ) & 0x10; const int32_t x0 = ((x[i].qs[j] & 0x0F) | xh_0) - 16; const int32_t x1 = ((x[i].qs[j] >> 4) | xh_1) - 16; y[i*qk + j + 0 ] = x0*d; y[i*qk + j + qk/2] = x1*d; } } } static void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k) { static const int qk = 32; ((void) (0)); const int nb = k / qk; for (int i = 0; i < nb; i++) { const float d = ((float) (x[i].d)); const float m = ((float) (x[i].m)); uint32_t qh; memcpy(&qh, x[i].qh, sizeof(qh)); for (int j = 0; j < qk/2; ++j) { const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10; const uint8_t xh_1 = ((qh >> (j + 12)) ) & 0x10; const int x0 = (x[i].qs[j] & 0x0F) | xh_0; const int x1 = (x[i].qs[j] >> 4) | xh_1; y[i*qk + j + 0 ] = x0*d + m; y[i*qk + j + qk/2] = x1*d + m; } } } static void dequantize_row_q8_0(const void * restrict vx, float * restrict y, int k) { static const int qk = 32; ((void) (0)); const int nb = k / qk; const block_q8_0 * restrict x = vx; for (int i = 0; i < nb; i++) { const float d = ((float) (x[i].d)); for (int j = 0; j < qk; ++j) { y[i*qk + j] = x[i].qs[j]*d; } } } static void ggml_vec_dot_f32(const int n, float * restrict s, const float * restrict x, const float * restrict y); static void ggml_vec_dot_f16(const int n, float * restrict s, ggml_fp16_t * restrict x, ggml_fp16_t * restrict y); static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void * restrict vx, const void * restrict vy); static void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void * restrict vx, const void * restrict vy); static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void * restrict vx, const void * restrict vy); static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void * restrict vx, const void * restrict vy); static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void * restrict vx, const void * restrict vy); static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { [GGML_TYPE_I8] = { .type_name = "i8", .blck_size = 1, .type_size = sizeof(int8_t), .is_quantized = 0, }, [GGML_TYPE_I16] = { .type_name = "i16", .blck_size = 1, .type_size = sizeof(int16_t), .is_quantized = 0, }, [GGML_TYPE_I32] = { .type_name = "i32", .blck_size = 1, .type_size = sizeof(int32_t), .is_quantized = 0, }, [GGML_TYPE_F32] = { .type_name = "f32", .blck_size = 1, .type_size = sizeof(float), .is_quantized = 0, .vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f32, .vec_dot_type = GGML_TYPE_F32, }, [GGML_TYPE_F16] = { .type_name = "f16", .blck_size = 1, .type_size = sizeof(ggml_fp16_t), .is_quantized = 0, .to_float = (ggml_to_float_t) ggml_fp16_to_fp32_row, .from_float = (ggml_from_float_t) ggml_fp32_to_fp16_row, .from_float_reference = (ggml_from_float_t) ggml_fp32_to_fp16_row, .vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f16, .vec_dot_type = GGML_TYPE_F16, }, [GGML_TYPE_Q4_0] = { .type_name = "q4_0", .blck_size = 32, .type_size = sizeof(block_q4_0), .is_quantized = 1, .to_float = (ggml_to_float_t) dequantize_row_q4_0, .from_float = quantize_row_q4_0, .from_float_reference = (ggml_from_float_t) quantize_row_q4_0_reference, .vec_dot = ggml_vec_dot_q4_0_q8_0, .vec_dot_type = GGML_TYPE_Q8_0, }, [GGML_TYPE_Q4_1] = { .type_name = "q4_1", .blck_size = 32, .type_size = sizeof(block_q4_1), .is_quantized = 1, .to_float = (ggml_to_float_t) dequantize_row_q4_1, .from_float = quantize_row_q4_1, .from_float_reference = (ggml_from_float_t) quantize_row_q4_1_reference, .vec_dot = ggml_vec_dot_q4_1_q8_1, .vec_dot_type = GGML_TYPE_Q8_1, }, [GGML_TYPE_Q5_0] = { .type_name = "q5_0", .blck_size = 32, .type_size = sizeof(block_q5_0), .is_quantized = 1, .to_float = (ggml_to_float_t) dequantize_row_q5_0, .from_float = quantize_row_q5_0, .from_float_reference = (ggml_from_float_t) quantize_row_q5_0_reference, .vec_dot = ggml_vec_dot_q5_0_q8_0, .vec_dot_type = GGML_TYPE_Q8_0, }, [GGML_TYPE_Q5_1] = { .type_name = "q5_1", .blck_size = 32, .type_size = sizeof(block_q5_1), .is_quantized = 1, .to_float = (ggml_to_float_t) dequantize_row_q5_1, .from_float = quantize_row_q5_1, .from_float_reference = (ggml_from_float_t) quantize_row_q5_1_reference, .vec_dot = ggml_vec_dot_q5_1_q8_1, .vec_dot_type = GGML_TYPE_Q8_1, }, [GGML_TYPE_Q8_0] = { .type_name = "q8_0", .blck_size = 32, .type_size = sizeof(block_q8_0), .is_quantized = 1, .to_float = dequantize_row_q8_0, .from_float = quantize_row_q8_0, .from_float_reference = (ggml_from_float_t) quantize_row_q8_0_reference, .vec_dot = ggml_vec_dot_q8_0_q8_0, .vec_dot_type = GGML_TYPE_Q8_0, }, [GGML_TYPE_Q8_1] = { .type_name = "q8_1", .blck_size = 32, .type_size = sizeof(block_q8_1), .is_quantized = 1, .from_float = quantize_row_q8_1, .from_float_reference = (ggml_from_float_t) quantize_row_q8_1_reference, .vec_dot_type = GGML_TYPE_Q8_1, }, [GGML_TYPE_Q2_K] = { .type_name = "q2_K", .blck_size = 256, .type_size = sizeof(block_q2_K), .is_quantized = 1, .to_float = (ggml_to_float_t) dequantize_row_q2_K, .from_float = quantize_row_q2_K, .from_float_reference = (ggml_from_float_t) quantize_row_q2_K_reference, .vec_dot = ggml_vec_dot_q2_K_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, }, [GGML_TYPE_Q3_K] = { .type_name = "q3_K", .blck_size = 256, .type_size = sizeof(block_q3_K), .is_quantized = 1, .to_float = (ggml_to_float_t) dequantize_row_q3_K, .from_float = quantize_row_q3_K, .from_float_reference = (ggml_from_float_t) quantize_row_q3_K_reference, .vec_dot = ggml_vec_dot_q3_K_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, }, [GGML_TYPE_Q4_K] = { .type_name = "q4_K", .blck_size = 256, .type_size = sizeof(block_q4_K), .is_quantized = 1, .to_float = (ggml_to_float_t) dequantize_row_q4_K, .from_float = quantize_row_q4_K, .from_float_reference = (ggml_from_float_t) quantize_row_q4_K_reference, .vec_dot = ggml_vec_dot_q4_K_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, }, [GGML_TYPE_Q5_K] = { .type_name = "q5_K", .blck_size = 256, .type_size = sizeof(block_q5_K), .is_quantized = 1, .to_float = (ggml_to_float_t) dequantize_row_q5_K, .from_float = quantize_row_q5_K, .from_float_reference = (ggml_from_float_t) quantize_row_q5_K_reference, .vec_dot = ggml_vec_dot_q5_K_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, }, [GGML_TYPE_Q6_K] = { .type_name = "q6_K", .blck_size = 256, .type_size = sizeof(block_q6_K), .is_quantized = 1, .to_float = (ggml_to_float_t) dequantize_row_q6_K, .from_float = quantize_row_q6_K, .from_float_reference = (ggml_from_float_t) quantize_row_q6_K_reference, .vec_dot = ggml_vec_dot_q6_K_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, }, [GGML_TYPE_Q8_K] = { .type_name = "q8_K", .blck_size = 256, .type_size = sizeof(block_q8_K), .is_quantized = 1, .from_float = quantize_row_q8_K, } }; ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) { do { if (!(type < GGML_TYPE_COUNT)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 1786, "type < GGML_TYPE_COUNT"); abort(); } } while (0); return type_traits[type]; } # 2314 "ggml.c" inline static void ggml_vec_set_i8(const int n, int8_t * x, const int8_t v) { for (int i = 0; i < n; ++i) x[i] = v; } inline static void ggml_vec_set_i16(const int n, int16_t * x, const int16_t v) { for (int i = 0; i < n; ++i) x[i] = v; } inline static void ggml_vec_set_i32(const int n, int32_t * x, const int32_t v) { for (int i = 0; i < n; ++i) x[i] = v; } inline static void ggml_vec_set_f16(const int n, ggml_fp16_t * x, const int32_t v) { for (int i = 0; i < n; ++i) x[i] = v; } inline static void ggml_vec_add_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] + y[i]; } inline static void ggml_vec_add1_f32(const int n, float * z, const float * x, const float v) { for (int i = 0; i < n; ++i) z[i] = x[i] + v; } inline static void ggml_vec_acc_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] += x[i]; } inline static void ggml_vec_acc1_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] += v; } inline static void ggml_vec_sub_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] - y[i]; } inline static void ggml_vec_set_f32 (const int n, float * x, const float v) { for (int i = 0; i < n; ++i) x[i] = v; } inline static void ggml_vec_cpy_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]; } inline static void ggml_vec_neg_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = -x[i]; } inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]*y[i]; } inline static void ggml_vec_div_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]/y[i]; } static void ggml_vec_dot_f32(const int n, float * restrict s, const float * restrict x, const float * restrict y) { float sumf = 0.0f; const int np = (n & ~(16 - 1)); float32x4_t sum[(16/4)] = { vdupq_n_f32(0.0f) }; float32x4_t ax[(16/4)]; float32x4_t ay[(16/4)]; for (int i = 0; i < np; i += 16) { for (int j = 0; j < (16/4); j++) { ax[j] = __extension__ ({ float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vld1q_v(x + i + j*4, 41); __ret; }); ay[j] = __extension__ ({ float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vld1q_v(y + i + j*4, 41); __ret; }); sum[j] = vfmaq_f32(sum[j], ax[j], ay[j]); } } { int offset = (16/4) >> 1; for (int i = 0; i < offset; ++i) { sum[i] = vaddq_f32(sum[i], sum[offset+i]); } offset >>= 1; for (int i = 0; i < offset; ++i) { sum[i] = vaddq_f32(sum[i], sum[offset+i]); } offset >>= 1; for (int i = 0; i < offset; ++i) { sum[i] = vaddq_f32(sum[i], sum[offset+i]); } sumf = vaddvq_f32(sum[0]); }; for (int i = np; i < n; ++i) { sumf += x[i]*y[i]; } # 2367 "ggml.c" *s = sumf; } static void ggml_vec_dot_f16(const int n, float * restrict s, ggml_fp16_t * restrict x, ggml_fp16_t * restrict y) { ggml_float sumf = 0.0; const int np = (n & ~(16 - 1)); float32x4_t sum[(16/4)] = { vdupq_n_f32(0.0f) }; float32x4_t ax[(16/4)]; float32x4_t ay[(16/4)]; for (int i = 0; i < np; i += 16) { for (int j = 0; j < (16/4); j++) { ax[j] = vcvt_f32_f16(__extension__ ({ float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vld1_v(x + i + j*4, 8); __ret; })); ay[j] = vcvt_f32_f16(__extension__ ({ float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vld1_v(y + i + j*4, 8); __ret; })); sum[j] = vfmaq_f32(sum[j], ax[j], ay[j]); } } { int offset = (16/4) >> 1; for (int i = 0; i < offset; ++i) { sum[i] = vaddq_f32(sum[i], sum[offset+i]); } offset >>= 1; for (int i = 0; i < offset; ++i) { sum[i] = vaddq_f32(sum[i], sum[offset+i]); } offset >>= 1; for (int i = 0; i < offset; ++i) { sum[i] = vaddq_f32(sum[i], sum[offset+i]); } sumf = vaddvq_f32(sum[0]); }; for (int i = np; i < n; ++i) { sumf += (ggml_float)(((float) (x[i]))*((float) (y[i]))); } *s = sumf; } static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { const int qk = 32; const int nb = n / qk; ((void) (0)); const block_q4_0 * restrict x = vx; const block_q8_0 * restrict y = vy; float32x4_t sumv0 = vdupq_n_f32(0.0f); float32x4_t sumv1 = vdupq_n_f32(0.0f); do { if (!(nb % 2 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 2419, "nb % 2 == 0"); abort(); } } while (0); for (int i = 0; i < nb; i += 2) { const block_q4_0 * restrict x0 = &x[i + 0]; const block_q4_0 * restrict x1 = &x[i + 1]; const block_q8_0 * restrict y0 = &y[i + 0]; const block_q8_0 * restrict y1 = &y[i + 1]; const uint8x16_t m4b = vdupq_n_u8(0x0F); const int8x16_t s8b = vdupq_n_s8(0x8); const uint8x16_t v0_0 = __extension__ ({ uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vld1q_v(x0->qs, 48); __ret; }); const uint8x16_t v0_1 = __extension__ ({ uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vld1q_v(x1->qs, 48); __ret; }); const int8x16_t v0_0l = vreinterpretq_s8_u8(vandq_u8 (v0_0, m4b)); const int8x16_t v0_0h = vreinterpretq_s8_u8(__extension__ ({ uint8x16_t __ret; uint8x16_t __s0 = v0_0; __ret = (uint8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, 4, 48); __ret; })); const int8x16_t v0_1l = vreinterpretq_s8_u8(vandq_u8 (v0_1, m4b)); const int8x16_t v0_1h = vreinterpretq_s8_u8(__extension__ ({ uint8x16_t __ret; uint8x16_t __s0 = v0_1; __ret = (uint8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, 4, 48); __ret; })); const int8x16_t v0_0ls = vsubq_s8(v0_0l, s8b); const int8x16_t v0_0hs = vsubq_s8(v0_0h, s8b); const int8x16_t v0_1ls = vsubq_s8(v0_1l, s8b); const int8x16_t v0_1hs = vsubq_s8(v0_1h, s8b); const int8x16_t v1_0l = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y0->qs, 32); __ret; }); const int8x16_t v1_0h = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y0->qs + 16, 32); __ret; }); const int8x16_t v1_1l = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y1->qs, 32); __ret; }); const int8x16_t v1_1h = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y1->qs + 16, 32); __ret; }); # 2458 "ggml.c" const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0ls), vget_low_s8 (v1_0l)); const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0ls), vget_high_s8(v1_0l)); const int16x8_t ph0l = vmull_s8(vget_low_s8 (v0_0hs), vget_low_s8 (v1_0h)); const int16x8_t ph0h = vmull_s8(vget_high_s8(v0_0hs), vget_high_s8(v1_0h)); const int16x8_t pl1l = vmull_s8(vget_low_s8 (v0_1ls), vget_low_s8 (v1_1l)); const int16x8_t pl1h = vmull_s8(vget_high_s8(v0_1ls), vget_high_s8(v1_1l)); const int16x8_t ph1l = vmull_s8(vget_low_s8 (v0_1hs), vget_low_s8 (v1_1h)); const int16x8_t ph1h = vmull_s8(vget_high_s8(v0_1hs), vget_high_s8(v1_1h)); const int32x4_t pl0 = vaddq_s32(vpaddlq_s16(pl0l), vpaddlq_s16(pl0h)); const int32x4_t ph0 = vaddq_s32(vpaddlq_s16(ph0l), vpaddlq_s16(ph0h)); const int32x4_t pl1 = vaddq_s32(vpaddlq_s16(pl1l), vpaddlq_s16(pl1h)); const int32x4_t ph1 = vaddq_s32(vpaddlq_s16(ph1l), vpaddlq_s16(ph1h)); sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(pl0, ph0)), ((float) (x0->d))*((float) (y0->d))); sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(pl1, ph1)), ((float) (x1->d))*((float) (y1->d))); } *s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1); # 2710 "ggml.c" } static void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { const int qk = 32; const int nb = n / qk; ((void) (0)); const block_q4_1 * restrict x = vx; const block_q8_1 * restrict y = vy; float32x4_t sumv0 = vdupq_n_f32(0.0f); float32x4_t sumv1 = vdupq_n_f32(0.0f); float summs = 0; do { if (!(nb % 2 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 2728, "nb % 2 == 0"); abort(); } } while (0); for (int i = 0; i < nb; i += 2) { const block_q4_1 * restrict x0 = &x[i + 0]; const block_q4_1 * restrict x1 = &x[i + 1]; const block_q8_1 * restrict y0 = &y[i + 0]; const block_q8_1 * restrict y1 = &y[i + 1]; summs += ((float) (x0->m)) * y0->s + ((float) (x1->m)) * y1->s; const uint8x16_t m4b = vdupq_n_u8(0x0F); const uint8x16_t v0_0 = __extension__ ({ uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vld1q_v(x0->qs, 48); __ret; }); const uint8x16_t v0_1 = __extension__ ({ uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vld1q_v(x1->qs, 48); __ret; }); const int8x16_t v0_0l = vreinterpretq_s8_u8(vandq_u8 (v0_0, m4b)); const int8x16_t v0_0h = vreinterpretq_s8_u8(__extension__ ({ uint8x16_t __ret; uint8x16_t __s0 = v0_0; __ret = (uint8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, 4, 48); __ret; })); const int8x16_t v0_1l = vreinterpretq_s8_u8(vandq_u8 (v0_1, m4b)); const int8x16_t v0_1h = vreinterpretq_s8_u8(__extension__ ({ uint8x16_t __ret; uint8x16_t __s0 = v0_1; __ret = (uint8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, 4, 48); __ret; })); const int8x16_t v1_0l = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y0->qs, 32); __ret; }); const int8x16_t v1_0h = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y0->qs + 16, 32); __ret; }); const int8x16_t v1_1l = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y1->qs, 32); __ret; }); const int8x16_t v1_1h = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y1->qs + 16, 32); __ret; }); # 2762 "ggml.c" const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0l), vget_low_s8 (v1_0l)); const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0l), vget_high_s8(v1_0l)); const int16x8_t ph0l = vmull_s8(vget_low_s8 (v0_0h), vget_low_s8 (v1_0h)); const int16x8_t ph0h = vmull_s8(vget_high_s8(v0_0h), vget_high_s8(v1_0h)); const int16x8_t pl1l = vmull_s8(vget_low_s8 (v0_1l), vget_low_s8 (v1_1l)); const int16x8_t pl1h = vmull_s8(vget_high_s8(v0_1l), vget_high_s8(v1_1l)); const int16x8_t ph1l = vmull_s8(vget_low_s8 (v0_1h), vget_low_s8 (v1_1h)); const int16x8_t ph1h = vmull_s8(vget_high_s8(v0_1h), vget_high_s8(v1_1h)); const int32x4_t pl0 = vaddq_s32(vpaddlq_s16(pl0l), vpaddlq_s16(pl0h)); const int32x4_t ph0 = vaddq_s32(vpaddlq_s16(ph0l), vpaddlq_s16(ph0h)); const int32x4_t pl1 = vaddq_s32(vpaddlq_s16(pl1l), vpaddlq_s16(pl1h)); const int32x4_t ph1 = vaddq_s32(vpaddlq_s16(ph1l), vpaddlq_s16(ph1h)); sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(pl0, ph0)), ((float) (x0->d))*y0->d); sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(pl1, ph1)), ((float) (x1->d))*y1->d); } *s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1) + summs; # 2868 "ggml.c" } static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { const int qk = 32; const int nb = n / qk; ((void) (0)); ((void) (0)); const block_q5_0 * restrict x = vx; const block_q8_0 * restrict y = vy; float32x4_t sumv0 = vdupq_n_f32(0.0f); float32x4_t sumv1 = vdupq_n_f32(0.0f); uint32_t qh0; uint32_t qh1; uint64_t tmp0[4]; uint64_t tmp1[4]; do { if (!(nb % 2 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 2890, "nb % 2 == 0"); abort(); } } while (0); for (int i = 0; i < nb; i += 2) { const block_q5_0 * restrict x0 = &x[i]; const block_q5_0 * restrict x1 = &x[i + 1]; const block_q8_0 * restrict y0 = &y[i]; const block_q8_0 * restrict y1 = &y[i + 1]; const uint8x16_t m4b = vdupq_n_u8(0x0F); memcpy(&qh0, x0->qh, sizeof(qh0)); memcpy(&qh1, x1->qh, sizeof(qh1)); tmp0[0] = table_b2b_1[(qh0 >> 0) & 0xFF]; tmp0[1] = table_b2b_1[(qh0 >> 8) & 0xFF]; tmp0[2] = table_b2b_1[(qh0 >> 16) & 0xFF]; tmp0[3] = table_b2b_1[(qh0 >> 24) ]; tmp1[0] = table_b2b_1[(qh1 >> 0) & 0xFF]; tmp1[1] = table_b2b_1[(qh1 >> 8) & 0xFF]; tmp1[2] = table_b2b_1[(qh1 >> 16) & 0xFF]; tmp1[3] = table_b2b_1[(qh1 >> 24) ]; const int8x16_t qhl0 = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v((const int8_t *)(tmp0 + 0), 32); __ret; }); const int8x16_t qhh0 = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v((const int8_t *)(tmp0 + 2), 32); __ret; }); const int8x16_t qhl1 = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v((const int8_t *)(tmp1 + 0), 32); __ret; }); const int8x16_t qhh1 = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v((const int8_t *)(tmp1 + 2), 32); __ret; }); const uint8x16_t v0_0 = __extension__ ({ uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vld1q_v(x0->qs, 48); __ret; }); const uint8x16_t v0_1 = __extension__ ({ uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vld1q_v(x1->qs, 48); __ret; }); int8x16_t v0_0l = vreinterpretq_s8_u8(vandq_u8 (v0_0, m4b)); int8x16_t v0_0h = vreinterpretq_s8_u8(__extension__ ({ uint8x16_t __ret; uint8x16_t __s0 = v0_0; __ret = (uint8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, 4, 48); __ret; })); int8x16_t v0_1l = vreinterpretq_s8_u8(vandq_u8 (v0_1, m4b)); int8x16_t v0_1h = vreinterpretq_s8_u8(__extension__ ({ uint8x16_t __ret; uint8x16_t __s0 = v0_1; __ret = (uint8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, 4, 48); __ret; })); const int8x16_t v0_0lf = vsubq_s8(v0_0l, qhl0); const int8x16_t v0_0hf = vsubq_s8(v0_0h, qhh0); const int8x16_t v0_1lf = vsubq_s8(v0_1l, qhl1); const int8x16_t v0_1hf = vsubq_s8(v0_1h, qhh1); const int8x16_t v1_0l = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y0->qs, 32); __ret; }); const int8x16_t v1_0h = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y0->qs + 16, 32); __ret; }); const int8x16_t v1_1l = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y1->qs, 32); __ret; }); const int8x16_t v1_1h = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y1->qs + 16, 32); __ret; }); # 2947 "ggml.c" const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0lf), vget_low_s8 (v1_0l)); const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0lf), vget_high_s8(v1_0l)); const int16x8_t ph0l = vmull_s8(vget_low_s8 (v0_0hf), vget_low_s8 (v1_0h)); const int16x8_t ph0h = vmull_s8(vget_high_s8(v0_0hf), vget_high_s8(v1_0h)); const int16x8_t pl1l = vmull_s8(vget_low_s8 (v0_1lf), vget_low_s8 (v1_1l)); const int16x8_t pl1h = vmull_s8(vget_high_s8(v0_1lf), vget_high_s8(v1_1l)); const int16x8_t ph1l = vmull_s8(vget_low_s8 (v0_1hf), vget_low_s8 (v1_1h)); const int16x8_t ph1h = vmull_s8(vget_high_s8(v0_1hf), vget_high_s8(v1_1h)); const int32x4_t pl0 = vaddq_s32(vpaddlq_s16(pl0l), vpaddlq_s16(pl0h)); const int32x4_t ph0 = vaddq_s32(vpaddlq_s16(ph0l), vpaddlq_s16(ph0h)); const int32x4_t pl1 = vaddq_s32(vpaddlq_s16(pl1l), vpaddlq_s16(pl1h)); const int32x4_t ph1 = vaddq_s32(vpaddlq_s16(ph1l), vpaddlq_s16(ph1h)); sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(pl0, ph0)), ((float) (x0->d))*((float) (y0->d))); sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(pl1, ph1)), ((float) (x1->d))*((float) (y1->d))); } *s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1); # 3178 "ggml.c" } static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { const int qk = 32; const int nb = n / qk; ((void) (0)); ((void) (0)); const block_q5_1 * restrict x = vx; const block_q8_1 * restrict y = vy; float32x4_t sumv0 = vdupq_n_f32(0.0f); float32x4_t sumv1 = vdupq_n_f32(0.0f); float summs0 = 0.0f; float summs1 = 0.0f; uint32_t qh0; uint32_t qh1; uint64_t tmp0[4]; uint64_t tmp1[4]; do { if (!(nb % 2 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 3203, "nb % 2 == 0"); abort(); } } while (0); for (int i = 0; i < nb; i += 2) { const block_q5_1 * restrict x0 = &x[i]; const block_q5_1 * restrict x1 = &x[i + 1]; const block_q8_1 * restrict y0 = &y[i]; const block_q8_1 * restrict y1 = &y[i + 1]; const uint8x16_t m4b = vdupq_n_u8(0x0F); summs0 += ((float) (x0->m)) * y0->s; summs1 += ((float) (x1->m)) * y1->s; memcpy(&qh0, x0->qh, sizeof(qh0)); memcpy(&qh1, x1->qh, sizeof(qh1)); tmp0[0] = table_b2b_0[(qh0 >> 0) & 0xFF]; tmp0[1] = table_b2b_0[(qh0 >> 8) & 0xFF]; tmp0[2] = table_b2b_0[(qh0 >> 16) & 0xFF]; tmp0[3] = table_b2b_0[(qh0 >> 24) ]; tmp1[0] = table_b2b_0[(qh1 >> 0) & 0xFF]; tmp1[1] = table_b2b_0[(qh1 >> 8) & 0xFF]; tmp1[2] = table_b2b_0[(qh1 >> 16) & 0xFF]; tmp1[3] = table_b2b_0[(qh1 >> 24) ]; const int8x16_t qhl0 = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v((const int8_t *)(tmp0 + 0), 32); __ret; }); const int8x16_t qhh0 = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v((const int8_t *)(tmp0 + 2), 32); __ret; }); const int8x16_t qhl1 = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v((const int8_t *)(tmp1 + 0), 32); __ret; }); const int8x16_t qhh1 = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v((const int8_t *)(tmp1 + 2), 32); __ret; }); const uint8x16_t v0_0 = __extension__ ({ uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vld1q_v(x0->qs, 48); __ret; }); const uint8x16_t v0_1 = __extension__ ({ uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vld1q_v(x1->qs, 48); __ret; }); const int8x16_t v0_0l = vreinterpretq_s8_u8(vandq_u8 (v0_0, m4b)); const int8x16_t v0_0h = vreinterpretq_s8_u8(__extension__ ({ uint8x16_t __ret; uint8x16_t __s0 = v0_0; __ret = (uint8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, 4, 48); __ret; })); const int8x16_t v0_1l = vreinterpretq_s8_u8(vandq_u8 (v0_1, m4b)); const int8x16_t v0_1h = vreinterpretq_s8_u8(__extension__ ({ uint8x16_t __ret; uint8x16_t __s0 = v0_1; __ret = (uint8x16_t) __builtin_neon_vshrq_n_v((int8x16_t)__s0, 4, 48); __ret; })); const int8x16_t v0_0lf = vorrq_s8(v0_0l, qhl0); const int8x16_t v0_0hf = vorrq_s8(v0_0h, qhh0); const int8x16_t v0_1lf = vorrq_s8(v0_1l, qhl1); const int8x16_t v0_1hf = vorrq_s8(v0_1h, qhh1); const int8x16_t v1_0l = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y0->qs, 32); __ret; }); const int8x16_t v1_0h = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y0->qs + 16, 32); __ret; }); const int8x16_t v1_1l = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y1->qs, 32); __ret; }); const int8x16_t v1_1h = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y1->qs + 16, 32); __ret; }); # 3263 "ggml.c" const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0lf), vget_low_s8 (v1_0l)); const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0lf), vget_high_s8(v1_0l)); const int16x8_t ph0l = vmull_s8(vget_low_s8 (v0_0hf), vget_low_s8 (v1_0h)); const int16x8_t ph0h = vmull_s8(vget_high_s8(v0_0hf), vget_high_s8(v1_0h)); const int16x8_t pl1l = vmull_s8(vget_low_s8 (v0_1lf), vget_low_s8 (v1_1l)); const int16x8_t pl1h = vmull_s8(vget_high_s8(v0_1lf), vget_high_s8(v1_1l)); const int16x8_t ph1l = vmull_s8(vget_low_s8 (v0_1hf), vget_low_s8 (v1_1h)); const int16x8_t ph1h = vmull_s8(vget_high_s8(v0_1hf), vget_high_s8(v1_1h)); const int32x4_t pl0 = vaddq_s32(vpaddlq_s16(pl0l), vpaddlq_s16(pl0h)); const int32x4_t ph0 = vaddq_s32(vpaddlq_s16(ph0l), vpaddlq_s16(ph0h)); const int32x4_t pl1 = vaddq_s32(vpaddlq_s16(pl1l), vpaddlq_s16(pl1h)); const int32x4_t ph1 = vaddq_s32(vpaddlq_s16(ph1l), vpaddlq_s16(ph1h)); sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(pl0, ph0)), ((float) (x0->d))*y0->d); sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(pl1, ph1)), ((float) (x1->d))*y1->d); } *s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1) + summs0 + summs1; # 3500 "ggml.c" } static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void * restrict vx, const void * restrict vy) { const int qk = 32; const int nb = n / qk; ((void) (0)); const block_q8_0 * restrict x = vx; const block_q8_0 * restrict y = vy; float32x4_t sumv0 = vdupq_n_f32(0.0f); float32x4_t sumv1 = vdupq_n_f32(0.0f); do { if (!(nb % 2 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 3515, "nb % 2 == 0"); abort(); } } while (0); for (int i = 0; i < nb; i += 2) { const block_q8_0 * restrict x0 = &x[i + 0]; const block_q8_0 * restrict x1 = &x[i + 1]; const block_q8_0 * restrict y0 = &y[i + 0]; const block_q8_0 * restrict y1 = &y[i + 1]; const int8x16_t x0_0 = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(x0->qs, 32); __ret; }); const int8x16_t x0_1 = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(x0->qs + 16, 32); __ret; }); const int8x16_t x1_0 = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(x1->qs, 32); __ret; }); const int8x16_t x1_1 = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(x1->qs + 16, 32); __ret; }); const int8x16_t y0_0 = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y0->qs, 32); __ret; }); const int8x16_t y0_1 = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y0->qs + 16, 32); __ret; }); const int8x16_t y1_0 = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y1->qs, 32); __ret; }); const int8x16_t y1_1 = __extension__ ({ int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vld1q_v(y1->qs + 16, 32); __ret; }); # 3543 "ggml.c" const int16x8_t p0_0 = vmull_s8(vget_low_s8 (x0_0), vget_low_s8 (y0_0)); const int16x8_t p0_1 = vmull_s8(vget_high_s8(x0_0), vget_high_s8(y0_0)); const int16x8_t p0_2 = vmull_s8(vget_low_s8 (x0_1), vget_low_s8 (y0_1)); const int16x8_t p0_3 = vmull_s8(vget_high_s8(x0_1), vget_high_s8(y0_1)); const int16x8_t p1_0 = vmull_s8(vget_low_s8 (x1_0), vget_low_s8 (y1_0)); const int16x8_t p1_1 = vmull_s8(vget_high_s8(x1_0), vget_high_s8(y1_0)); const int16x8_t p1_2 = vmull_s8(vget_low_s8 (x1_1), vget_low_s8 (y1_1)); const int16x8_t p1_3 = vmull_s8(vget_high_s8(x1_1), vget_high_s8(y1_1)); const int32x4_t p0 = vaddq_s32(vpaddlq_s16(p0_0), vpaddlq_s16(p0_1)); const int32x4_t p1 = vaddq_s32(vpaddlq_s16(p0_2), vpaddlq_s16(p0_3)); const int32x4_t p2 = vaddq_s32(vpaddlq_s16(p1_0), vpaddlq_s16(p1_1)); const int32x4_t p3 = vaddq_s32(vpaddlq_s16(p1_2), vpaddlq_s16(p1_3)); sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(p0, p1)), ((float) (x0->d))*((float) (y0->d))); sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(p2, p3)), ((float) (x1->d))*((float) (y1->d))); } *s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1); # 3622 "ggml.c" } inline static void ggml_vec_dot_f16_unroll(const int n, const int xs, float * restrict s, void * restrict xv, ggml_fp16_t * restrict y) { ggml_float sumf[2] = { 0.0 }; ggml_fp16_t * restrict x[2]; for (int i = 0; i < 2; ++i) { x[i] = (ggml_fp16_t *) ((char *) xv + i*xs); } const int np = (n & ~(16 - 1)); float32x4_t sum[2][(16/4)] = { { vdupq_n_f32(0.0f) } }; float32x4_t ax[(16/4)]; float32x4_t ay[(16/4)]; for (int i = 0; i < np; i += 16) { for (int j = 0; j < (16/4); j++) { ay[j] = vcvt_f32_f16(__extension__ ({ float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vld1_v(y + i + j*4, 8); __ret; })); for (int k = 0; k < 2; ++k) { ax[j] = vcvt_f32_f16(__extension__ ({ float16x4_t __ret; __ret = (float16x4_t) __builtin_neon_vld1_v(x[k] + i + j*4, 8); __ret; })); sum[k][j] = vfmaq_f32(sum[k][j], ax[j], ay[j]); } } } for (int k = 0; k < 2; ++k) { { int offset = (16/4) >> 1; for (int i = 0; i < offset; ++i) { sum[k][i] = vaddq_f32(sum[k][i], sum[k][offset+i]); } offset >>= 1; for (int i = 0; i < offset; ++i) { sum[k][i] = vaddq_f32(sum[k][i], sum[k][offset+i]); } offset >>= 1; for (int i = 0; i < offset; ++i) { sum[k][i] = vaddq_f32(sum[k][i], sum[k][offset+i]); } sumf[k] = vaddvq_f32(sum[k][0]); }; } for (int i = np; i < n; ++i) { for (int j = 0; j < 2; ++j) { sumf[j] += (ggml_float)(((float) (x[j][i]))*((float) (y[i]))); } } # 3674 "ggml.c" for (int i = 0; i < 2; ++i) { s[i] = sumf[i]; } } inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float * restrict x, const float v) { const int np = (n & ~(16 - 1)); float32x4_t vx = vdupq_n_f32(v); float32x4_t ax[(16/4)]; float32x4_t ay[(16/4)]; for (int i = 0; i < np; i += 16) { for (int j = 0; j < (16/4); j++) { ax[j] = __extension__ ({ float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vld1q_v(x + i + j*4, 41); __ret; }); ay[j] = __extension__ ({ float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vld1q_v(y + i + j*4, 41); __ret; }); ay[j] = vfmaq_f32(ay[j], ax[j], vx); __extension__ ({ float32x4_t __s1 = ay[j]; __builtin_neon_vst1q_v(y + i + j*4, (int8x16_t)__s1, 41); }); } } for (int i = np; i < n; ++i) { y[i] += x[i]*v; } } inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { const int np = (n & ~(16 - 1)); float32x4_t vx = vdupq_n_f32(v); float32x4_t ay[(16/4)]; for (int i = 0; i < np; i += 16) { for (int j = 0; j < (16/4); j++) { ay[j] = __extension__ ({ float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vld1q_v(y + i + j*4, 41); __ret; }); ay[j] = vmulq_f32(ay[j], vx); __extension__ ({ float32x4_t __s1 = ay[j]; __builtin_neon_vst1q_v(y + i + j*4, (int8x16_t)__s1, 41); }); } } for (int i = np; i < n; ++i) { y[i] *= v; } } inline static void ggml_vec_norm_f32 (const int n, float * s, const float * x) { ggml_vec_dot_f32(n, s, x, x); *s = sqrtf(*s); } inline static void ggml_vec_sqr_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]*x[i]; } inline static void ggml_vec_sqrt_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = sqrtf(x[i]); } inline static void ggml_vec_log_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = logf(x[i]); } inline static void ggml_vec_abs_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fabsf(x[i]); } inline static void ggml_vec_sgn_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : ((x[i] < 0.f) ? -1.f : 0.f); } inline static void ggml_vec_step_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : 0.f; } inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = tanhf(x[i]); } inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; } inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; } static const float GELU_COEF_A = 0.044715f; static const float GELU_QUICK_COEF = -1.702f; static const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f; inline static float ggml_gelu_f32(float x) { return 0.5f*x*(1.0f + tanhf(SQRT_2_OVER_PI*x*(1.0f + GELU_COEF_A*x*x))); } inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { const uint16_t * i16 = (const uint16_t *) x; for (int i = 0; i < n; ++i) { y[i] = table_gelu_f16[i16[i]]; } } inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) { uint16_t t; for (int i = 0; i < n; ++i) { ggml_fp16_t fp16 = (x[i]); memcpy(&t, &fp16, sizeof(uint16_t)); y[i] = ((float) (table_gelu_f16[t])); } } # 3785 "ggml.c" inline static float ggml_gelu_quick_f32(float x) { return x*(1.0f/(1.0f+expf(GELU_QUICK_COEF*x))); } # 3797 "ggml.c" inline static void ggml_vec_gelu_quick_f32(const int n, float * y, const float * x) { uint16_t t; for (int i = 0; i < n; ++i) { ggml_fp16_t fp16 = (x[i]); memcpy(&t, &fp16, sizeof(uint16_t)); y[i] = ((float) (table_gelu_quick_f16[t])); } } # 3814 "ggml.c" inline static float ggml_silu_f32(float x) { return x/(1.0f + expf(-x)); } # 3826 "ggml.c" inline static void ggml_vec_silu_f32(const int n, float * y, const float * x) { uint16_t t; for (int i = 0; i < n; ++i) { ggml_fp16_t fp16 = (x[i]); memcpy(&t, &fp16, sizeof(uint16_t)); y[i] = ((float) (table_silu_f16[t])); } } # 3842 "ggml.c" inline static float ggml_silu_backward_f32(float x, float dy) { const float s = 1.0f/(1.0f + expf(-x)); return dy*s*(1.0f + x*(1.0f - s)); } inline static void ggml_vec_silu_backward_f32(const int n, float * dx, const float * x, const float * dy) { for (int i = 0; i < n; ++i) { ggml_fp16_t fp16 = (x[i]); float usedx = ((float) (fp16)); dx[i] = ggml_silu_backward_f32(usedx, dy[i]); } } # 3865 "ggml.c" inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) { ggml_float sum = 0.0; for (int i = 0; i < n; ++i) { sum += (ggml_float)x[i]; } *s = sum; } inline static void ggml_vec_sum_f32_ggf(const int n, ggml_float * s, const float * x) { ggml_float sum = 0.0; for (int i = 0; i < n; ++i) { sum += (ggml_float)x[i]; } *s = sum; } inline static void ggml_vec_sum_f16_ggf(const int n, float * s, const ggml_fp16_t * x) { float sum = 0.0f; for (int i = 0; i < n; ++i) { sum += ((float) (x[i])); } *s = sum; } inline static void ggml_vec_max_f32(const int n, float * s, const float * x) { float max = -__builtin_inff(); for (int i = 0; i < n; ++i) { max = ((max) > (x[i]) ? (max) : (x[i])); } *s = max; } inline static void ggml_vec_norm_inv_f32(const int n, float * s, const float * x) { ggml_vec_norm_f32(n, s, x); *s = 1.f/(*s); } inline static void ggml_vec_argmax_f32(const int n, int * s, const float * x) { float max = -__builtin_inff(); int idx = 0; for (int i = 0; i < n; ++i) { max = ((max) > (x[i]) ? (max) : (x[i])); if (max == x[i]) { idx = i; } } *s = idx; } static const char * GGML_OP_NAME[GGML_OP_COUNT] = { "NONE", "DUP", "ADD", "ADD1", "ACC", "SUB", "MUL", "DIV", "SQR", "SQRT", "LOG", "SUM", "SUM_ROWS", "MEAN", "ARGMAX", "REPEAT", "REPEAT_BACK", "CONCAT", "SILU_BACK", "NORM", "RMS_NORM", "RMS_NORM_BACK", "GROUP_NORM", "MUL_MAT", "OUT_PROD", "SCALE", "SET", "CPY", "CONT", "RESHAPE", "VIEW", "PERMUTE", "TRANSPOSE", "GET_ROWS", "GET_ROWS_BACK", "DIAG", "DIAG_MASK_INF", "DIAG_MASK_ZERO", "SOFT_MAX", "SOFT_MAX_BACK", "ROPE", "ROPE_BACK", "ALIBI", "CLAMP", "CONV_1D", "CONV_2D", "CONV_TRANSPOSE_2D", "POOL_1D", "POOL_2D", "UPSCALE", "FLASH_ATTN", "FLASH_FF", "FLASH_ATTN_BACK", "WIN_PART", "WIN_UNPART", "GET_REL_POS", "ADD_REL_POS", "UNARY", "MAP_UNARY", "MAP_BINARY", "MAP_CUSTOM1_F32", "MAP_CUSTOM2_F32", "MAP_CUSTOM3_F32", "MAP_CUSTOM1", "MAP_CUSTOM2", "MAP_CUSTOM3", "CROSS_ENTROPY_LOSS", "CROSS_ENTROPY_LOSS_BACK", }; _Static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68"); static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "none", "x", "x+y", "x+y", "view(x,nb,offset)+=y->x", "x-y", "x*y", "x/y", "x^2", "√x", "log(x)", "Σx", "Σx_k", "Σx/n", "argmax(x)", "repeat(x)", "repeat_back(x)", "concat(x, y)", "silu_back(x)", "norm(x)", "rms_norm(x)", "rms_norm_back(x)", "group_norm(x)", "X*Y", "X*Y", "x*v", "y-\\>view(x)", "x-\\>y", "cont(x)", "reshape(x)", "view(x)", "permute(x)", "transpose(x)", "get_rows(x)", "get_rows_back(x)", "diag(x)", "diag_mask_inf(x)", "diag_mask_zero(x)", "soft_max(x)", "soft_max_back(x)", "rope(x)", "rope_back(x)", "alibi(x)", "clamp(x)", "conv_1d(x)", "conv_2d(x)", "conv_transpose_2d(x)", "pool_1d(x)", "pool_2d(x)", "upscale(x)", "flash_attn(x)", "flash_ff(x)", "flash_attn_back(x)", "win_part(x)", "win_unpart(x)", "get_rel_pos(x)", "add_rel_pos(x)", "unary(x)", "f(x)", "f(x,y)", "custom_f32(x)", "custom_f32(x,y)", "custom_f32(x,y,z)", "custom(x)", "custom(x,y)", "custom(x,y,z)", "cross_entropy_loss(x,y)", "cross_entropy_loss_back(x,y)", }; _Static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68"); _Static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2"); _Static_assert(sizeof(struct ggml_object)%4 == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN"); _Static_assert(sizeof(struct ggml_tensor)%4 == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN"); # 4100 "ggml.c" static _Bool GGML_OP_HAS_INIT [GGML_OP_COUNT] = { 0 }; static _Bool GGML_OP_HAS_FINALIZE[GGML_OP_COUNT] = { 0 }; static void ggml_setup_op_has_task_pass(void) { { _Bool * p = GGML_OP_HAS_INIT; p[GGML_OP_ACC ] = 1; p[GGML_OP_MUL_MAT ] = 1; p[GGML_OP_OUT_PROD ] = 1; p[GGML_OP_SET ] = 1; p[GGML_OP_GET_ROWS_BACK ] = 1; p[GGML_OP_DIAG_MASK_INF ] = 1; p[GGML_OP_DIAG_MASK_ZERO ] = 1; p[GGML_OP_CONV_1D ] = 1; p[GGML_OP_CONV_2D ] = 1; p[GGML_OP_CONV_TRANSPOSE_2D ] = 1; p[GGML_OP_FLASH_ATTN_BACK ] = 1; p[GGML_OP_CROSS_ENTROPY_LOSS ] = 1; p[GGML_OP_ADD_REL_POS ] = 1; } { _Bool * p = GGML_OP_HAS_FINALIZE; p[GGML_OP_CROSS_ENTROPY_LOSS ] = 1; } } struct ggml_context { size_t mem_size; void * mem_buffer; _Bool mem_buffer_owned; _Bool no_alloc; _Bool no_alloc_save; int n_objects; struct ggml_object * objects_begin; struct ggml_object * objects_end; struct ggml_scratch scratch; struct ggml_scratch scratch_save; }; struct ggml_context_container { _Bool used; struct ggml_context context; }; # 4162 "ggml.c" struct ggml_numa_node { uint32_t cpus[512]; uint32_t n_cpus; }; struct ggml_numa_nodes { struct ggml_numa_node nodes[8]; uint32_t n_nodes; uint32_t total_cpus; }; struct ggml_state { struct ggml_context_container contexts[64]; struct ggml_numa_nodes numa; }; static struct ggml_state g_state; static atomic_int g_state_barrier = 0; inline static void ggml_critical_section_start(void) { int processing = __c11_atomic_fetch_add(&g_state_barrier, 1, memory_order_seq_cst); while (processing > 0) { __c11_atomic_fetch_sub(&g_state_barrier, 1, memory_order_seq_cst); sched_yield(); processing = __c11_atomic_fetch_add(&g_state_barrier, 1, memory_order_seq_cst); } } inline static void ggml_critical_section_end(void) { __c11_atomic_fetch_sub(&g_state_barrier, 1, memory_order_seq_cst); } void ggml_numa_init(void) { if (g_state.numa.n_nodes > 0) { fprintf(stderr, "ggml_numa_init: NUMA already initialized\n"); return; } struct stat st; char path[256]; int rv; while (g_state.numa.n_nodes < 8) { rv = snprintf(path, sizeof(path), "/sys/devices/system/node/node%u", g_state.numa.n_nodes); do { if (!(rv > 0 && (unsigned)rv < sizeof(path))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 4219, "rv > 0 && (unsigned)rv < sizeof(path)"); abort(); } } while (0); if (stat(path, &st) != 0) { break; } ++g_state.numa.n_nodes; } while (g_state.numa.total_cpus < 512) { rv = snprintf(path, sizeof(path), "/sys/devices/system/cpu/cpu%u", g_state.numa.total_cpus); do { if (!(rv > 0 && (unsigned)rv < sizeof(path))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 4227, "rv > 0 && (unsigned)rv < sizeof(path)"); abort(); } } while (0); if (stat(path, &st) != 0) { break; } ++g_state.numa.total_cpus; } ; if (g_state.numa.n_nodes < 1 || g_state.numa.total_cpus < 1) { g_state.numa.n_nodes = 0; return; } for (uint32_t n = 0; n < g_state.numa.n_nodes; ++n) { struct ggml_numa_node * node = &g_state.numa.nodes[n]; ; node->n_cpus = 0; for (uint32_t c = 0; c < g_state.numa.total_cpus; ++c) { rv = snprintf(path, sizeof(path), "/sys/devices/system/node/node%u/cpu%u", n, c); do { if (!(rv > 0 && (unsigned)rv < sizeof(path))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 4245, "rv > 0 && (unsigned)rv < sizeof(path)"); abort(); } } while (0); if (stat(path, &st) == 0) { node->cpus[node->n_cpus++] = c; ; } } ; } if (ggml_is_numa()) { FILE *fptr = fopen("/proc/sys/kernel/numa_balancing", "r"); if (fptr != ((void*)0)) { char buf[42]; if (fgets(buf, sizeof(buf), fptr) && strncmp(buf, "0\n", sizeof(buf)) != 0) { printf("WARNING: /proc/sys/kernel/numa_balancing is enabled, this has been observed to impair performance\n"); } fclose(fptr); } } } _Bool ggml_is_numa(void) { return g_state.numa.n_nodes > 1; } void ggml_print_object(const struct ggml_object * obj) { printf(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n", obj->type, obj->offs, obj->size, (const void *) obj->next); } void ggml_print_objects(const struct ggml_context * ctx) { struct ggml_object * obj = ctx->objects_begin; printf("%s: objects in context %p:\n", __func__, (const void *) ctx); while (obj != ((void*)0)) { ggml_print_object(obj); obj = obj->next; } printf("%s: --- end ---\n", __func__); } int64_t ggml_nelements(const struct ggml_tensor * tensor) { _Static_assert(4 == 4, "GGML_MAX_DIMS is not 4 - update this function"); return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3]; } int64_t ggml_nrows(const struct ggml_tensor * tensor) { _Static_assert(4 == 4, "GGML_MAX_DIMS is not 4 - update this function"); return tensor->ne[1]*tensor->ne[2]*tensor->ne[3]; } size_t ggml_nbytes(const struct ggml_tensor * tensor) { size_t nbytes = tensor->ne[0]*tensor->nb[0]/ggml_blck_size(tensor->type); for (int i = 1; i < 4; ++i) { nbytes += (tensor->ne[i] - 1)*tensor->nb[i]; } return nbytes; } size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) { return (((ggml_nbytes(tensor)) + (4) - 1) & ~((4) - 1)); } size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) { _Static_assert(4 == 4, "GGML_MAX_DIMS is not 4 - update this function"); return (nrows_split*tensor->ne[0]*ggml_type_size(tensor->type))/ggml_blck_size(tensor->type); } int ggml_blck_size(enum ggml_type type) { return type_traits[type].blck_size; } size_t ggml_type_size(enum ggml_type type) { return type_traits[type].type_size; } float ggml_type_sizef(enum ggml_type type) { return ((float)(type_traits[type].type_size))/type_traits[type].blck_size; } const char * ggml_type_name(enum ggml_type type) { return type_traits[type].type_name; } _Bool ggml_is_quantized(enum ggml_type type) { return type_traits[type].is_quantized; } const char * ggml_op_name(enum ggml_op op) { return GGML_OP_NAME[op]; } const char * ggml_op_symbol(enum ggml_op op) { return GGML_OP_SYMBOL[op]; } size_t ggml_element_size(const struct ggml_tensor * tensor) { return ggml_type_size(tensor->type); } static inline _Bool ggml_is_scalar(const struct ggml_tensor * tensor) { _Static_assert(4 == 4, "GGML_MAX_DIMS is not 4 - update this function"); return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1; } static inline _Bool ggml_is_vector(const struct ggml_tensor * tensor) { _Static_assert(4 == 4, "GGML_MAX_DIMS is not 4 - update this function"); return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1; } static inline _Bool ggml_is_matrix(const struct ggml_tensor * tensor) { _Static_assert(4 == 4, "GGML_MAX_DIMS is not 4 - update this function"); return tensor->ne[2] == 1 && tensor->ne[3] == 1; } static inline _Bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { _Static_assert(4 == 4, "GGML_MAX_DIMS is not 4 - update this function"); return (t0->ne[0] == t1->ne[0]) && (t1->ne[2]%t0->ne[2] == 0) && (t1->ne[3]%t0->ne[3] == 0); } static inline _Bool ggml_can_out_prod(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { _Static_assert(4 == 4, "GGML_MAX_DIMS is not 4 - update this function"); return (t0->ne[1] == t1->ne[1]) && (t0->ne[2] == t1->ne[2]) && (t0->ne[3] == t1->ne[3]); } enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) { enum ggml_type wtype = GGML_TYPE_COUNT; switch (ftype) { case GGML_FTYPE_ALL_F32: wtype = GGML_TYPE_F32; break; case GGML_FTYPE_MOSTLY_F16: wtype = GGML_TYPE_F16; break; case GGML_FTYPE_MOSTLY_Q4_0: wtype = GGML_TYPE_Q4_0; break; case GGML_FTYPE_MOSTLY_Q4_1: wtype = GGML_TYPE_Q4_1; break; case GGML_FTYPE_MOSTLY_Q5_0: wtype = GGML_TYPE_Q5_0; break; case GGML_FTYPE_MOSTLY_Q5_1: wtype = GGML_TYPE_Q5_1; break; case GGML_FTYPE_MOSTLY_Q8_0: wtype = GGML_TYPE_Q8_0; break; case GGML_FTYPE_MOSTLY_Q2_K: wtype = GGML_TYPE_Q2_K; break; case GGML_FTYPE_MOSTLY_Q3_K: wtype = GGML_TYPE_Q3_K; break; case GGML_FTYPE_MOSTLY_Q4_K: wtype = GGML_TYPE_Q4_K; break; case GGML_FTYPE_MOSTLY_Q5_K: wtype = GGML_TYPE_Q5_K; break; case GGML_FTYPE_MOSTLY_Q6_K: wtype = GGML_TYPE_Q6_K; break; case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break; case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break; } do { if (!(wtype != GGML_TYPE_COUNT)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 4410, "wtype != GGML_TYPE_COUNT"); abort(); } } while (0); return wtype; } size_t ggml_tensor_overhead(void) { return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE; } _Bool ggml_is_transposed(const struct ggml_tensor * tensor) { return tensor->nb[0] > tensor->nb[1]; } _Bool ggml_is_contiguous(const struct ggml_tensor * tensor) { _Static_assert(4 == 4, "GGML_MAX_DIMS is not 4 - update this function"); return tensor->nb[0] == ggml_type_size(tensor->type) && tensor->nb[1] == (tensor->nb[0]*tensor->ne[0])/ggml_blck_size(tensor->type) && tensor->nb[2] == tensor->nb[1]*tensor->ne[1] && tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; } static inline _Bool ggml_is_contiguous_except_dim_1(const struct ggml_tensor * tensor) { _Static_assert(4 == 4, "GGML_MAX_DIMS is not 4 - update this function"); return tensor->nb[0] == ggml_type_size(tensor->type) && tensor->nb[2] == tensor->nb[1]*tensor->ne[1] && tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; } _Bool ggml_is_permuted(const struct ggml_tensor * tensor) { _Static_assert(4 == 4, "GGML_MAX_DIMS is not 4 - update this function"); return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3]; } static inline _Bool ggml_is_padded_1d(const struct ggml_tensor * tensor) { _Static_assert(4 == 4, "GGML_MAX_DIMS is not 4 - update this function"); return tensor->nb[0] == ggml_type_size(tensor->type) && tensor->nb[2] == tensor->nb[1]*tensor->ne[1] && tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; } _Bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { _Static_assert(4 == 4, "GGML_MAX_DIMS is not 4 - update this function"); return (t0->ne[0] == t1->ne[0] ) && (t0->ne[1] == t1->ne[1] ) && (t0->ne[2] == t1->ne[2] ) && (t0->ne[3] == t1->ne[3] ); } static inline _Bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { _Static_assert(4 == 4, "GGML_MAX_DIMS is not 4 - update this function"); return (t1->ne[0]%t0->ne[0] == 0) && (t1->ne[1]%t0->ne[1] == 0) && (t1->ne[2]%t0->ne[2] == 0) && (t1->ne[3]%t0->ne[3] == 0); } static inline _Bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { _Static_assert(4 == 4, "GGML_MAX_DIMS is not 4 - update this function"); return (t0->ne[0] == t1->ne[0]) && ggml_can_repeat(t0, t1); } static inline int ggml_up32(int n) { return (n + 31) & ~31; } static inline int ggml_up(int n, int m) { do { if (!((m & (m - 1)) == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 4494, "(m & (m - 1)) == 0"); abort(); } } while (0); return (n + m - 1) & ~(m - 1); } struct ggml_context * ggml_init(struct ggml_init_params params) { ggml_critical_section_start(); static _Bool is_first_call = 1; if (is_first_call) { ggml_time_init(); { const uint64_t t_start = ggml_time_us(); (void)(t_start); ggml_fp16_t ii; for (int i = 0; i < (1 << 16); ++i) { uint16_t ui = i; memcpy(&ii, &ui, sizeof(ii)); const float f = table_f32_f16[i] = ((float) (ii)); table_gelu_f16[i] = (ggml_gelu_f32(f)); table_gelu_quick_f16[i] = (ggml_gelu_quick_f32(f)); table_silu_f16[i] = (ggml_silu_f32(f)); table_exp_f16[i] = (expf(f)); } const uint64_t t_end = ggml_time_us(); (void)(t_end); ; } { const uint64_t t_start = ggml_time_us(); (void)(t_start); g_state = (struct ggml_state) { { { 0 } }, { .n_nodes = 0, .total_cpus = 0, }, }; for (int i = 0; i < 64; ++i) { g_state.contexts[i].used = 0; } const uint64_t t_end = ggml_time_us(); (void)(t_end); ; } ggml_setup_op_has_task_pass(); is_first_call = 0; } struct ggml_context * ctx = ((void*)0); for (int i = 0; i < 64; i++) { if (!g_state.contexts[i].used) { g_state.contexts[i].used = 1; ctx = &g_state.contexts[i].context; ; break; } } if (ctx == ((void*)0)) { ; ggml_critical_section_end(); return ((void*)0); } if (params.mem_size == 0) { params.mem_size = 4; } const size_t mem_size = params.mem_buffer ? params.mem_size : (((params.mem_size) + (4) - 1) & ~((4) - 1)); *ctx = (struct ggml_context) { mem_size, params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size), params.mem_buffer ? 0 : 1, params.no_alloc, params.no_alloc, 0, ((void*)0), ((void*)0), { 0, 0, ((void*)0), }, { 0, 0, ((void*)0), }, }; do { if (!(ctx->mem_buffer != ((void*)0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 4607, "ctx->mem_buffer != NULL"); abort(); } } while (0); do { if (!(((uintptr_t) (ctx->mem_buffer))%4 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 4609, "((uintptr_t) (ctx->mem_buffer))%GGML_MEM_ALIGN == 0"); abort(); } } while (0); ; ggml_critical_section_end(); return ctx; } void ggml_free(struct ggml_context * ctx) { ggml_critical_section_start(); _Bool found = 0; for (int i = 0; i < 64; i++) { if (&g_state.contexts[i].context == ctx) { g_state.contexts[i].used = 0; ; if (ctx->mem_buffer_owned) { free(ctx->mem_buffer); } found = 1; break; } } if (!found) { ; } ggml_critical_section_end(); } size_t ggml_used_mem(const struct ggml_context * ctx) { return ctx->objects_end == ((void*)0) ? 0 : ctx->objects_end->offs + ctx->objects_end->size; } size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch) { const size_t result = ctx->scratch.data ? ctx->scratch.offs : 0; ctx->scratch = scratch; return result; } _Bool ggml_get_no_alloc(struct ggml_context * ctx) { return ctx->no_alloc; } void ggml_set_no_alloc(struct ggml_context * ctx, _Bool no_alloc) { ctx->no_alloc = no_alloc; } void * ggml_get_mem_buffer(const struct ggml_context * ctx) { return ctx->mem_buffer; } size_t ggml_get_mem_size(const struct ggml_context * ctx) { return ctx->mem_size; } size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) { size_t max_size = 0; struct ggml_object * obj = ctx->objects_begin; while (obj != ((void*)0)) { if (obj->type == GGML_OBJECT_TENSOR) { struct ggml_tensor * tensor = (struct ggml_tensor *) ((char *) ctx->mem_buffer + obj->offs); const size_t size = ggml_nbytes(tensor); if (max_size < size) { max_size = size; } } obj = obj->next; } return max_size; } static void ggml_scratch_save(struct ggml_context * ctx) { ctx->no_alloc_save = ctx->no_alloc; ctx->no_alloc = 0; ctx->scratch_save = ctx->scratch; ctx->scratch.data = ((void*)0); } static void ggml_scratch_load(struct ggml_context * ctx) { ctx->no_alloc = ctx->no_alloc_save; ctx->scratch = ctx->scratch_save; } static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) { struct ggml_object * obj_cur = ctx->objects_end; const size_t cur_offs = obj_cur == ((void*)0) ? 0 : obj_cur->offs; const size_t cur_size = obj_cur == ((void*)0) ? 0 : obj_cur->size; const size_t cur_end = cur_offs + cur_size; size_t size_needed = (((size) + (4) - 1) & ~((4) - 1)); char * const mem_buffer = ctx->mem_buffer; struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end); if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) { printf("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n", __func__, cur_end + size_needed, ctx->mem_size); ((void) (0)); return ((void*)0); } *obj_new = (struct ggml_object) { .offs = cur_end + GGML_OBJECT_SIZE, .size = size_needed, .next = ((void*)0), .type = type, }; do { if (!(((uintptr_t) (mem_buffer + obj_new->offs))%4 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 4748, "((uintptr_t) (mem_buffer + obj_new->offs))%GGML_MEM_ALIGN == 0"); abort(); } } while (0); if (obj_cur != ((void*)0)) { obj_cur->next = obj_new; } else { ctx->objects_begin = obj_new; } ctx->objects_end = obj_new; return obj_new; } static struct ggml_tensor * ggml_new_tensor_impl( struct ggml_context * ctx, enum ggml_type type, int n_dims, const int64_t * ne, struct ggml_tensor * view_src, size_t view_offs) { ((void) (0)); if (view_src != ((void*)0) && view_src->view_src != ((void*)0)) { view_offs += view_src->view_offs; view_src = view_src->view_src; } size_t data_size = ggml_type_size(type)*(ne[0]/ggml_blck_size(type)); for (int i = 1; i < n_dims; i++) { data_size *= ne[i]; } do { if (!(view_src == ((void*)0) || data_size + view_offs <= ggml_nbytes(view_src))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 4785, "view_src == NULL || data_size + view_offs <= ggml_nbytes(view_src)"); abort(); } } while (0); void * data = view_src != ((void*)0) ? view_src->data : ((void*)0); if (data != ((void*)0)) { data = (char *) data + view_offs; } size_t obj_alloc_size = 0; if (view_src == ((void*)0) && !ctx->no_alloc) { if (ctx->scratch.data != ((void*)0)) { if (ctx->scratch.offs + data_size > ctx->scratch.size) { printf("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n", __func__, ctx->scratch.offs + data_size, ctx->scratch.size); ((void) (0)); return ((void*)0); } data = (char * const) ctx->scratch.data + ctx->scratch.offs; ctx->scratch.offs += data_size; } else { obj_alloc_size = data_size; } } struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size); struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs); *result = (struct ggml_tensor) { type, GGML_BACKEND_CPU, n_dims, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, GGML_OP_NONE, { 0 }, 0, ((void*)0), { ((void*)0) }, 0, 0, 0, view_src, view_offs, obj_alloc_size > 0 ? (void *)(result + 1) : data, { 0 }, ((void*)0), { 0 }, }; for (int i = 0; i < n_dims; i++) { result->ne[i] = ne[i]; } result->nb[0] = ggml_type_size(type); result->nb[1] = result->nb[0]*(result->ne[0]/ggml_blck_size(type)); for (int i = 2; i < 4; i++) { result->nb[i] = result->nb[i - 1]*result->ne[i - 1]; } ctx->n_objects++; return result; } struct ggml_tensor * ggml_new_tensor( struct ggml_context * ctx, enum ggml_type type, int n_dims, const int64_t * ne) { return ggml_new_tensor_impl(ctx, type, n_dims, ne, ((void*)0), 0); } struct ggml_tensor * ggml_new_tensor_1d( struct ggml_context * ctx, enum ggml_type type, int64_t ne0) { return ggml_new_tensor(ctx, type, 1, &ne0); } struct ggml_tensor * ggml_new_tensor_2d( struct ggml_context * ctx, enum ggml_type type, int64_t ne0, int64_t ne1) { const int64_t ne[2] = { ne0, ne1 }; return ggml_new_tensor(ctx, type, 2, ne); } struct ggml_tensor * ggml_new_tensor_3d( struct ggml_context * ctx, enum ggml_type type, int64_t ne0, int64_t ne1, int64_t ne2) { const int64_t ne[3] = { ne0, ne1, ne2 }; return ggml_new_tensor(ctx, type, 3, ne); } struct ggml_tensor * ggml_new_tensor_4d( struct ggml_context * ctx, enum ggml_type type, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) { const int64_t ne[4] = { ne0, ne1, ne2, ne3 }; return ggml_new_tensor(ctx, type, 4, ne); } struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) { ggml_scratch_save(ctx); struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 1); ggml_scratch_load(ctx); ggml_set_i32(result, value); return result; } struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value) { ggml_scratch_save(ctx); struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1); ggml_scratch_load(ctx); ggml_set_f32(result, value); return result; } struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) { return ggml_new_tensor(ctx, src->type, src->n_dims, src->ne); } static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) { do { if (!(tensor != ((void*)0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 4933, "tensor != NULL"); abort(); } } while (0); ((void) (0)); memcpy(tensor->op_params, params, params_size); } static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) { ((void) (0)); return ((const int32_t *)(tensor->op_params))[i]; } static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) { ((void) (0)); ((int32_t *)(tensor->op_params))[i] = value; } struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) { memset(tensor->data, 0, ggml_nbytes(tensor)); return tensor; } struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value) { const int n = ggml_nrows(tensor); const int nc = tensor->ne[0]; const size_t n1 = tensor->nb[1]; char * const data = tensor->data; switch (tensor->type) { case GGML_TYPE_I8: { ((void) (0)); for (int i = 0; i < n; i++) { ggml_vec_set_i8(nc, (int8_t *)(data + i*n1), value); } } break; case GGML_TYPE_I16: { ((void) (0)); for (int i = 0; i < n; i++) { ggml_vec_set_i16(nc, (int16_t *)(data + i*n1), value); } } break; case GGML_TYPE_I32: { ((void) (0)); for (int i = 0; i < n; i++) { ggml_vec_set_i32(nc, (int32_t *)(data + i*n1), value); } } break; case GGML_TYPE_F16: { ((void) (0)); for (int i = 0; i < n; i++) { ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), (value)); } } break; case GGML_TYPE_F32: { ((void) (0)); for (int i = 0; i < n; i++) { ggml_vec_set_f32(nc, (float *)(data + i*n1), value); } } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 4998, "false"); abort(); } } while (0); } break; } return tensor; } struct ggml_tensor * ggml_set_f32(struct ggml_tensor * tensor, float value) { const int n = ggml_nrows(tensor); const int nc = tensor->ne[0]; const size_t n1 = tensor->nb[1]; char * const data = tensor->data; switch (tensor->type) { case GGML_TYPE_I8: { ((void) (0)); for (int i = 0; i < n; i++) { ggml_vec_set_i8(nc, (int8_t *)(data + i*n1), value); } } break; case GGML_TYPE_I16: { ((void) (0)); for (int i = 0; i < n; i++) { ggml_vec_set_i16(nc, (int16_t *)(data + i*n1), value); } } break; case GGML_TYPE_I32: { ((void) (0)); for (int i = 0; i < n; i++) { ggml_vec_set_i32(nc, (int32_t *)(data + i*n1), value); } } break; case GGML_TYPE_F16: { ((void) (0)); for (int i = 0; i < n; i++) { ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), (value)); } } break; case GGML_TYPE_F32: { ((void) (0)); for (int i = 0; i < n; i++) { ggml_vec_set_f32(nc, (float *)(data + i*n1), value); } } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5050, "false"); abort(); } } while (0); } break; } return tensor; } int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i) { switch (tensor->type) { case GGML_TYPE_I8: { do { if (!(tensor->nb[0] == sizeof(int8_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5061, "tensor->nb[0] == sizeof(int8_t)"); abort(); } } while (0); return ((int8_t *)(tensor->data))[i]; } break; case GGML_TYPE_I16: { do { if (!(tensor->nb[0] == sizeof(int16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5066, "tensor->nb[0] == sizeof(int16_t)"); abort(); } } while (0); return ((int16_t *)(tensor->data))[i]; } break; case GGML_TYPE_I32: { do { if (!(tensor->nb[0] == sizeof(int32_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5071, "tensor->nb[0] == sizeof(int32_t)"); abort(); } } while (0); return ((int32_t *)(tensor->data))[i]; } break; case GGML_TYPE_F16: { do { if (!(tensor->nb[0] == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5076, "tensor->nb[0] == sizeof(ggml_fp16_t)"); abort(); } } while (0); return ((float) (((ggml_fp16_t *)(tensor->data))[i])); } break; case GGML_TYPE_F32: { do { if (!(tensor->nb[0] == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5081, "tensor->nb[0] == sizeof(float)"); abort(); } } while (0); return ((float *)(tensor->data))[i]; } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5086, "false"); abort(); } } while (0); } break; } return 0.0f; } void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value) { switch (tensor->type) { case GGML_TYPE_I8: { do { if (!(tensor->nb[0] == sizeof(int8_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5097, "tensor->nb[0] == sizeof(int8_t)"); abort(); } } while (0); ((int8_t *)(tensor->data))[i] = value; } break; case GGML_TYPE_I16: { do { if (!(tensor->nb[0] == sizeof(int16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5102, "tensor->nb[0] == sizeof(int16_t)"); abort(); } } while (0); ((int16_t *)(tensor->data))[i] = value; } break; case GGML_TYPE_I32: { do { if (!(tensor->nb[0] == sizeof(int32_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5107, "tensor->nb[0] == sizeof(int32_t)"); abort(); } } while (0); ((int32_t *)(tensor->data))[i] = value; } break; case GGML_TYPE_F16: { do { if (!(tensor->nb[0] == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5112, "tensor->nb[0] == sizeof(ggml_fp16_t)"); abort(); } } while (0); ((ggml_fp16_t *)(tensor->data))[i] = (value); } break; case GGML_TYPE_F32: { do { if (!(tensor->nb[0] == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5117, "tensor->nb[0] == sizeof(float)"); abort(); } } while (0); ((float *)(tensor->data))[i] = value; } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5122, "false"); abort(); } } while (0); } break; } } float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) { switch (tensor->type) { case GGML_TYPE_I8: { do { if (!(tensor->nb[0] == sizeof(int8_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5131, "tensor->nb[0] == sizeof(int8_t)"); abort(); } } while (0); return ((int8_t *)(tensor->data))[i]; } break; case GGML_TYPE_I16: { do { if (!(tensor->nb[0] == sizeof(int16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5136, "tensor->nb[0] == sizeof(int16_t)"); abort(); } } while (0); return ((int16_t *)(tensor->data))[i]; } break; case GGML_TYPE_I32: { do { if (!(tensor->nb[0] == sizeof(int32_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5141, "tensor->nb[0] == sizeof(int32_t)"); abort(); } } while (0); return ((int32_t *)(tensor->data))[i]; } break; case GGML_TYPE_F16: { do { if (!(tensor->nb[0] == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5146, "tensor->nb[0] == sizeof(ggml_fp16_t)"); abort(); } } while (0); return ((float) (((ggml_fp16_t *)(tensor->data))[i])); } break; case GGML_TYPE_F32: { do { if (!(tensor->nb[0] == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5151, "tensor->nb[0] == sizeof(float)"); abort(); } } while (0); return ((float *)(tensor->data))[i]; } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5156, "false"); abort(); } } while (0); } break; } return 0.0f; } void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) { switch (tensor->type) { case GGML_TYPE_I8: { do { if (!(tensor->nb[0] == sizeof(int8_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5167, "tensor->nb[0] == sizeof(int8_t)"); abort(); } } while (0); ((int8_t *)(tensor->data))[i] = value; } break; case GGML_TYPE_I16: { do { if (!(tensor->nb[0] == sizeof(int16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5172, "tensor->nb[0] == sizeof(int16_t)"); abort(); } } while (0); ((int16_t *)(tensor->data))[i] = value; } break; case GGML_TYPE_I32: { do { if (!(tensor->nb[0] == sizeof(int32_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5177, "tensor->nb[0] == sizeof(int32_t)"); abort(); } } while (0); ((int32_t *)(tensor->data))[i] = value; } break; case GGML_TYPE_F16: { do { if (!(tensor->nb[0] == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5182, "tensor->nb[0] == sizeof(ggml_fp16_t)"); abort(); } } while (0); ((ggml_fp16_t *)(tensor->data))[i] = (value); } break; case GGML_TYPE_F32: { do { if (!(tensor->nb[0] == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5187, "tensor->nb[0] == sizeof(float)"); abort(); } } while (0); ((float *)(tensor->data))[i] = value; } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5192, "false"); abort(); } } while (0); } break; } } void * ggml_get_data(const struct ggml_tensor * tensor) { return tensor->data; } float * ggml_get_data_f32(const struct ggml_tensor * tensor) { ((void) (0)); return (float *)(tensor->data); } enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) { do { if (!(tensor->op == GGML_OP_UNARY)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5207, "tensor->op == GGML_OP_UNARY"); abort(); } } while (0); return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0); } const char * ggml_get_name(const struct ggml_tensor * tensor) { return tensor->name; } struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) { strncpy(tensor->name, name, sizeof(tensor->name)); tensor->name[sizeof(tensor->name) - 1] = '\0'; return tensor; } struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...) { va_list args; __builtin_va_start(args, fmt); vsnprintf(tensor->name, sizeof(tensor->name), fmt, args); __builtin_va_end(args); return tensor; } struct ggml_tensor * ggml_view_tensor( struct ggml_context * ctx, struct ggml_tensor * src) { struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, src->n_dims, src->ne, src, 0); ggml_format_name(result, "%s (view)", src->name); for (int i = 0; i < 4; i++) { result->nb[i] = src->nb[i]; } return result; } struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) { struct ggml_object * obj = ctx->objects_begin; char * const mem_buffer = ctx->mem_buffer; while (obj != ((void*)0)) { if (obj->type == GGML_OBJECT_TENSOR) { struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs); if (strcmp(cur->name, name) == 0) { return cur; } } obj = obj->next; } return ((void*)0); } static struct ggml_tensor * ggml_dup_impl( struct ggml_context * ctx, struct ggml_tensor * a, _Bool inplace) { _Bool is_node = 0; if (!inplace && (a->grad)) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); result->op = GGML_OP_DUP; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_dup( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_dup_impl(ctx, a, 0); } struct ggml_tensor * ggml_dup_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_dup_impl(ctx, a, 1); } static struct ggml_tensor * ggml_add_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, _Bool inplace) { do { if (!(ggml_can_repeat_rows(b, a))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5305, "ggml_can_repeat_rows(b, a)"); abort(); } } while (0); _Bool is_node = 0; if (!inplace && (a->grad || b->grad)) { do { if (!(ggml_are_same_shape(a, b))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5311, "ggml_are_same_shape(a, b)"); abort(); } } while (0); is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); result->op = GGML_OP_ADD; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_add( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { return ggml_add_impl(ctx, a, b, 0); } struct ggml_tensor * ggml_add_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { return ggml_add_impl(ctx, a, b, 1); } static struct ggml_tensor * ggml_add1_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, _Bool inplace) { do { if (!(ggml_is_scalar(b))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5346, "ggml_is_scalar(b)"); abort(); } } while (0); do { if (!(ggml_is_padded_1d(a))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5347, "ggml_is_padded_1d(a)"); abort(); } } while (0); _Bool is_node = 0; if (a->grad || b->grad) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); result->op = GGML_OP_ADD1; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_add1( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { return ggml_add1_impl(ctx, a, b, 0); } struct ggml_tensor * ggml_add1_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { return ggml_add1_impl(ctx, a, b, 1); } static struct ggml_tensor * ggml_acc_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset, _Bool inplace) { do { if (!(ggml_nelements(b) <= ggml_nelements(a))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5390, "ggml_nelements(b) <= ggml_nelements(a)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(a))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5391, "ggml_is_contiguous(a)"); abort(); } } while (0); do { if (!(a->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5392, "a->type == GGML_TYPE_F32"); abort(); } } while (0); do { if (!(b->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5393, "b->type == GGML_TYPE_F32"); abort(); } } while (0); _Bool is_node = 0; if (!inplace && (a->grad || b->grad)) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_ACC; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_acc( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset) { return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, 0); } struct ggml_tensor * ggml_acc_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset) { return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, 1); } static struct ggml_tensor * ggml_sub_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, _Bool inplace) { do { if (!(ggml_are_same_shape(a, b))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5443, "ggml_are_same_shape(a, b)"); abort(); } } while (0); _Bool is_node = 0; if (!inplace && (a->grad || b->grad)) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); result->op = GGML_OP_SUB; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_sub( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { return ggml_sub_impl(ctx, a, b, 0); } struct ggml_tensor * ggml_sub_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { return ggml_sub_impl(ctx, a, b, 1); } static struct ggml_tensor * ggml_mul_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, _Bool inplace) { do { if (!(ggml_can_repeat_rows(b, a))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5484, "ggml_can_repeat_rows(b, a)"); abort(); } } while (0); _Bool is_node = 0; if (!inplace && (a->grad || b->grad)) { do { if (!(ggml_are_same_shape(a, b))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5490, "ggml_are_same_shape(a, b)"); abort(); } } while (0); is_node = 1; } if (inplace) { do { if (!(!is_node)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5495, "!is_node"); abort(); } } while (0); } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); result->op = GGML_OP_MUL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_mul( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { return ggml_mul_impl(ctx, a, b, 0); } struct ggml_tensor * ggml_mul_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { return ggml_mul_impl(ctx, a, b, 1); } static struct ggml_tensor * ggml_div_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, _Bool inplace) { do { if (!(ggml_are_same_shape(a, b))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5529, "ggml_are_same_shape(a, b)"); abort(); } } while (0); _Bool is_node = 0; if (!inplace && (a->grad || b->grad)) { is_node = 1; } if (inplace) { do { if (!(!is_node)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5538, "!is_node"); abort(); } } while (0); } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); result->op = GGML_OP_DIV; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_div( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { return ggml_div_impl(ctx, a, b, 0); } struct ggml_tensor * ggml_div_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { return ggml_div_impl(ctx, a, b, 1); } static struct ggml_tensor * ggml_sqr_impl( struct ggml_context * ctx, struct ggml_tensor * a, _Bool inplace) { _Bool is_node = 0; if (!inplace && (a->grad)) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); result->op = GGML_OP_SQR; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_sqr( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_sqr_impl(ctx, a, 0); } struct ggml_tensor * ggml_sqr_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_sqr_impl(ctx, a, 1); } static struct ggml_tensor * ggml_sqrt_impl( struct ggml_context * ctx, struct ggml_tensor * a, _Bool inplace) { _Bool is_node = 0; if (!inplace && (a->grad)) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); result->op = GGML_OP_SQRT; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_sqrt( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_sqrt_impl(ctx, a, 0); } struct ggml_tensor * ggml_sqrt_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_sqrt_impl(ctx, a, 1); } static struct ggml_tensor * ggml_log_impl( struct ggml_context * ctx, struct ggml_tensor * a, _Bool inplace) { _Bool is_node = 0; if (!inplace && (a->grad)) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); result->op = GGML_OP_LOG; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_log( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_log_impl(ctx, a, 0); } struct ggml_tensor * ggml_log_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_log_impl(ctx, a, 1); } struct ggml_tensor * ggml_sum( struct ggml_context * ctx, struct ggml_tensor * a) { _Bool is_node = 0; if (a->grad) { is_node = 1; } struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1); result->op = GGML_OP_SUM; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_sum_rows( struct ggml_context * ctx, struct ggml_tensor * a) { _Bool is_node = 0; if (a->grad) { is_node = 1; } int64_t ne[4] = {1,1,1,1}; for (int i=1; in_dims; ++i) { ne[i] = a->ne[i]; } struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, a->n_dims, ne); result->op = GGML_OP_SUM_ROWS; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_mean( struct ggml_context * ctx, struct ggml_tensor * a) { _Bool is_node = 0; if (a->grad) { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5719, "false"); abort(); } } while (0); is_node = 1; } int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] }; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, ne); result->op = GGML_OP_MEAN; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_argmax( struct ggml_context * ctx, struct ggml_tensor * a) { do { if (!(ggml_is_matrix(a))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5738, "ggml_is_matrix(a)"); abort(); } } while (0); _Bool is_node = 0; if (a->grad) { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5742, "false"); abort(); } } while (0); is_node = 1; } int64_t ne[4] = { a->ne[1], 1, 1, 1 }; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, a->n_dims, ne); result->op = GGML_OP_ARGMAX; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_repeat( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { do { if (!(ggml_can_repeat(a, b))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5762, "ggml_can_repeat(a, b)"); abort(); } } while (0); _Bool is_node = 0; if (a->grad) { is_node = 1; } struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, b->n_dims, b->ne); result->op = GGML_OP_REPEAT; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_repeat_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { do { if (!(ggml_can_repeat(b, a))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5786, "ggml_can_repeat(b, a)"); abort(); } } while (0); _Bool is_node = 0; if (a->grad) { is_node = 1; } if (ggml_are_same_shape(a, b) && !is_node) { return a; } struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, b->n_dims, b->ne); result->op = GGML_OP_REPEAT_BACK; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_concat( struct ggml_context* ctx, struct ggml_tensor* a, struct ggml_tensor* b) { do { if (!(a->ne[0] == b->ne[0] && a->ne[1] == b->ne[1] && a->ne[3] == b->ne[3])) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 5814, "a->ne[0] == b->ne[0] && a->ne[1] == b->ne[1] && a->ne[3] == b->ne[3]"); abort(); } } while (0); _Bool is_node = 0; if (a->grad || b->grad) { is_node = 1; } struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, a->ne[0], a->ne[1], a->ne[2] + b->ne[2], a->ne[3]); result->op = GGML_OP_CONCAT; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_abs( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary(ctx, a, GGML_UNARY_OP_ABS); } struct ggml_tensor * ggml_abs_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS); } struct ggml_tensor * ggml_sgn( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary(ctx, a, GGML_UNARY_OP_SGN); } struct ggml_tensor * ggml_sgn_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN); } struct ggml_tensor * ggml_neg( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary(ctx, a, GGML_UNARY_OP_NEG); } struct ggml_tensor * ggml_neg_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG); } struct ggml_tensor * ggml_step( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary(ctx, a, GGML_UNARY_OP_STEP); } struct ggml_tensor * ggml_step_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP); } struct ggml_tensor * ggml_tanh( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary(ctx, a, GGML_UNARY_OP_TANH); } struct ggml_tensor * ggml_tanh_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH); } struct ggml_tensor * ggml_elu( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary(ctx, a, GGML_UNARY_OP_ELU); } struct ggml_tensor * ggml_elu_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU); } struct ggml_tensor * ggml_relu( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary(ctx, a, GGML_UNARY_OP_RELU); } struct ggml_tensor * ggml_relu_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU); } struct ggml_tensor * ggml_gelu( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary(ctx, a, GGML_UNARY_OP_GELU); } struct ggml_tensor * ggml_gelu_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU); } struct ggml_tensor * ggml_gelu_quick( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK); } struct ggml_tensor * ggml_gelu_quick_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK); } struct ggml_tensor * ggml_silu( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary(ctx, a, GGML_UNARY_OP_SILU); } struct ggml_tensor * ggml_silu_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU); } struct ggml_tensor * ggml_silu_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { _Bool is_node = 0; if (a->grad || b->grad) { is_node = 1; } struct ggml_tensor * result = ggml_dup_tensor(ctx, a); result->op = GGML_OP_SILU_BACK; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } static struct ggml_tensor * ggml_norm_impl( struct ggml_context * ctx, struct ggml_tensor * a, float eps, _Bool inplace) { _Bool is_node = 0; if (!inplace && (a->grad)) { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6005, "false"); abort(); } } while (0); is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); ggml_set_op_params(result, &eps, sizeof(eps)); result->op = GGML_OP_NORM; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_norm( struct ggml_context * ctx, struct ggml_tensor * a, float eps) { return ggml_norm_impl(ctx, a, eps, 0); } struct ggml_tensor * ggml_norm_inplace( struct ggml_context * ctx, struct ggml_tensor * a, float eps) { return ggml_norm_impl(ctx, a, eps, 1); } static struct ggml_tensor * ggml_rms_norm_impl( struct ggml_context * ctx, struct ggml_tensor * a, float eps, _Bool inplace) { _Bool is_node = 0; if (!inplace && (a->grad)) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); ggml_set_op_params(result, &eps, sizeof(eps)); result->op = GGML_OP_RMS_NORM; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_rms_norm( struct ggml_context * ctx, struct ggml_tensor * a, float eps) { return ggml_rms_norm_impl(ctx, a, eps, 0); } struct ggml_tensor * ggml_rms_norm_inplace( struct ggml_context * ctx, struct ggml_tensor * a, float eps) { return ggml_rms_norm_impl(ctx, a, eps, 1); } struct ggml_tensor * ggml_rms_norm_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, float eps) { _Bool is_node = 0; if (a->grad) { is_node = 1; } struct ggml_tensor * result = ggml_dup_tensor(ctx, a); ggml_set_op_params(result, &eps, sizeof(eps)); result->op = GGML_OP_RMS_NORM_BACK; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } static struct ggml_tensor * ggml_group_norm_impl( struct ggml_context * ctx, struct ggml_tensor * a, int n_groups, _Bool inplace) { _Bool is_node = 0; if (!inplace && (a->grad)) { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6108, "false"); abort(); } } while (0); is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); result->op = GGML_OP_GROUP_NORM; result->op_params[0] = n_groups; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = ((void*)0); return result; } struct ggml_tensor * ggml_group_norm( struct ggml_context * ctx, struct ggml_tensor * a, int n_groups) { return ggml_group_norm_impl(ctx, a, n_groups, 0); } struct ggml_tensor * ggml_group_norm_inplace( struct ggml_context * ctx, struct ggml_tensor * a, int n_groups) { return ggml_group_norm_impl(ctx, a, n_groups, 1); } struct ggml_tensor * ggml_mul_mat( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { do { if (!(ggml_can_mul_mat(a, b))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6143, "ggml_can_mul_mat(a, b)"); abort(); } } while (0); do { if (!(!ggml_is_transposed(a))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6144, "!ggml_is_transposed(a)"); abort(); } } while (0); _Bool is_node = 0; if (a->grad || b->grad) { is_node = 1; } const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] }; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, ((a->n_dims) > (b->n_dims) ? (a->n_dims) : (b->n_dims)), ne); result->op = GGML_OP_MUL_MAT; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_out_prod( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { do { if (!(ggml_can_out_prod(a, b))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6169, "ggml_can_out_prod(a, b)"); abort(); } } while (0); do { if (!(!ggml_is_transposed(a))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6170, "!ggml_is_transposed(a)"); abort(); } } while (0); _Bool is_node = 0; if (a->grad || b->grad) { is_node = 1; } const int64_t ne[4] = { a->ne[0], b->ne[0], a->ne[2], b->ne[3] }; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, ((a->n_dims) < (b->n_dims) ? (a->n_dims) : (b->n_dims)), ne); result->op = GGML_OP_OUT_PROD; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } static struct ggml_tensor * ggml_scale_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, _Bool inplace) { do { if (!(ggml_is_scalar(b))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6196, "ggml_is_scalar(b)"); abort(); } } while (0); do { if (!(ggml_is_padded_1d(a))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6197, "ggml_is_padded_1d(a)"); abort(); } } while (0); _Bool is_node = 0; if (a->grad || b->grad) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); result->op = GGML_OP_SCALE; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_scale( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { return ggml_scale_impl(ctx, a, b, 0); } struct ggml_tensor * ggml_scale_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { return ggml_scale_impl(ctx, a, b, 1); } static struct ggml_tensor * ggml_set_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset, _Bool inplace) { do { if (!(ggml_nelements(a) >= ggml_nelements(b))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6240, "ggml_nelements(a) >= ggml_nelements(b)"); abort(); } } while (0); _Bool is_node = 0; if (a->grad || b->grad) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_SET; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_set( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset) { return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, 0); } struct ggml_tensor * ggml_set_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset) { return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, 1); } struct ggml_tensor * ggml_set_1d( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t offset) { return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, 0); } struct ggml_tensor * ggml_set_1d_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t offset) { return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, 1); } struct ggml_tensor * ggml_set_2d( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t offset) { return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, 0); } struct ggml_tensor * ggml_set_2d_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t offset) { return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, 0); } static struct ggml_tensor * ggml_cpy_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, _Bool inplace) { do { if (!(ggml_nelements(a) == ggml_nelements(b))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6326, "ggml_nelements(a) == ggml_nelements(b)"); abort(); } } while (0); _Bool is_node = 0; if (!inplace && (a->grad || b->grad)) { is_node = 1; } struct ggml_tensor * result = ggml_view_tensor(ctx, b); if (strlen(b->name) > 0) { ggml_format_name(result, "%s (copy of %s)", b->name, a->name); } else { ggml_format_name(result, "%s (copy)", a->name); } result->op = GGML_OP_CPY; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_cpy( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { return ggml_cpy_impl(ctx, a, b, 0); } struct ggml_tensor * ggml_cpy_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { return ggml_cpy_impl(ctx, a, b, 1); } static struct ggml_tensor * ggml_cont_impl( struct ggml_context * ctx, struct ggml_tensor * a, _Bool inplace) { _Bool is_node = 0; if (!inplace && a->grad) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); ggml_format_name(result, "%s (cont)", a->name); result->op = GGML_OP_CONT; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_cont( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_cont_impl(ctx, a, 0); } struct ggml_tensor * ggml_cont_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_cont_impl(ctx, a, 1); } struct ggml_tensor * ggml_reshape( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { do { if (!(ggml_is_contiguous(a))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6404, "ggml_is_contiguous(a)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(b))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6405, "ggml_is_contiguous(b)"); abort(); } } while (0); do { if (!(ggml_nelements(a) == ggml_nelements(b))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6406, "ggml_nelements(a) == ggml_nelements(b)"); abort(); } } while (0); _Bool is_node = 0; if (a->grad) { is_node = 1; } if (b->grad) { } struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, b->n_dims, b->ne, a, 0); ggml_format_name(result, "%s (reshaped)", a->name); result->op = GGML_OP_RESHAPE; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_reshape_1d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0) { do { if (!(ggml_is_contiguous(a))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6433, "ggml_is_contiguous(a)"); abort(); } } while (0); do { if (!(ggml_nelements(a) == ne0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6434, "ggml_nelements(a) == ne0"); abort(); } } while (0); _Bool is_node = 0; if (a->grad) { is_node = 1; } const int64_t ne[1] = { ne0 }; struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0); ggml_format_name(result, "%s (reshaped)", a->name); result->op = GGML_OP_RESHAPE; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_reshape_2d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1) { do { if (!(ggml_is_contiguous(a))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6458, "ggml_is_contiguous(a)"); abort(); } } while (0); do { if (!(ggml_nelements(a) == ne0*ne1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6459, "ggml_nelements(a) == ne0*ne1"); abort(); } } while (0); _Bool is_node = 0; if (a->grad) { is_node = 1; } const int64_t ne[2] = { ne0, ne1 }; struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0); ggml_format_name(result, "%s (reshaped)", a->name); result->op = GGML_OP_RESHAPE; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_reshape_3d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2) { do { if (!(ggml_is_contiguous(a))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6484, "ggml_is_contiguous(a)"); abort(); } } while (0); do { if (!(ggml_nelements(a) == ne0*ne1*ne2)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6485, "ggml_nelements(a) == ne0*ne1*ne2"); abort(); } } while (0); _Bool is_node = 0; if (a->grad) { is_node = 1; } const int64_t ne[3] = { ne0, ne1, ne2 }; struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0); ggml_format_name(result, "%s (reshaped)", a->name); result->op = GGML_OP_RESHAPE; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_reshape_4d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) { do { if (!(ggml_is_contiguous(a))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6511, "ggml_is_contiguous(a)"); abort(); } } while (0); do { if (!(ggml_nelements(a) == ne0*ne1*ne2*ne3)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6512, "ggml_nelements(a) == ne0*ne1*ne2*ne3"); abort(); } } while (0); _Bool is_node = 0; if (a->grad) { is_node = 1; } const int64_t ne[4] = { ne0, ne1, ne2, ne3 }; struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0); ggml_format_name(result, "%s (reshaped)", a->name); result->op = GGML_OP_RESHAPE; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } static struct ggml_tensor * ggml_view_impl( struct ggml_context * ctx, struct ggml_tensor * a, int n_dims, const int64_t * ne, size_t offset) { _Bool is_node = 0; if (a->grad) { is_node = 1; } struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset); ggml_format_name(result, "%s (view)", a->name); ggml_set_op_params(result, &offset, sizeof(offset)); result->op = GGML_OP_VIEW; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_view_1d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, size_t offset) { struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset); return result; } struct ggml_tensor * ggml_view_2d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, size_t nb1, size_t offset) { const int64_t ne[2] = { ne0, ne1 }; struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset); result->nb[1] = nb1; result->nb[2] = result->nb[1]*ne1; result->nb[3] = result->nb[2]; return result; } struct ggml_tensor * ggml_view_3d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2, size_t nb1, size_t nb2, size_t offset) { const int64_t ne[3] = { ne0, ne1, ne2 }; struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset); result->nb[1] = nb1; result->nb[2] = nb2; result->nb[3] = result->nb[2]*ne2; return result; } struct ggml_tensor * ggml_view_4d( struct ggml_context * ctx, struct ggml_tensor * a, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3, size_t nb1, size_t nb2, size_t nb3, size_t offset) { const int64_t ne[4] = { ne0, ne1, ne2, ne3 }; struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset); result->nb[1] = nb1; result->nb[2] = nb2; result->nb[3] = nb3; return result; } struct ggml_tensor * ggml_permute( struct ggml_context * ctx, struct ggml_tensor * a, int axis0, int axis1, int axis2, int axis3) { do { if (!(axis0 >= 0 && axis0 < 4)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6647, "axis0 >= 0 && axis0 < GGML_MAX_DIMS"); abort(); } } while (0); do { if (!(axis1 >= 0 && axis1 < 4)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6648, "axis1 >= 0 && axis1 < GGML_MAX_DIMS"); abort(); } } while (0); do { if (!(axis2 >= 0 && axis2 < 4)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6649, "axis2 >= 0 && axis2 < GGML_MAX_DIMS"); abort(); } } while (0); do { if (!(axis3 >= 0 && axis3 < 4)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6650, "axis3 >= 0 && axis3 < GGML_MAX_DIMS"); abort(); } } while (0); do { if (!(axis0 != axis1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6652, "axis0 != axis1"); abort(); } } while (0); do { if (!(axis0 != axis2)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6653, "axis0 != axis2"); abort(); } } while (0); do { if (!(axis0 != axis3)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6654, "axis0 != axis3"); abort(); } } while (0); do { if (!(axis1 != axis2)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6655, "axis1 != axis2"); abort(); } } while (0); do { if (!(axis1 != axis3)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6656, "axis1 != axis3"); abort(); } } while (0); do { if (!(axis2 != axis3)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6657, "axis2 != axis3"); abort(); } } while (0); _Bool is_node = 0; if (a->grad) { is_node = 1; } struct ggml_tensor * result = ggml_view_tensor(ctx, a); ggml_format_name(result, "%s (permuted)", a->name); int ne[4]; int nb[4]; ne[axis0] = a->ne[0]; ne[axis1] = a->ne[1]; ne[axis2] = a->ne[2]; ne[axis3] = a->ne[3]; nb[axis0] = a->nb[0]; nb[axis1] = a->nb[1]; nb[axis2] = a->nb[2]; nb[axis3] = a->nb[3]; result->ne[0] = ne[0]; result->ne[1] = ne[1]; result->ne[2] = ne[2]; result->ne[3] = ne[3]; result->nb[0] = nb[0]; result->nb[1] = nb[1]; result->nb[2] = nb[2]; result->nb[3] = nb[3]; result->op = GGML_OP_PERMUTE; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; int32_t params[] = { axis0, axis1, axis2, axis3 }; ggml_set_op_params(result, params, sizeof(params)); return result; } struct ggml_tensor * ggml_transpose( struct ggml_context * ctx, struct ggml_tensor * a) { _Bool is_node = 0; if (a->grad) { is_node = 1; } struct ggml_tensor * result = ggml_view_tensor(ctx, a); ggml_format_name(result, "%s (transposed)", a->name); result->ne[0] = a->ne[1]; result->ne[1] = a->ne[0]; result->nb[0] = a->nb[1]; result->nb[1] = a->nb[0]; result->op = GGML_OP_TRANSPOSE; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_get_rows( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { do { if (!(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6734, "ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32"); abort(); } } while (0); _Bool is_node = 0; if (a->grad || b->grad) { is_node = 1; } struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, a->ne[0], b->ne[0]); result->op = GGML_OP_GET_ROWS; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_get_rows_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c) { do { if (!(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6761, "ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32"); abort(); } } while (0); do { if (!(ggml_is_matrix(c) && (a->ne[0] == c->ne[0]))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6762, "ggml_is_matrix(c) && (a->ne[0] == c->ne[0])"); abort(); } } while (0); _Bool is_node = 0; if (a->grad || b->grad) { is_node = 1; } struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, c->ne[0], c->ne[1]); result->op = GGML_OP_GET_ROWS_BACK; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; result->src[2] = c; return result; } struct ggml_tensor * ggml_diag( struct ggml_context * ctx, struct ggml_tensor * a) { do { if (!(a->ne[1] == 1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6788, "a->ne[1] == 1"); abort(); } } while (0); _Bool is_node = 0; if (a->grad) { is_node = 1; } const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] }; struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, ((a->n_dims) > (2) ? (a->n_dims) : (2)), ne); result->op = GGML_OP_DIAG; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } static struct ggml_tensor * ggml_diag_mask_inf_impl( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, _Bool inplace) { _Bool is_node = 0; if (a->grad) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); int32_t params[] = { n_past }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_DIAG_MASK_INF; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_diag_mask_inf( struct ggml_context * ctx, struct ggml_tensor * a, int n_past) { return ggml_diag_mask_inf_impl(ctx, a, n_past, 0); } struct ggml_tensor * ggml_diag_mask_inf_inplace( struct ggml_context * ctx, struct ggml_tensor * a, int n_past) { return ggml_diag_mask_inf_impl(ctx, a, n_past, 1); } static struct ggml_tensor * ggml_diag_mask_zero_impl( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, _Bool inplace) { _Bool is_node = 0; if (a->grad) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); int32_t params[] = { n_past }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_DIAG_MASK_ZERO; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_diag_mask_zero( struct ggml_context * ctx, struct ggml_tensor * a, int n_past) { return ggml_diag_mask_zero_impl(ctx, a, n_past, 0); } struct ggml_tensor * ggml_diag_mask_zero_inplace( struct ggml_context * ctx, struct ggml_tensor * a, int n_past) { return ggml_diag_mask_zero_impl(ctx, a, n_past, 1); } static struct ggml_tensor * ggml_soft_max_impl( struct ggml_context * ctx, struct ggml_tensor * a, _Bool inplace) { _Bool is_node = 0; if (a->grad) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); result->op = GGML_OP_SOFT_MAX; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_soft_max( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_soft_max_impl(ctx, a, 0); } struct ggml_tensor * ggml_soft_max_inplace( struct ggml_context * ctx, struct ggml_tensor * a) { return ggml_soft_max_impl(ctx, a, 1); } static struct ggml_tensor * ggml_soft_max_back_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, _Bool inplace) { _Bool is_node = 0; if (a->grad || b->grad) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); result->op = GGML_OP_SOFT_MAX_BACK; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_soft_max_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { return ggml_soft_max_back_impl(ctx, a, b, 0); } struct ggml_tensor * ggml_soft_max_back_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { return ggml_soft_max_back_impl(ctx, a, b, 1); } static struct ggml_tensor * ggml_rope_impl( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_dims, int mode, int n_ctx, float freq_base, float freq_scale, float xpos_base, _Bool xpos_down, _Bool inplace) { do { if (!(n_past >= 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 6969, "n_past >= 0"); abort(); } } while (0); _Bool is_node = 0; if (a->grad) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); int32_t params[8] = { n_past, n_dims, mode, n_ctx }; memcpy(params + 4, &freq_base, sizeof(float)); memcpy(params + 5, &freq_scale, sizeof(float)); memcpy(params + 6, &xpos_base, sizeof(float)); memcpy(params + 7, &xpos_down, sizeof(_Bool)); ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_ROPE; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_rope( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_dims, int mode, int n_ctx) { return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, 10000.0f, 1.0f, 0.0f, 0, 0); } struct ggml_tensor * ggml_rope_inplace( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_dims, int mode, int n_ctx) { return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, 10000.0f, 1.0f, 0.0f, 0, 1); } struct ggml_tensor * ggml_rope_custom( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_dims, int mode, int n_ctx, float freq_base, float freq_scale) { return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale, 0.0f, 0, 0); } struct ggml_tensor * ggml_rope_custom_inplace( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_dims, int mode, int n_ctx, float freq_base, float freq_scale) { return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale, 0.0f, 0, 1); } struct ggml_tensor * ggml_rope_xpos_inplace( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_dims, float base, _Bool down) { return ggml_rope_impl(ctx, a, n_past, n_dims, 0, 0, 10000.0f, 1.0f, base, down, 1); } struct ggml_tensor * ggml_rope_back( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_dims, int mode, int n_ctx, float freq_base, float freq_scale, float xpos_base, _Bool xpos_down) { do { if (!(n_past >= 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7059, "n_past >= 0"); abort(); } } while (0); do { if (!((mode & 4) == 0 && "ggml_rope_back() for ChatGLM not implemented yet")) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7060, "(mode & 4) == 0 && \"ggml_rope_back() for ChatGLM not implemented yet\""); abort(); } } while (0); _Bool is_node = 0; if (a->grad) { is_node = 0; } struct ggml_tensor * result = ggml_dup_tensor(ctx, a); int32_t params[8] = { n_past, n_dims, mode, n_ctx }; memcpy(params + 4, &freq_base, sizeof(float)); memcpy(params + 5, &freq_scale, sizeof(float)); memcpy(params + 6, &xpos_base, sizeof(float)); memcpy(params + 7, &xpos_down, sizeof(_Bool)); ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_ROPE_BACK; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_alibi( struct ggml_context * ctx, struct ggml_tensor * a, int n_past, int n_head, float bias_max) { do { if (!(n_past >= 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7092, "n_past >= 0"); abort(); } } while (0); _Bool is_node = 0; if (a->grad) { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7096, "false"); abort(); } } while (0); is_node = 1; } struct ggml_tensor * result = ggml_view_tensor(ctx, a); int32_t op_params[3] = { n_past, n_head }; memcpy(op_params + 2, &bias_max, sizeof(float)); ggml_set_op_params(result, op_params, sizeof(op_params)); result->op = GGML_OP_ALIBI; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_clamp( struct ggml_context * ctx, struct ggml_tensor * a, float min, float max) { _Bool is_node = 0; if (a->grad) { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7125, "false"); abort(); } } while (0); is_node = 1; } struct ggml_tensor * result = ggml_view_tensor(ctx, a); float params[] = { min, max }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_CLAMP; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) { return (ins + 2 * p - d * (ks - 1) - 1) / s + 1; } struct ggml_tensor * ggml_conv_1d( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int s0, int p0, int d0) { do { if (!(ggml_is_matrix(b))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7155, "ggml_is_matrix(b)"); abort(); } } while (0); do { if (!(a->ne[1] == b->ne[1])) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7156, "a->ne[1] == b->ne[1]"); abort(); } } while (0); _Bool is_node = 0; if (a->grad || b->grad) { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7160, "false"); abort(); } } while (0); is_node = 1; } const int64_t ne[4] = { ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0), a->ne[2], 1, 1, }; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne); int32_t params[] = { s0, p0, d0 }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_CONV_1D; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor* ggml_conv_1d_ph( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int s, int d) { return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d); } struct ggml_tensor * ggml_conv_2d( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int s0, int s1, int p0, int p1, int d0, int d1) { do { if (!(a->ne[2] == b->ne[2])) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7205, "a->ne[2] == b->ne[2]"); abort(); } } while (0); _Bool is_node = 0; if (a->grad || b->grad) { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7209, "false"); abort(); } } while (0); is_node = 1; } const int64_t ne[4] = { ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0), ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1), a->ne[3], b->ne[3], }; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); int32_t params[] = { s0, s1, p0, p1, d0, d1 }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_CONV_2D; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_conv_2d_sk_p0( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { return ggml_conv_2d(ctx, a, b, a->ne[0], a->ne[1], 0, 0, 1, 1); } struct ggml_tensor * ggml_conv_2d_s1_ph( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1); } static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) { return (ins - 1) * s - 2 * p + ks; } struct ggml_tensor * ggml_conv_transpose_2d_p0( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, int stride) { do { if (!(a->ne[3] == b->ne[2])) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7261, "a->ne[3] == b->ne[2]"); abort(); } } while (0); _Bool is_node = 0; if (a->grad || b->grad) { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7266, "false"); abort(); } } while (0); is_node = 1; } const int64_t ne[4] = { ggml_calc_conv_transpose_output_size(b->ne[0], a->ne[0], stride, 0 ), ggml_calc_conv_transpose_output_size(b->ne[1], a->ne[1], stride, 0 ), a->ne[2], b->ne[3], }; struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); ggml_set_op_params_i32(result, 0, stride); result->op = GGML_OP_CONV_TRANSPOSE_2D; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) { return (ins + 2 * p - ks) / s + 1; } struct ggml_tensor * ggml_pool_1d( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_op_pool op, int k0, int s0, int p0) { _Bool is_node = 0; if (a->grad) { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7307, "false"); abort(); } } while (0); is_node = 1; } const int64_t ne[3] = { ggml_calc_pool_output_size(a->ne[0], k0, s0, p0), a->ne[1], }; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne); int32_t params[] = { op, k0, s0, p0 }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_POOL_1D; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_pool_2d( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_op_pool op, int k0, int k1, int s0, int s1, int p0, int p1) { _Bool is_node = 0; if (a->grad) { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7343, "false"); abort(); } } while (0); is_node = 1; } const int64_t ne[3] = { ggml_calc_pool_output_size(a->ne[0], k0, s0, p0), ggml_calc_pool_output_size(a->ne[1], k1, s1, p1), a->ne[2], }; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne); int32_t params[] = { op, k0, k1, s0, s1, p0, p1 }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_POOL_2D; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } static struct ggml_tensor * ggml_upscale_impl( struct ggml_context * ctx, struct ggml_tensor * a, int scale_factor) { _Bool is_node = 0; if (a->grad) { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7373, "false"); abort(); } } while (0); is_node = 1; } struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3]); result->op = GGML_OP_UPSCALE; result->op_params[0] = scale_factor; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = ((void*)0); return result; } struct ggml_tensor * ggml_upscale( struct ggml_context * ctx, struct ggml_tensor * a, int scale_factor) { return ggml_upscale_impl(ctx, a, scale_factor); } struct ggml_tensor * ggml_flash_attn( struct ggml_context * ctx, struct ggml_tensor * q, struct ggml_tensor * k, struct ggml_tensor * v, _Bool masked) { do { if (!(ggml_can_mul_mat(k, q))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7406, "ggml_can_mul_mat(k, q)"); abort(); } } while (0); _Bool is_node = 0; if (q->grad || k->grad || v->grad) { is_node = 1; } struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, q->n_dims, q->ne); int32_t t = masked ? 1 : 0; ggml_set_op_params(result, &t, sizeof(t)); result->op = GGML_OP_FLASH_ATTN; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = q; result->src[1] = k; result->src[2] = v; return result; } struct ggml_tensor * ggml_flash_ff( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b0, struct ggml_tensor * b1, struct ggml_tensor * c0, struct ggml_tensor * c1) { do { if (!(ggml_can_mul_mat(b0, a))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7439, "ggml_can_mul_mat(b0, a)"); abort(); } } while (0); _Bool is_node = 0; if (a->grad || b0->grad || b1->grad || c0->grad || c1->grad) { is_node = 1; } struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, a->ne); result->op = GGML_OP_FLASH_FF; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b0; result->src[2] = b1; result->src[3] = c0; result->src[4] = c1; return result; } struct ggml_tensor * ggml_flash_attn_back( struct ggml_context * ctx, struct ggml_tensor * q, struct ggml_tensor * k, struct ggml_tensor * v, struct ggml_tensor * d, _Bool masked) { do { if (!(ggml_can_mul_mat(k, q))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7471, "ggml_can_mul_mat(k, q)"); abort(); } } while (0); const int64_t D = q->ne[0]; const int64_t N = q->ne[1]; const int64_t M = k->ne[1]; const int64_t ne2 = q->ne[2]; const int64_t ne3 = q->ne[3]; do { if (!(k->ne[0] == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7485, "k->ne[0] == D"); abort(); } } while (0); do { if (!(v->ne[0] == M)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7486, "v->ne[0] == M"); abort(); } } while (0); do { if (!(v->ne[1] == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7487, "v->ne[1] == D"); abort(); } } while (0); do { if (!(d->ne[0] == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7488, "d->ne[0] == D"); abort(); } } while (0); do { if (!(d->ne[1] == N)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7489, "d->ne[1] == N"); abort(); } } while (0); do { if (!(k->ne[2] == ne2)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7490, "k->ne[2] == ne2"); abort(); } } while (0); do { if (!(k->ne[3] == ne3)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7491, "k->ne[3] == ne3"); abort(); } } while (0); do { if (!(v->ne[2] == ne2)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7492, "v->ne[2] == ne2"); abort(); } } while (0); do { if (!(v->ne[3] == ne3)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7493, "v->ne[3] == ne3"); abort(); } } while (0); do { if (!(d->ne[2] == ne2)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7494, "d->ne[2] == ne2"); abort(); } } while (0); do { if (!(d->ne[3] == ne3)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7495, "d->ne[3] == ne3"); abort(); } } while (0); _Bool is_node = 0; if (q->grad || k->grad || v->grad) { is_node = 0; } int64_t ne[4] = {D,M+N+M,ne2,ne3}; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); int32_t masked_i = masked ? 1 : 0; ggml_set_op_params(result, &masked_i, sizeof(masked_i)); result->op = GGML_OP_FLASH_ATTN_BACK; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = q; result->src[1] = k; result->src[2] = v; result->src[3] = d; return result; } struct ggml_tensor * ggml_win_part( struct ggml_context * ctx, struct ggml_tensor * a, int w) { do { if (!(a->ne[3] == 1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7534, "a->ne[3] == 1"); abort(); } } while (0); do { if (!(a->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7535, "a->type == GGML_TYPE_F32"); abort(); } } while (0); _Bool is_node = 0; if (a->grad) { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7540, "false"); abort(); } } while (0); is_node = 1; } const int px = (w - a->ne[1]%w)%w; const int py = (w - a->ne[2]%w)%w; const int npx = (px + a->ne[1])/w; const int npy = (py + a->ne[2])/w; const int np = npx*npy; const int64_t ne[4] = { a->ne[0], w, w, np, }; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); int32_t params[] = { npx, npy, w }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_WIN_PART; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_win_unpart( struct ggml_context * ctx, struct ggml_tensor * a, int w0, int h0, int w) { do { if (!(a->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7574, "a->type == GGML_TYPE_F32"); abort(); } } while (0); _Bool is_node = 0; if (a->grad) { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7579, "false"); abort(); } } while (0); is_node = 1; } const int64_t ne[4] = { a->ne[0], w0, h0, 1, }; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne); int32_t params[] = { w }; ggml_set_op_params(result, params, sizeof(params)); result->op = GGML_OP_WIN_UNPART; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_get_rel_pos( struct ggml_context * ctx, struct ggml_tensor * a, int qh, int kh) { do { if (!(qh == kh)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7603, "qh == kh"); abort(); } } while (0); do { if (!(2*((qh) > (kh) ? (qh) : (kh)) - 1 == a->ne[1])) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7604, "2*MAX(qh, kh) - 1 == a->ne[1]"); abort(); } } while (0); _Bool is_node = 0; if (a->grad) { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7609, "false"); abort(); } } while (0); is_node = 1; } const int64_t ne[4] = { a->ne[0], kh, qh, 1, }; struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 3, ne); result->op = GGML_OP_GET_REL_POS; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = ((void*)0); return result; } static struct ggml_tensor * ggml_add_rel_pos_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * pw, struct ggml_tensor * ph, _Bool inplace) { do { if (!(ggml_are_same_shape(pw, ph))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7632, "ggml_are_same_shape(pw, ph)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(a))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7633, "ggml_is_contiguous(a)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(pw))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7634, "ggml_is_contiguous(pw)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(ph))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7635, "ggml_is_contiguous(ph)"); abort(); } } while (0); do { if (!(ph->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7636, "ph->type == GGML_TYPE_F32"); abort(); } } while (0); do { if (!(pw->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7637, "pw->type == GGML_TYPE_F32"); abort(); } } while (0); do { if (!(pw->ne[3] == a->ne[2])) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7638, "pw->ne[3] == a->ne[2]"); abort(); } } while (0); do { if (!(pw->ne[0]*pw->ne[0] == a->ne[0])) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7639, "pw->ne[0]*pw->ne[0] == a->ne[0]"); abort(); } } while (0); do { if (!(pw->ne[1]*pw->ne[2] == a->ne[1])) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7640, "pw->ne[1]*pw->ne[2] == a->ne[1]"); abort(); } } while (0); _Bool is_node = 0; if (!inplace && (a->grad || pw->grad || ph->grad)) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); ggml_set_op_params_i32(result, 0, inplace ? 1 : 0); result->op = GGML_OP_ADD_REL_POS; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = pw; result->src[2] = ph; return result; } struct ggml_tensor * ggml_add_rel_pos( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * pw, struct ggml_tensor * ph) { return ggml_add_rel_pos_impl(ctx, a, pw, ph, 0); } struct ggml_tensor * ggml_add_rel_pos_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * pw, struct ggml_tensor * ph) { return ggml_add_rel_pos_impl(ctx, a, pw, ph, 1); } static struct ggml_tensor * ggml_unary_impl( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_unary_op op, _Bool inplace) { _Bool is_node = 0; if (!inplace && (a->grad)) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); ggml_set_op_params_i32(result, 0, (int32_t) op); result->op = GGML_OP_UNARY; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_unary( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_unary_op op) { return ggml_unary_impl(ctx, a, op, 0); } struct ggml_tensor * ggml_unary_inplace( struct ggml_context * ctx, struct ggml_tensor * a, enum ggml_unary_op op) { return ggml_unary_impl(ctx, a, op, 1); } static struct ggml_tensor * ggml_map_unary_impl_f32( struct ggml_context * ctx, struct ggml_tensor * a, const ggml_unary_op_f32_t fun, _Bool inplace) { _Bool is_node = 0; if (!inplace && a->grad) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); ggml_set_op_params(result, (const void *) &fun, sizeof(fun)); result->op = GGML_OP_MAP_UNARY; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_map_unary_f32( struct ggml_context * ctx, struct ggml_tensor * a, const ggml_unary_op_f32_t fun) { return ggml_map_unary_impl_f32(ctx, a, fun, 0); } struct ggml_tensor * ggml_map_unary_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, const ggml_unary_op_f32_t fun) { return ggml_map_unary_impl_f32(ctx, a, fun, 1); } static struct ggml_tensor * ggml_map_binary_impl_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, const ggml_binary_op_f32_t fun, _Bool inplace) { do { if (!(ggml_are_same_shape(a, b))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7761, "ggml_are_same_shape(a, b)"); abort(); } } while (0); _Bool is_node = 0; if (!inplace && (a->grad || b->grad)) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); ggml_set_op_params(result, (const void *) &fun, sizeof(fun)); result->op = GGML_OP_MAP_BINARY; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_map_binary_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, const ggml_binary_op_f32_t fun) { return ggml_map_binary_impl_f32(ctx, a, b, fun, 0); } struct ggml_tensor * ggml_map_binary_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, const ggml_binary_op_f32_t fun) { return ggml_map_binary_impl_f32(ctx, a, b, fun, 1); } static struct ggml_tensor * ggml_map_custom1_impl_f32( struct ggml_context * ctx, struct ggml_tensor * a, const ggml_custom1_op_f32_t fun, _Bool inplace) { _Bool is_node = 0; if (!inplace && a->grad) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); ggml_set_op_params(result, (const void *) &fun, sizeof(fun)); result->op = GGML_OP_MAP_CUSTOM1_F32; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_map_custom1_f32( struct ggml_context * ctx, struct ggml_tensor * a, const ggml_custom1_op_f32_t fun) { return ggml_map_custom1_impl_f32(ctx, a, fun, 0); } struct ggml_tensor * ggml_map_custom1_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, const ggml_custom1_op_f32_t fun) { return ggml_map_custom1_impl_f32(ctx, a, fun, 1); } static struct ggml_tensor * ggml_map_custom2_impl_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, const ggml_custom2_op_f32_t fun, _Bool inplace) { _Bool is_node = 0; if (!inplace && (a->grad || b->grad)) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); ggml_set_op_params(result, (const void *) &fun, sizeof(fun)); result->op = GGML_OP_MAP_CUSTOM2_F32; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_map_custom2_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, const ggml_custom2_op_f32_t fun) { return ggml_map_custom2_impl_f32(ctx, a, b, fun, 0); } struct ggml_tensor * ggml_map_custom2_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, const ggml_custom2_op_f32_t fun) { return ggml_map_custom2_impl_f32(ctx, a, b, fun, 1); } static struct ggml_tensor * ggml_map_custom3_impl_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, const ggml_custom3_op_f32_t fun, _Bool inplace) { _Bool is_node = 0; if (!inplace && (a->grad || b->grad || c->grad)) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); ggml_set_op_params(result, (const void *) &fun, sizeof(fun)); result->op = GGML_OP_MAP_CUSTOM3_F32; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; result->src[2] = c; return result; } struct ggml_tensor * ggml_map_custom3_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, const ggml_custom3_op_f32_t fun) { return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, 0); } struct ggml_tensor * ggml_map_custom3_inplace_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, const ggml_custom3_op_f32_t fun) { return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, 1); } struct ggml_map_custom1_op_params { ggml_custom1_op_t fun; int n_tasks; void * userdata; }; static struct ggml_tensor * ggml_map_custom1_impl( struct ggml_context * ctx, struct ggml_tensor * a, const ggml_custom1_op_t fun, int n_tasks, void * userdata, _Bool inplace) { do { if (!(n_tasks == -1 || n_tasks > 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7937, "n_tasks == GGML_N_TASKS_MAX || n_tasks > 0"); abort(); } } while (0); _Bool is_node = 0; if (!inplace && a->grad) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); struct ggml_map_custom1_op_params params = { fun, n_tasks, userdata }; ggml_set_op_params(result, (const void *) ¶ms, sizeof(params)); result->op = GGML_OP_MAP_CUSTOM1; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; return result; } struct ggml_tensor * ggml_map_custom1( struct ggml_context * ctx, struct ggml_tensor * a, const ggml_custom1_op_t fun, int n_tasks, void * userdata) { return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, 0); } struct ggml_tensor * ggml_map_custom1_inplace( struct ggml_context * ctx, struct ggml_tensor * a, const ggml_custom1_op_t fun, int n_tasks, void * userdata) { return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, 1); } struct ggml_map_custom2_op_params { ggml_custom2_op_t fun; int n_tasks; void * userdata; }; static struct ggml_tensor * ggml_map_custom2_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, const ggml_custom2_op_t fun, int n_tasks, void * userdata, _Bool inplace) { do { if (!(n_tasks == -1 || n_tasks > 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 7995, "n_tasks == GGML_N_TASKS_MAX || n_tasks > 0"); abort(); } } while (0); _Bool is_node = 0; if (!inplace && (a->grad || b->grad)) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); struct ggml_map_custom2_op_params params = { fun, n_tasks, userdata }; ggml_set_op_params(result, (const void *) ¶ms, sizeof(params)); result->op = GGML_OP_MAP_CUSTOM2; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_map_custom2( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, const ggml_custom2_op_t fun, int n_tasks, void * userdata) { return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, 0); } struct ggml_tensor * ggml_map_custom2_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, const ggml_custom2_op_t fun, int n_tasks, void * userdata) { return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, 1); } struct ggml_map_custom3_op_params { ggml_custom3_op_t fun; int n_tasks; void * userdata; }; static struct ggml_tensor * ggml_map_custom3_impl( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, const ggml_custom3_op_t fun, int n_tasks, void * userdata, _Bool inplace) { do { if (!(n_tasks == -1 || n_tasks > 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8057, "n_tasks == GGML_N_TASKS_MAX || n_tasks > 0"); abort(); } } while (0); _Bool is_node = 0; if (!inplace && (a->grad || b->grad || c->grad)) { is_node = 1; } struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); struct ggml_map_custom3_op_params params = { fun, n_tasks, userdata }; ggml_set_op_params(result, (const void *) ¶ms, sizeof(params)); result->op = GGML_OP_MAP_CUSTOM3; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; result->src[2] = c; return result; } struct ggml_tensor * ggml_map_custom3( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, const ggml_custom3_op_t fun, int n_tasks, void * userdata) { return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, 0); } struct ggml_tensor * ggml_map_custom3_inplace( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c, const ggml_custom3_op_t fun, int n_tasks, void * userdata) { return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, 1); } struct ggml_tensor * ggml_cross_entropy_loss( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { do { if (!(ggml_are_same_shape(a, b))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8113, "ggml_are_same_shape(a, b)"); abort(); } } while (0); _Bool is_node = 0; if (a->grad || b->grad) { is_node = 1; } struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1); result->op = GGML_OP_CROSS_ENTROPY_LOSS; result->grad = is_node ? ggml_dup_tensor(ctx, result) : ((void*)0); result->src[0] = a; result->src[1] = b; return result; } struct ggml_tensor * ggml_cross_entropy_loss_back( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_tensor * c) { do { if (!(ggml_are_same_shape(a, b))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8137, "ggml_are_same_shape(a, b)"); abort(); } } while (0); do { if (!(ggml_is_scalar(c))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8138, "ggml_is_scalar(c)"); abort(); } } while (0); struct ggml_tensor * result = ggml_dup_tensor(ctx, a); result->op = GGML_OP_CROSS_ENTROPY_LOSS_BACK; result->grad = ((void*)0); result->src[0] = a; result->src[1] = b; result->src[2] = c; return result; } void ggml_set_param( struct ggml_context * ctx, struct ggml_tensor * tensor) { tensor->is_param = 1; do { if (!(tensor->grad == ((void*)0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8158, "tensor->grad == NULL"); abort(); } } while (0); tensor->grad = ggml_dup_tensor(ctx, tensor); } static void ggml_compute_forward_dup_same_cont( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { do { if (!(ggml_nelements(dst) == ggml_nelements(src0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8168, "ggml_nelements(dst) == ggml_nelements(src0)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(dst) && ggml_is_contiguous(src0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8169, "ggml_is_contiguous(dst) && ggml_is_contiguous(src0)"); abort(); } } while (0); do { if (!(src0->type == dst->type)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8170, "src0->type == dst->type"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const size_t nb00 = src0->nb[0]; const size_t nb0 = dst->nb[0]; const int ith = params->ith; const int nth = params->nth; const int ne = ggml_nelements(dst); const int dr = (ne + nth - 1) / nth; const int ie0 = dr * ith; const int ie1 = ((ie0 + dr) < (ne) ? (ie0 + dr) : (ne)); if (ie0 < ie1) { memcpy( ((char *) dst->data + ie0*nb0), ((char *) src0->data + ie0*nb00), (ie1 - ie0) * ggml_type_size(src0->type)); } } static void ggml_compute_forward_dup_f16( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { do { if (!(ggml_nelements(dst) == ggml_nelements(src0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8200, "ggml_nelements(dst) == ggml_nelements(src0)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; const int ith = params->ith; const int nth = params->nth; if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) { ggml_compute_forward_dup_same_cont(params, src0, dst); return; } const int nr = ne01; const int dr = (nr + nth - 1) / nth; const int ir0 = dr * ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); if (src0->type == dst->type && ne00 == ne0 && nb00 == ggml_type_size(src0->type) && nb0 == ggml_type_size(dst->type)) { const size_t rs = ne00*nb00; for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i02 = 0; i02 < ne02; i02++) { for (int64_t i01 = ir0; i01 < ir1; i01++) { memcpy( ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3), ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03), rs); } } } return; } if (ggml_is_contiguous(dst)) { if (nb00 == sizeof(ggml_fp16_t)) { if (dst->type == GGML_TYPE_F16) { size_t id = 0; const size_t rs = ne00 * nb00; char * dst_ptr = (char *) dst->data; for (int i03 = 0; i03 < ne03; i03++) { for (int i02 = 0; i02 < ne02; i02++) { id += rs * ir0; for (int i01 = ir0; i01 < ir1; i01++) { const char * src0_ptr = (char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03; memcpy(dst_ptr + id, src0_ptr, rs); id += rs; } id += rs * (ne01 - ir1); } } } else if (dst->type == GGML_TYPE_F32) { size_t id = 0; float * dst_ptr = (float *) dst->data; for (int i03 = 0; i03 < ne03; i03++) { for (int i02 = 0; i02 < ne02; i02++) { id += ne00 * ir0; for (int i01 = ir0; i01 < ir1; i01++) { const ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03); for (int i00 = 0; i00 < ne00; i00++) { dst_ptr[id] = ((float) (src0_ptr[i00])); id++; } } id += ne00 * (ne01 - ir1); } } } else if (type_traits[dst->type].from_float) { ggml_from_float_t const quantize_row_q = type_traits[dst->type].from_float; float * src0_f32 = (float *) params->wdata + (ne00 + CACHE_LINE_SIZE_F32) * ith; size_t id = 0; size_t rs = nb0 * (ne00 / ggml_blck_size(dst->type)); char * dst_ptr = (char *) dst->data; for (int i03 = 0; i03 < ne03; i03++) { for (int i02 = 0; i02 < ne02; i02++) { id += rs * ir0; for (int i01 = ir0; i01 < ir1; i01++) { const ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03); for (int i00 = 0; i00 < ne00; i00++) { src0_f32[i00] = ((float) (src0_ptr[i00])); } quantize_row_q(src0_f32, dst_ptr + id, ne00); id += rs; } id += rs * (ne01 - ir1); } } } else { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8304, "false"); abort(); } } while (0); } } else { if (dst->type == GGML_TYPE_F32) { size_t id = 0; float * dst_ptr = (float *) dst->data; for (int i03 = 0; i03 < ne03; i03++) { for (int i02 = 0; i02 < ne02; i02++) { id += ne00 * ir0; for (int i01 = ir0; i01 < ir1; i01++) { for (int i00 = 0; i00 < ne00; i00++) { const ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); dst_ptr[id] = ((float) (*src0_ptr)); id++; } } id += ne00 * (ne01 - ir1); } } } else if (dst->type == GGML_TYPE_F16) { size_t id = 0; ggml_fp16_t * dst_ptr = (ggml_fp16_t *) dst->data; for (int i03 = 0; i03 < ne03; i03++) { for (int i02 = 0; i02 < ne02; i02++) { id += ne00 * ir0; for (int i01 = ir0; i01 < ir1; i01++) { for (int i00 = 0; i00 < ne00; i00++) { const ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); dst_ptr[id] = *src0_ptr; id++; } } id += ne00 * (ne01 - ir1); } } } else { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8346, "false"); abort(); } } while (0); } } return; } int64_t i10 = 0; int64_t i11 = 0; int64_t i12 = 0; int64_t i13 = 0; if (dst->type == GGML_TYPE_F16) { for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i02 = 0; i02 < ne02; i02++) { i10 += ne00 * ir0; while (i10 >= ne0) { i10 -= ne0; if (++i11 == ne1) { i11 = 0; if (++i12 == ne2) { i12 = 0; if (++i13 == ne3) { i13 = 0; } } } } for (int64_t i01 = ir0; i01 < ir1; i01++) { for (int64_t i00 = 0; i00 < ne00; i00++) { const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3); memcpy(dst_ptr, src0_ptr, sizeof(ggml_fp16_t)); if (++i10 == ne00) { i10 = 0; if (++i11 == ne01) { i11 = 0; if (++i12 == ne02) { i12 = 0; if (++i13 == ne03) { i13 = 0; } } } } } } i10 += ne00 * (ne01 - ir1); while (i10 >= ne0) { i10 -= ne0; if (++i11 == ne1) { i11 = 0; if (++i12 == ne2) { i12 = 0; if (++i13 == ne3) { i13 = 0; } } } } } } } else if (dst->type == GGML_TYPE_F32) { for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i02 = 0; i02 < ne02; i02++) { i10 += ne00 * ir0; while (i10 >= ne0) { i10 -= ne0; if (++i11 == ne1) { i11 = 0; if (++i12 == ne2) { i12 = 0; if (++i13 == ne3) { i13 = 0; } } } } for (int64_t i01 = ir0; i01 < ir1; i01++) { for (int64_t i00 = 0; i00 < ne00; i00++) { const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3); *(float *) dst_ptr = ((float) (*(const ggml_fp16_t *) src0_ptr)); if (++i10 == ne0) { i10 = 0; if (++i11 == ne1) { i11 = 0; if (++i12 == ne2) { i12 = 0; if (++i13 == ne3) { i13 = 0; } } } } } } i10 += ne00 * (ne01 - ir1); while (i10 >= ne0) { i10 -= ne0; if (++i11 == ne1) { i11 = 0; if (++i12 == ne2) { i12 = 0; if (++i13 == ne3) { i13 = 0; } } } } } } } else { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8463, "false"); abort(); } } while (0); } } static void ggml_compute_forward_dup_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { do { if (!(ggml_nelements(dst) == ggml_nelements(src0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8471, "ggml_nelements(dst) == ggml_nelements(src0)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; const int ith = params->ith; const int nth = params->nth; if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) { ggml_compute_forward_dup_same_cont(params, src0, dst); return; } const int nr = ne01; const int dr = (nr + nth - 1) / nth; const int ir0 = dr * ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); if (src0->type == dst->type && ne00 == ne0 && nb00 == ggml_type_size(src0->type) && nb0 == ggml_type_size(dst->type)) { const size_t rs = ne00*nb00; for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i02 = 0; i02 < ne02; i02++) { for (int64_t i01 = ir0; i01 < ir1; i01++) { memcpy( ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3), ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03), rs); } } } return; } if (ggml_is_contiguous(dst)) { if (nb00 == sizeof(float)) { if (dst->type == GGML_TYPE_F32) { size_t id = 0; const size_t rs = ne00 * nb00; char * dst_ptr = (char *) dst->data; for (int i03 = 0; i03 < ne03; i03++) { for (int i02 = 0; i02 < ne02; i02++) { id += rs * ir0; for (int i01 = ir0; i01 < ir1; i01++) { const char * src0_ptr = (char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03; memcpy(dst_ptr + id, src0_ptr, rs); id += rs; } id += rs * (ne01 - ir1); } } } else if (type_traits[dst->type].from_float) { ggml_from_float_t const quantize_row_q = type_traits[dst->type].from_float; size_t id = 0; size_t rs = nb0 * (ne00 / ggml_blck_size(dst->type)); char * dst_ptr = (char *) dst->data; for (int i03 = 0; i03 < ne03; i03++) { for (int i02 = 0; i02 < ne02; i02++) { id += rs * ir0; for (int i01 = ir0; i01 < ir1; i01++) { const float * src0_ptr = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03); quantize_row_q(src0_ptr, dst_ptr + id, ne00); id += rs; } id += rs * (ne01 - ir1); } } } else { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8551, "false"); abort(); } } while (0); } } else { if (dst->type == GGML_TYPE_F32) { size_t id = 0; float * dst_ptr = (float *) dst->data; for (int i03 = 0; i03 < ne03; i03++) { for (int i02 = 0; i02 < ne02; i02++) { id += ne00 * ir0; for (int i01 = ir0; i01 < ir1; i01++) { for (int i00 = 0; i00 < ne00; i00++) { const float * src0_ptr = (float *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); dst_ptr[id] = *src0_ptr; id++; } } id += ne00 * (ne01 - ir1); } } } else if (dst->type == GGML_TYPE_F16) { size_t id = 0; ggml_fp16_t * dst_ptr = (ggml_fp16_t *) dst->data; for (int i03 = 0; i03 < ne03; i03++) { for (int i02 = 0; i02 < ne02; i02++) { id += ne00 * ir0; for (int i01 = ir0; i01 < ir1; i01++) { for (int i00 = 0; i00 < ne00; i00++) { const float * src0_ptr = (float *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); dst_ptr[id] = (*src0_ptr); id++; } } id += ne00 * (ne01 - ir1); } } } else { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8593, "false"); abort(); } } while (0); } } return; } int64_t i10 = 0; int64_t i11 = 0; int64_t i12 = 0; int64_t i13 = 0; if (dst->type == GGML_TYPE_F32) { for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i02 = 0; i02 < ne02; i02++) { i10 += ne00 * ir0; while (i10 >= ne0) { i10 -= ne0; if (++i11 == ne1) { i11 = 0; if (++i12 == ne2) { i12 = 0; if (++i13 == ne3) { i13 = 0; } } } } for (int64_t i01 = ir0; i01 < ir1; i01++) { for (int64_t i00 = 0; i00 < ne00; i00++) { const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3); memcpy(dst_ptr, src0_ptr, sizeof(float)); if (++i10 == ne0) { i10 = 0; if (++i11 == ne1) { i11 = 0; if (++i12 == ne2) { i12 = 0; if (++i13 == ne3) { i13 = 0; } } } } } } i10 += ne00 * (ne01 - ir1); while (i10 >= ne0) { i10 -= ne0; if (++i11 == ne1) { i11 = 0; if (++i12 == ne2) { i12 = 0; if (++i13 == ne3) { i13 = 0; } } } } } } } else if (dst->type == GGML_TYPE_F16) { for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i02 = 0; i02 < ne02; i02++) { i10 += ne00 * ir0; while (i10 >= ne0) { i10 -= ne0; if (++i11 == ne1) { i11 = 0; if (++i12 == ne2) { i12 = 0; if (++i13 == ne3) { i13 = 0; } } } } for (int64_t i01 = ir0; i01 < ir1; i01++) { for (int64_t i00 = 0; i00 < ne00; i00++) { const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3); *(ggml_fp16_t *) dst_ptr = (*(const float *) src0_ptr); if (++i10 == ne0) { i10 = 0; if (++i11 == ne1) { i11 = 0; if (++i12 == ne2) { i12 = 0; if (++i13 == ne3) { i13 = 0; } } } } } } i10 += ne00 * (ne01 - ir1); while (i10 >= ne0) { i10 -= ne0; if (++i11 == ne1) { i11 = 0; if (++i12 == ne2) { i12 = 0; if (++i13 == ne3) { i13 = 0; } } } } } } } else { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8712, "false"); abort(); } } while (0); } } static void ggml_compute_forward_dup( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) { ggml_compute_forward_dup_same_cont(params, src0, dst); return; } switch (src0->type) { case GGML_TYPE_F16: { ggml_compute_forward_dup_f16(params, src0, dst); } break; case GGML_TYPE_F32: { ggml_compute_forward_dup_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8735, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_add_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(ggml_can_repeat_rows(src1, src0) && ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8747, "ggml_can_repeat_rows(src1, src0) && ggml_are_same_shape(src0, dst)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int ith = params->ith; const int nth = params->nth; const int nr = ggml_nrows(src0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne10 = (src1)->ne[0]; (void)(ne10); const int64_t ne11 = (src1)->ne[1]; (void)(ne11); const int64_t ne12 = (src1)->ne[2]; (void)(ne12); const int64_t ne13 = (src1)->ne[3]; (void)(ne13);; const size_t nb10 = (src1)->nb[0]; (void)(nb10); const size_t nb11 = (src1)->nb[1]; (void)(nb11); const size_t nb12 = (src1)->nb[2]; (void)(nb12); const size_t nb13 = (src1)->nb[3]; (void)(nb13);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; do { if (!(nb0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8760, "nb0 == sizeof(float)"); abort(); } } while (0); do { if (!(nb00 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8761, "nb00 == sizeof(float)"); abort(); } } while (0); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); if (nb10 == sizeof(float)) { for (int ir = ir0; ir < ir1; ++ir) { const int64_t i03 = ir/(ne02*ne01); const int64_t i02 = (ir - i03*ne02*ne01)/ne01; const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01); const int64_t i13 = i03 % ne13; const int64_t i12 = i02 % ne12; const int64_t i11 = i01 % ne11; float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 ); float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01); float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11); ggml_vec_add_f32(ne00, dst_ptr, src0_ptr, src1_ptr); } } else { for (int ir = ir0; ir < ir1; ++ir) { const int64_t i03 = ir/(ne02*ne01); const int64_t i02 = (ir - i03*ne02*ne01)/ne01; const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01); const int64_t i13 = i03 % ne13; const int64_t i12 = i02 % ne12; const int64_t i11 = i01 % ne11; float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 ); float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01); for (int i0 = 0; i0 < ne0; i0++) { float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i0*nb10); dst_ptr[i0] = src0_ptr[i0] + *src1_ptr; } } } } static void ggml_compute_forward_add_f16_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8822, "ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int ith = params->ith; const int nth = params->nth; const int nr = ggml_nrows(src0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne10 = (src1)->ne[0]; (void)(ne10); const int64_t ne11 = (src1)->ne[1]; (void)(ne11); const int64_t ne12 = (src1)->ne[2]; (void)(ne12); const int64_t ne13 = (src1)->ne[3]; (void)(ne13);; const size_t nb10 = (src1)->nb[0]; (void)(nb10); const size_t nb11 = (src1)->nb[1]; (void)(nb11); const size_t nb12 = (src1)->nb[2]; (void)(nb12); const size_t nb13 = (src1)->nb[3]; (void)(nb13);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; do { if (!(src0->type == GGML_TYPE_F16)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8835, "src0->type == GGML_TYPE_F16"); abort(); } } while (0); do { if (!(src1->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8836, "src1->type == GGML_TYPE_F32"); abort(); } } while (0); do { if (!(dst->type == GGML_TYPE_F16)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8837, "dst->type == GGML_TYPE_F16"); abort(); } } while (0); do { if (!(nb0 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8839, "nb0 == sizeof(ggml_fp16_t)"); abort(); } } while (0); do { if (!(nb00 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8840, "nb00 == sizeof(ggml_fp16_t)"); abort(); } } while (0); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); if (nb10 == sizeof(float)) { for (int ir = ir0; ir < ir1; ++ir) { const int i3 = ir/(ne2*ne1); const int i2 = (ir - i3*ne2*ne1)/ne1; const int i1 = (ir - i3*ne2*ne1 - i2*ne1); ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1); ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); float * src1_ptr = (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11); for (int i = 0; i < ne0; i++) { dst_ptr[i] = (((float) (src0_ptr[i])) + src1_ptr[i]); } } } else { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8867, "false"); abort(); } } while (0); } } static void ggml_compute_forward_add_f16_f16( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8876, "ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int ith = params->ith; const int nth = params->nth; const int nr = ggml_nrows(src0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne10 = (src1)->ne[0]; (void)(ne10); const int64_t ne11 = (src1)->ne[1]; (void)(ne11); const int64_t ne12 = (src1)->ne[2]; (void)(ne12); const int64_t ne13 = (src1)->ne[3]; (void)(ne13);; const size_t nb10 = (src1)->nb[0]; (void)(nb10); const size_t nb11 = (src1)->nb[1]; (void)(nb11); const size_t nb12 = (src1)->nb[2]; (void)(nb12); const size_t nb13 = (src1)->nb[3]; (void)(nb13);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; do { if (!(src0->type == GGML_TYPE_F16)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8889, "src0->type == GGML_TYPE_F16"); abort(); } } while (0); do { if (!(src1->type == GGML_TYPE_F16)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8890, "src1->type == GGML_TYPE_F16"); abort(); } } while (0); do { if (!(dst->type == GGML_TYPE_F16)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8891, "dst->type == GGML_TYPE_F16"); abort(); } } while (0); do { if (!(nb0 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8893, "nb0 == sizeof(ggml_fp16_t)"); abort(); } } while (0); do { if (!(nb00 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8894, "nb00 == sizeof(ggml_fp16_t)"); abort(); } } while (0); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); if (nb10 == sizeof(ggml_fp16_t)) { for (int ir = ir0; ir < ir1; ++ir) { const int i3 = ir/(ne2*ne1); const int i2 = (ir - i3*ne2*ne1)/ne1; const int i1 = (ir - i3*ne2*ne1 - i2*ne1); ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1); ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); ggml_fp16_t * src1_ptr = (ggml_fp16_t *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11); for (int i = 0; i < ne0; i++) { dst_ptr[i] = (((float) (src0_ptr[i])) + ((float) (src1_ptr[i]))); } } } else { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8921, "false"); abort(); } } while (0); } } static void ggml_compute_forward_add_q_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8930, "ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int nr = ggml_nrows(src0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne10 = (src1)->ne[0]; (void)(ne10); const int64_t ne11 = (src1)->ne[1]; (void)(ne11); const int64_t ne12 = (src1)->ne[2]; (void)(ne12); const int64_t ne13 = (src1)->ne[3]; (void)(ne13);; const size_t nb10 = (src1)->nb[0]; (void)(nb10); const size_t nb11 = (src1)->nb[1]; (void)(nb11); const size_t nb12 = (src1)->nb[2]; (void)(nb12); const size_t nb13 = (src1)->nb[3]; (void)(nb13);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; const int ith = params->ith; const int nth = params->nth; const enum ggml_type type = src0->type; ggml_to_float_t const dequantize_row_q = type_traits[type].to_float; ggml_from_float_t const quantize_row_q = type_traits[type].from_float; do { if (!(nb00 == ggml_type_size(type))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8948, "nb00 == ggml_type_size(type)"); abort(); } } while (0); do { if (!(nb10 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8949, "nb10 == sizeof(float)"); abort(); } } while (0); do { if (!(nb0 <= nb1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8952, "nb0 <= nb1"); abort(); } } while (0); do { if (!(nb1 <= nb2)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8953, "nb1 <= nb2"); abort(); } } while (0); do { if (!(nb2 <= nb3)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8954, "nb2 <= nb3"); abort(); } } while (0); do { if (!(ggml_is_quantized(src0->type))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8956, "ggml_is_quantized(src0->type)"); abort(); } } while (0); do { if (!(dst->type == src0->type)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8957, "dst->type == src0->type"); abort(); } } while (0); do { if (!(src1->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 8958, "src1->type == GGML_TYPE_F32"); abort(); } } while (0); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); float * wdata = (float *) params->wdata + (ne00 + CACHE_LINE_SIZE_F32) * ith; for (int ir = ir0; ir < ir1; ++ir) { const int i03 = ir/(ne02*ne01); const int i02 = (ir - i03*ne02*ne01)/ne01; const int i01 = (ir - i03*ne02*ne01 - i02*ne01); const int i13 = i03; const int i12 = i02; const int i11 = i01; const int i3 = i03; const int i2 = i02; const int i1 = i01; void * src0_row = (void *) ((char *) src0->data + (i01*nb01 + i02*nb02 + i03*nb03)); float * src1_row = (float *)((char *) src1->data + (i11*nb11 + i12*nb12 + i13*nb13)); void * dst_row = (void *) ((char *) dst->data + ( i1*nb1 + i2*nb2 + i3*nb3)); ((void) (0)); dequantize_row_q(src0_row, wdata, ne00); ggml_vec_acc_f32(ne00, wdata, src1_row); quantize_row_q(wdata, dst_row, ne00); } } static void ggml_compute_forward_add( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_add_f32(params, src0, src1, dst); } break; case GGML_TYPE_F16: { if (src1->type == GGML_TYPE_F16) { ggml_compute_forward_add_f16_f16(params, src0, src1, dst); } else if (src1->type == GGML_TYPE_F32) { ggml_compute_forward_add_f16_f32(params, src0, src1, dst); } else { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9018, "false"); abort(); } } while (0); } } break; case GGML_TYPE_Q4_0: case GGML_TYPE_Q4_1: case GGML_TYPE_Q5_0: case GGML_TYPE_Q5_1: case GGML_TYPE_Q8_0: case GGML_TYPE_Q2_K: case GGML_TYPE_Q3_K: case GGML_TYPE_Q4_K: case GGML_TYPE_Q5_K: case GGML_TYPE_Q6_K: { ggml_compute_forward_add_q_f32(params, src0, src1, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9036, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_add1_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9048, "ggml_are_same_shape(src0, dst)"); abort(); } } while (0); do { if (!(ggml_is_scalar(src1))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9049, "ggml_is_scalar(src1)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int ith = params->ith; const int nth = params->nth; const int nr = ggml_nrows(src0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; do { if (!(nb0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9062, "nb0 == sizeof(float)"); abort(); } } while (0); do { if (!(nb00 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9063, "nb00 == sizeof(float)"); abort(); } } while (0); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); for (int ir = ir0; ir < ir1; ++ir) { const int i3 = ir/(ne2*ne1); const int i2 = (ir - i3*ne2*ne1)/ne1; const int i1 = (ir - i3*ne2*ne1 - i2*ne1); # 9087 "ggml.c" ggml_vec_add1_f32(ne0, (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), *(float *) src1->data); } } static void ggml_compute_forward_add1_f16_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9100, "ggml_are_same_shape(src0, dst)"); abort(); } } while (0); do { if (!(ggml_is_scalar(src1))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9101, "ggml_is_scalar(src1)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const float v = *(float *) src1->data; const int ith = params->ith; const int nth = params->nth; const int nr = ggml_nrows(src0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; do { if (!(src0->type == GGML_TYPE_F16)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9117, "src0->type == GGML_TYPE_F16"); abort(); } } while (0); do { if (!(src1->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9118, "src1->type == GGML_TYPE_F32"); abort(); } } while (0); do { if (!(dst->type == GGML_TYPE_F16)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9119, "dst->type == GGML_TYPE_F16"); abort(); } } while (0); do { if (!(nb0 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9121, "nb0 == sizeof(ggml_fp16_t)"); abort(); } } while (0); do { if (!(nb00 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9122, "nb00 == sizeof(ggml_fp16_t)"); abort(); } } while (0); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); for (int ir = ir0; ir < ir1; ++ir) { const int i3 = ir/(ne2*ne1); const int i2 = (ir - i3*ne2*ne1)/ne1; const int i1 = (ir - i3*ne2*ne1 - i2*ne1); ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ); ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); for (int i = 0; i < ne0; i++) { dst_ptr[i] = (((float) (src0_ptr[i])) + v); } } } static void ggml_compute_forward_add1_f16_f16( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9150, "ggml_are_same_shape(src0, dst)"); abort(); } } while (0); do { if (!(ggml_is_scalar(src1))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9151, "ggml_is_scalar(src1)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const float v = ((float) (*(ggml_fp16_t *) src1->data)); const int ith = params->ith; const int nth = params->nth; const int nr = ggml_nrows(src0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; do { if (!(src0->type == GGML_TYPE_F16)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9167, "src0->type == GGML_TYPE_F16"); abort(); } } while (0); do { if (!(src1->type == GGML_TYPE_F16)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9168, "src1->type == GGML_TYPE_F16"); abort(); } } while (0); do { if (!(dst->type == GGML_TYPE_F16)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9169, "dst->type == GGML_TYPE_F16"); abort(); } } while (0); do { if (!(nb0 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9171, "nb0 == sizeof(ggml_fp16_t)"); abort(); } } while (0); do { if (!(nb00 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9172, "nb00 == sizeof(ggml_fp16_t)"); abort(); } } while (0); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); for (int ir = ir0; ir < ir1; ++ir) { const int i3 = ir/(ne2*ne1); const int i2 = (ir - i3*ne2*ne1)/ne1; const int i1 = (ir - i3*ne2*ne1 - i2*ne1); ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ); ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); for (int i = 0; i < ne0; i++) { dst_ptr[i] = (((float) (src0_ptr[i])) + v); } } } static void ggml_compute_forward_add1_q_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9200, "ggml_are_same_shape(src0, dst)"); abort(); } } while (0); do { if (!(ggml_is_scalar(src1))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9201, "ggml_is_scalar(src1)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const float v = *(float *) src1->data; const int ith = params->ith; const int nth = params->nth; const int nr = ggml_nrows(src0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; const enum ggml_type type = src0->type; ggml_to_float_t const dequantize_row_q = type_traits[type].to_float; ggml_from_float_t const quantize_row_q = type_traits[type].from_float; do { if (!(nb00 == ggml_type_size(type))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9222, "nb00 == ggml_type_size(type)"); abort(); } } while (0); do { if (!(nb0 <= nb1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9225, "nb0 <= nb1"); abort(); } } while (0); do { if (!(nb1 <= nb2)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9226, "nb1 <= nb2"); abort(); } } while (0); do { if (!(nb2 <= nb3)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9227, "nb2 <= nb3"); abort(); } } while (0); do { if (!(ggml_is_quantized(src0->type))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9229, "ggml_is_quantized(src0->type)"); abort(); } } while (0); do { if (!(dst->type == src0->type)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9230, "dst->type == src0->type"); abort(); } } while (0); do { if (!(src1->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9231, "src1->type == GGML_TYPE_F32"); abort(); } } while (0); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); float * wdata = (float *) params->wdata + (ne0 + CACHE_LINE_SIZE_F32) * ith; for (int ir = ir0; ir < ir1; ++ir) { const int i3 = ir/(ne2*ne1); const int i2 = (ir - i3*ne2*ne1)/ne1; const int i1 = (ir - i3*ne2*ne1 - i2*ne1); void * src0_row = (void *) ((char *) src0->data + (i1*nb01 + i2*nb02 + i3*nb03)); void * dst_row = (void *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb0 )); ((void) (0)); dequantize_row_q(src0_row, wdata, ne0); ggml_vec_acc1_f32(ne0, wdata, v); quantize_row_q(wdata, dst_row, ne0); } } static void ggml_compute_forward_add1( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_add1_f32(params, src0, src1, dst); } break; case GGML_TYPE_F16: { if (src1->type == GGML_TYPE_F16) { ggml_compute_forward_add1_f16_f16(params, src0, src1, dst); } else if (src1->type == GGML_TYPE_F32) { ggml_compute_forward_add1_f16_f32(params, src0, src1, dst); } else { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9281, "false"); abort(); } } while (0); } } break; case GGML_TYPE_Q4_0: case GGML_TYPE_Q4_1: case GGML_TYPE_Q5_0: case GGML_TYPE_Q5_1: case GGML_TYPE_Q8_0: case GGML_TYPE_Q8_1: case GGML_TYPE_Q2_K: case GGML_TYPE_Q3_K: case GGML_TYPE_Q4_K: case GGML_TYPE_Q5_K: case GGML_TYPE_Q6_K: { ggml_compute_forward_add1_q_f32(params, src0, src1, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9300, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_acc_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9313, "ggml_are_same_shape(src0, dst)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(dst) && ggml_is_contiguous(src0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9314, "ggml_is_contiguous(dst) && ggml_is_contiguous(src0)"); abort(); } } while (0); size_t nb1 = ((int32_t *) dst->op_params)[0]; size_t nb2 = ((int32_t *) dst->op_params)[1]; size_t nb3 = ((int32_t *) dst->op_params)[2]; size_t offset = ((int32_t *) dst->op_params)[3]; _Bool inplace = (_Bool) ((int32_t *) dst->op_params)[4]; if (!inplace && (params->type == GGML_TASK_INIT)) { memcpy( ((char *) dst->data), ((char *) src0->data), ggml_nbytes(dst)); } if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int ith = params->ith; const int nth = params->nth; const int nr = ggml_nrows(src1); const int nc = src1->ne[0]; const int64_t ne10 = (src1)->ne[0]; (void)(ne10); const int64_t ne11 = (src1)->ne[1]; (void)(ne11); const int64_t ne12 = (src1)->ne[2]; (void)(ne12); const int64_t ne13 = (src1)->ne[3]; (void)(ne13);; const size_t nb10 = (src1)->nb[0]; (void)(nb10); const size_t nb11 = (src1)->nb[1]; (void)(nb11); const size_t nb12 = (src1)->nb[2]; (void)(nb12); const size_t nb13 = (src1)->nb[3]; (void)(nb13);; const size_t nb0 = ggml_element_size(src0); const size_t nb00 = nb0; const size_t nb01 = nb1; const size_t nb02 = nb2; const size_t nb03 = nb3; do { if (!(offset + (ne10 == 0 ? 0 : ne10-1)*nb0 + (ne11 == 0 ? 0 : ne11-1)*nb1 + (ne12 == 0 ? 0 : ne12-1)*nb2 + (ne13 == 0 ? 0 : ne13-1)*nb3 < ggml_nbytes(dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9354, "offset + (ne10 == 0 ? 0 : ne10-1)*nb0 + (ne11 == 0 ? 0 : ne11-1)*nb1 + (ne12 == 0 ? 0 : ne12-1)*nb2 + (ne13 == 0 ? 0 : ne13-1)*nb3 < ggml_nbytes(dst)"); abort(); } } while (0); do { if (!(offset + (ne10 == 0 ? 0 : ne10-1)*nb00 + (ne11 == 0 ? 0 : ne11-1)*nb01 + (ne12 == 0 ? 0 : ne12-1)*nb02 + (ne13 == 0 ? 0 : ne13-1)*nb03 < ggml_nbytes(src0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9355, "offset + (ne10 == 0 ? 0 : ne10-1)*nb00 + (ne11 == 0 ? 0 : ne11-1)*nb01 + (ne12 == 0 ? 0 : ne12-1)*nb02 + (ne13 == 0 ? 0 : ne13-1)*nb03 < ggml_nbytes(src0)"); abort(); } } while (0); do { if (!(nb10 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9357, "nb10 == sizeof(float)"); abort(); } } while (0); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); for (int ir = ir0; ir < ir1; ++ir) { const int i3 = ir/(ne12*ne11); const int i2 = (ir - i3*ne12*ne11)/ne11; const int i1 = (ir - i3*ne12*ne11 - i2*ne11); ggml_vec_add_f32(nc, (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + offset), (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + offset), (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11)); } } static void ggml_compute_forward_acc( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_acc_f32(params, src0, src1, dst); } break; case GGML_TYPE_F16: case GGML_TYPE_Q4_0: case GGML_TYPE_Q4_1: case GGML_TYPE_Q5_0: case GGML_TYPE_Q5_1: case GGML_TYPE_Q8_0: case GGML_TYPE_Q8_1: case GGML_TYPE_Q2_K: case GGML_TYPE_Q3_K: case GGML_TYPE_Q4_K: case GGML_TYPE_Q5_K: case GGML_TYPE_Q6_K: default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9412, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_sub_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { ((void) (0)); ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int nr = ggml_nrows(src0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne10 = (src1)->ne[0]; (void)(ne10); const int64_t ne11 = (src1)->ne[1]; (void)(ne11); const int64_t ne12 = (src1)->ne[2]; (void)(ne12); const int64_t ne13 = (src1)->ne[3]; (void)(ne13);; const size_t nb10 = (src1)->nb[0]; (void)(nb10); const size_t nb11 = (src1)->nb[1]; (void)(nb11); const size_t nb12 = (src1)->nb[2]; (void)(nb12); const size_t nb13 = (src1)->nb[3]; (void)(nb13);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; do { if (!(nb0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9435, "nb0 == sizeof(float)"); abort(); } } while (0); do { if (!(nb00 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9436, "nb00 == sizeof(float)"); abort(); } } while (0); if (nb10 == sizeof(float)) { for (int ir = 0; ir < nr; ++ir) { const int i3 = ir/(ne2*ne1); const int i2 = (ir - i3*ne2*ne1)/ne1; const int i1 = (ir - i3*ne2*ne1 - i2*ne1); # 9453 "ggml.c" ggml_vec_sub_f32(ne0, (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11)); } } else { for (int ir = 0; ir < nr; ++ir) { const int i3 = ir/(ne2*ne1); const int i2 = (ir - i3*ne2*ne1)/ne1; const int i1 = (ir - i3*ne2*ne1 - i2*ne1); float * dst_ptr = (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ); float * src0_ptr = (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); for (int i0 = 0; i0 < ne0; i0++) { float * src1_ptr = (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11 + i0*nb10); dst_ptr[i0] = src0_ptr[i0] - *src1_ptr; } } } } static void ggml_compute_forward_sub( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_sub_f32(params, src0, src1, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9492, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_mul_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(ggml_can_repeat_rows(src1, src0) && ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9504, "ggml_can_repeat_rows(src1, src0) && ggml_are_same_shape(src0, dst)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int ith = params->ith; const int nth = params->nth; # 9521 "ggml.c" const int64_t nr = ggml_nrows(src0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne10 = (src1)->ne[0]; (void)(ne10); const int64_t ne11 = (src1)->ne[1]; (void)(ne11); const int64_t ne12 = (src1)->ne[2]; (void)(ne12); const int64_t ne13 = (src1)->ne[3]; (void)(ne13);; const size_t nb10 = (src1)->nb[0]; (void)(nb10); const size_t nb11 = (src1)->nb[1]; (void)(nb11); const size_t nb12 = (src1)->nb[2]; (void)(nb12); const size_t nb13 = (src1)->nb[3]; (void)(nb13);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; do { if (!(nb0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9525, "nb0 == sizeof(float)"); abort(); } } while (0); do { if (!(nb00 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9526, "nb00 == sizeof(float)"); abort(); } } while (0); do { if (!(ne00 == ne10)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9527, "ne00 == ne10"); abort(); } } while (0); if (nb10 == sizeof(float)) { for (int64_t ir = ith; ir < nr; ir += nth) { const int64_t i03 = ir/(ne02*ne01); const int64_t i02 = (ir - i03*ne02*ne01)/ne01; const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01); const int64_t i13 = i03 % ne13; const int64_t i12 = i02 % ne12; const int64_t i11 = i01 % ne11; float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 ); float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01); float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11); ggml_vec_mul_f32(ne00, dst_ptr, src0_ptr, src1_ptr); } } else { for (int64_t ir = ith; ir < nr; ir += nth) { const int64_t i03 = ir/(ne02*ne01); const int64_t i02 = (ir - i03*ne02*ne01)/ne01; const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01); const int64_t i13 = i03 % ne13; const int64_t i12 = i02 % ne12; const int64_t i11 = i01 % ne11; float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1 ); float * src0_ptr = (float *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01); for (int64_t i0 = 0; i0 < ne00; i0++) { float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i0*nb10); dst_ptr[i0] = src0_ptr[i0] * (*src1_ptr); } } } } static void ggml_compute_forward_mul( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(src1->type == GGML_TYPE_F32 && "only f32 src1 supported for now")) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9584, "src1->type == GGML_TYPE_F32 && \"only f32 src1 supported for now\""); abort(); } } while (0); switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_mul_f32(params, src0, src1, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9593, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_div_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { ((void) (0)); ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int nr = ggml_nrows(src0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne10 = (src1)->ne[0]; (void)(ne10); const int64_t ne11 = (src1)->ne[1]; (void)(ne11); const int64_t ne12 = (src1)->ne[2]; (void)(ne12); const int64_t ne13 = (src1)->ne[3]; (void)(ne13);; const size_t nb10 = (src1)->nb[0]; (void)(nb10); const size_t nb11 = (src1)->nb[1]; (void)(nb11); const size_t nb12 = (src1)->nb[2]; (void)(nb12); const size_t nb13 = (src1)->nb[3]; (void)(nb13);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; do { if (!(nb0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9616, "nb0 == sizeof(float)"); abort(); } } while (0); do { if (!(nb00 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9617, "nb00 == sizeof(float)"); abort(); } } while (0); if (nb10 == sizeof(float)) { for (int ir = 0; ir < nr; ++ir) { const int i3 = ir/(ne2*ne1); const int i2 = (ir - i3*ne2*ne1)/ne1; const int i1 = (ir - i3*ne2*ne1 - i2*ne1); # 9636 "ggml.c" ggml_vec_div_f32(ne0, (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11)); } } else { for (int ir = 0; ir < nr; ++ir) { const int i3 = ir/(ne2*ne1); const int i2 = (ir - i3*ne2*ne1)/ne1; const int i1 = (ir - i3*ne2*ne1 - i2*ne1); float * dst_ptr = (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ); float * src0_ptr = (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); for (int i0 = 0; i0 < ne0; i0++) { float * src1_ptr = (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11 + i0*nb10); dst_ptr[i0] = src0_ptr[i0] / (*src1_ptr); } } } } static void ggml_compute_forward_div( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_div_f32(params, src0, src1, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9675, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_sqr_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { ((void) (0)); ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int n = ggml_nrows(src0); const int nc = src0->ne[0]; ((void) (0)); ((void) (0)); for (int i = 0; i < n; i++) { ggml_vec_sqr_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), (float *) ((char *) src0->data + i*(src0->nb[1]))); } } static void ggml_compute_forward_sqr( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_sqr_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9717, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_sqrt_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { ((void) (0)); ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int n = ggml_nrows(src0); const int nc = src0->ne[0]; ((void) (0)); ((void) (0)); for (int i = 0; i < n; i++) { ggml_vec_sqrt_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), (float *) ((char *) src0->data + i*(src0->nb[1]))); } } static void ggml_compute_forward_sqrt( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_sqrt_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9759, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_log_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { do { if (!(params->ith == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9771, "params->ith == 0"); abort(); } } while (0); do { if (!(ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9772, "ggml_are_same_shape(src0, dst)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int n = ggml_nrows(src0); const int nc = src0->ne[0]; do { if (!(dst->nb[0] == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9781, "dst->nb[0] == sizeof(float)"); abort(); } } while (0); do { if (!(src0->nb[0] == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9782, "src0->nb[0] == sizeof(float)"); abort(); } } while (0); for (int i = 0; i < n; i++) { ggml_vec_log_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), (float *) ((char *) src0->data + i*(src0->nb[1]))); } } static void ggml_compute_forward_log( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_log_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9802, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_sum_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { ((void) (0)); ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } ((void) (0)); ((void) (0)); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; ggml_float sum = 0; ggml_float row_sum = 0; for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i02 = 0; i02 < ne02; i02++) { for (int64_t i01 = 0; i01 < ne01; i01++) { ggml_vec_sum_f32_ggf(ne00, &row_sum, (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03)); sum += row_sum; } } } ((float *) dst->data)[0] = sum; } static void ggml_compute_forward_sum_f16( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { ((void) (0)); ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } ((void) (0)); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; float sum = 0; float row_sum = 0; for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i02 = 0; i02 < ne02; i02++) { for (int64_t i01 = 0; i01 < ne01; i01++) { ggml_vec_sum_f16_ggf(ne00, &row_sum, (ggml_fp16_t *) ((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03)); sum += row_sum; } } } ((ggml_fp16_t *) dst->data)[0] = (sum); } static void ggml_compute_forward_sum( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_sum_f32(params, src0, dst); } break; case GGML_TYPE_F16: { ggml_compute_forward_sum_f16(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9889, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_sum_rows_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { do { if (!(params->ith == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9900, "params->ith == 0"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } do { if (!(src0->nb[0] == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9906, "src0->nb[0] == sizeof(float)"); abort(); } } while (0); do { if (!(dst->nb[0] == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9907, "dst->nb[0] == sizeof(float)"); abort(); } } while (0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; do { if (!(ne0 == 1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9911, "ne0 == 1"); abort(); } } while (0); do { if (!(ne1 == ne01)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9912, "ne1 == ne01"); abort(); } } while (0); do { if (!(ne2 == ne02)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9913, "ne2 == ne02"); abort(); } } while (0); do { if (!(ne3 == ne03)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9914, "ne3 == ne03"); abort(); } } while (0); for (int64_t i3 = 0; i3 < ne03; i3++) { for (int64_t i2 = 0; i2 < ne02; i2++) { for (int64_t i1 = 0; i1 < ne01; i1++) { float * src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03); float * dst_row = (float *) ((char *) dst->data + i1*nb1 + i2*nb2 + i3*nb3); float row_sum = 0; ggml_vec_sum_f32(ne00, &row_sum, src_row); dst_row[0] = row_sum; } } } } static void ggml_compute_forward_sum_rows( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_sum_rows_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9940, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_mean_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } ((void) (0)); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; ((void) (0)); ((void) (0)); ((void) (0)); ((void) (0)); (void)(ne0); (void)(ne1); (void)(ne2); (void)(ne3); for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i02 = 0; i02 < ne02; i02++) { for (int64_t i01 = 0; i01 < ne01; i01++) { ggml_vec_sum_f32(ne00, (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3), (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03)); *(float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3) /= (float) ne00; } } } } static void ggml_compute_forward_mean( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_mean_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 9995, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_argmax_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } ((void) (0)); ((void) (0)); const int64_t ne00 = src0->ne[0]; const int64_t ne01 = src0->ne[1]; const size_t nb01 = src0->nb[1]; const size_t nb0 = dst->nb[0]; for (int64_t i1 = 0; i1 < ne01; i1++) { float * src = (float *) ((char *) src0->data + i1*nb01); int32_t * dst_ = (int32_t *) ((char *) dst->data + i1*nb0); int v = 0; ggml_vec_argmax_f32(ne00, &v, src); dst_[0] = v; } } static void ggml_compute_forward_argmax( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_argmax_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10041, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_repeat_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { do { if (!(params->ith == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10052, "params->ith == 0"); abort(); } } while (0); do { if (!(ggml_can_repeat(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10053, "ggml_can_repeat(src0, dst)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; const int nr0 = (int)(ne0/ne00); const int nr1 = (int)(ne1/ne01); const int nr2 = (int)(ne2/ne02); const int nr3 = (int)(ne3/ne03); do { if (!(nb0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10068, "nb0 == sizeof(float)"); abort(); } } while (0); do { if (!(nb00 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10069, "nb00 == sizeof(float)"); abort(); } } while (0); for (int i3 = 0; i3 < nr3; i3++) { for (int k3 = 0; k3 < ne03; k3++) { for (int i2 = 0; i2 < nr2; i2++) { for (int k2 = 0; k2 < ne02; k2++) { for (int i1 = 0; i1 < nr1; i1++) { for (int k1 = 0; k1 < ne01; k1++) { for (int i0 = 0; i0 < nr0; i0++) { ggml_vec_cpy_f32(ne00, (float *) ((char *) dst->data + (i3*ne03 + k3)*nb3 + (i2*ne02 + k2)*nb2 + (i1*ne01 + k1)*nb1 + (i0*ne00)*nb0), (float *) ((char *) src0->data + ( k3)*nb03 + ( k2)*nb02 + ( k1)*nb01)); } } } } } } } } static void ggml_compute_forward_repeat( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_repeat_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10102, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_repeat_back_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { do { if (!(params->ith == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10113, "params->ith == 0"); abort(); } } while (0); do { if (!(ggml_can_repeat(dst, src0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10114, "ggml_can_repeat(dst, src0)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; const int nr0 = (int)(ne00/ne0); const int nr1 = (int)(ne01/ne1); const int nr2 = (int)(ne02/ne2); const int nr3 = (int)(ne03/ne3); do { if (!(nb0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10129, "nb0 == sizeof(float)"); abort(); } } while (0); do { if (!(nb00 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10130, "nb00 == sizeof(float)"); abort(); } } while (0); if (ggml_is_contiguous(dst)) { ggml_vec_set_f32(ne0*ne1*ne2*ne3, dst->data, 0); } else { for (int k3 = 0; k3 < ne3; k3++) { for (int k2 = 0; k2 < ne2; k2++) { for (int k1 = 0; k1 < ne1; k1++) { ggml_vec_set_f32(ne0, (float *) ((char *) dst->data + k1*nb1 + k2*nb2 + k3*nb3), 0); } } } } for (int i3 = 0; i3 < nr3; i3++) { for (int k3 = 0; k3 < ne3; k3++) { for (int i2 = 0; i2 < nr2; i2++) { for (int k2 = 0; k2 < ne2; k2++) { for (int i1 = 0; i1 < nr1; i1++) { for (int k1 = 0; k1 < ne1; k1++) { for (int i0 = 0; i0 < nr0; i0++) { ggml_vec_acc_f32(ne0, (float *) ((char *) dst->data + ( k3)*nb3 + ( k2)*nb2 + ( k1)*nb1), (float *) ((char *) src0->data + (i3*ne3 + k3)*nb03 + (i2*ne2 + k2)*nb02 + (i1*ne1 + k1)*nb01 + (i0*ne0)*nb00)); } } } } } } } } static void ggml_compute_forward_repeat_back( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_repeat_back_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10177, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_concat_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } do { if (!(src0->nb[0] == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10194, "src0->nb[0] == sizeof(float)"); abort(); } } while (0); const int ith = params->ith; const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne10 = (src1)->ne[0]; (void)(ne10); const int64_t ne11 = (src1)->ne[1]; (void)(ne11); const int64_t ne12 = (src1)->ne[2]; (void)(ne12); const int64_t ne13 = (src1)->ne[3]; (void)(ne13);; const size_t nb10 = (src1)->nb[0]; (void)(nb10); const size_t nb11 = (src1)->nb[1]; (void)(nb11); const size_t nb12 = (src1)->nb[2]; (void)(nb12); const size_t nb13 = (src1)->nb[3]; (void)(nb13);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; do { if (!(nb0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10201, "nb0 == sizeof(float)"); abort(); } } while (0); do { if (!(nb00 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10202, "nb00 == sizeof(float)"); abort(); } } while (0); do { if (!(nb10 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10203, "nb10 == sizeof(float)"); abort(); } } while (0); for (int i3 = 0; i3 < ne3; i3++) { for (int i2 = ith; i2 < ne2; i2++) { if (i2 < ne02) { for (int i1 = 0; i1 < ne1; i1++) { for (int i0 = 0; i0 < ne0; i0++) { const float * x = (float *)((char *) src0->data + i0 * nb00 + i1 * nb01 + i2 * nb02 + i3 * nb03); float * y = (float *)((char *)dst->data + i0 * nb0 + i1 * nb1 + i2 * nb2 + i3 * nb3); *y = *x; } } } else { for (int i1 = 0; i1 < ne1; i1++) { for (int i0 = 0; i0 < ne0; i0++) { const float * x = (float *)((char *) src1->data + i0 * nb10 + i1 * nb11 + (i2 - ne02) * nb12 + i3 * nb13); float * y = (float *)((char *)dst->data + i0 * nb0 + i1 * nb1 + i2 * nb2 + i3 * nb3); *y = *x; } } } } } } static void ggml_compute_forward_concat( const struct ggml_compute_params* params, const struct ggml_tensor* src0, const struct ggml_tensor* src1, struct ggml_tensor* dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_concat_f32(params, src0, src1, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10243, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_abs_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { ((void) (0)); ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int n = ggml_nrows(src0); const int nc = src0->ne[0]; ((void) (0)); ((void) (0)); for (int i = 0; i < n; i++) { ggml_vec_abs_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), (float *) ((char *) src0->data + i*(src0->nb[1]))); } } static void ggml_compute_forward_abs( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_abs_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10285, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_sgn_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { ((void) (0)); ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int n = ggml_nrows(src0); const int nc = src0->ne[0]; ((void) (0)); ((void) (0)); for (int i = 0; i < n; i++) { ggml_vec_sgn_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), (float *) ((char *) src0->data + i*(src0->nb[1]))); } } static void ggml_compute_forward_sgn( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_sgn_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10327, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_neg_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { ((void) (0)); ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int n = ggml_nrows(src0); const int nc = src0->ne[0]; ((void) (0)); ((void) (0)); for (int i = 0; i < n; i++) { ggml_vec_neg_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), (float *) ((char *) src0->data + i*(src0->nb[1]))); } } static void ggml_compute_forward_neg( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_neg_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10369, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_step_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { ((void) (0)); ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int n = ggml_nrows(src0); const int nc = src0->ne[0]; ((void) (0)); ((void) (0)); for (int i = 0; i < n; i++) { ggml_vec_step_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), (float *) ((char *) src0->data + i*(src0->nb[1]))); } } static void ggml_compute_forward_step( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_step_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10411, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_tanh_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { ((void) (0)); ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int n = ggml_nrows(src0); const int nc = src0->ne[0]; ((void) (0)); ((void) (0)); for (int i = 0; i < n; i++) { ggml_vec_tanh_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), (float *) ((char *) src0->data + i*(src0->nb[1]))); } } static void ggml_compute_forward_tanh( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_tanh_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10453, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_elu_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { ((void) (0)); ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int n = ggml_nrows(src0); const int nc = src0->ne[0]; ((void) (0)); ((void) (0)); for (int i = 0; i < n; i++) { ggml_vec_elu_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), (float *) ((char *) src0->data + i*(src0->nb[1]))); } } static void ggml_compute_forward_elu( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_elu_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10495, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_relu_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { ((void) (0)); ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int n = ggml_nrows(src0); const int nc = src0->ne[0]; ((void) (0)); ((void) (0)); for (int i = 0; i < n; i++) { ggml_vec_relu_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), (float *) ((char *) src0->data + i*(src0->nb[1]))); } } static void ggml_compute_forward_relu( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_relu_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10537, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_gelu_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { do { if (!(ggml_is_contiguous_except_dim_1(src0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10548, "ggml_is_contiguous_except_dim_1(src0)"); abort(); } } while (0); do { if (!(ggml_is_contiguous_except_dim_1(dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10549, "ggml_is_contiguous_except_dim_1(dst)"); abort(); } } while (0); do { if (!(ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10550, "ggml_are_same_shape(src0, dst)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int ith = params->ith; const int nth = params->nth; const int nc = src0->ne[0]; const int nr = ggml_nrows(src0); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); for (int i1 = ir0; i1 < ir1; i1++) { ggml_vec_gelu_f32(nc, (float *) ((char *) dst->data + i1*( dst->nb[1])), (float *) ((char *) src0->data + i1*(src0->nb[1]))); # 10582 "ggml.c" } } static void ggml_compute_forward_gelu( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_gelu_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10596, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_gelu_quick_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { do { if (!(ggml_is_contiguous_except_dim_1(src0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10607, "ggml_is_contiguous_except_dim_1(src0)"); abort(); } } while (0); do { if (!(ggml_is_contiguous_except_dim_1(dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10608, "ggml_is_contiguous_except_dim_1(dst)"); abort(); } } while (0); do { if (!(ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10609, "ggml_are_same_shape(src0, dst)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int ith = params->ith; const int nth = params->nth; const int nc = src0->ne[0]; const int nr = ggml_nrows(src0); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); for (int i1 = ir0; i1 < ir1; i1++) { ggml_vec_gelu_quick_f32(nc, (float *) ((char *) dst->data + i1*( dst->nb[1])), (float *) ((char *) src0->data + i1*(src0->nb[1]))); # 10641 "ggml.c" } } static void ggml_compute_forward_gelu_quick( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_gelu_quick_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10655, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_silu_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { do { if (!(ggml_is_contiguous_except_dim_1(src0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10666, "ggml_is_contiguous_except_dim_1(src0)"); abort(); } } while (0); do { if (!(ggml_is_contiguous_except_dim_1(dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10667, "ggml_is_contiguous_except_dim_1(dst)"); abort(); } } while (0); do { if (!(ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10668, "ggml_are_same_shape(src0, dst)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int ith = params->ith; const int nth = params->nth; const int nc = src0->ne[0]; const int nr = ggml_nrows(src0); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); for (int i1 = ir0; i1 < ir1; i1++) { ggml_vec_silu_f32(nc, (float *) ((char *) dst->data + i1*( dst->nb[1])), (float *) ((char *) src0->data + i1*(src0->nb[1]))); # 10700 "ggml.c" } } static void ggml_compute_forward_silu( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_silu_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10714, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_silu_back_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * grad, struct ggml_tensor * dst) { do { if (!(ggml_is_contiguous_except_dim_1(grad))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10726, "ggml_is_contiguous_except_dim_1(grad)"); abort(); } } while (0); do { if (!(ggml_is_contiguous_except_dim_1(src0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10727, "ggml_is_contiguous_except_dim_1(src0)"); abort(); } } while (0); do { if (!(ggml_is_contiguous_except_dim_1(dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10728, "ggml_is_contiguous_except_dim_1(dst)"); abort(); } } while (0); do { if (!(ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10729, "ggml_are_same_shape(src0, dst)"); abort(); } } while (0); do { if (!(ggml_are_same_shape(src0, grad))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10730, "ggml_are_same_shape(src0, grad)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int ith = params->ith; const int nth = params->nth; const int nc = src0->ne[0]; const int nr = ggml_nrows(src0); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); for (int i1 = ir0; i1 < ir1; i1++) { ggml_vec_silu_backward_f32(nc, (float *) ((char *) dst->data + i1*( dst->nb[1])), (float *) ((char *) src0->data + i1*(src0->nb[1])), (float *) ((char *) grad->data + i1*(grad->nb[1]))); # 10763 "ggml.c" } } static void ggml_compute_forward_silu_back( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * grad, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_silu_back_f32(params, src0, grad, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10778, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_norm_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { do { if (!(ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10789, "ggml_are_same_shape(src0, dst)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } do { if (!(src0->nb[0] == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10795, "src0->nb[0] == sizeof(float)"); abort(); } } while (0); const int ith = params->ith; const int nth = params->nth; const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; float eps; memcpy(&eps, dst->op_params, sizeof(float)); for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i02 = 0; i02 < ne02; i02++) { for (int64_t i01 = ith; i01 < ne01; i01 += nth) { const float * x = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03); ggml_float sum = 0.0; for (int64_t i00 = 0; i00 < ne00; i00++) { sum += (ggml_float)x[i00]; } float mean = sum/ne00; float * y = (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3); ggml_float sum2 = 0.0; for (int64_t i00 = 0; i00 < ne00; i00++) { float v = x[i00] - mean; y[i00] = v; sum2 += (ggml_float)(v*v); } float variance = sum2/ne00; const float scale = 1.0f/sqrtf(variance + eps); ggml_vec_scale_f32(ne00, y, scale); } } } } static void ggml_compute_forward_norm( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_norm_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10847, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_rms_norm_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { do { if (!(ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10858, "ggml_are_same_shape(src0, dst)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } do { if (!(src0->nb[0] == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10864, "src0->nb[0] == sizeof(float)"); abort(); } } while (0); const int ith = params->ith; const int nth = params->nth; const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; float eps; memcpy(&eps, dst->op_params, sizeof(float)); for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i02 = 0; i02 < ne02; i02++) { for (int64_t i01 = ith; i01 < ne01; i01 += nth) { const float * x = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03); ggml_float sum = 0.0; for (int64_t i00 = 0; i00 < ne00; i00++) { sum += (ggml_float)(x[i00] * x[i00]); } const float mean = sum/ne00; float * y = (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3); memcpy(y, x, ne00 * sizeof(float)); const float scale = 1.0f/sqrtf(mean + eps); ggml_vec_scale_f32(ne00, y, scale); } } } } static void ggml_compute_forward_rms_norm( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_rms_norm_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10913, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_rms_norm_back_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(ggml_are_same_shape(src0, dst) && ggml_are_same_shape(src0, src1))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10923, "ggml_are_same_shape(src0, dst) && ggml_are_same_shape(src0, src1)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } do { if (!(src0->nb[0] == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 10929, "src0->nb[0] == sizeof(float)"); abort(); } } while (0); const int ith = params->ith; const int nth = params->nth; const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne10 = (src1)->ne[0]; (void)(ne10); const int64_t ne11 = (src1)->ne[1]; (void)(ne11); const int64_t ne12 = (src1)->ne[2]; (void)(ne12); const int64_t ne13 = (src1)->ne[3]; (void)(ne13);; const size_t nb10 = (src1)->nb[0]; (void)(nb10); const size_t nb11 = (src1)->nb[1]; (void)(nb11); const size_t nb12 = (src1)->nb[2]; (void)(nb12); const size_t nb13 = (src1)->nb[3]; (void)(nb13);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; float eps; memcpy(&eps, dst->op_params, sizeof(float)); for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i02 = 0; i02 < ne02; i02++) { for (int64_t i01 = ith; i01 < ne01; i01 += nth) { const int64_t i11 = i01; const int64_t i12 = i02; const int64_t i13 = i03; const float * x = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03); const float * dz = (float *) ((char *) src1->data + i11*nb11 + i12*nb12 + i13*nb13); ggml_float sum_xx = 0.0; ggml_float sum_xdz = 0.0; for (int64_t i00 = 0; i00 < ne00; i00++) { sum_xx += (ggml_float)(x[i00] * x[i00]); sum_xdz += (ggml_float)(x[i00] * dz[i00]); } const float mean_eps = (float)(sum_xx)/ne00 + eps; const float sum_eps = (float)(sum_xx) + eps*ne00; const float rrms = 1.0f / sqrtf(mean_eps); { # 11056 "ggml.c" } float * dx = (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3); ggml_vec_cpy_f32 (ne00, dx, x); ggml_vec_scale_f32(ne00, dx, (float)(-sum_xdz)/sum_eps); ggml_vec_acc_f32 (ne00, dx, dz); ggml_vec_scale_f32(ne00, dx, rrms); } } } } static void ggml_compute_forward_rms_norm_back( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_rms_norm_back_f32(params, src0, src1, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11087, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_group_norm_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { do { if (!(ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11098, "ggml_are_same_shape(src0, dst)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } do { if (!(src0->nb[0] == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11104, "src0->nb[0] == sizeof(float)"); abort(); } } while (0); const int ith = params->ith; const int nth = params->nth; const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; const float eps = 1e-6f; int n_channels = src0->ne[2]; int n_groups = dst->op_params[0]; int n_channels_per_group = (n_channels + n_groups - 1) / n_groups; for (int i = ith; i < n_groups; i+=nth) { int start = i * n_channels_per_group; int end = start + n_channels_per_group; if (end > n_channels) { end = n_channels; } int step = end - start; for (int64_t i03 = 0; i03 < ne03; i03++) { ggml_float sum = 0.0; for (int64_t i02 = start; i02 < end; i02++) { for (int64_t i01 = 0; i01 < ne01; i01++) { const float * x = (float *)((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03); for (int64_t i00 = 0; i00 < ne00; i00++) { sum += (ggml_float)x[i00]; } } } float mean = sum / (ne00 * ne01 * step); ggml_float sum2 = 0.0; for (int64_t i02 = start; i02 < end; i02++) { for (int64_t i01 = 0; i01 < ne01; i01++) { const float * x = (float *)((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03); float * y = (float *)((char *) dst->data + i01 * nb1 + i02 * nb2 + i03 * nb3); for (int64_t i00 = 0; i00 < ne00; i00++) { float v = x[i00] - mean; y[i00] = v; sum2 += (ggml_float)(v * v); } } } float variance = sum2 / (ne00 * ne01 * step); const float scale = 1.0f / sqrtf(variance + eps); for (int64_t i02 = start; i02 < end; i02++) { for (int64_t i01 = 0; i01 < ne01; i01++) { float * y = (float *)((char *) dst->data + i01 * nb1 + i02 * nb2 + i03 * nb3); ggml_vec_scale_f32(ne00, y, scale); } } } } } static void ggml_compute_forward_group_norm( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_group_norm_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11177, "false"); abort(); } } while (0); } break; } } # 11212 "ggml.c" static void ggml_compute_forward_mul_mat( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { int64_t t0 = 0; (void)(t0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne10 = (src1)->ne[0]; (void)(ne10); const int64_t ne11 = (src1)->ne[1]; (void)(ne11); const int64_t ne12 = (src1)->ne[2]; (void)(ne12); const int64_t ne13 = (src1)->ne[3]; (void)(ne13);; const size_t nb10 = (src1)->nb[0]; (void)(nb10); const size_t nb11 = (src1)->nb[1]; (void)(nb11); const size_t nb12 = (src1)->nb[2]; (void)(nb12); const size_t nb13 = (src1)->nb[3]; (void)(nb13);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; const int ith = params->ith; const int nth = params->nth; const enum ggml_type type = src0->type; const _Bool src1_cont = ggml_is_contiguous(src1); ggml_vec_dot_t const vec_dot = type_traits[type].vec_dot; enum ggml_type const vec_dot_type = type_traits[type].vec_dot_type; ggml_from_float_t const from_float_to_vec_dot = type_traits[vec_dot_type].from_float; do { if (!(ne0 == ne01)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11233, "ne0 == ne01"); abort(); } } while (0); do { if (!(ne1 == ne11)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11234, "ne1 == ne11"); abort(); } } while (0); do { if (!(ne2 == ne12)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11235, "ne2 == ne12"); abort(); } } while (0); do { if (!(ne3 == ne13)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11236, "ne3 == ne13"); abort(); } } while (0); do { if (!(nb00 == ggml_type_size(type))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11239, "nb00 == ggml_type_size(type)"); abort(); } } while (0); do { if (!(nb10 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11240, "nb10 == sizeof(float)"); abort(); } } while (0); do { if (!(nb0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11243, "nb0 == sizeof(float)"); abort(); } } while (0); do { if (!(nb0 <= nb1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11244, "nb0 <= nb1"); abort(); } } while (0); do { if (!(nb1 <= nb2)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11245, "nb1 <= nb2"); abort(); } } while (0); do { if (!(nb2 <= nb3)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11246, "nb2 <= nb3"); abort(); } } while (0); const int64_t r2 = ne12/ne02; const int64_t r3 = ne13/ne03; # 11322 "ggml.c" if (params->type == GGML_TASK_INIT) { if (src1->type != vec_dot_type) { char * wdata = params->wdata; const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type); for (int64_t i13 = 0; i13 < ne13; ++i13) { for (int64_t i12 = 0; i12 < ne12; ++i12) { for (int64_t i11 = 0; i11 < ne11; ++i11) { from_float_to_vec_dot((float *)((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11), (void *) wdata, ne10); wdata += row_size; } } } } return; } if (params->type == GGML_TASK_FINALIZE) { return; } const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata; const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type); const int64_t nr0 = ne01; const int64_t nr1 = ne11*ne12*ne13; const int64_t nth0 = nr0 > nr1 ? nth : 1; const int64_t nth1 = nr0 > nr1 ? 1 : nth; const int64_t ith0 = ith % nth0; const int64_t ith1 = ith / nth0; const int64_t dr0 = (nr0 + nth0 - 1)/nth0; const int64_t dr1 = (nr1 + nth1 - 1)/nth1; const int64_t ir010 = dr0*ith0; const int64_t ir011 = ((ir010 + dr0) < (nr0) ? (ir010 + dr0) : (nr0)); const int64_t ir110 = dr1*ith1; const int64_t ir111 = ((ir110 + dr1) < (nr1) ? (ir110 + dr1) : (nr1)); if (ir010 >= ir011 || ir110 >= ir111) { sched_yield(); return; } ((void) (0)); ((void) (0)); const int64_t blck_0 = 16; const int64_t blck_1 = 16; float tmp[16]; for (int64_t iir1 = ir110; iir1 < ir111; iir1 += blck_1) { for (int64_t iir0 = ir010; iir0 < ir011; iir0 += blck_0) { for (int64_t ir1 = iir1; ir1 < iir1 + blck_1 && ir1 < ir111; ++ir1) { const int64_t i13 = (ir1/(ne12*ne11)); const int64_t i12 = (ir1 - i13*ne12*ne11)/ne11; const int64_t i11 = (ir1 - i13*ne12*ne11 - i12*ne11); const int64_t i03 = i13/r3; const int64_t i02 = i12/r2; const int64_t i1 = i11; const int64_t i2 = i12; const int64_t i3 = i13; const char * src0_row = (const char *) src0->data + (0 + i02*nb02 + i03*nb03); const char * src1_col = (const char *) wdata + (src1_cont || src1->type != vec_dot_type ? (i11 + i12*ne11 + i13*ne12*ne11)*row_size : (i11*nb11 + i12*nb12 + i13*nb13)); float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3)); for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) { vec_dot(ne00, &tmp[ir0 - iir0], src0_row + ir0*nb01, src1_col); } memcpy(&dst_col[iir0], tmp, (((iir0 + blck_0) < (ir011) ? (iir0 + blck_0) : (ir011)) - iir0)*sizeof(float)); } } } } static void ggml_compute_forward_out_prod_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { int64_t t0 = 0; (void)(t0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne10 = (src1)->ne[0]; (void)(ne10); const int64_t ne11 = (src1)->ne[1]; (void)(ne11); const int64_t ne12 = (src1)->ne[2]; (void)(ne12); const int64_t ne13 = (src1)->ne[3]; (void)(ne13);; const size_t nb10 = (src1)->nb[0]; (void)(nb10); const size_t nb11 = (src1)->nb[1]; (void)(nb11); const size_t nb12 = (src1)->nb[2]; (void)(nb12); const size_t nb13 = (src1)->nb[3]; (void)(nb13);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; const int ith = params->ith; const int nth = params->nth; do { if (!(ne02 == ne12)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11443, "ne02 == ne12"); abort(); } } while (0); do { if (!(ne03 == ne13)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11444, "ne03 == ne13"); abort(); } } while (0); do { if (!(ne2 == ne12)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11445, "ne2 == ne12"); abort(); } } while (0); do { if (!(ne3 == ne13)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11446, "ne3 == ne13"); abort(); } } while (0); do { if (!(nb00 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11449, "nb00 == sizeof(float)"); abort(); } } while (0); do { if (!(nb0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11452, "nb0 == sizeof(float)"); abort(); } } while (0); do { if (!(ne0 == ne00)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11457, "ne0 == ne00"); abort(); } } while (0); do { if (!(ne1 == ne10)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11458, "ne1 == ne10"); abort(); } } while (0); do { if (!(ne2 == ne02)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11459, "ne2 == ne02"); abort(); } } while (0); do { if (!(ne3 == ne03)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11460, "ne3 == ne03"); abort(); } } while (0); if (params->type == GGML_TASK_INIT) { ggml_vec_set_f32(ne0*ne1*ne2*ne3, dst->data, 0); return; } if (params->type == GGML_TASK_FINALIZE) { return; } const int64_t nr = ne1*ne2*ne3; const int64_t dr = (nr + nth - 1)/nth; const int64_t ir0 = dr*ith; const int64_t ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); # 11496 "ggml.c" for (int64_t ir = ir0; ir < ir1; ++ir) { const int64_t i3 = ir/(ne2*ne1); const int64_t i2 = (ir - i3*ne2*ne1)/ne1; const int64_t i1 = (ir - i3*ne2*ne1 - i2*ne1); const int64_t i02 = i2; const int64_t i03 = i3; const int64_t i12 = i2; const int64_t i13 = i3; for (int64_t i01 = 0; i01 < ne01; ++i01) { const int64_t i11 = i01; float * s0 = (float *) ((char *) src0->data + ( i01*nb01 + i02*nb02 + i03*nb03)); float * s1 = (float *) ((char *) src1->data + (i1*nb10 + i11*nb11 + i12*nb12 + i13*nb13)); float * d = (float *) ((char *) dst->data + ( i1*nb1 + i2*nb2 + i3*nb3)); ggml_vec_mad_f32(ne0, d, s0, *s1); } } # 11535 "ggml.c" } static void ggml_compute_forward_out_prod( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_Q4_0: case GGML_TYPE_Q4_1: case GGML_TYPE_Q5_0: case GGML_TYPE_Q5_1: case GGML_TYPE_Q8_0: case GGML_TYPE_Q8_1: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11550, "false"); abort(); } } while (0); } break; case GGML_TYPE_F16: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11555, "false"); abort(); } } while (0); } break; case GGML_TYPE_F32: { ggml_compute_forward_out_prod_f32(params, src0, src1, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11564, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_scale_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(ggml_is_contiguous(src0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11576, "ggml_is_contiguous(src0)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11577, "ggml_is_contiguous(dst)"); abort(); } } while (0); do { if (!(ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11578, "ggml_are_same_shape(src0, dst)"); abort(); } } while (0); do { if (!(ggml_is_scalar(src1))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11579, "ggml_is_scalar(src1)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const float v = *(float *) src1->data; const int ith = params->ith; const int nth = params->nth; const int nc = src0->ne[0]; const int nr = ggml_nrows(src0); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); const size_t nb01 = src0->nb[1]; const size_t nb1 = dst->nb[1]; for (int i1 = ir0; i1 < ir1; i1++) { if (dst->data != src0->data) { memcpy((char *)dst->data + i1*nb1, (char *)src0->data + i1*nb01, nc * sizeof(float)); } ggml_vec_scale_f32(nc, (float *) ((char *) dst->data + i1*nb1), v); } } static void ggml_compute_forward_scale( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_scale_f32(params, src0, src1, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11627, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_set_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11639, "ggml_are_same_shape(src0, dst)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(dst) && ggml_is_contiguous(src0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11640, "ggml_is_contiguous(dst) && ggml_is_contiguous(src0)"); abort(); } } while (0); size_t nb1 = ((int32_t *) dst->op_params)[0]; size_t nb2 = ((int32_t *) dst->op_params)[1]; size_t nb3 = ((int32_t *) dst->op_params)[2]; size_t offset = ((int32_t *) dst->op_params)[3]; _Bool inplace = (_Bool) ((int32_t *) dst->op_params)[4]; if (!inplace && (params->type == GGML_TASK_INIT)) { memcpy( ((char *) dst->data), ((char *) src0->data), ggml_nbytes(dst)); } if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int ith = params->ith; const int nth = params->nth; const int nr = ggml_nrows(src1); const int nc = src1->ne[0]; const int64_t ne10 = (src1)->ne[0]; (void)(ne10); const int64_t ne11 = (src1)->ne[1]; (void)(ne11); const int64_t ne12 = (src1)->ne[2]; (void)(ne12); const int64_t ne13 = (src1)->ne[3]; (void)(ne13);; const size_t nb10 = (src1)->nb[0]; (void)(nb10); const size_t nb11 = (src1)->nb[1]; (void)(nb11); const size_t nb12 = (src1)->nb[2]; (void)(nb12); const size_t nb13 = (src1)->nb[3]; (void)(nb13);; const size_t nb0 = ggml_element_size(src0); const int im0 = (ne10 == 0 ? 0 : ne10-1); const int im1 = (ne11 == 0 ? 0 : ne11-1); const int im2 = (ne12 == 0 ? 0 : ne12-1); const int im3 = (ne13 == 0 ? 0 : ne13-1); do { if (!(offset + im0*nb0 + im1*nb1 + im2*nb2 + im3*nb3 <= ggml_nbytes(dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11680, "offset + im0*nb0 + im1*nb1 + im2*nb2 + im3*nb3 <= ggml_nbytes(dst)"); abort(); } } while (0); do { if (!(nb10 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11682, "nb10 == sizeof(float)"); abort(); } } while (0); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); for (int ir = ir0; ir < ir1; ++ir) { const int i3 = ir/(ne12*ne11); const int i2 = (ir - i3*ne12*ne11)/ne11; const int i1 = (ir - i3*ne12*ne11 - i2*ne11); ggml_vec_cpy_f32(nc, (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + offset), (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11)); } } static void ggml_compute_forward_set( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_set_f32(params, src0, src1, dst); } break; case GGML_TYPE_F16: case GGML_TYPE_Q4_0: case GGML_TYPE_Q4_1: case GGML_TYPE_Q5_0: case GGML_TYPE_Q5_1: case GGML_TYPE_Q8_0: case GGML_TYPE_Q8_1: case GGML_TYPE_Q2_K: case GGML_TYPE_Q3_K: case GGML_TYPE_Q4_K: case GGML_TYPE_Q5_K: case GGML_TYPE_Q6_K: default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11729, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_cpy( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { ggml_compute_forward_dup(params, src0, dst); } static void ggml_compute_forward_cont( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { ggml_compute_forward_dup(params, src0, dst); } static void ggml_compute_forward_reshape( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { (void)(params); (void)(src0); (void)(dst); } static void ggml_compute_forward_view( const struct ggml_compute_params * params, const struct ggml_tensor * src0) { (void)(params); (void)(src0); } static void ggml_compute_forward_permute( const struct ggml_compute_params * params, const struct ggml_tensor * src0) { (void)(params); (void)(src0); } static void ggml_compute_forward_transpose( const struct ggml_compute_params * params, const struct ggml_tensor * src0) { (void)(params); (void)(src0); } static void ggml_compute_forward_get_rows_q( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int nc = src0->ne[0]; const int nr = ggml_nelements(src1); const enum ggml_type type = src0->type; ggml_to_float_t const dequantize_row_q = type_traits[type].to_float; ((void) (0)); ((void) (0)); ((void) (0)); for (int i = 0; i < nr; ++i) { const int r = ((int32_t *) src1->data)[i]; dequantize_row_q( (const void *) ((char *) src0->data + r*src0->nb[1]), (float *) ((char *) dst->data + i*dst->nb[1]), nc); } } static void ggml_compute_forward_get_rows_f16( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int nc = src0->ne[0]; const int nr = ggml_nelements(src1); ((void) (0)); ((void) (0)); ((void) (0)); for (int i = 0; i < nr; ++i) { const int r = ((int32_t *) src1->data)[i]; for (int j = 0; j < nc; ++j) { ggml_fp16_t v = ((ggml_fp16_t *) ((char *) src0->data + r*src0->nb[1]))[j]; ((float *) ((char *) dst->data + i*dst->nb[1]))[j] = ((float) (v)); } } } static void ggml_compute_forward_get_rows_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int nc = src0->ne[0]; const int nr = ggml_nelements(src1); ((void) (0)); ((void) (0)); ((void) (0)); for (int i = 0; i < nr; ++i) { const int r = ((int32_t *) src1->data)[i]; ggml_vec_cpy_f32(nc, (float *) ((char *) dst->data + i*dst->nb[1]), (float *) ((char *) src0->data + r*src0->nb[1])); } } static void ggml_compute_forward_get_rows( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_Q4_0: case GGML_TYPE_Q4_1: case GGML_TYPE_Q5_0: case GGML_TYPE_Q5_1: case GGML_TYPE_Q8_0: case GGML_TYPE_Q8_1: case GGML_TYPE_Q2_K: case GGML_TYPE_Q3_K: case GGML_TYPE_Q4_K: case GGML_TYPE_Q5_K: case GGML_TYPE_Q6_K: { ggml_compute_forward_get_rows_q(params, src0, src1, dst); } break; case GGML_TYPE_F16: { ggml_compute_forward_get_rows_f16(params, src0, src1, dst); } break; case GGML_TYPE_F32: { ggml_compute_forward_get_rows_f32(params, src0, src1, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11910, "false"); abort(); } } while (0); } break; } # 11931 "ggml.c" } static void ggml_compute_forward_get_rows_back_f32_f16( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * opt0, struct ggml_tensor * dst) { do { if (!(params->ith == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11941, "params->ith == 0"); abort(); } } while (0); do { if (!(ggml_are_same_shape(opt0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11942, "ggml_are_same_shape(opt0, dst)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(opt0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11943, "ggml_is_contiguous(opt0)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11944, "ggml_is_contiguous(dst)"); abort(); } } while (0); ggml_compute_forward_dup_same_cont(params, opt0, dst); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int nc = src0->ne[0]; const int nr = ggml_nelements(src1); do { if (!(dst->ne[0] == nc)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11955, "dst->ne[0] == nc"); abort(); } } while (0); do { if (!(src0->nb[0] == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11956, "src0->nb[0] == sizeof(ggml_fp16_t)"); abort(); } } while (0); for (int i = 0; i < nr; ++i) { const int r = ((int32_t *) src1->data)[i]; for (int j = 0; j < nc; ++j) { ggml_fp16_t v = ((ggml_fp16_t *) ((char *) src0->data + i*src0->nb[1]))[j]; ((float *) ((char *) dst->data + r*dst->nb[1]))[j] += ((float) (v)); } } } static void ggml_compute_forward_get_rows_back_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * opt0, struct ggml_tensor * dst) { do { if (!(params->ith == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11974, "params->ith == 0"); abort(); } } while (0); do { if (!(ggml_are_same_shape(opt0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11975, "ggml_are_same_shape(opt0, dst)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(opt0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11976, "ggml_is_contiguous(opt0)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11977, "ggml_is_contiguous(dst)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT) { memset(dst->data, 0, ggml_nbytes(dst)); } if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int nc = src0->ne[0]; const int nr = ggml_nelements(src1); do { if (!(dst->ne[0] == nc)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11992, "dst->ne[0] == nc"); abort(); } } while (0); do { if (!(src0->nb[0] == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 11993, "src0->nb[0] == sizeof(float)"); abort(); } } while (0); for (int i = 0; i < nr; ++i) { const int r = ((int32_t *) src1->data)[i]; ggml_vec_add_f32(nc, (float *) ((char *) dst->data + r*dst->nb[1]), (float *) ((char *) dst->data + r*dst->nb[1]), (float *) ((char *) src0->data + i*src0->nb[1])); } } static void ggml_compute_forward_get_rows_back( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * opt0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F16: { ggml_compute_forward_get_rows_back_f32_f16(params, src0, src1, opt0, dst); } break; case GGML_TYPE_F32: { ggml_compute_forward_get_rows_back_f32(params, src0, src1, opt0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12023, "false"); abort(); } } while (0); } break; } # 12044 "ggml.c" } static void ggml_compute_forward_diag_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { do { if (!(params->ith == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12052, "params->ith == 0"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; do { if (!(ne00 == ne0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12062, "ne00 == ne0"); abort(); } } while (0); do { if (!(ne00 == ne1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12063, "ne00 == ne1"); abort(); } } while (0); do { if (!(ne01 == 1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12064, "ne01 == 1"); abort(); } } while (0); do { if (!(ne02 == ne2)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12065, "ne02 == ne2"); abort(); } } while (0); do { if (!(ne03 == ne3)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12066, "ne03 == ne3"); abort(); } } while (0); do { if (!(nb00 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12068, "nb00 == sizeof(float)"); abort(); } } while (0); do { if (!(nb0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12069, "nb0 == sizeof(float)"); abort(); } } while (0); for (int i3 = 0; i3 < ne3; i3++) { for (int i2 = 0; i2 < ne2; i2++) { for (int i1 = 0; i1 < ne1; i1++) { float * d = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1); float * s = (float *)((char *) src0->data + i3*nb03 + i2*nb02); for (int i0 = 0; i0 < i1; i0++) { d[i0] = 0; } d[i1] = s[i1]; for (int i0 = i1+1; i0 < ne0; i0++) { d[i0] = 0; } } } } } static void ggml_compute_forward_diag( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_diag_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12099, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_diag_mask_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst, const float value) { const int ith = params->ith; const int nth = params->nth; const int n_past = ((int32_t *) dst->op_params)[0]; const _Bool inplace = src0->data == dst->data; do { if (!(n_past >= 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12118, "n_past >= 0"); abort(); } } while (0); if (!inplace && (params->type == GGML_TASK_INIT)) { do { if (!(ggml_nelements(dst) == ggml_nelements(src0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12123, "ggml_nelements(dst) == ggml_nelements(src0)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(dst) && ggml_is_contiguous(src0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12124, "ggml_is_contiguous(dst) && ggml_is_contiguous(src0)"); abort(); } } while (0); memcpy( ((char *) dst->data), ((char *) src0->data), ggml_nbytes(dst)); } if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int n = ggml_nrows(src0); const int nc = src0->ne[0]; const int nr = src0->ne[1]; const int nz = n/nr; do { if (!(dst->nb[0] == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12142, "dst->nb[0] == sizeof(float)"); abort(); } } while (0); do { if (!(src0->nb[0] == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12143, "src0->nb[0] == sizeof(float)"); abort(); } } while (0); for (int k = 0; k < nz; k++) { for (int j = ith; j < nr; j += nth) { for (int i = n_past; i < nc; i++) { if (i > n_past + j) { *(float *)((char *) dst->data + k*dst->nb[2] + j*dst->nb[1] + i*dst->nb[0]) = value; } } } } } static void ggml_compute_forward_diag_mask_inf( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_diag_mask_f32(params, src0, dst, -__builtin_inff()); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12167, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_diag_mask_zero( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_diag_mask_f32(params, src0, dst, 0); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12183, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_soft_max_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { do { if (!(ggml_is_contiguous(src0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12194, "ggml_is_contiguous(src0)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12195, "ggml_is_contiguous(dst)"); abort(); } } while (0); do { if (!(ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12196, "ggml_are_same_shape(src0, dst)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int ith = params->ith; const int nth = params->nth; const int nc = src0->ne[0]; const int nr = ggml_nrows(src0); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); for (int i1 = ir0; i1 < ir1; i1++) { float *sp = (float *)((char *) src0->data + i1*src0->nb[1]); float *dp = (float *)((char *) dst->data + i1*dst->nb[1]); # 12228 "ggml.c" float max = -__builtin_inff(); ggml_vec_max_f32(nc, &max, sp); ggml_float sum = 0.0; uint16_t scvt; for (int i = 0; i < nc; i++) { if (sp[i] == -__builtin_inff()) { dp[i] = 0.0f; } else { ggml_fp16_t s = (sp[i] - max); memcpy(&scvt, &s, sizeof(scvt)); const float val = ((float) (table_exp_f16[scvt])); sum += (ggml_float)val; dp[i] = val; } } ((void) (0)); sum = 1.0/sum; ggml_vec_scale_f32(nc, dp, sum); } } static void ggml_compute_forward_soft_max( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_soft_max_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12272, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_soft_max_back_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(ggml_is_contiguous(src0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12284, "ggml_is_contiguous(src0)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(src1))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12285, "ggml_is_contiguous(src1)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12286, "ggml_is_contiguous(dst)"); abort(); } } while (0); do { if (!(ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12287, "ggml_are_same_shape(src0, dst)"); abort(); } } while (0); do { if (!(ggml_are_same_shape(src1, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12288, "ggml_are_same_shape(src1, dst)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int ith = params->ith; const int nth = params->nth; const int nc = src0->ne[0]; const int nr = ggml_nrows(src0); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); for (int i1 = ir0; i1 < ir1; i1++) { float *dy = (float *)((char *) src0->data + i1*src0->nb[1]); float *y = (float *)((char *) src1->data + i1*src1->nb[1]); float *dx = (float *)((char *) dst->data + i1*dst->nb[1]); # 12341 "ggml.c" float dot_y_dy = 0; ggml_vec_dot_f32 (nc, &dot_y_dy, y, dy); ggml_vec_cpy_f32 (nc, dx, dy); ggml_vec_acc1_f32(nc, dx, -dot_y_dy); ggml_vec_mul_f32 (nc, dx, dx, y); } } static void ggml_compute_forward_soft_max_back( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_soft_max_back_f32(params, src0, src1, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12368, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_alibi_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int n_past = ((int32_t *) dst->op_params)[0]; const int n_head = ((int32_t *) dst->op_params)[1]; float max_bias; memcpy(&max_bias, (int32_t *) dst->op_params + 2, sizeof(float)); ((void) (0)); const int ne0 = src0->ne[0]; const int ne1 = src0->ne[1]; const int ne2 = src0->ne[2]; const int n = ggml_nrows(src0); const int ne2_ne3 = n/ne1; const int nb0 = src0->nb[0]; const int nb1 = src0->nb[1]; const int nb2 = src0->nb[2]; do { if (!(nb0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12405, "nb0 == sizeof(float)"); abort(); } } while (0); do { if (!(ne1 + n_past == ne0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12406, "ne1 + n_past == ne0"); abort(); } } while (0); do { if (!(n_head == ne2)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12407, "n_head == ne2"); abort(); } } while (0); const int n_heads_log2_floor = 1 << (int) floor(log2(n_head)); const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor); const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_heads_log2_floor); for (int i = 0; i < ne0; i++) { for (int j = 0; j < ne1; j++) { for (int k = 0; k < ne2_ne3; k++) { float * const src = (float *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2); float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2); float m_k; if (k < n_heads_log2_floor) { m_k = powf(m0, k + 1); } else { m_k = powf(m1, 2 * (k - n_heads_log2_floor) + 1); } pdst[0] = i * m_k + src[0]; } } } } static void ggml_compute_forward_alibi_f16( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int n_past = ((int32_t *) dst->op_params)[0]; const int n_head = ((int32_t *) dst->op_params)[1]; float max_bias; memcpy(&max_bias, (int32_t *) dst->op_params + 2, sizeof(float)); ((void) (0)); const int ne0 = src0->ne[0]; const int ne1 = src0->ne[1]; const int ne2 = src0->ne[2]; const int n = ggml_nrows(src0); const int ne2_ne3 = n/ne1; const int nb0 = src0->nb[0]; const int nb1 = src0->nb[1]; const int nb2 = src0->nb[2]; do { if (!(nb0 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12468, "nb0 == sizeof(ggml_fp16_t)"); abort(); } } while (0); do { if (!(ne1 + n_past == ne0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12469, "ne1 + n_past == ne0"); abort(); } } while (0); (void) n_past; do { if (!(n_head == ne2)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12470, "n_head == ne2"); abort(); } } while (0); const int n_heads_log2_floor = 1 << (int) floor(log2(n_head)); const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor); const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_heads_log2_floor); for (int i = 0; i < ne0; i++) { for (int j = 0; j < ne1; j++) { for (int k = 0; k < ne2_ne3; k++) { ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i*nb0 + j*nb1 + k*nb2); float * pdst = (float *)((char *) dst->data + i*nb0 + j*nb1 + k*nb2); float m_k; if (k < n_heads_log2_floor) { m_k = powf(m0, k + 1); } else { m_k = powf(m1, 2 * (k - n_heads_log2_floor) + 1); } pdst[0] = i * m_k + ((float) (src[0])); } } } } static void ggml_compute_forward_alibi( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F16: { ggml_compute_forward_alibi_f16(params, src0, dst); } break; case GGML_TYPE_F32: { ggml_compute_forward_alibi_f32(params, src0, dst); } break; case GGML_TYPE_Q4_0: case GGML_TYPE_Q4_1: case GGML_TYPE_Q5_0: case GGML_TYPE_Q5_1: case GGML_TYPE_Q8_0: case GGML_TYPE_Q8_1: case GGML_TYPE_Q2_K: case GGML_TYPE_Q3_K: case GGML_TYPE_Q4_K: case GGML_TYPE_Q5_K: case GGML_TYPE_Q6_K: case GGML_TYPE_Q8_K: case GGML_TYPE_I8: case GGML_TYPE_I16: case GGML_TYPE_I32: case GGML_TYPE_COUNT: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12531, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_clamp_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } float min; float max; memcpy(&min, (float *) dst->op_params + 0, sizeof(float)); memcpy(&max, (float *) dst->op_params + 1, sizeof(float)); const int ith = params->ith; const int nth = params->nth; const int n = ggml_nrows(src0); const int nc = src0->ne[0]; const size_t nb00 = src0->nb[0]; const size_t nb01 = src0->nb[1]; const size_t nb0 = dst->nb[0]; const size_t nb1 = dst->nb[1]; do { if (!(nb0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12565, "nb0 == sizeof(float)"); abort(); } } while (0); do { if (!(nb00 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12566, "nb00 == sizeof(float)"); abort(); } } while (0); for (int j = ith; j < n; j += nth) { float * dst_ptr = (float *) ((char *) dst->data + j*nb1); float * src0_ptr = (float *) ((char *) src0->data + j*nb01); for (int i = 0; i < nc; i++) { dst_ptr[i] = ((((src0_ptr[i]) < (max) ? (src0_ptr[i]) : (max))) > (min) ? (((src0_ptr[i]) < (max) ? (src0_ptr[i]) : (max))) : (min)); } } } static void ggml_compute_forward_clamp( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_clamp_f32(params, src0, dst); } break; case GGML_TYPE_F16: case GGML_TYPE_Q4_0: case GGML_TYPE_Q4_1: case GGML_TYPE_Q5_0: case GGML_TYPE_Q5_1: case GGML_TYPE_Q8_0: case GGML_TYPE_Q8_1: case GGML_TYPE_Q2_K: case GGML_TYPE_Q3_K: case GGML_TYPE_Q4_K: case GGML_TYPE_Q5_K: case GGML_TYPE_Q6_K: case GGML_TYPE_Q8_K: case GGML_TYPE_I8: case GGML_TYPE_I16: case GGML_TYPE_I32: case GGML_TYPE_COUNT: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12605, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_rope_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } float freq_base; float freq_scale; float xpos_base; _Bool xpos_down; const int n_past = ((int32_t *) dst->op_params)[0]; const int n_dims = ((int32_t *) dst->op_params)[1]; const int mode = ((int32_t *) dst->op_params)[2]; const int n_ctx = ((int32_t *) dst->op_params)[3]; memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float)); memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float)); memcpy(&xpos_base, (int32_t *) dst->op_params + 6, sizeof(float)); memcpy(&xpos_down, (int32_t *) dst->op_params + 7, sizeof(_Bool)); ((void) (0)); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; do { if (!(nb00 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12644, "nb00 == sizeof(float)"); abort(); } } while (0); const int ith = params->ith; const int nth = params->nth; const int nr = ggml_nrows(dst); do { if (!(n_dims <= ne0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12651, "n_dims <= ne0"); abort(); } } while (0); do { if (!(n_dims % 2 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12652, "n_dims % 2 == 0"); abort(); } } while (0); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); int ir = 0; const float theta_scale = powf(freq_base, -2.0f/n_dims); const _Bool is_neox = mode & 2; const _Bool is_glm = mode & 4; for (int64_t i3 = 0; i3 < ne3; i3++) { for (int64_t i2 = ((mode & 1) == 0 ? 0 : n_past); i2 < ne2; i2++) { const int64_t p = ((mode & 1) == 0 ? n_past + i2 : i2); for (int64_t i1 = 0; i1 < ne1; i1++) { if (ir++ < ir0) continue; if (ir > ir1) break; float theta = freq_scale * (float)p; if (is_glm) { theta = ((p) < (n_ctx - 2) ? (p) : (n_ctx - 2)); float block_theta = ((p - (n_ctx - 2)) > (0) ? (p - (n_ctx - 2)) : (0)); for (int64_t i0 = 0; i0 < ne0 / 4; i0++) { const float cos_theta = cosf(theta); const float sin_theta = sinf(theta); const float cos_block_theta = cosf(block_theta); const float sin_block_theta = sinf(block_theta); theta *= theta_scale; block_theta *= theta_scale; const float * const src = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); float * dst_data = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); const float x0 = src[0]; const float x1 = src[n_dims/2]; const float x2 = src[n_dims]; const float x3 = src[n_dims/2*3]; dst_data[0] = x0*cos_theta - x1*sin_theta; dst_data[n_dims/2] = x0*sin_theta + x1*cos_theta; dst_data[n_dims] = x2*cos_block_theta - x3*sin_block_theta; dst_data[n_dims/2*3] = x2*sin_block_theta + x3*cos_block_theta; } } else if (!is_neox) { for (int64_t i0 = 0; i0 < ne0; i0 += 2) { const float cos_theta = cosf(theta); const float sin_theta = sinf(theta); float zeta = xpos_base != 0.0f ? powf((i0 + 0.4f * ne0) / (1.4f * ne0), (n_past + i2) / xpos_base) : 1.0f; if (xpos_down) zeta = 1.0f / zeta; theta *= theta_scale; const float * const src = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); float * dst_data = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); const float x0 = src[0]; const float x1 = src[1]; dst_data[0] = x0*cos_theta*zeta - x1*sin_theta*zeta; dst_data[1] = x0*sin_theta*zeta + x1*cos_theta*zeta; } } else { for (int64_t ib = 0; ib < ne0/n_dims; ++ib) { for (int64_t ic = 0; ic < n_dims; ic += 2) { const float cos_theta = cosf(theta); const float sin_theta = sinf(theta); theta *= theta_scale; const int64_t i0 = ib*n_dims + ic/2; const float * const src = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); float * dst_data = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); const float x0 = src[0]; const float x1 = src[n_dims/2]; dst_data[0] = x0*cos_theta - x1*sin_theta; dst_data[n_dims/2] = x0*sin_theta + x1*cos_theta; } } } } } } } static void ggml_compute_forward_rope_f16( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } float freq_base; float freq_scale; const int n_past = ((int32_t *) dst->op_params)[0]; const int n_dims = ((int32_t *) dst->op_params)[1]; const int mode = ((int32_t *) dst->op_params)[2]; const int n_ctx = ((int32_t *) dst->op_params)[3]; memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float)); memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float)); ((void) (0)); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; do { if (!(nb0 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12776, "nb0 == sizeof(ggml_fp16_t)"); abort(); } } while (0); const int ith = params->ith; const int nth = params->nth; const int nr = ggml_nrows(dst); do { if (!(n_dims <= ne0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12783, "n_dims <= ne0"); abort(); } } while (0); do { if (!(n_dims % 2 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12784, "n_dims % 2 == 0"); abort(); } } while (0); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); int ir = 0; const float theta_scale = powf(freq_base, -2.0f/n_dims); const _Bool is_neox = mode & 2; const _Bool is_glm = mode & 4; for (int64_t i3 = 0; i3 < ne3; i3++) { for (int64_t i2 = ((mode & 1) == 0 ? 0 : n_past); i2 < ne2; i2++) { const int64_t p = ((mode & 1) == 0 ? n_past + i2 : i2); for (int64_t i1 = 0; i1 < ne1; i1++) { if (ir++ < ir0) continue; if (ir > ir1) break; float theta = freq_scale * (float)p; if (is_glm) { theta = ((p) < (n_ctx - 2) ? (p) : (n_ctx - 2)); float block_theta = ((p - (n_ctx - 2)) > (0) ? (p - (n_ctx - 2)) : (0)); for (int64_t i0 = 0; i0 < ne0 / 4; i0++) { const float cos_theta = cosf(theta); const float sin_theta = sinf(theta); const float cos_block_theta = cosf(block_theta); const float sin_block_theta = sinf(block_theta); theta *= theta_scale; block_theta *= theta_scale; const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); const float x0 = ((float) (src[0])); const float x1 = ((float) (src[n_dims/2])); const float x2 = ((float) (src[n_dims])); const float x3 = ((float) (src[n_dims/2*3])); dst_data[0] = (x0*cos_theta - x1*sin_theta); dst_data[n_dims/2] = (x0*sin_theta + x1*cos_theta); dst_data[n_dims] = (x2*cos_block_theta - x3*sin_block_theta); dst_data[n_dims/2*3] = (x2*sin_block_theta + x3*cos_block_theta); } } if (!is_neox) { for (int64_t i0 = 0; i0 < ne0; i0 += 2) { const float cos_theta = cosf(theta); const float sin_theta = sinf(theta); theta *= theta_scale; const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); const float x0 = ((float) (src[0])); const float x1 = ((float) (src[1])); dst_data[0] = (x0*cos_theta - x1*sin_theta); dst_data[1] = (x0*sin_theta + x1*cos_theta); } } else { for (int64_t ib = 0; ib < ne0/n_dims; ++ib) { for (int64_t ic = 0; ic < n_dims; ic += 2) { const float cos_theta = cosf(theta); const float sin_theta = sinf(theta); theta *= theta_scale; const int64_t i0 = ib*n_dims + ic/2; const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); const float x0 = ((float) (src[0])); const float x1 = ((float) (src[n_dims/2])); dst_data[0] = (x0*cos_theta - x1*sin_theta); dst_data[n_dims/2] = (x0*sin_theta + x1*cos_theta); } } } } } } } static void ggml_compute_forward_rope( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F16: { ggml_compute_forward_rope_f16(params, src0, dst); } break; case GGML_TYPE_F32: { ggml_compute_forward_rope_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 12894, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_rope_back_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } float freq_base; float freq_scale; float xpos_base; _Bool xpos_down; const int n_past = ((int32_t *) dst->op_params)[0]; const int n_dims = ((int32_t *) dst->op_params)[1]; const int mode = ((int32_t *) dst->op_params)[2]; const int n_ctx = ((int32_t *) dst->op_params)[3]; (void)(n_ctx); memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float)); memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float)); memcpy(&xpos_base, (int32_t *) dst->op_params + 6, sizeof(float)); memcpy(&xpos_down, (int32_t *) dst->op_params + 7, sizeof(_Bool)); ((void) (0)); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; ((void) (0)); const int ith = params->ith; const int nth = params->nth; const int nr = ggml_nrows(dst); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); int ir = 0; const float theta_scale = powf(freq_base, -2.0f/n_dims); const _Bool is_neox = mode & 2; for (int64_t i3 = 0; i3 < ne3; i3++) { for (int64_t i2 = ((mode & 1) == 0 ? 0 : n_past); i2 < ne2; i2++) { const int64_t p = ((mode & 1) == 0 ? n_past + i2 : i2); for (int64_t i1 = 0; i1 < ne1; i1++) { if (ir++ < ir0) continue; if (ir > ir1) break; float theta = freq_scale * (float)p; if (!is_neox) { for (int64_t i0 = 0; i0 < ne0; i0 += 2) { const float cos_theta = cosf(theta); const float sin_theta = sinf(theta); float zeta = xpos_base != 0.0f ? powf((i0 + 0.4f * ne0) / (1.4f * ne0), (n_past + i2) / xpos_base) : 1.0f; if (xpos_down) zeta = 1.0f / zeta; theta *= theta_scale; const float * const dy = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); float * dx = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); const float dy0 = dy[0]; const float dy1 = dy[1]; dx[0] = dy0*cos_theta*zeta + dy1*sin_theta*zeta; dx[1] = - dy0*sin_theta*zeta + dy1*cos_theta*zeta; } } else { for (int64_t ib = 0; ib < ne0/n_dims; ++ib) { for (int64_t ic = 0; ic < n_dims; ic += 2) { const float cos_theta = cosf(theta); const float sin_theta = sinf(theta); theta *= theta_scale; const int64_t i0 = ib*n_dims + ic/2; const float * const dy = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); float * dx = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); const float dy0 = dy[0]; const float dy1 = dy[n_dims/2]; dx[0] = dy0*cos_theta + dy1*sin_theta; dx[n_dims/2] = - dy0*sin_theta + dy1*cos_theta; } } } } } } } static void ggml_compute_forward_rope_back_f16( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int n_past = ((int32_t *) dst->op_params)[0]; const int n_dims = ((int32_t *) dst->op_params)[1]; const int mode = ((int32_t *) dst->op_params)[2]; ((void) (0)); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; ((void) (0)); const int ith = params->ith; const int nth = params->nth; const int nr = ggml_nrows(dst); const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); int ir = 0; const float theta_scale = powf(10000.0, -2.0f/n_dims); const _Bool is_neox = mode & 2; for (int64_t i3 = 0; i3 < ne3; i3++) { for (int64_t i2 = ((mode & 1) == 0 ? 0 : n_past); i2 < ne2; i2++) { const int64_t p = ((mode & 1) == 0 ? n_past + i2 : i2); for (int64_t i1 = 0; i1 < ne1; i1++) { if (ir++ < ir0) continue; if (ir > ir1) break; float theta = (float)p; if (!is_neox) { for (int64_t i0 = 0; i0 < ne0; i0 += 2) { const float cos_theta = cosf(theta); const float sin_theta = sinf(theta); theta *= theta_scale; const ggml_fp16_t * const dy = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); ggml_fp16_t * dx = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); const float dy0 = ((float) (dy[0])); const float dy1 = ((float) (dy[1])); dx[0] = (dy0*cos_theta + dy1*sin_theta); dx[1] = (-dy0*sin_theta + dy1*cos_theta); } } else { for (int64_t ib = 0; ib < ne0/n_dims; ++ib) { for (int64_t ic = 0; ic < n_dims; ic += 2) { const float cos_theta = cosf(theta); const float sin_theta = sinf(theta); theta *= theta_scale; const int64_t i0 = ib*n_dims + ic/2; const ggml_fp16_t * const dy = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); ggml_fp16_t * dx = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); const float dy0 = ((float) (dy[0])); const float dy1 = ((float) (dy[n_dims/2])); dx[0] = (dy0*cos_theta + dy1*sin_theta); dx[n_dims/2] = (-dy0*sin_theta + dy1*cos_theta); } } } } } } } static void ggml_compute_forward_rope_back( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F16: { ggml_compute_forward_rope_back_f16(params, src0, dst); } break; case GGML_TYPE_F32: { ggml_compute_forward_rope_back_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13123, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_conv_1d_s1_ph_f16_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(src0->type == GGML_TYPE_F16)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13135, "src0->type == GGML_TYPE_F16"); abort(); } } while (0); do { if (!(src1->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13136, "src1->type == GGML_TYPE_F32"); abort(); } } while (0); do { if (!(dst->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13137, "dst->type == GGML_TYPE_F32"); abort(); } } while (0); int64_t t0 = 0; (void)(t0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne10 = (src1)->ne[0]; (void)(ne10); const int64_t ne11 = (src1)->ne[1]; (void)(ne11); const int64_t ne12 = (src1)->ne[2]; (void)(ne12); const int64_t ne13 = (src1)->ne[3]; (void)(ne13);; const size_t nb10 = (src1)->nb[0]; (void)(nb10); const size_t nb11 = (src1)->nb[1]; (void)(nb11); const size_t nb12 = (src1)->nb[2]; (void)(nb12); const size_t nb13 = (src1)->nb[3]; (void)(nb13);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; const int ith = params->ith; const int nth = params->nth; const int nk = ne00; const int nh = nk/2; const int ew0 = ggml_up32(ne01); do { if (!(ne00 % 2 == 1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13152, "ne00 % 2 == 1"); abort(); } } while (0); do { if (!(nb00 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13153, "nb00 == sizeof(ggml_fp16_t)"); abort(); } } while (0); do { if (!(nb10 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13154, "nb10 == sizeof(float)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT) { memset(params->wdata, 0, params->wsize); { ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0; for (int64_t i02 = 0; i02 < ne02; i02++) { for (int64_t i01 = 0; i01 < ne01; i01++) { const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i02*nb02 + i01*nb01); ggml_fp16_t * dst_data = wdata + i02*ew0*ne00; for (int64_t i00 = 0; i00 < ne00; i00++) { dst_data[i00*ew0 + i01] = src[i00]; } } } } { ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + ne02*ew0*ne00; for (int64_t i11 = 0; i11 < ne11; i11++) { const float * const src = (float *)((char *) src1->data + i11*nb11); ggml_fp16_t * dst_data = wdata; for (int64_t i10 = 0; i10 < ne10; i10++) { dst_data[(i10 + nh)*ew0 + i11] = (src[i10]); } } } return; } if (params->type == GGML_TASK_FINALIZE) { return; } const int nr = ne02; const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); for (int i1 = ir0; i1 < ir1; i1++) { float * dst_data = (float *)((char *) dst->data + i1*nb1); for (int64_t i0 = 0; i0 < ne10; ++i0) { dst_data[i0] = 0; for (int k = -nh; k <= nh; k++) { float v = 0.0f; ggml_vec_dot_f16(ew0, &v, (ggml_fp16_t *) params->wdata + i1*ew0*ne00 + (nh + k)*ew0, (ggml_fp16_t *) params->wdata + ne02*ew0*ne00 + (i0 + nh + k)*ew0); dst_data[i0] += v; } } } } static void ggml_compute_forward_conv_1d_s1_ph_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(src0->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13226, "src0->type == GGML_TYPE_F32"); abort(); } } while (0); do { if (!(src1->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13227, "src1->type == GGML_TYPE_F32"); abort(); } } while (0); do { if (!(dst->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13228, "dst->type == GGML_TYPE_F32"); abort(); } } while (0); int64_t t0 = 0; (void)(t0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne10 = (src1)->ne[0]; (void)(ne10); const int64_t ne11 = (src1)->ne[1]; (void)(ne11); const int64_t ne12 = (src1)->ne[2]; (void)(ne12); const int64_t ne13 = (src1)->ne[3]; (void)(ne13);; const size_t nb10 = (src1)->nb[0]; (void)(nb10); const size_t nb11 = (src1)->nb[1]; (void)(nb11); const size_t nb12 = (src1)->nb[2]; (void)(nb12); const size_t nb13 = (src1)->nb[3]; (void)(nb13);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; const int ith = params->ith; const int nth = params->nth; const int nk = ne00; const int nh = nk/2; const int ew0 = ggml_up32(ne01); do { if (!(ne00 % 2 == 1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13243, "ne00 % 2 == 1"); abort(); } } while (0); do { if (!(nb00 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13244, "nb00 == sizeof(float)"); abort(); } } while (0); do { if (!(nb10 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13245, "nb10 == sizeof(float)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT) { memset(params->wdata, 0, params->wsize); { float * const wdata = (float *) params->wdata + 0; for (int64_t i02 = 0; i02 < ne02; i02++) { for (int64_t i01 = 0; i01 < ne01; i01++) { const float * const src = (float *)((char *) src0->data + i02*nb02 + i01*nb01); float * dst_data = wdata + i02*ew0*ne00; for (int64_t i00 = 0; i00 < ne00; i00++) { dst_data[i00*ew0 + i01] = src[i00]; } } } } { float * const wdata = (float *) params->wdata + ne02*ew0*ne00; for (int64_t i11 = 0; i11 < ne11; i11++) { const float * const src = (float *)((char *) src1->data + i11*nb11); float * dst_data = wdata; for (int64_t i10 = 0; i10 < ne10; i10++) { dst_data[(i10 + nh)*ew0 + i11] = src[i10]; } } } return; } if (params->type == GGML_TASK_FINALIZE) { return; } const int nr = ne02; const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); for (int i1 = ir0; i1 < ir1; i1++) { float * dst_data = (float *)((char *) dst->data + i1*nb1); for (int64_t i0 = 0; i0 < ne10; ++i0) { dst_data[i0] = 0; for (int k = -nh; k <= nh; k++) { float v = 0.0f; ggml_vec_dot_f32(ew0, &v, (float *) params->wdata + i1*ew0*ne00 + (nh + k)*ew0, (float *) params->wdata + ne02*ew0*ne00 + (i0 + nh + k)*ew0); dst_data[i0] += v; } } } } static void ggml_compute_forward_conv_1d_s1_ph( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F16: { ggml_compute_forward_conv_1d_s1_ph_f16_f32(params, src0, src1, dst); } break; case GGML_TYPE_F32: { ggml_compute_forward_conv_1d_s1_ph_f32(params, src0, src1, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13328, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_conv_1d_s2_ph_f16_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(src0->type == GGML_TYPE_F16)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13338, "src0->type == GGML_TYPE_F16"); abort(); } } while (0); do { if (!(src1->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13339, "src1->type == GGML_TYPE_F32"); abort(); } } while (0); do { if (!(dst->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13340, "dst->type == GGML_TYPE_F32"); abort(); } } while (0); int64_t t0 = 0; (void)(t0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne10 = (src1)->ne[0]; (void)(ne10); const int64_t ne11 = (src1)->ne[1]; (void)(ne11); const int64_t ne12 = (src1)->ne[2]; (void)(ne12); const int64_t ne13 = (src1)->ne[3]; (void)(ne13);; const size_t nb10 = (src1)->nb[0]; (void)(nb10); const size_t nb11 = (src1)->nb[1]; (void)(nb11); const size_t nb12 = (src1)->nb[2]; (void)(nb12); const size_t nb13 = (src1)->nb[3]; (void)(nb13);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; const int ith = params->ith; const int nth = params->nth; const int nk = ne00; const int nh = nk/2; const int ew0 = ggml_up32(ne01); do { if (!(ne00 % 2 == 1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13355, "ne00 % 2 == 1"); abort(); } } while (0); do { if (!(nb00 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13356, "nb00 == sizeof(ggml_fp16_t)"); abort(); } } while (0); do { if (!(nb10 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13357, "nb10 == sizeof(float)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT) { memset(params->wdata, 0, params->wsize); { ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0; for (int64_t i02 = 0; i02 < ne02; i02++) { for (int64_t i01 = 0; i01 < ne01; i01++) { const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i02*nb02 + i01*nb01); ggml_fp16_t * dst_data = wdata + i02*ew0*ne00; for (int64_t i00 = 0; i00 < ne00; i00++) { dst_data[i00*ew0 + i01] = src[i00]; } } } } { ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + ne02*ew0*ne00; for (int64_t i11 = 0; i11 < ne11; i11++) { const float * const src = (float *)((char *) src1->data + i11*nb11); ggml_fp16_t * dst_data = wdata; for (int64_t i10 = 0; i10 < ne10; i10++) { dst_data[(i10 + nh)*ew0 + i11] = (src[i10]); } } } return; } if (params->type == GGML_TASK_FINALIZE) { return; } const int nr = ne02; const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); for (int i1 = ir0; i1 < ir1; i1++) { float * dst_data = (float *)((char *) dst->data + i1*nb1); for (int64_t i0 = 0; i0 < ne10; i0 += 2) { dst_data[i0/2] = 0; for (int k = -nh; k <= nh; k++) { float v = 0.0f; ggml_vec_dot_f16(ew0, &v, (ggml_fp16_t *) params->wdata + i1*ew0*ne00 + (nh + k)*ew0, (ggml_fp16_t *) params->wdata + ne02*ew0*ne00 + (i0 + nh + k)*ew0); dst_data[i0/2] += v; } } } } static void ggml_compute_forward_conv_1d_s2_ph_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(src0->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13429, "src0->type == GGML_TYPE_F32"); abort(); } } while (0); do { if (!(src1->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13430, "src1->type == GGML_TYPE_F32"); abort(); } } while (0); do { if (!(dst->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13431, "dst->type == GGML_TYPE_F32"); abort(); } } while (0); int64_t t0 = 0; (void)(t0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne10 = (src1)->ne[0]; (void)(ne10); const int64_t ne11 = (src1)->ne[1]; (void)(ne11); const int64_t ne12 = (src1)->ne[2]; (void)(ne12); const int64_t ne13 = (src1)->ne[3]; (void)(ne13);; const size_t nb10 = (src1)->nb[0]; (void)(nb10); const size_t nb11 = (src1)->nb[1]; (void)(nb11); const size_t nb12 = (src1)->nb[2]; (void)(nb12); const size_t nb13 = (src1)->nb[3]; (void)(nb13);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; const int ith = params->ith; const int nth = params->nth; const int nk = ne00; const int nh = nk/2; const int ew0 = ggml_up32(ne01); do { if (!(ne00 % 2 == 1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13446, "ne00 % 2 == 1"); abort(); } } while (0); do { if (!(nb00 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13447, "nb00 == sizeof(float)"); abort(); } } while (0); do { if (!(nb10 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13448, "nb10 == sizeof(float)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT) { memset(params->wdata, 0, params->wsize); { float * const wdata = (float *) params->wdata + 0; for (int64_t i02 = 0; i02 < ne02; i02++) { for (int64_t i01 = 0; i01 < ne01; i01++) { const float * const src = (float *)((char *) src0->data + i02*nb02 + i01*nb01); float * dst_data = wdata + i02*ew0*ne00; for (int64_t i00 = 0; i00 < ne00; i00++) { dst_data[i00*ew0 + i01] = src[i00]; } } } } { float * const wdata = (float *) params->wdata + ne02*ew0*ne00; for (int64_t i11 = 0; i11 < ne11; i11++) { const float * const src = (float *)((char *) src1->data + i11*nb11); float * dst_data = wdata; for (int64_t i10 = 0; i10 < ne10; i10++) { dst_data[(i10 + nh)*ew0 + i11] = src[i10]; } } } return; } if (params->type == GGML_TASK_FINALIZE) { return; } const int nr = ne02; const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); for (int i1 = ir0; i1 < ir1; i1++) { float * dst_data = (float *)((char *) dst->data + i1*nb1); for (int64_t i0 = 0; i0 < ne10; i0 += 2) { dst_data[i0/2] = 0; for (int k = -nh; k <= nh; k++) { float v = 0.0f; ggml_vec_dot_f32(ew0, &v, (float *) params->wdata + i1*ew0*ne00 + (nh + k)*ew0, (float *) params->wdata + ne02*ew0*ne00 + (i0 + nh + k)*ew0); dst_data[i0/2] += v; } } } } static void ggml_compute_forward_conv_1d_s2_ph( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F16: { ggml_compute_forward_conv_1d_s2_ph_f16_f32(params, src0, src1, dst); } break; case GGML_TYPE_F32: { ggml_compute_forward_conv_1d_s2_ph_f32(params, src0, src1, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13531, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_conv_1d( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { const int32_t s0 = ((const int32_t*)(dst->op_params))[0]; const int32_t p0 = ((const int32_t*)(dst->op_params))[1]; const int32_t d0 = ((const int32_t*)(dst->op_params))[2]; do { if (!(d0 == 1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13546, "d0 == 1"); abort(); } } while (0); do { if (!(p0 == src0->ne[0]/2)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13547, "p0 == src0->ne[0]/2"); abort(); } } while (0); if (s0 == 1) { ggml_compute_forward_conv_1d_s1_ph(params, src0, src1, dst); } else if (s0 == 2) { ggml_compute_forward_conv_1d_s2_ph(params, src0, src1, dst); } else { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13553, "false"); abort(); } } while (0); }; } static void ggml_compute_forward_conv_2d_f16_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(src0->type == GGML_TYPE_F16)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13564, "src0->type == GGML_TYPE_F16"); abort(); } } while (0); do { if (!(src1->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13565, "src1->type == GGML_TYPE_F32"); abort(); } } while (0); do { if (!(dst->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13566, "dst->type == GGML_TYPE_F32"); abort(); } } while (0); int64_t t0 = 0; (void)(t0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne10 = (src1)->ne[0]; (void)(ne10); const int64_t ne11 = (src1)->ne[1]; (void)(ne11); const int64_t ne12 = (src1)->ne[2]; (void)(ne12); const int64_t ne13 = (src1)->ne[3]; (void)(ne13);; const size_t nb10 = (src1)->nb[0]; (void)(nb10); const size_t nb11 = (src1)->nb[1]; (void)(nb11); const size_t nb12 = (src1)->nb[2]; (void)(nb12); const size_t nb13 = (src1)->nb[3]; (void)(nb13);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; const int ith = params->ith; const int nth = params->nth; const int nk0 = ne00; const int nk1 = ne01; const int ew0 = nk0*nk1*ne02; const int32_t s0 = ((const int32_t*)(dst->op_params))[0]; const int32_t s1 = ((const int32_t*)(dst->op_params))[1]; const int32_t p0 = ((const int32_t*)(dst->op_params))[2]; const int32_t p1 = ((const int32_t*)(dst->op_params))[3]; const int32_t d0 = ((const int32_t*)(dst->op_params))[4]; const int32_t d1 = ((const int32_t*)(dst->op_params))[5]; do { if (!(nb00 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13589, "nb00 == sizeof(ggml_fp16_t)"); abort(); } } while (0); do { if (!(nb10 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13590, "nb10 == sizeof(float)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT) { memset(params->wdata, 0, params->wsize); { ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0; for (int i12 = 0; i12 < ne12; i12++) { const float * const src = (float *)((char *) src1->data + i12*nb12); ggml_fp16_t * dst_data = wdata; for (int i1 = 0; i1 < ne1; i1++) { for (int i0 = 0; i0 < ne0; i0++) { for (int ik1 = 0; ik1 < nk1; ik1++) { for (int ik0 = 0; ik0 < nk0; ik0++) { const int idx0 = i0*s0 + ik0*d0 - p0; const int idx1 = i1*s1 + ik1*d1 - p1; if (!(idx1 < 0 || idx1 >= ne11 || idx0 < 0 || idx0 >= ne10)) { dst_data[(i1*ne0 + i0)*ew0 + i12*(nk0*nk1) + ik1*nk0 + ik0] = (src[idx1*ne10 + idx0]); } } } } } } } return; } if (params->type == GGML_TASK_FINALIZE) { return; } const int np = ne2; const int dp = (np + nth - 1)/nth; const int ip0 = dp*ith; const int ip1 = ((ip0 + dp) < (np) ? (ip0 + dp) : (np)); ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0; for (int i3 = 0; i3 < ne3; i3++) { for (int i2 = ip0; i2 < ip1; i2++) { float * dst_data = (float *)((char *) dst->data + i3*nb3 + i2*nb2); for (int i1 = 0; i1 < ne1; ++i1) { for (int i0 = 0; i0 < ne0; ++i0) { ggml_vec_dot_f16(ew0, dst_data + i1*ne0 + i0, (ggml_fp16_t *) ((char *) src0->data + i2*nb03), (ggml_fp16_t *) wdata + i3*nb3 + (i1*ne0 + i0)*ew0); } } } } } static void ggml_compute_forward_conv_2d( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F16: { ggml_compute_forward_conv_2d_f16_f32(params, src0, src1, dst); } break; case GGML_TYPE_F32: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13668, "false"); abort(); } } while (0); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13672, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_conv_transpose_2d( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(src0->type == GGML_TYPE_F16)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13684, "src0->type == GGML_TYPE_F16"); abort(); } } while (0); do { if (!(src1->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13685, "src1->type == GGML_TYPE_F32"); abort(); } } while (0); do { if (!(dst->type == GGML_TYPE_F32)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13686, "dst->type == GGML_TYPE_F32"); abort(); } } while (0); int64_t t0 = 0; (void)(t0); const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne10 = (src1)->ne[0]; (void)(ne10); const int64_t ne11 = (src1)->ne[1]; (void)(ne11); const int64_t ne12 = (src1)->ne[2]; (void)(ne12); const int64_t ne13 = (src1)->ne[3]; (void)(ne13);; const size_t nb10 = (src1)->nb[0]; (void)(nb10); const size_t nb11 = (src1)->nb[1]; (void)(nb11); const size_t nb12 = (src1)->nb[2]; (void)(nb12); const size_t nb13 = (src1)->nb[3]; (void)(nb13);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; const int ith = params->ith; const int nth = params->nth; const int nk = ne00*ne01*ne02*ne03; do { if (!(nb00 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13698, "nb00 == sizeof(ggml_fp16_t)"); abort(); } } while (0); do { if (!(nb10 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13699, "nb10 == sizeof(float)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT) { memset(params->wdata, 0, params->wsize); { ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0; for (int64_t i03 = 0; i03 < ne03; i03++) { for (int64_t i02 = 0; i02 < ne02; i02++) { const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i03*nb03 + i02*nb02); ggml_fp16_t * dst_data = wdata + i02*ne01*ne00*ne03; for (int64_t i01 = 0; i01 < ne01; i01++) { for (int64_t i00 = 0; i00 < ne00; i00++) { dst_data[i01*ne00*ne03 + i00*ne03 + i03] = src[i01 * ne00 + i00]; } } } } } { ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + nk; for (int i12 = 0; i12 < ne12; i12++) { for (int i11 = 0; i11 < ne11; i11++) { const float * const src = (float *)((char *) src1->data + i12*nb12 + i11*nb11); ggml_fp16_t * dst_data = wdata + i11*ne10*ne12; for (int i10 = 0; i10 < ne10; i10++) { dst_data[i10*ne12 + i12] = (src[i10]); } } } } return; } if (params->type == GGML_TASK_FINALIZE) { return; } const int32_t stride = ggml_get_op_params_i32(dst, 0); const int np = ne2; const int dp = (np + nth - 1)/nth; const int ip0 = dp*ith; const int ip1 = ((ip0 + dp) < (np) ? (ip0 + dp) : (np)); ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0; ggml_fp16_t * const wdata_src = wdata + nk; for (int i2 = ip0; i2 < ip1; i2++) { float * dst_data = (float *)((char *) dst->data + i2*nb2); ggml_fp16_t * wdata_kernel = wdata + i2*ne01*ne00*ne03; for (int i11 = 0; i11 < ne11; i11++) { for (int i10 = 0; i10 < ne10; i10++) { const int i1n = i11*ne10*ne12 + i10*ne12; for (int i01 = 0; i01 < ne01; i01++) { for (int i00 = 0; i00 < ne00; i00++) { float v = 0; ggml_vec_dot_f16(ne03, &v, wdata_src + i1n, wdata_kernel + i01*ne00*ne03 + i00*ne03); dst_data[(i11*stride + i01)*ne0 + i10*stride + i00] += v; } } } } } } static void ggml_compute_forward_pool_1d_sk_p0( const struct ggml_compute_params * params, const enum ggml_op_pool op, const struct ggml_tensor * src, const int k, struct ggml_tensor * dst) { ((void) (0)); ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const char * cdata = (const char *)src->data; const char * const data_end = cdata + ggml_nbytes(src); float * drow = (float *)dst->data; const int64_t rs = dst->ne[0]; while (cdata < data_end) { const float * const srow = (const float *)cdata; int j = 0; for (int64_t i = 0; i < rs; ++i) { switch (op) { case GGML_OP_POOL_AVG: drow[i] = 0; break; case GGML_OP_POOL_MAX: drow[i] = -3.40282347e+38F; break; case GGML_OP_POOL_COUNT: do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13807, "false"); abort(); } } while (0); break; } for (int ki = 0; ki < k; ++ki) { switch (op) { case GGML_OP_POOL_AVG: drow[i] += srow[j]; break; case GGML_OP_POOL_MAX: if (srow[j] > drow[i]) drow[i] = srow[j]; break; case GGML_OP_POOL_COUNT: do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13813, "false"); abort(); } } while (0); break; } ++j; } switch (op) { case GGML_OP_POOL_AVG: drow[i] /= k; break; case GGML_OP_POOL_MAX: break; case GGML_OP_POOL_COUNT: do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13820, "false"); abort(); } } while (0); break; } } cdata += src->nb[1]; drow += rs; } } static void ggml_compute_forward_pool_1d( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { const int32_t * opts = (const int32_t *)dst->op_params; enum ggml_op_pool op = opts[0]; const int k0 = opts[1]; const int s0 = opts[2]; const int p0 = opts[3]; do { if (!(p0 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13841, "p0 == 0"); abort(); } } while (0); do { if (!(k0 == s0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13842, "k0 == s0"); abort(); } } while (0); ggml_compute_forward_pool_1d_sk_p0(params, op, src0, k0, dst); } static void ggml_compute_forward_pool_2d_sk_p0( const struct ggml_compute_params * params, const enum ggml_op_pool op, const struct ggml_tensor * src, const int k0, const int k1, struct ggml_tensor * dst) { ((void) (0)); ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const char * cdata = (const char*)src->data; const char * const data_end = cdata + ggml_nbytes(src); const int64_t px = dst->ne[0]; const int64_t py = dst->ne[1]; const int64_t pa = px * py; float * dplane = (float *)dst->data; const int ka = k0 * k1; while (cdata < data_end) { for (int oy = 0; oy < py; ++oy) { float * const drow = dplane + oy * px; for (int ox = 0; ox < px; ++ox) { float * const out = drow + ox; switch (op) { case GGML_OP_POOL_AVG: *out = 0; break; case GGML_OP_POOL_MAX: *out = -3.40282347e+38F; break; case GGML_OP_POOL_COUNT: do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13882, "false"); abort(); } } while (0); break; } const int ix = ox * k0; const int iy = oy * k1; for (int ky = 0; ky < k1; ++ky) { const float * const srow = (const float *)(cdata + src->nb[1] * (iy + ky)); for (int kx = 0; kx < k0; ++kx) { int j = ix + kx; switch (op) { case GGML_OP_POOL_AVG: *out += srow[j]; break; case GGML_OP_POOL_MAX: if (srow[j] > *out) *out = srow[j]; break; case GGML_OP_POOL_COUNT: do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13895, "false"); abort(); } } while (0); break; } } } switch (op) { case GGML_OP_POOL_AVG: *out /= ka; break; case GGML_OP_POOL_MAX: break; case GGML_OP_POOL_COUNT: do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13902, "false"); abort(); } } while (0); break; } } } cdata += src->nb[2]; dplane += pa; } } static void ggml_compute_forward_pool_2d( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { const int32_t * opts = (const int32_t *)dst->op_params; enum ggml_op_pool op = opts[0]; const int k0 = opts[1]; const int k1 = opts[2]; const int s0 = opts[3]; const int s1 = opts[4]; const int p0 = opts[5]; const int p1 = opts[6]; do { if (!(p0 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13927, "p0 == 0"); abort(); } } while (0); do { if (!(p1 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13928, "p1 == 0"); abort(); } } while (0); do { if (!(k0 == s0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13929, "k0 == s0"); abort(); } } while (0); do { if (!(k1 == s1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13930, "k1 == s1"); abort(); } } while (0); ggml_compute_forward_pool_2d_sk_p0(params, op, src0, k0, k1, dst); } static void ggml_compute_forward_upscale_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } do { if (!(src0->nb[0] == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13946, "src0->nb[0] == sizeof(float)"); abort(); } } while (0); const int ith = params->ith; const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; const int scale_factor = dst->op_params[0]; for (int i03 = 0; i03 < ne03; i03++) { for (int i02 = ith; i02 < ne02; i02++) { for (int m = 0; m < dst->ne[1]; m++) { int i01 = m / scale_factor; for (int n = 0; n < dst->ne[0]; n++) { int i00 = n / scale_factor; const float * x = (float *)((char *) src0->data + i00 * nb00 +i01 * nb01 + i02 * nb02 + i03 * nb03); float * y = (float *)((char *) dst->data + n * dst->nb[0] + m * dst->nb[1] + i02 * dst->nb[2] + i03 * dst->nb[3]); *y = *x; } } } } } static void ggml_compute_forward_upscale( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_upscale_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 13985, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_flash_attn_f32( const struct ggml_compute_params * params, const struct ggml_tensor * q, const struct ggml_tensor * k, const struct ggml_tensor * v, const _Bool masked, struct ggml_tensor * dst) { int64_t t0 = 0; (void)(t0); const int64_t neq0 = (q)->ne[0]; (void)(neq0); const int64_t neq1 = (q)->ne[1]; (void)(neq1); const int64_t neq2 = (q)->ne[2]; (void)(neq2); const int64_t neq3 = (q)->ne[3]; (void)(neq3);; const size_t nbq0 = (q)->nb[0]; (void)(nbq0); const size_t nbq1 = (q)->nb[1]; (void)(nbq1); const size_t nbq2 = (q)->nb[2]; (void)(nbq2); const size_t nbq3 = (q)->nb[3]; (void)(nbq3);; const int64_t nek0 = (k)->ne[0]; (void)(nek0); const int64_t nek1 = (k)->ne[1]; (void)(nek1); const int64_t nek2 = (k)->ne[2]; (void)(nek2); const int64_t nek3 = (k)->ne[3]; (void)(nek3);; const size_t nbk0 = (k)->nb[0]; (void)(nbk0); const size_t nbk1 = (k)->nb[1]; (void)(nbk1); const size_t nbk2 = (k)->nb[2]; (void)(nbk2); const size_t nbk3 = (k)->nb[3]; (void)(nbk3);; const int64_t nev0 = (v)->ne[0]; (void)(nev0); const int64_t nev1 = (v)->ne[1]; (void)(nev1); const int64_t nev2 = (v)->ne[2]; (void)(nev2); const int64_t nev3 = (v)->ne[3]; (void)(nev3);; const size_t nbv0 = (v)->nb[0]; (void)(nbv0); const size_t nbv1 = (v)->nb[1]; (void)(nbv1); const size_t nbv2 = (v)->nb[2]; (void)(nbv2); const size_t nbv3 = (v)->nb[3]; (void)(nbv3);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);; const int ith = params->ith; const int nth = params->nth; const int64_t D = neq0; const int64_t N = neq1; const int64_t P = nek1 - N; const int64_t M = P + N; const int Mup = ggml_up(M, 4); do { if (!(ne0 == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14021, "ne0 == D"); abort(); } } while (0); do { if (!(ne1 == N)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14022, "ne1 == N"); abort(); } } while (0); do { if (!(P >= 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14023, "P >= 0"); abort(); } } while (0); do { if (!(nbq0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14025, "nbq0 == sizeof(float)"); abort(); } } while (0); do { if (!(nbk0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14026, "nbk0 == sizeof(float)"); abort(); } } while (0); do { if (!(nbv0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14027, "nbv0 == sizeof(float)"); abort(); } } while (0); do { if (!(neq0 == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14029, "neq0 == D"); abort(); } } while (0); do { if (!(nek0 == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14030, "nek0 == D"); abort(); } } while (0); do { if (!(nev1 == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14031, "nev1 == D"); abort(); } } while (0); do { if (!(neq1 == N)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14033, "neq1 == N"); abort(); } } while (0); do { if (!(nek1 == N + P)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14034, "nek1 == N + P"); abort(); } } while (0); do { if (!(nev1 == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14035, "nev1 == D"); abort(); } } while (0); do { if (!(nb0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14038, "nb0 == sizeof(float)"); abort(); } } while (0); do { if (!(nb0 <= nb1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14039, "nb0 <= nb1"); abort(); } } while (0); do { if (!(nb1 <= nb2)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14040, "nb1 <= nb2"); abort(); } } while (0); do { if (!(nb2 <= nb3)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14041, "nb2 <= nb3"); abort(); } } while (0); if (params->type == GGML_TASK_INIT) { return; } if (params->type == GGML_TASK_FINALIZE) { return; } const int nr = neq1*neq2*neq3; const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); const float scale = 1.0f/sqrtf(D); for (int ir = ir0; ir < ir1; ++ir) { const int iq3 = ir/(neq2*neq1); const int iq2 = (ir - iq3*neq2*neq1)/neq1; const int iq1 = (ir - iq3*neq2*neq1 - iq2*neq1); float * S = (float *) params->wdata + ith*(Mup + CACHE_LINE_SIZE_F32); for (int i = M; i < Mup; ++i) { S[i] = -__builtin_inff(); } for (int64_t ic = 0; ic < nek1; ++ic) { const int ik3 = iq3; const int ik2 = iq2; const int ik1 = ic; const int i1 = ik1; ggml_vec_dot_f32(neq0, S + i1, (float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), (float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3))); } ggml_vec_scale_f32(nek1, S, scale); if (masked) { for (int64_t i = P; i < M; i++) { if (i > P + iq1) { S[i] = -__builtin_inff(); } } } { float max = -__builtin_inff(); ggml_vec_max_f32(M, &max, S); ggml_float sum = 0.0; { uint16_t scvt[4]; (void)(scvt); ggml_float sump[4] = { 0.0 }; for (int i = 0; i < Mup; i += 4) { float * SS = S + i; for (int j = 0; j < 4; ++j) { if (SS[j] == -__builtin_inff()) { SS[j] = 0.0f; } else { const float val = expf(SS[j] - max); sump[j] += (ggml_float)val; SS[j] = val; } } } for (int i = 0; i < 4; i++) { sum += sump[i]; } } ((void) (0)); sum = 1.0/sum; ggml_vec_scale_f32(M, S, sum); } for (int64_t ic = 0; ic < nev1; ++ic) { const int i1 = iq1; const int i2 = iq2; const int i3 = iq3; ggml_vec_dot_f32(nek1, (float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), (float *) ((char *) v->data + ( ic*nbv1 + i2*nbv2 + i3*nbv3)), S); } } } static void ggml_compute_forward_flash_attn_f16( const struct ggml_compute_params * params, const struct ggml_tensor * q, const struct ggml_tensor * k, const struct ggml_tensor * v, const _Bool masked, struct ggml_tensor * dst) { int64_t t0 = 0; (void)(t0); const int64_t neq0 = (q)->ne[0]; (void)(neq0); const int64_t neq1 = (q)->ne[1]; (void)(neq1); const int64_t neq2 = (q)->ne[2]; (void)(neq2); const int64_t neq3 = (q)->ne[3]; (void)(neq3);; const size_t nbq0 = (q)->nb[0]; (void)(nbq0); const size_t nbq1 = (q)->nb[1]; (void)(nbq1); const size_t nbq2 = (q)->nb[2]; (void)(nbq2); const size_t nbq3 = (q)->nb[3]; (void)(nbq3);; const int64_t nek0 = (k)->ne[0]; (void)(nek0); const int64_t nek1 = (k)->ne[1]; (void)(nek1); const int64_t nek2 = (k)->ne[2]; (void)(nek2); const int64_t nek3 = (k)->ne[3]; (void)(nek3);; const size_t nbk0 = (k)->nb[0]; (void)(nbk0); const size_t nbk1 = (k)->nb[1]; (void)(nbk1); const size_t nbk2 = (k)->nb[2]; (void)(nbk2); const size_t nbk3 = (k)->nb[3]; (void)(nbk3);; const int64_t nev0 = (v)->ne[0]; (void)(nev0); const int64_t nev1 = (v)->ne[1]; (void)(nev1); const int64_t nev2 = (v)->ne[2]; (void)(nev2); const int64_t nev3 = (v)->ne[3]; (void)(nev3);; const size_t nbv0 = (v)->nb[0]; (void)(nbv0); const size_t nbv1 = (v)->nb[1]; (void)(nbv1); const size_t nbv2 = (v)->nb[2]; (void)(nbv2); const size_t nbv3 = (v)->nb[3]; (void)(nbv3);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);; const int ith = params->ith; const int nth = params->nth; const int64_t D = neq0; const int64_t N = neq1; const int64_t P = nek1 - N; const int64_t M = P + N; const int Mup = ggml_up(M, 4); do { if (!(ne0 == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14203, "ne0 == D"); abort(); } } while (0); do { if (!(ne1 == N)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14204, "ne1 == N"); abort(); } } while (0); do { if (!(P >= 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14205, "P >= 0"); abort(); } } while (0); do { if (!(nbq0 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14207, "nbq0 == sizeof(ggml_fp16_t)"); abort(); } } while (0); do { if (!(nbk0 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14208, "nbk0 == sizeof(ggml_fp16_t)"); abort(); } } while (0); do { if (!(nbv0 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14209, "nbv0 == sizeof(ggml_fp16_t)"); abort(); } } while (0); do { if (!(neq0 == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14211, "neq0 == D"); abort(); } } while (0); do { if (!(nek0 == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14212, "nek0 == D"); abort(); } } while (0); do { if (!(nev1 == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14213, "nev1 == D"); abort(); } } while (0); do { if (!(neq1 == N)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14215, "neq1 == N"); abort(); } } while (0); do { if (!(nek1 == N + P)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14216, "nek1 == N + P"); abort(); } } while (0); do { if (!(nev1 == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14217, "nev1 == D"); abort(); } } while (0); do { if (!(nb0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14220, "nb0 == sizeof(float)"); abort(); } } while (0); do { if (!(nb0 <= nb1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14221, "nb0 <= nb1"); abort(); } } while (0); do { if (!(nb1 <= nb2)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14222, "nb1 <= nb2"); abort(); } } while (0); do { if (!(nb2 <= nb3)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14223, "nb2 <= nb3"); abort(); } } while (0); if (params->type == GGML_TASK_INIT) { return; } if (params->type == GGML_TASK_FINALIZE) { return; } const int nr = neq1*neq2*neq3; const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); const float scale = 1.0f/sqrtf(D); for (int ir = ir0; ir < ir1; ++ir) { const int iq3 = ir/(neq2*neq1); const int iq2 = (ir - iq3*neq2*neq1)/neq1; const int iq1 = (ir - iq3*neq2*neq1 - iq2*neq1); float * S = (float *) params->wdata + ith*(2*Mup + CACHE_LINE_SIZE_F32); for (int i = M; i < Mup; ++i) { S[i] = -__builtin_inff(); } if (2 > 2 || nek1 % 2 != 0) { for (int64_t ic = 0; ic < nek1; ++ic) { const int ik3 = iq3; const int ik2 = iq2; const int ik1 = ic; const int i1 = ik1; ggml_vec_dot_f16(neq0, S + i1, (ggml_fp16_t *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), (ggml_fp16_t *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3))); } } else { for (int64_t ic = 0; ic < nek1; ic += 2) { const int ik3 = iq3; const int ik2 = iq2; const int ik1 = ic; const int i1 = ik1; ggml_vec_dot_f16_unroll(neq0, nbk1, S + i1, ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), (ggml_fp16_t *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3))); } } ggml_vec_scale_f32(nek1, S, scale); if (masked) { for (int64_t i = P; i < M; i++) { if (i > P + iq1) { S[i] = -__builtin_inff(); } } } { float max = -__builtin_inff(); ggml_vec_max_f32(M, &max, S); ggml_float sum = 0.0; { uint16_t scvt[4]; ggml_float sump[4] = { 0.0 }; for (int i = 0; i < Mup; i += 4) { float * SS = S + i; for (int j = 0; j < 4; ++j) { if (SS[j] == -__builtin_inff()) { SS[j] = 0.0f; } else { ggml_fp16_t s = (SS[j] - max); memcpy(&scvt[j], &s, sizeof(uint16_t)); const float val = ((float) (table_exp_f16[scvt[j]])); sump[j] += (ggml_float)val; SS[j] = val; } } } for (int i = 0; i < 4; i++) { sum += sump[i]; } } ((void) (0)); sum = 1.0/sum; ggml_vec_scale_f32(M, S, sum); } ggml_fp16_t * S16 = (ggml_fp16_t *) ((float *) params->wdata + ith*(2*Mup + CACHE_LINE_SIZE_F32) + Mup); for (int64_t i = 0; i < M; i++) { S16[i] = (S[i]); } if (2 == 1 || (nev1 % 2 != 0)) { for (int64_t ic = 0; ic < nev1; ++ic) { const int i1 = iq1; const int i2 = iq2; const int i3 = iq3; ggml_vec_dot_f16(nek1, (float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), (ggml_fp16_t *) ((char *) v->data + ( ic*nbv1 + i2*nbv2 + i3*nbv3)), S16); } } else { for (int64_t ic = 0; ic < nev1; ic += 2) { const int i1 = iq1; const int i2 = iq2; const int i3 = iq3; ggml_vec_dot_f16_unroll(nek1, nbv1, (float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), ((char *) v->data + ( ic*nbv1 + i2*nbv2 + i3*nbv3)), S16); } } } } static void ggml_compute_forward_flash_attn( const struct ggml_compute_params * params, const struct ggml_tensor * q, const struct ggml_tensor * k, const struct ggml_tensor * v, const _Bool masked, struct ggml_tensor * dst) { switch (q->type) { case GGML_TYPE_F16: { ggml_compute_forward_flash_attn_f16(params, q, k, v, masked, dst); } break; case GGML_TYPE_F32: { ggml_compute_forward_flash_attn_f32(params, q, k, v, masked, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14407, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_flash_ff_f16( const struct ggml_compute_params * params, const struct ggml_tensor * a, const struct ggml_tensor * b0, const struct ggml_tensor * b1, const struct ggml_tensor * c0, const struct ggml_tensor * c1, struct ggml_tensor * dst) { int64_t t0 = 0; (void)(t0); const int64_t nea0 = (a)->ne[0]; (void)(nea0); const int64_t nea1 = (a)->ne[1]; (void)(nea1); const int64_t nea2 = (a)->ne[2]; (void)(nea2); const int64_t nea3 = (a)->ne[3]; (void)(nea3);; const size_t nba0 = (a)->nb[0]; (void)(nba0); const size_t nba1 = (a)->nb[1]; (void)(nba1); const size_t nba2 = (a)->nb[2]; (void)(nba2); const size_t nba3 = (a)->nb[3]; (void)(nba3);; const int64_t neb00 = (b0)->ne[0]; (void)(neb00); const int64_t neb01 = (b0)->ne[1]; (void)(neb01); const int64_t neb02 = (b0)->ne[2]; (void)(neb02); const int64_t neb03 = (b0)->ne[3]; (void)(neb03);; const size_t nbb00 = (b0)->nb[0]; (void)(nbb00); const size_t nbb01 = (b0)->nb[1]; (void)(nbb01); const size_t nbb02 = (b0)->nb[2]; (void)(nbb02); const size_t nbb03 = (b0)->nb[3]; (void)(nbb03);; const int64_t neb10 = (b1)->ne[0]; (void)(neb10); const int64_t neb11 = (b1)->ne[1]; (void)(neb11); const int64_t neb12 = (b1)->ne[2]; (void)(neb12); const int64_t neb13 = (b1)->ne[3]; (void)(neb13);; const size_t nbb10 = (b1)->nb[0]; (void)(nbb10); const size_t nbb11 = (b1)->nb[1]; (void)(nbb11); const size_t nbb12 = (b1)->nb[2]; (void)(nbb12); const size_t nbb13 = (b1)->nb[3]; (void)(nbb13);; const int64_t nec00 = (c0)->ne[0]; (void)(nec00); const int64_t nec01 = (c0)->ne[1]; (void)(nec01); const int64_t nec02 = (c0)->ne[2]; (void)(nec02); const int64_t nec03 = (c0)->ne[3]; (void)(nec03);; const size_t nbc00 = (c0)->nb[0]; (void)(nbc00); const size_t nbc01 = (c0)->nb[1]; (void)(nbc01); const size_t nbc02 = (c0)->nb[2]; (void)(nbc02); const size_t nbc03 = (c0)->nb[3]; (void)(nbc03);; const int64_t nec10 = (c1)->ne[0]; (void)(nec10); const int64_t nec11 = (c1)->ne[1]; (void)(nec11); const int64_t nec12 = (c1)->ne[2]; (void)(nec12); const int64_t nec13 = (c1)->ne[3]; (void)(nec13);; const size_t nbc10 = (c1)->nb[0]; (void)(nbc10); const size_t nbc11 = (c1)->nb[1]; (void)(nbc11); const size_t nbc12 = (c1)->nb[2]; (void)(nbc12); const size_t nbc13 = (c1)->nb[3]; (void)(nbc13);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);; const int ith = params->ith; const int nth = params->nth; const int64_t D = nea0; const int64_t M = neb01; do { if (!(ne0 == nea0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14445, "ne0 == nea0"); abort(); } } while (0); do { if (!(ne1 == nea1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14446, "ne1 == nea1"); abort(); } } while (0); do { if (!(ne2 == nea2)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14447, "ne2 == nea2"); abort(); } } while (0); do { if (!(nba0 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14449, "nba0 == sizeof(ggml_fp16_t)"); abort(); } } while (0); do { if (!(nbb00 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14450, "nbb00 == sizeof(ggml_fp16_t)"); abort(); } } while (0); do { if (!(nbb10 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14451, "nbb10 == sizeof(float)"); abort(); } } while (0); do { if (!(nbc00 == sizeof(ggml_fp16_t))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14452, "nbc00 == sizeof(ggml_fp16_t)"); abort(); } } while (0); do { if (!(nbc10 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14453, "nbc10 == sizeof(float)"); abort(); } } while (0); do { if (!(neb00 == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14455, "neb00 == D"); abort(); } } while (0); do { if (!(neb01 == M)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14456, "neb01 == M"); abort(); } } while (0); do { if (!(neb10 == M)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14457, "neb10 == M"); abort(); } } while (0); do { if (!(neb11 == 1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14458, "neb11 == 1"); abort(); } } while (0); do { if (!(nec00 == M)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14460, "nec00 == M"); abort(); } } while (0); do { if (!(nec01 == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14461, "nec01 == D"); abort(); } } while (0); do { if (!(nec10 == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14462, "nec10 == D"); abort(); } } while (0); do { if (!(nec11 == 1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14463, "nec11 == 1"); abort(); } } while (0); do { if (!(nb0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14466, "nb0 == sizeof(float)"); abort(); } } while (0); do { if (!(nb0 <= nb1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14467, "nb0 <= nb1"); abort(); } } while (0); do { if (!(nb1 <= nb2)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14468, "nb1 <= nb2"); abort(); } } while (0); do { if (!(nb2 <= nb3)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14469, "nb2 <= nb3"); abort(); } } while (0); if (params->type == GGML_TASK_INIT) { return; } if (params->type == GGML_TASK_FINALIZE) { return; } const int nr = nea1*nea2*nea3; const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); for (int ir = ir0; ir < ir1; ++ir) { const int ia3 = ir/(nea2*nea1); const int ia2 = (ir - ia3*nea2*nea1)/nea1; const int ia1 = (ir - ia3*nea2*nea1 - ia2*nea1); float * S = (float *) params->wdata + ith*(2*M + CACHE_LINE_SIZE_F32); for (int64_t ic = 0; ic < neb01; ++ic) { const int ib03 = ia3; const int ib02 = ia2; const int ib01 = ic; const int i1 = ib01; ggml_vec_dot_f16(nea0, S + i1, (ggml_fp16_t *) ((char *) b0->data + (ib01*nbb01 + ib02*nbb02 + ib03*nbb03)), (ggml_fp16_t *) ((char *) a->data + ( ia1*nba1 + ia2*nba2 + ia3*nba3))); } ggml_vec_add_f32(neb01, S, S, (float *) b1->data); ggml_fp16_t * S16 = (ggml_fp16_t *) ((float *) params->wdata + ith*(2*M + CACHE_LINE_SIZE_F32) + M); for (int64_t i = 0; i < M; i++) { S16[i] = (S[i]); } ggml_vec_gelu_f16(neb01, S16, S16); { const int i1 = ia1; const int i2 = ia2; const int i3 = ia3; for (int64_t ic = 0; ic < nec01; ++ic) { ggml_vec_dot_f16(neb01, (float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)), (ggml_fp16_t *) ((char *) c0->data + ( ic*nbc01 + i2*nbc02 + i3*nbc03)), S16); } ggml_vec_add_f32(nec01, (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3)), (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3)), (float *) c1->data); } } } static void ggml_compute_forward_flash_ff( const struct ggml_compute_params * params, const struct ggml_tensor * a, const struct ggml_tensor * b0, const struct ggml_tensor * b1, const struct ggml_tensor * c0, const struct ggml_tensor * c1, struct ggml_tensor * dst) { switch (b0->type) { case GGML_TYPE_F16: { ggml_compute_forward_flash_ff_f16(params, a, b0, b1, c0, c1, dst); } break; case GGML_TYPE_F32: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14562, "false"); abort(); } } while (0); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14566, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_flash_attn_back_f32( const struct ggml_compute_params * params, const struct ggml_tensor * q, const struct ggml_tensor * k, const struct ggml_tensor * v, const struct ggml_tensor * d, const _Bool masked, struct ggml_tensor * dst) { int64_t t0 = 0; (void)(t0); const int64_t neq0 = (q)->ne[0]; (void)(neq0); const int64_t neq1 = (q)->ne[1]; (void)(neq1); const int64_t neq2 = (q)->ne[2]; (void)(neq2); const int64_t neq3 = (q)->ne[3]; (void)(neq3);; const size_t nbq0 = (q)->nb[0]; (void)(nbq0); const size_t nbq1 = (q)->nb[1]; (void)(nbq1); const size_t nbq2 = (q)->nb[2]; (void)(nbq2); const size_t nbq3 = (q)->nb[3]; (void)(nbq3);; const int64_t nek0 = (k)->ne[0]; (void)(nek0); const int64_t nek1 = (k)->ne[1]; (void)(nek1); const int64_t nek2 = (k)->ne[2]; (void)(nek2); const int64_t nek3 = (k)->ne[3]; (void)(nek3);; const size_t nbk0 = (k)->nb[0]; (void)(nbk0); const size_t nbk1 = (k)->nb[1]; (void)(nbk1); const size_t nbk2 = (k)->nb[2]; (void)(nbk2); const size_t nbk3 = (k)->nb[3]; (void)(nbk3);; const int64_t nev0 = (v)->ne[0]; (void)(nev0); const int64_t nev1 = (v)->ne[1]; (void)(nev1); const int64_t nev2 = (v)->ne[2]; (void)(nev2); const int64_t nev3 = (v)->ne[3]; (void)(nev3);; const size_t nbv0 = (v)->nb[0]; (void)(nbv0); const size_t nbv1 = (v)->nb[1]; (void)(nbv1); const size_t nbv2 = (v)->nb[2]; (void)(nbv2); const size_t nbv3 = (v)->nb[3]; (void)(nbv3);; const int64_t ned0 = (d)->ne[0]; (void)(ned0); const int64_t ned1 = (d)->ne[1]; (void)(ned1); const int64_t ned2 = (d)->ne[2]; (void)(ned2); const int64_t ned3 = (d)->ne[3]; (void)(ned3);; const size_t nbd0 = (d)->nb[0]; (void)(nbd0); const size_t nbd1 = (d)->nb[1]; (void)(nbd1); const size_t nbd2 = (d)->nb[2]; (void)(nbd2); const size_t nbd3 = (d)->nb[3]; (void)(nbd3);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);; const int ith = params->ith; const int nth = params->nth; const int64_t D = neq0; const int64_t N = neq1; const int64_t P = nek1 - N; const int64_t M = P + N; const int Mup = ggml_up(M, 4); const int mxDM = ((D) > (Mup) ? (D) : (Mup)); do { if (!(P >= 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14608, "P >= 0"); abort(); } } while (0); do { if (!(nbq0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14610, "nbq0 == sizeof(float)"); abort(); } } while (0); do { if (!(nbk0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14611, "nbk0 == sizeof(float)"); abort(); } } while (0); do { if (!(nbv0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14612, "nbv0 == sizeof(float)"); abort(); } } while (0); do { if (!(neq0 == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14614, "neq0 == D"); abort(); } } while (0); do { if (!(nek0 == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14615, "nek0 == D"); abort(); } } while (0); do { if (!(nev1 == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14616, "nev1 == D"); abort(); } } while (0); do { if (!(ned0 == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14617, "ned0 == D"); abort(); } } while (0); do { if (!(neq1 == N)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14619, "neq1 == N"); abort(); } } while (0); do { if (!(nek1 == N + P)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14620, "nek1 == N + P"); abort(); } } while (0); do { if (!(nev1 == D)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14621, "nev1 == D"); abort(); } } while (0); do { if (!(ned1 == N)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14622, "ned1 == N"); abort(); } } while (0); do { if (!(nb0 == sizeof(float))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14625, "nb0 == sizeof(float)"); abort(); } } while (0); do { if (!(nb0 <= nb1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14626, "nb0 <= nb1"); abort(); } } while (0); do { if (!(nb1 <= nb2)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14627, "nb1 <= nb2"); abort(); } } while (0); do { if (!(nb2 <= nb3)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14628, "nb2 <= nb3"); abort(); } } while (0); if (params->type == GGML_TASK_INIT) { if (ith == 0) { memset(dst->data, 0, nb0*ne0*ne1*ne2*ne3); } return; } if (params->type == GGML_TASK_FINALIZE) { return; } const int nr = neq2*neq3; const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); const float scale = 1.0f/sqrtf(D); for (int ir = ir0; ir < ir1; ++ir) { const int iq3 = ir/(neq2); const int iq2 = ir - iq3*neq2; for ( int iq1 = 0; iq1 < neq1; ++iq1) { float * S = (float *) params->wdata + ith*2*(mxDM + CACHE_LINE_SIZE_F32) + 0*(mxDM+CACHE_LINE_SIZE_F32); float * SM = (float *) params->wdata + ith*2*(mxDM + CACHE_LINE_SIZE_F32) + 1*(mxDM+CACHE_LINE_SIZE_F32); for (int i = M; i < Mup; ++i) { S[i] = -__builtin_inff(); } for (int64_t ic = 0; ic < nek1; ++ic) { const int ik3 = iq3; const int ik2 = iq2; const int ik1 = ic; const int i1 = ik1; ggml_vec_dot_f32(neq0, S + i1, (float *) ((char *) k->data + (ik1*nbk1 + ik2*nbk2 + ik3*nbk3)), (float *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3))); } ggml_vec_scale_f32(nek1, S, scale); if (masked) { for (int64_t i = P; i < M; i++) { if (i > P + iq1) { S[i] = -__builtin_inff(); } } } { float max = -__builtin_inff(); ggml_vec_max_f32(M, &max, S); ggml_float sum = 0.0; { uint16_t scvt[4]; (void)(scvt); ggml_float sump[4] = { 0.0 }; for (int i = 0; i < Mup; i += 4) { float * SR = S + i; float * SW = SM + i; for (int j = 0; j < 4; ++j) { if (SR[j] == -__builtin_inff()) { SW[j] = 0.0f; } else { const float val = expf(SR[j] - max); sump[j] += (ggml_float)val; SW[j] = val; } } } for (int i = 0; i < 4; i++) { sum += sump[i]; } } ((void) (0)); sum = 1.0/sum; ggml_vec_scale_f32(M, SM, sum); } { # 14808 "ggml.c" } ggml_vec_set_f32(M, S, 0); for (int64_t ic = 0; ic < D; ++ic) { const int i1 = iq1; const int i2 = iq2; const int i3 = iq3; ggml_vec_mad_f32(M, S, (float *) ((char *) v->data + ( ic*nbv1 + i2*nbv2 + i3*nbv3)), *(float *) ((char *) d->data + (ic*nbd0 + i1*nbd1 + i2*nbd2 + i3*nbd3))); } float dot_SM_gradSM = 0; ggml_vec_dot_f32 (M, &dot_SM_gradSM, SM, S); ggml_vec_acc1_f32(M, S, -dot_SM_gradSM); ggml_vec_mul_f32 (M, S, S, SM); if (masked) { for (int64_t i = P; i < M; i++) { if (i > P + iq1) { S[i] = 0; } } } ggml_vec_scale_f32(M, S, scale); void * grad_q = (char *) dst->data; void * grad_k = (char *) dst->data + nb0*D*N*neq2*neq3; void * grad_v = (char *) dst->data + nb0*D*N*neq2*neq3 + nb0*D*M*neq2*neq3; const size_t nbgq1 = nb0*neq0; const size_t nbgq2 = nb0*neq0*neq1; const size_t nbgq3 = nb0*neq0*neq1*neq2; const size_t nbgk1 = nb0*nek0; const size_t nbgk2 = nb0*nek0*nek1; const size_t nbgk3 = nb0*nek0*nek1*neq2; const size_t nbgv1 = nb0*nev0; const size_t nbgv2 = nb0*nev0*nev1; const size_t nbgv3 = nb0*nev0*nev1*neq2; # 14873 "ggml.c" for (int64_t ic = 0; ic < M; ++ic) { const int i1 = iq1; const int i2 = iq2; const int i3 = iq3; ggml_vec_mad_f32(D, (float *) ((char *) grad_q + (i1*nbgq1 + i2*nbgq2 + i3*nbgq3)), (float *) ((char *) k->data + (ic*nbk1 + i2*nbk2 + i3*nbk3)), S[ic]); } for (int64_t ic = 0; ic < M; ++ic) { const int i1 = iq1; const int i2 = iq2; const int i3 = iq3; ggml_vec_mad_f32(D, (float *) ((char *) grad_k + (ic*nbgk1 + i2*nbgk2 + i3*nbgk3)), (float *) ((char *) q->data + (i1*nbq1 + i2*nbq2 + i3*nbq3)), S[ic]); } for (int64_t ic = 0; ic < D; ++ic) { const int i1 = iq1; const int i2 = iq2; const int i3 = iq3; ggml_vec_mad_f32(M, (float *) ((char *) grad_v + ( ic*nbgv1 + i2*nbgv2 + i3*nbgv3)), SM, *(float *) ((char *) d->data + (ic*nbd0 + i1*nbd1 + i2*nbd2 + i3*nbd3))); } } } } static void ggml_compute_forward_flash_attn_back( const struct ggml_compute_params * params, const struct ggml_tensor * q, const struct ggml_tensor * k, const struct ggml_tensor * v, const struct ggml_tensor * d, const _Bool masked, struct ggml_tensor * dst) { switch (q->type) { case GGML_TYPE_F32: { ggml_compute_forward_flash_attn_back_f32(params, q, k, v, d, masked, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 14939, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_win_part_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const int32_t nep0 = ((const int32_t *)(dst->op_params))[0]; const int32_t nep1 = ((const int32_t *)(dst->op_params))[1]; const int32_t w = ((const int32_t *)(dst->op_params))[2]; ((void) (0)); ((void) (0)); for (int py = 0; py < nep1; ++py) { for (int px = 0; px < nep0; ++px) { const int64_t i3 = py*nep0 + px; for (int64_t i2 = 0; i2 < ne2; ++i2) { for (int64_t i1 = 0; i1 < ne1; ++i1) { for (int64_t i0 = 0; i0 < ne0; ++i0) { const int64_t i02 = py*w + i2; const int64_t i01 = px*w + i1; const int64_t i00 = i0; const int64_t i = i3*ne2*ne1*ne0 + i2*ne1*ne0 + i1*ne0 + i0; const int64_t j = i02*ne01*ne00 + i01*ne00 + i00; if (py*w + i2 >= ne02 || px*w + i1 >= ne01) { ((float *) dst->data)[i] = 0.0f; } else { ((float *) dst->data)[i] = ((float *) src0->data)[j]; } } } } } } } static void ggml_compute_forward_win_part( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_win_part_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15001, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_win_unpart_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const int32_t w = ((const int32_t *)(dst->op_params))[0]; const int px = (w - ne1%w)%w; const int npx = (px + ne1)/w; ((void) (0)); for (int64_t i2 = 0; i2 < ne2; ++i2) { for (int64_t i1 = 0; i1 < ne1; ++i1) { for (int64_t i0 = 0; i0 < ne0; ++i0) { const int ip2 = i2/w; const int ip1 = i1/w; const int64_t i02 = i2%w; const int64_t i01 = i1%w; const int64_t i00 = i0; const int64_t i = (ip2*npx + ip1)*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00 + i00; const int64_t j = i2*ne1*ne0 + i1*ne0 + i0; ((float *) dst->data)[j] = ((float *) src0->data)[i]; } } } } static void ggml_compute_forward_win_unpart( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_win_unpart_f32(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15061, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_unary( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { const enum ggml_unary_op op = ggml_get_unary_op(dst); switch (op) { case GGML_UNARY_OP_ABS: { ggml_compute_forward_abs(params, src0, dst); } break; case GGML_UNARY_OP_SGN: { ggml_compute_forward_sgn(params, src0, dst); } break; case GGML_UNARY_OP_NEG: { ggml_compute_forward_neg(params, src0, dst); } break; case GGML_UNARY_OP_STEP: { ggml_compute_forward_step(params, src0, dst); } break; case GGML_UNARY_OP_TANH: { ggml_compute_forward_tanh(params, src0, dst); } break; case GGML_UNARY_OP_ELU: { ggml_compute_forward_elu(params, src0, dst); } break; case GGML_UNARY_OP_RELU: { ggml_compute_forward_relu(params, src0, dst); } break; case GGML_UNARY_OP_GELU: { ggml_compute_forward_gelu(params, src0, dst); } break; case GGML_UNARY_OP_GELU_QUICK: { ggml_compute_forward_gelu_quick(params, src0, dst); } break; case GGML_UNARY_OP_SILU: { ggml_compute_forward_silu(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15117, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_get_rel_pos_f16( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int64_t ne00 = (src0)->ne[0]; (void)(ne00); const int64_t ne01 = (src0)->ne[1]; (void)(ne01); const int64_t ne02 = (src0)->ne[2]; (void)(ne02); const int64_t ne03 = (src0)->ne[3]; (void)(ne03);; const size_t nb00 = (src0)->nb[0]; (void)(nb00); const size_t nb01 = (src0)->nb[1]; (void)(nb01); const size_t nb02 = (src0)->nb[2]; (void)(nb02); const size_t nb03 = (src0)->nb[3]; (void)(nb03);; const int64_t ne0 = (dst)->ne[0]; (void)(ne0); const int64_t ne1 = (dst)->ne[1]; (void)(ne1); const int64_t ne2 = (dst)->ne[2]; (void)(ne2); const int64_t ne3 = (dst)->ne[3]; (void)(ne3);; const size_t nb0 = (dst)->nb[0]; (void)(nb0); const size_t nb1 = (dst)->nb[1]; (void)(nb1); const size_t nb2 = (dst)->nb[2]; (void)(nb2); const size_t nb3 = (dst)->nb[3]; (void)(nb3);;; const int64_t w = ne1; ggml_fp16_t * src0_data = (ggml_fp16_t *) src0->data; ggml_fp16_t * dst_data = (ggml_fp16_t *) dst->data; for (int64_t i2 = 0; i2 < ne2; ++i2) { for (int64_t i1 = 0; i1 < ne1; ++i1) { const int64_t pos = (w - i1 - 1) + i2; for (int64_t i0 = 0; i0 < ne0; ++i0) { dst_data[i2*ne1*ne0 + i1*ne0 + i0] = src0_data[pos*ne00 + i0]; } } } } static void ggml_compute_forward_get_rel_pos( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F16: { ggml_compute_forward_get_rel_pos_f16(params, src0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15162, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_add_rel_pos_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * src2, struct ggml_tensor * dst) { const _Bool inplace = (_Bool) ((int32_t *) dst->op_params)[0]; if (!inplace && params->type == GGML_TASK_INIT) { memcpy((char *) dst->data, (char *) src0->data, ggml_nbytes(dst)); return; } if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } int64_t t0 = 0; (void)(t0); float * src1_data = (float *) src1->data; float * src2_data = (float *) src2->data; float * dst_data = (float *) dst->data; const int64_t ne10 = src1->ne[0]; const int64_t ne11 = src1->ne[1]; const int64_t ne12 = src1->ne[2]; const int64_t ne13 = src1->ne[3]; const int ith = params->ith; const int nth = params->nth; const int np = ne13; const int dp = (np + nth - 1)/nth; const int ip0 = dp*ith; const int ip1 = ((ip0 + dp) < (np) ? (ip0 + dp) : (np)); for (int64_t i13 = ip0; i13 < ip1; ++i13) { for (int64_t i12 = 0; i12 < ne12; ++i12) { for (int64_t i11 = 0; i11 < ne11; ++i11) { const int64_t jp1 = i13*ne12*ne11*ne10 + i12*ne11*ne10 + i11*ne10; for (int64_t i10 = 0; i10 < ne10; ++i10) { const int64_t jp0 = jp1 + i10; const float src1_e = src1_data[jp0]; const float src2_e = src2_data[jp0]; const int64_t jdh = jp0 * ne10; const int64_t jdw = jdh - (ne10 - 1) * i10; for (int64_t j = 0; j < ne10; ++j) { dst_data[jdh + j ] += src2_e; dst_data[jdw + j*ne10] += src1_e; } } } } } } static void ggml_compute_forward_add_rel_pos( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * src2, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_add_rel_pos_f32(params, src0, src1, src2, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15248, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_map_unary_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst, const ggml_unary_op_f32_t fun) { do { if (!(ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15260, "ggml_are_same_shape(src0, dst)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int n = ggml_nrows(src0); const int nc = src0->ne[0]; ((void) (0)); ((void) (0)); for (int i = 0; i < n; i++) { fun(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), (float *) ((char *) src0->data + i*(src0->nb[1]))); } } static void ggml_compute_forward_map_unary( const struct ggml_compute_params * params, const struct ggml_tensor * src0, struct ggml_tensor * dst, const ggml_unary_op_f32_t fun) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_map_unary_f32(params, src0, dst, fun); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15292, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_map_binary_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, const ggml_binary_op_f32_t fun) { ((void) (0)); ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const int n = ggml_nrows(src0); const int nc = src0->ne[0]; ((void) (0)); ((void) (0)); ((void) (0)); for (int i = 0; i < n; i++) { fun(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), (float *) ((char *) src0->data + i*(src0->nb[1])), (float *) ((char *) src1->data + i*(src1->nb[1]))); } } static void ggml_compute_forward_map_binary( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, const ggml_binary_op_f32_t fun) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_map_binary_f32(params, src0, src1, dst, fun); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15341, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_map_custom1_f32( const struct ggml_compute_params * params, const struct ggml_tensor * a, struct ggml_tensor * dst, const ggml_custom1_op_f32_t fun) { ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } fun(dst, a); } static void ggml_compute_forward_map_custom2_f32( const struct ggml_compute_params * params, const struct ggml_tensor * a, const struct ggml_tensor * b, struct ggml_tensor * dst, const ggml_custom2_op_f32_t fun) { ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } fun(dst, a, b); } static void ggml_compute_forward_map_custom3_f32( const struct ggml_compute_params * params, const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, struct ggml_tensor * dst, const ggml_custom3_op_f32_t fun) { ((void) (0)); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } fun(dst, a, b, c); } static void ggml_compute_forward_map_custom1( const struct ggml_compute_params * params, const struct ggml_tensor * a, struct ggml_tensor * dst) { if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) dst->op_params; p->fun(dst, a, params->ith, params->nth, p->userdata); } static void ggml_compute_forward_map_custom2( const struct ggml_compute_params * params, const struct ggml_tensor * a, const struct ggml_tensor * b, struct ggml_tensor * dst) { if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) dst->op_params; p->fun(dst, a, b, params->ith, params->nth, p->userdata); } static void ggml_compute_forward_map_custom3( const struct ggml_compute_params * params, const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, struct ggml_tensor * dst) { if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) dst->op_params; p->fun(dst, a, b, c, params->ith, params->nth, p->userdata); } static void ggml_compute_forward_cross_entropy_loss_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { do { if (!(ggml_is_contiguous(src0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15453, "ggml_is_contiguous(src0)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(src1))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15454, "ggml_is_contiguous(src1)"); abort(); } } while (0); do { if (!(ggml_is_scalar(dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15455, "ggml_is_scalar(dst)"); abort(); } } while (0); do { if (!(ggml_are_same_shape(src0, src1))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15456, "ggml_are_same_shape(src0, src1)"); abort(); } } while (0); const int ith = params->ith; const int nth = params->nth; float * sums = (float *) params->wdata; const int nc = src0->ne[0]; const int nr = ggml_nrows(src0); do { if (!(params->wsize >= sizeof(float) * (nth + nth * nc))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15467, "params->wsize >= sizeof(float) * (nth + nth * nc)"); abort(); } } while (0); if (params->type == GGML_TASK_INIT) { if (ith == 0) { memset(sums, 0, sizeof(float) * (nth + nth * nc)); } return; } if (params->type == GGML_TASK_FINALIZE) { if (ith == 0) { float * dp = (float *) dst->data; ggml_vec_sum_f32(nth, dp, sums); dp[0] *= -1.0f / (float) nr; } return; } const double eps = 1e-9; const int dr = (nr + nth - 1)/nth; const int ir0 = dr*ith; const int ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); for (int i1 = ir0; i1 < ir1; i1++) { float * s0 = (float *)((char *) src0->data + i1*src0->nb[1]); float * s1 = (float *)((char *) src1->data + i1*src1->nb[1]); float * st = ((float *) params->wdata) + nth + ith*nc; # 15507 "ggml.c" ggml_float sum = 0.0; { float max = -__builtin_inff(); ggml_vec_max_f32(nc, &max, s0); uint16_t scvt; (void)(scvt); for (int i = 0; i < nc; i++) { if (s0[i] == -__builtin_inff()) { st[i] = 0.0f; } else { const float s = s0[i] - max; const float val = expf(s); sum += (ggml_float)val; st[i] = val; } } ((void) (0)); } sum = (1.0 - eps) / sum; ggml_vec_scale_f32(nc, st, sum); ggml_vec_add1_f32(nc, st, st, eps); ggml_vec_log_f32(nc, st, st); ggml_vec_mul_f32(nc, st, st, s1); float st_sum = 0; ggml_vec_sum_f32(nc, &st_sum, st); sums[ith] += st_sum; } } static void ggml_compute_forward_cross_entropy_loss( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_cross_entropy_loss_f32(params, src0, src1, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15566, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward_cross_entropy_loss_back_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * opt0, struct ggml_tensor * dst) { do { if (!(ggml_is_contiguous(dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15579, "ggml_is_contiguous(dst)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(src0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15580, "ggml_is_contiguous(src0)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(src1))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15581, "ggml_is_contiguous(src1)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(opt0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15582, "ggml_is_contiguous(opt0)"); abort(); } } while (0); do { if (!(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15583, "ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst)"); abort(); } } while (0); const int64_t ith = params->ith; const int64_t nth = params->nth; if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { return; } const double eps = 1e-9; const int64_t nc = src0->ne[0]; const int64_t nr = ggml_nrows(src0); const int64_t dr = (nr + nth - 1)/nth; const int64_t ir0 = dr*ith; const int64_t ir1 = ((ir0 + dr) < (nr) ? (ir0 + dr) : (nr)); float * d = (float *) opt0->data; for (int64_t i1 = ir0; i1 < ir1; i1++) { float * ds0 = (float *)((char *) dst->data + i1*dst->nb[1]); float * s0 = (float *)((char *) src0->data + i1*src0->nb[1]); float * s1 = (float *)((char *) src1->data + i1*src1->nb[1]); # 15621 "ggml.c" ggml_float sum = 0.0; { float max = -__builtin_inff(); ggml_vec_max_f32(nc, &max, s0); uint16_t scvt; (void)(scvt); for (int i = 0; i < nc; i++) { if (s0[i] == -__builtin_inff()) { ds0[i] = 0.0f; } else { const float s = s0[i] - max; const float val = expf(s); sum += (ggml_float)val; ds0[i] = val; } } ((void) (0)); sum = (1.0 - eps)/sum; } ggml_vec_scale_f32(nc, ds0, sum); ggml_vec_add1_f32(nc, ds0, ds0, eps); ggml_vec_sub_f32(nc, ds0, ds0, s1); ggml_vec_scale_f32(nc, ds0, d[0] / (float) nr); # 15661 "ggml.c" } } static void ggml_compute_forward_cross_entropy_loss_back( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * opt0, struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { ggml_compute_forward_cross_entropy_loss_back_f32(params, src0, src1, opt0, dst); } break; default: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15677, "false"); abort(); } } while (0); } break; } } static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) { do { if (!(params)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15686, "params"); abort(); } } while (0); # 15697 "ggml.c" switch (tensor->op) { case GGML_OP_DUP: { ggml_compute_forward_dup(params, tensor->src[0], tensor); } break; case GGML_OP_ADD: { ggml_compute_forward_add(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_ADD1: { ggml_compute_forward_add1(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_ACC: { ggml_compute_forward_acc(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_SUB: { ggml_compute_forward_sub(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_MUL: { ggml_compute_forward_mul(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_DIV: { ggml_compute_forward_div(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_SQR: { ggml_compute_forward_sqr(params, tensor->src[0], tensor); } break; case GGML_OP_SQRT: { ggml_compute_forward_sqrt(params, tensor->src[0], tensor); } break; case GGML_OP_LOG: { ggml_compute_forward_log(params, tensor->src[0], tensor); } break; case GGML_OP_SUM: { ggml_compute_forward_sum(params, tensor->src[0], tensor); } break; case GGML_OP_SUM_ROWS: { ggml_compute_forward_sum_rows(params, tensor->src[0], tensor); } break; case GGML_OP_MEAN: { ggml_compute_forward_mean(params, tensor->src[0], tensor); } break; case GGML_OP_ARGMAX: { ggml_compute_forward_argmax(params, tensor->src[0], tensor); } break; case GGML_OP_REPEAT: { ggml_compute_forward_repeat(params, tensor->src[0], tensor); } break; case GGML_OP_REPEAT_BACK: { ggml_compute_forward_repeat_back(params, tensor->src[0], tensor); } break; case GGML_OP_CONCAT: { ggml_compute_forward_concat(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_SILU_BACK: { ggml_compute_forward_silu_back(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_NORM: { ggml_compute_forward_norm(params, tensor->src[0], tensor); } break; case GGML_OP_RMS_NORM: { ggml_compute_forward_rms_norm(params, tensor->src[0], tensor); } break; case GGML_OP_RMS_NORM_BACK: { ggml_compute_forward_rms_norm_back(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_GROUP_NORM: { ggml_compute_forward_group_norm(params, tensor->src[0], tensor); } break; case GGML_OP_MUL_MAT: { ggml_compute_forward_mul_mat(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_OUT_PROD: { ggml_compute_forward_out_prod(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_SCALE: { ggml_compute_forward_scale(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_SET: { ggml_compute_forward_set(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_CPY: { ggml_compute_forward_cpy(params, tensor->src[0], tensor); } break; case GGML_OP_CONT: { ggml_compute_forward_cont(params, tensor->src[0], tensor); } break; case GGML_OP_RESHAPE: { ggml_compute_forward_reshape(params, tensor->src[0], tensor); } break; case GGML_OP_VIEW: { ggml_compute_forward_view(params, tensor->src[0]); } break; case GGML_OP_PERMUTE: { ggml_compute_forward_permute(params, tensor->src[0]); } break; case GGML_OP_TRANSPOSE: { ggml_compute_forward_transpose(params, tensor->src[0]); } break; case GGML_OP_GET_ROWS: { ggml_compute_forward_get_rows(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_GET_ROWS_BACK: { ggml_compute_forward_get_rows_back(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor); } break; case GGML_OP_DIAG: { ggml_compute_forward_diag(params, tensor->src[0], tensor); } break; case GGML_OP_DIAG_MASK_INF: { ggml_compute_forward_diag_mask_inf(params, tensor->src[0], tensor); } break; case GGML_OP_DIAG_MASK_ZERO: { ggml_compute_forward_diag_mask_zero(params, tensor->src[0], tensor); } break; case GGML_OP_SOFT_MAX: { ggml_compute_forward_soft_max(params, tensor->src[0], tensor); } break; case GGML_OP_SOFT_MAX_BACK: { ggml_compute_forward_soft_max_back(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_ROPE: { ggml_compute_forward_rope(params, tensor->src[0], tensor); } break; case GGML_OP_ROPE_BACK: { ggml_compute_forward_rope_back(params, tensor->src[0], tensor); } break; case GGML_OP_ALIBI: { ggml_compute_forward_alibi(params, tensor->src[0], tensor); } break; case GGML_OP_CLAMP: { ggml_compute_forward_clamp(params, tensor->src[0], tensor); } break; case GGML_OP_CONV_1D: { ggml_compute_forward_conv_1d(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_CONV_2D: { ggml_compute_forward_conv_2d(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_CONV_TRANSPOSE_2D: { ggml_compute_forward_conv_transpose_2d(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_POOL_1D: { ggml_compute_forward_pool_1d(params, tensor->src[0], tensor); } break; case GGML_OP_POOL_2D: { ggml_compute_forward_pool_2d(params, tensor->src[0], tensor); } break; case GGML_OP_UPSCALE: { ggml_compute_forward_upscale(params, tensor->src[0], tensor); } break; case GGML_OP_FLASH_ATTN: { const int32_t t = ggml_get_op_params_i32(tensor, 0); do { if (!(t == 0 || t == 1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15897, "t == 0 || t == 1"); abort(); } } while (0); const _Bool masked = t != 0; ggml_compute_forward_flash_attn(params, tensor->src[0], tensor->src[1], tensor->src[2], masked, tensor); } break; case GGML_OP_FLASH_FF: { ggml_compute_forward_flash_ff(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor->src[3], tensor->src[4], tensor); } break; case GGML_OP_FLASH_ATTN_BACK: { int32_t t = ggml_get_op_params_i32(tensor, 0); do { if (!(t == 0 || t == 1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15908, "t == 0 || t == 1"); abort(); } } while (0); _Bool masked = t != 0; ggml_compute_forward_flash_attn_back(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor->src[3], masked, tensor); } break; case GGML_OP_WIN_PART: { ggml_compute_forward_win_part(params, tensor->src[0], tensor); } break; case GGML_OP_WIN_UNPART: { ggml_compute_forward_win_unpart(params, tensor->src[0], tensor); } break; case GGML_OP_UNARY: { ggml_compute_forward_unary(params, tensor->src[0], tensor); } break; case GGML_OP_GET_REL_POS: { ggml_compute_forward_get_rel_pos(params, tensor->src[0], tensor); } break; case GGML_OP_ADD_REL_POS: { ggml_compute_forward_add_rel_pos(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor); } break; case GGML_OP_MAP_UNARY: { ggml_unary_op_f32_t fun; memcpy(&fun, tensor->op_params, sizeof(fun)); ggml_compute_forward_map_unary(params, tensor->src[0], tensor, fun); } break; case GGML_OP_MAP_BINARY: { ggml_binary_op_f32_t fun; memcpy(&fun, tensor->op_params, sizeof(fun)); ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun); } break; case GGML_OP_MAP_CUSTOM1_F32: { ggml_custom1_op_f32_t fun; memcpy(&fun, tensor->op_params, sizeof(fun)); ggml_compute_forward_map_custom1_f32(params, tensor->src[0], tensor, fun); } break; case GGML_OP_MAP_CUSTOM2_F32: { ggml_custom2_op_f32_t fun; memcpy(&fun, tensor->op_params, sizeof(fun)); ggml_compute_forward_map_custom2_f32(params, tensor->src[0], tensor->src[1], tensor, fun); } break; case GGML_OP_MAP_CUSTOM3_F32: { ggml_custom3_op_f32_t fun; memcpy(&fun, tensor->op_params, sizeof(fun)); ggml_compute_forward_map_custom3_f32(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun); } break; case GGML_OP_MAP_CUSTOM1: { ggml_compute_forward_map_custom1(params, tensor->src[0], tensor); } break; case GGML_OP_MAP_CUSTOM2: { ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_MAP_CUSTOM3: { ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor); } break; case GGML_OP_CROSS_ENTROPY_LOSS: { ggml_compute_forward_cross_entropy_loss(params, tensor->src[0], tensor->src[1], tensor); } break; case GGML_OP_CROSS_ENTROPY_LOSS_BACK: { ggml_compute_forward_cross_entropy_loss_back(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor); } break; case GGML_OP_NONE: { } break; case GGML_OP_COUNT: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 15998, "false"); abort(); } } while (0); } break; } } static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor, _Bool inplace) { struct ggml_tensor * src0 = tensor->src[0]; struct ggml_tensor * src1 = tensor->src[1]; switch (tensor->op) { case GGML_OP_DUP: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace); } } break; case GGML_OP_ADD: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace); } if (src1->grad) { src1->grad = ggml_add_impl(ctx, src1->grad, tensor->grad, inplace); } } break; case GGML_OP_ADD1: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace); } if (src1->grad) { src1->grad = ggml_add_impl(ctx, src1->grad, ggml_mean(ctx, tensor->grad), inplace); } } break; case GGML_OP_ACC: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace); } if (src1->grad) { const size_t nb1 = ((int32_t *) tensor->op_params)[0]; const size_t nb2 = ((int32_t *) tensor->op_params)[1]; const size_t nb3 = ((int32_t *) tensor->op_params)[2]; const size_t offset = ((int32_t *) tensor->op_params)[3]; struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx, tensor->grad, src1->grad->ne[0], src1->grad->ne[1], src1->grad->ne[2], src1->grad->ne[3], nb1, nb2, nb3, offset); src1->grad = ggml_add_impl(ctx, src1->grad, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1->grad), inplace); } } break; case GGML_OP_SUB: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace); } if (src1->grad) { src1->grad = ggml_sub_impl(ctx, src1->grad, tensor->grad, inplace); } } break; case GGML_OP_MUL: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, ggml_mul(ctx, src1, tensor->grad), inplace); } if (src1->grad) { src1->grad = ggml_add_impl(ctx, src1->grad, ggml_mul(ctx, src0, tensor->grad), inplace); } } break; case GGML_OP_DIV: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, ggml_div(ctx, tensor->grad, src1), inplace); } if (src1->grad) { src1->grad = ggml_sub_impl(ctx, src1->grad, ggml_mul(ctx, tensor->grad, ggml_div(ctx, tensor, src1)), inplace); } } break; case GGML_OP_SQR: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, ggml_scale(ctx, ggml_mul(ctx, src0, tensor->grad), ggml_new_f32(ctx, 2.0f)), inplace); } } break; case GGML_OP_SQRT: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, ggml_scale(ctx, ggml_div(ctx, tensor->grad, tensor), ggml_new_f32(ctx, 0.5f)), inplace); } } break; case GGML_OP_LOG: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, ggml_div(ctx, tensor->grad, src0), inplace); } } break; case GGML_OP_SUM: { if (src0->grad) { src0->grad = ggml_add1_impl(ctx, src0->grad, tensor->grad, inplace); } } break; case GGML_OP_SUM_ROWS: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, ggml_repeat(ctx, tensor->grad, src0->grad), inplace); } } break; case GGML_OP_MEAN: case GGML_OP_ARGMAX: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16173, "false"); abort(); } } while (0); } break; case GGML_OP_REPEAT: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, ggml_repeat_back(ctx, tensor->grad, src0->grad), inplace); } } break; case GGML_OP_REPEAT_BACK: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, ggml_repeat(ctx, tensor->grad, src0->grad), inplace); } } break; case GGML_OP_CONCAT: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16197, "false"); abort(); } } while (0); } break; case GGML_OP_SILU_BACK: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16201, "false"); abort(); } } while (0); } break; case GGML_OP_NORM: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16205, "false"); abort(); } } while (0); } break; case GGML_OP_RMS_NORM: { if (src0->grad) { float eps; memcpy(&eps, tensor->op_params, sizeof(float)); src0->grad = ggml_add_impl(ctx, src0->grad, ggml_rms_norm_back(ctx, src0, tensor->grad, eps), inplace); } } break; case GGML_OP_RMS_NORM_BACK: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16222, "false"); abort(); } } while (0); } break; case GGML_OP_GROUP_NORM: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16226, "false"); abort(); } } while (0); } break; case GGML_OP_MUL_MAT: { # 16246 "ggml.c" if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, ggml_out_prod(ctx, src1, tensor->grad), inplace); } if (src1->grad) { src1->grad = ggml_add_impl(ctx, src1->grad, # 16267 "ggml.c" ggml_out_prod(ctx, src0, ggml_transpose(ctx, tensor->grad)), inplace); } } break; case GGML_OP_OUT_PROD: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16276, "false"); abort(); } } while (0); } break; case GGML_OP_SCALE: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, ggml_scale_impl(ctx, tensor->grad, src1, 0), inplace); } if (src1->grad) { src1->grad = ggml_add_impl(ctx, src1->grad, ggml_sum(ctx, ggml_mul_impl(ctx, tensor->grad, src0, 0)), inplace); } } break; case GGML_OP_SET: { const size_t nb1 = ((int32_t *) tensor->op_params)[0]; const size_t nb2 = ((int32_t *) tensor->op_params)[1]; const size_t nb3 = ((int32_t *) tensor->op_params)[2]; const size_t offset = ((int32_t *) tensor->op_params)[3]; struct ggml_tensor * tensor_grad_view = ((void*)0); if (src0->grad || src1->grad) { do { if (!(src0->type == tensor->type)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16306, "src0->type == tensor->type"); abort(); } } while (0); do { if (!(tensor->grad->type == tensor->type)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16307, "tensor->grad->type == tensor->type"); abort(); } } while (0); do { if (!(tensor->grad->type == src1->grad->type)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16308, "tensor->grad->type == src1->grad->type"); abort(); } } while (0); tensor_grad_view = ggml_view_4d(ctx, tensor->grad, src1->grad->ne[0], src1->grad->ne[1], src1->grad->ne[2], src1->grad->ne[3], nb1, nb2, nb3, offset); } if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, ggml_acc_impl(ctx, tensor->grad, ggml_neg(ctx, tensor_grad_view), nb1, nb2, nb3, offset, 0), inplace); } if (src1->grad) { src1->grad = ggml_add_impl(ctx, src1->grad, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1->grad), inplace); } } break; case GGML_OP_CPY: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace); } if (src1->grad) { } } break; case GGML_OP_CONT: { if (src0->grad) { do { if (!(ggml_is_contiguous(src0->grad))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16357, "ggml_is_contiguous(src0->grad)"); abort(); } } while (0); do { if (!(ggml_is_contiguous(tensor->grad))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16358, "ggml_is_contiguous(tensor->grad)"); abort(); } } while (0); src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace); } } break; case GGML_OP_RESHAPE: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, ggml_reshape(ctx, tensor->grad, src0->grad), inplace); } } break; case GGML_OP_VIEW: { if (src0->grad) { size_t offset; memcpy(&offset, tensor->op_params, sizeof(offset)); size_t nb1 = tensor->nb[1]; size_t nb2 = tensor->nb[2]; size_t nb3 = tensor->nb[3]; if (src0->type != src0->grad->type) { size_t ng = ggml_element_size(src0->grad); size_t n0 = ggml_element_size(src0); do { if (!(offset % n0 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16388, "offset % n0 == 0"); abort(); } } while (0); do { if (!(nb1 % n0 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16389, "nb1 % n0 == 0"); abort(); } } while (0); do { if (!(nb2 % n0 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16390, "nb2 % n0 == 0"); abort(); } } while (0); do { if (!(nb3 % n0 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16391, "nb3 % n0 == 0"); abort(); } } while (0); offset = (offset / n0) * ng; nb1 = (nb1 / n0) * ng; nb2 = (nb2 / n0) * ng; nb3 = (nb3 / n0) * ng; } src0->grad = ggml_acc_impl(ctx, src0->grad, tensor->grad, nb1, nb2, nb3, offset, inplace); } } break; case GGML_OP_PERMUTE: { if (src0->grad) { int32_t * axes = (int32_t *) tensor->op_params; int axis0 = axes[0] & 0x3; int axis1 = axes[1] & 0x3; int axis2 = axes[2] & 0x3; int axis3 = axes[3] & 0x3; int axes_backward[4] = {0,0,0,0}; axes_backward[axis0] = 0; axes_backward[axis1] = 1; axes_backward[axis2] = 2; axes_backward[axis3] = 3; src0->grad = ggml_add_impl(ctx, src0->grad, ggml_permute(ctx, tensor->grad, axes_backward[0], axes_backward[1], axes_backward[2], axes_backward[3]), inplace); } } break; case GGML_OP_TRANSPOSE: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, ggml_transpose(ctx, tensor->grad), inplace); } } break; case GGML_OP_GET_ROWS: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, ggml_get_rows_back(ctx, tensor->grad, src1, src0->grad), inplace); } if (src1->grad) { } } break; case GGML_OP_GET_ROWS_BACK: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16451, "false"); abort(); } } while (0); } break; case GGML_OP_DIAG: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16455, "false"); abort(); } } while (0); } break; case GGML_OP_DIAG_MASK_INF: { if (src0->grad) { const int n_past = ((int32_t *) tensor->op_params)[0]; src0->grad = ggml_add_impl(ctx, src0->grad, ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, 0), inplace); } } break; case GGML_OP_DIAG_MASK_ZERO: { if (src0->grad) { const int n_past = ((int32_t *) tensor->op_params)[0]; src0->grad = ggml_add_impl(ctx, src0->grad, ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, 0), inplace); } } break; case GGML_OP_SOFT_MAX: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, ggml_soft_max_back(ctx, tensor->grad, tensor), inplace); } } break; case GGML_OP_SOFT_MAX_BACK: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16492, "false"); abort(); } } while (0); } break; case GGML_OP_ROPE: { if (src0->grad) { const int n_past = ((int32_t *) tensor->op_params)[0]; const int n_dims = ((int32_t *) tensor->op_params)[1]; const int mode = ((int32_t *) tensor->op_params)[2]; const int n_ctx = ((int32_t *) tensor->op_params)[3]; float freq_base; float freq_scale; float xpos_base; _Bool xpos_down; memcpy(&freq_base, (int32_t *) tensor->op_params + 4, sizeof(float)); memcpy(&freq_scale, (int32_t *) tensor->op_params + 5, sizeof(float)); memcpy(&xpos_base, (int32_t *) tensor->op_params + 6, sizeof(float)); memcpy(&xpos_down, (int32_t *) tensor->op_params + 7, sizeof(_Bool)); src0->grad = ggml_add_impl(ctx, src0->grad, ggml_rope_back(ctx, tensor->grad, n_past, n_dims, mode, n_ctx, freq_base, freq_scale, xpos_base, xpos_down), inplace); } } break; case GGML_OP_ROPE_BACK: { if (src0->grad) { const int n_past = ((int32_t *) tensor->op_params)[0]; const int n_dims = ((int32_t *) tensor->op_params)[1]; const int mode = ((int32_t *) tensor->op_params)[2]; const int n_ctx = ((int32_t *) tensor->op_params)[3]; float freq_base; float freq_scale; float xpos_base; _Bool xpos_down; memcpy(&freq_base, (int32_t *) tensor->op_params + 4, sizeof(float)); memcpy(&freq_scale, (int32_t *) tensor->op_params + 5, sizeof(float)); memcpy(&xpos_base, (int32_t *) tensor->op_params + 6, sizeof(float)); memcpy(&xpos_down, (int32_t *) tensor->op_params + 7, sizeof(_Bool)); src0->grad = ggml_add_impl(ctx, src0->grad, ggml_rope_impl(ctx, tensor->grad, n_past, n_dims, mode, n_ctx, freq_base, freq_scale, xpos_base, xpos_down, 0), inplace); } } break; case GGML_OP_ALIBI: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16560, "false"); abort(); } } while (0); } break; case GGML_OP_CLAMP: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16564, "false"); abort(); } } while (0); } break; case GGML_OP_CONV_1D: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16568, "false"); abort(); } } while (0); } break; case GGML_OP_CONV_2D: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16572, "false"); abort(); } } while (0); } break; case GGML_OP_CONV_TRANSPOSE_2D: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16576, "false"); abort(); } } while (0); } break; case GGML_OP_POOL_1D: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16580, "false"); abort(); } } while (0); } break; case GGML_OP_POOL_2D: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16584, "false"); abort(); } } while (0); } break; case GGML_OP_UPSCALE: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16588, "false"); abort(); } } while (0); } break; case GGML_OP_FLASH_ATTN: { struct ggml_tensor * flash_grad = ((void*)0); if (src0->grad || src1->grad || tensor->src[2]->grad) { int32_t t = ggml_get_op_params_i32(tensor, 0); do { if (!(t == 0 || t == 1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16595, "t == 0 || t == 1"); abort(); } } while (0); _Bool masked = t != 0; flash_grad = ggml_flash_attn_back(ctx, src0, src1, tensor->src[2], tensor->grad, masked); } if (src0->grad) { struct ggml_tensor * grad_q = ((void*)0); const size_t nb0 = flash_grad->nb[0]; const size_t offset = 0; switch(src0->n_dims) { case 2: { grad_q = ggml_view_2d(ctx, flash_grad, src0->ne[0], src0->ne[1], nb0*src0->ne[0], offset); } break; case 3: { grad_q = ggml_view_3d(ctx, flash_grad, src0->ne[0], src0->ne[1], src0->ne[2], nb0*src0->ne[0], nb0*src0->ne[0]*src0->ne[1], offset); } break; case 4: { grad_q = ggml_view_4d(ctx, flash_grad, src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], nb0*src0->ne[0], nb0*src0->ne[0]*src0->ne[1], nb0*src0->ne[0]*src0->ne[1]*src0->ne[2], offset); } break; } src0->grad = ggml_add_impl(ctx, src0->grad, grad_q, inplace); } if (src1->grad) { struct ggml_tensor * grad_k = ((void*)0); const size_t nb0 = flash_grad->nb[0]; const size_t offset = nb0*src0->ne[0]*src0->ne[1]*src0->ne[2]*src0->ne[3]; switch(src1->n_dims) { case 2: { grad_k = ggml_view_2d(ctx, flash_grad, src1->ne[0], src1->ne[1], nb0*src1->ne[0], offset); } break; case 3: { grad_k = ggml_view_3d(ctx, flash_grad, src1->ne[0], src1->ne[1], src1->ne[2], nb0*src1->ne[0], nb0*src1->ne[0]*src1->ne[1], offset); } break; case 4: { grad_k = ggml_view_4d(ctx, flash_grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3], nb0*src1->ne[0], nb0*src1->ne[0]*src1->ne[1], nb0*src1->ne[0]*src1->ne[1]*src1->ne[2], offset); } break; } src1->grad = ggml_add_impl(ctx, src1->grad, grad_k, inplace); } struct ggml_tensor * opt0 = tensor->src[2]; if (opt0->grad) { struct ggml_tensor * grad_v = ((void*)0); const size_t nb0 = flash_grad->nb[0]; const size_t offset = nb0*src0->ne[0]*src0->ne[1]*src0->ne[2]*src0->ne[3] + nb0*src1->ne[0]*src1->ne[1]*src1->ne[2]*src1->ne[3]; switch(opt0->n_dims) { case 2: { grad_v = ggml_view_2d(ctx, flash_grad, opt0->ne[0], opt0->ne[1], nb0*opt0->ne[0], offset); } break; case 3: { grad_v = ggml_view_3d(ctx, flash_grad, opt0->ne[0], opt0->ne[1], opt0->ne[2], nb0*opt0->ne[0], nb0*opt0->ne[0]*opt0->ne[1], offset); } break; case 4: { grad_v = ggml_view_4d(ctx, flash_grad, opt0->ne[0], opt0->ne[1], opt0->ne[2], opt0->ne[3], nb0*opt0->ne[0], nb0*opt0->ne[0]*opt0->ne[1], nb0*opt0->ne[0]*opt0->ne[1]*opt0->ne[2], offset); } break; } opt0->grad = ggml_add_impl(ctx, opt0->grad, grad_v, inplace); } } break; case GGML_OP_FLASH_FF: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16749, "false"); abort(); } } while (0); } break; case GGML_OP_FLASH_ATTN_BACK: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16753, "false"); abort(); } } while (0); } break; case GGML_OP_WIN_PART: case GGML_OP_WIN_UNPART: case GGML_OP_UNARY: { switch (ggml_get_unary_op(tensor)) { case GGML_UNARY_OP_ABS: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, ggml_mul(ctx, ggml_sgn(ctx, src0), tensor->grad), inplace); } } break; case GGML_UNARY_OP_SGN: { if (src0->grad) { } } break; case GGML_UNARY_OP_NEG: { if (src0->grad) { src0->grad = ggml_sub_impl(ctx, src0->grad, tensor->grad, inplace); } } break; case GGML_UNARY_OP_STEP: { if (src0->grad) { } } break; case GGML_UNARY_OP_TANH: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16792, "false"); abort(); } } while (0); } break; case GGML_UNARY_OP_ELU: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16796, "false"); abort(); } } while (0); } break; case GGML_UNARY_OP_RELU: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, ggml_mul(ctx, ggml_step(ctx, src0), tensor->grad), inplace); } } break; case GGML_UNARY_OP_GELU: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16811, "false"); abort(); } } while (0); } break; case GGML_UNARY_OP_GELU_QUICK: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16815, "false"); abort(); } } while (0); } break; case GGML_UNARY_OP_SILU: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, ggml_silu_back(ctx, src0, tensor->grad), inplace); } } break; default: do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16828, "false"); abort(); } } while (0); } } break; case GGML_OP_GET_REL_POS: case GGML_OP_ADD_REL_POS: case GGML_OP_MAP_UNARY: case GGML_OP_MAP_BINARY: case GGML_OP_MAP_CUSTOM1_F32: case GGML_OP_MAP_CUSTOM2_F32: case GGML_OP_MAP_CUSTOM3_F32: case GGML_OP_MAP_CUSTOM1: case GGML_OP_MAP_CUSTOM2: case GGML_OP_MAP_CUSTOM3: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16842, "false"); abort(); } } while (0); } break; case GGML_OP_CROSS_ENTROPY_LOSS: { if (src0->grad) { src0->grad = ggml_add_impl(ctx, src0->grad, ggml_cross_entropy_loss_back(ctx, src0, src1, tensor->grad), inplace); } } break; case GGML_OP_CROSS_ENTROPY_LOSS_BACK: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16858, "false"); abort(); } } while (0); } break; case GGML_OP_NONE: { } break; case GGML_OP_COUNT: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16866, "false"); abort(); } } while (0); } break; } } _Static_assert(8273 > 4096 * 2, "GGML_GRAPH_HT_SIZE is too small"); static size_t hash(void * p) { return (size_t)p % 8273; } static _Bool hash_insert(void * hash_table[], void * p) { size_t h = hash(p); size_t i = h; while (hash_table[i] != ((void*)0) && hash_table[i] != p) { i = (i + 1) % 8273; if (i == h) { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16886, "false"); abort(); } } while (0); } } if (hash_table[i] == p) { return 1; } hash_table[i] = p; return 0; } static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) { if (node->grad == ((void*)0)) { if (node->op != GGML_OP_NONE) { } } if (hash_insert(cgraph->visited_hash_table, node)) { return; } for (int i = 0; i < 6; ++i) { if (node->src[i]) { ggml_visit_parents(cgraph, node->src[i]); } } if (node->op == GGML_OP_NONE && node->grad == ((void*)0)) { do { if (!(cgraph->n_leafs < 4096)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16921, "cgraph->n_leafs < GGML_MAX_NODES"); abort(); } } while (0); if (strlen(node->name) == 0) { ggml_format_name(node, "leaf_%d", cgraph->n_leafs); } cgraph->leafs[cgraph->n_leafs] = node; cgraph->n_leafs++; } else { do { if (!(cgraph->n_nodes < 4096)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16930, "cgraph->n_nodes < GGML_MAX_NODES"); abort(); } } while (0); if (strlen(node->name) == 0) { ggml_format_name(node, "node_%d", cgraph->n_nodes); } cgraph->nodes[cgraph->n_nodes] = node; cgraph->grads[cgraph->n_nodes] = node->grad; cgraph->n_nodes++; } } static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, _Bool expand) { if (!expand) { cgraph->n_nodes = 0; cgraph->n_leafs = 0; } const int n0 = cgraph->n_nodes; (void)(n0); ggml_visit_parents(cgraph, tensor); const int n_new = cgraph->n_nodes - n0; ; if (n_new > 0) { do { if (!(cgraph->nodes[cgraph->n_nodes - 1] == tensor)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16958, "cgraph->nodes[cgraph->n_nodes - 1] == tensor"); abort(); } } while (0); } } void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) { ggml_build_forward_impl(cgraph, tensor, 1); } struct ggml_cgraph ggml_build_forward(struct ggml_tensor * tensor) { struct ggml_cgraph result = { 0, 0, { ((void*)0) }, { ((void*)0) }, { ((void*)0) }, { ((void*)0) }, 0, 0, 0, }; ggml_build_forward_impl(&result, tensor, 0); return result; } void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, _Bool keep) { do { if (!(gf->n_nodes > 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 16985, "gf->n_nodes > 0"); abort(); } } while (0); if (keep) { for (int i = 0; i < gf->n_nodes; i++) { struct ggml_tensor * node = gf->nodes[i]; if (node->grad) { node->grad = ggml_dup_tensor(ctx, node); gf->grads[i] = node->grad; } } } for (int i = gf->n_nodes - 1; i >= 0; i--) { struct ggml_tensor * node = gf->nodes[i]; if (node->grad) { ggml_compute_backward(ctx, node, keep); } } for (int i = 0; i < gf->n_nodes; i++) { struct ggml_tensor * node = gf->nodes[i]; if (node->is_param) { ; ggml_build_forward_expand(gb, node->grad); } } } struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, _Bool keep) { struct ggml_cgraph result = *gf; ggml_build_backward_expand(ctx, gf, &result, keep); return result; } struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) { struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, GGML_GRAPH_SIZE); struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs); *cgraph = (struct ggml_cgraph) { 0, 0, { ((void*)0) }, { ((void*)0) }, { ((void*)0) }, { ((void*)0) }, 0, 0, 0, }; return cgraph; } struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor) { struct ggml_cgraph * cgraph = ggml_new_graph(ctx); ggml_build_forward_impl(cgraph, tensor, 0); return cgraph; } size_t ggml_graph_overhead(void) { return GGML_OBJECT_SIZE + (((GGML_GRAPH_SIZE) + (4) - 1) & ~((4) - 1)); } # 17096 "ggml.c" typedef int ggml_lock_t; # 17109 "ggml.c" typedef pthread_t ggml_thread_t; # 17167 "ggml.c" static void set_numa_thread_affinity(int thread_n, int n_threads) { (void)(thread_n); (void)(n_threads); } static void clear_numa_thread_affinity(void) {} struct ggml_compute_state_shared { const struct ggml_cgraph * cgraph; const struct ggml_cplan * cplan; int64_t perf_node_start_cycles; int64_t perf_node_start_time_us; const int n_threads; atomic_int n_active; atomic_int node_n; _Bool (*abort_callback)(void * data); void * abort_callback_data; }; struct ggml_compute_state { ggml_thread_t thrd; int ith; struct ggml_compute_state_shared * shared; }; static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) { int64_t cycles_cur = 0 - st->perf_node_start_cycles; int64_t time_us_cur = 0 - st->perf_node_start_time_us; node->perf_runs++; node->perf_cycles += cycles_cur; node->perf_time_us += time_us_cur; } static thread_ret_t ggml_graph_compute_thread(void * data) { struct ggml_compute_state * state = (struct ggml_compute_state *) data; const struct ggml_cgraph * cgraph = state->shared->cgraph; const struct ggml_cplan * cplan = state->shared->cplan; const int * n_tasks_arr = cplan->n_tasks; const int n_threads = state->shared->n_threads; set_numa_thread_affinity(state->ith, n_threads); int node_n = -1; while (1) { if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) { state->shared->node_n += 1; return (thread_ret_t) 1; } if (__c11_atomic_fetch_sub(&state->shared->n_active, 1, memory_order_seq_cst) == 1) { struct ggml_compute_params params = { GGML_TASK_FINALIZE, 0, 0, cplan->work_size, cplan->work_data, }; if (node_n != -1) { struct ggml_tensor * node = state->shared->cgraph->nodes[node_n]; if (GGML_OP_HAS_FINALIZE[node->op]) { params.nth = n_tasks_arr[node_n]; ggml_compute_forward(¶ms, node); } ggml_graph_compute_perf_stats_node(node, state->shared); } while (++node_n < cgraph->n_nodes) { ; struct ggml_tensor * node = cgraph->nodes[node_n]; const int n_tasks = n_tasks_arr[node_n]; state->shared->perf_node_start_cycles = 0; state->shared->perf_node_start_time_us = 0; params.nth = n_tasks; if (GGML_OP_HAS_INIT[node->op]) { params.type = GGML_TASK_INIT; ggml_compute_forward(¶ms, node); } if (n_tasks == 1) { params.type = GGML_TASK_COMPUTE; ggml_compute_forward(¶ms, node); if (GGML_OP_HAS_FINALIZE[node->op]) { params.type = GGML_TASK_FINALIZE; ggml_compute_forward(¶ms, node); } ggml_graph_compute_perf_stats_node(node, state->shared); } else { break; } if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) { break; } } __c11_atomic_store(&state->shared->n_active, n_threads, memory_order_seq_cst); __c11_atomic_store(&state->shared->node_n, node_n, memory_order_seq_cst); } else { const int last = node_n; do { node_n = __c11_atomic_load(&state->shared->node_n, memory_order_seq_cst); } while (node_n == last); } if (node_n >= cgraph->n_nodes) break; struct ggml_tensor * node = cgraph->nodes[node_n]; const int n_tasks = n_tasks_arr[node_n]; struct ggml_compute_params params = { GGML_TASK_COMPUTE, state->ith, n_tasks, cplan->work_size, cplan->work_data, }; if (state->ith < n_tasks) { ggml_compute_forward(¶ms, node); } } return 0; } struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) { if (n_threads <= 0) { n_threads = 4; } size_t work_size = 0; struct ggml_cplan cplan; memset(&cplan, 0, sizeof(struct ggml_cplan)); for (int i = 0; i < cgraph->n_nodes; i++) { int n_tasks = 1; struct ggml_tensor * node = cgraph->nodes[i]; switch (node->op) { case GGML_OP_CPY: case GGML_OP_DUP: { n_tasks = n_threads; size_t cur = 0; if (ggml_is_quantized(node->type)) { cur = ggml_type_size(GGML_TYPE_F32) * node->ne[0] * n_tasks; } work_size = ((work_size) > (cur) ? (work_size) : (cur)); } break; case GGML_OP_ADD: case GGML_OP_ADD1: { n_tasks = n_threads; size_t cur = 0; if (ggml_is_quantized(node->src[0]->type)) { cur = ggml_type_size(GGML_TYPE_F32) * node->src[0]->ne[0] * n_tasks; } work_size = ((work_size) > (cur) ? (work_size) : (cur)); } break; case GGML_OP_ACC: { n_tasks = n_threads; size_t cur = 0; if (ggml_is_quantized(node->src[0]->type)) { cur = ggml_type_size(GGML_TYPE_F32) * node->src[1]->ne[0] * n_tasks; } work_size = ((work_size) > (cur) ? (work_size) : (cur)); } break; case GGML_OP_SUB: case GGML_OP_DIV: case GGML_OP_SQR: case GGML_OP_SQRT: case GGML_OP_LOG: case GGML_OP_SUM: case GGML_OP_SUM_ROWS: case GGML_OP_MEAN: case GGML_OP_ARGMAX: case GGML_OP_REPEAT: case GGML_OP_REPEAT_BACK: { n_tasks = 1; } break; case GGML_OP_UNARY: { switch (ggml_get_unary_op(node)) { case GGML_UNARY_OP_ABS: case GGML_UNARY_OP_SGN: case GGML_UNARY_OP_NEG: case GGML_UNARY_OP_STEP: case GGML_UNARY_OP_TANH: case GGML_UNARY_OP_ELU: case GGML_UNARY_OP_RELU: { n_tasks = 1; } break; case GGML_UNARY_OP_GELU: case GGML_UNARY_OP_GELU_QUICK: case GGML_UNARY_OP_SILU: { n_tasks = n_threads; } break; } } break; case GGML_OP_SILU_BACK: case GGML_OP_MUL: case GGML_OP_NORM: case GGML_OP_RMS_NORM: case GGML_OP_RMS_NORM_BACK: case GGML_OP_GROUP_NORM: { n_tasks = n_threads; } break; case GGML_OP_CONCAT: case GGML_OP_MUL_MAT: case GGML_OP_OUT_PROD: { n_tasks = n_threads; # 17428 "ggml.c" size_t cur = 0; const enum ggml_type vec_dot_type = type_traits[node->src[0]->type].vec_dot_type; # 17453 "ggml.c" if (node->src[1]->type != vec_dot_type) { cur = ggml_type_size(vec_dot_type)*ggml_nelements(node->src[1])/ggml_blck_size(vec_dot_type); } else { cur = 0; } work_size = ((work_size) > (cur) ? (work_size) : (cur)); } break; case GGML_OP_SCALE: { n_tasks = 1; } break; case GGML_OP_SET: case GGML_OP_CONT: case GGML_OP_RESHAPE: case GGML_OP_VIEW: case GGML_OP_PERMUTE: case GGML_OP_TRANSPOSE: case GGML_OP_GET_ROWS: case GGML_OP_GET_ROWS_BACK: case GGML_OP_DIAG: { n_tasks = 1; } break; case GGML_OP_DIAG_MASK_ZERO: case GGML_OP_DIAG_MASK_INF: case GGML_OP_SOFT_MAX: case GGML_OP_SOFT_MAX_BACK: case GGML_OP_ROPE: case GGML_OP_ROPE_BACK: case GGML_OP_ADD_REL_POS: { n_tasks = n_threads; } break; case GGML_OP_ALIBI: { n_tasks = 1; } break; case GGML_OP_CLAMP: { n_tasks = 1; } break; case GGML_OP_CONV_1D: { n_tasks = n_threads; do { if (!(node->src[0]->ne[3] == 1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 17499, "node->src[0]->ne[3] == 1"); abort(); } } while (0); do { if (!(node->src[1]->ne[2] == 1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 17500, "node->src[1]->ne[2] == 1"); abort(); } } while (0); do { if (!(node->src[1]->ne[3] == 1)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 17501, "node->src[1]->ne[3] == 1"); abort(); } } while (0); size_t cur = 0; const int nk = node->src[0]->ne[0]; if (node->src[0]->type == GGML_TYPE_F16 && node->src[1]->type == GGML_TYPE_F32) { cur = sizeof(ggml_fp16_t)*( nk*ggml_up32(node->src[0]->ne[1])*node->src[0]->ne[2] + ( 2*(nk/2) + node->src[1]->ne[0])*node->src[1]->ne[1] ); } else if (node->src[0]->type == GGML_TYPE_F32 && node->src[1]->type == GGML_TYPE_F32) { cur = sizeof(float)*( nk*ggml_up32(node->src[0]->ne[1])*node->src[0]->ne[2] + ( 2*(nk/2) + node->src[1]->ne[0])*node->src[1]->ne[1] ); } else { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 17519, "false"); abort(); } } while (0); } work_size = ((work_size) > (cur) ? (work_size) : (cur)); } break; case GGML_OP_CONV_2D: { n_tasks = n_threads; const int64_t ne00 = node->src[0]->ne[0]; const int64_t ne01 = node->src[0]->ne[1]; const int64_t ne02 = node->src[0]->ne[2]; const int64_t ne03 = node->src[0]->ne[3]; const int64_t ne10 = node->src[1]->ne[0]; const int64_t ne11 = node->src[1]->ne[1]; const int64_t ne12 = node->src[1]->ne[2]; const int64_t ne0 = node->ne[0]; const int64_t ne1 = node->ne[1]; const int64_t ne2 = node->ne[2]; const int64_t nk = ne00*ne01; const int64_t ew0 = nk * ne02; (void)(ne03); (void)(ne2); size_t cur = 0; if (node->src[0]->type == GGML_TYPE_F16 && node->src[1]->type == GGML_TYPE_F32) { cur = sizeof(ggml_fp16_t)*(ne0*ne1*ew0); } else if (node->src[0]->type == GGML_TYPE_F32 && node->src[1]->type == GGML_TYPE_F32) { cur = sizeof(float)* (ne10*ne11*ne12); } else { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 17555, "false"); abort(); } } while (0); } work_size = ((work_size) > (cur) ? (work_size) : (cur)); } break; case GGML_OP_CONV_TRANSPOSE_2D: { n_tasks = n_threads; const int64_t ne00 = node->src[0]->ne[0]; const int64_t ne01 = node->src[0]->ne[1]; const int64_t ne02 = node->src[0]->ne[2]; const int64_t ne03 = node->src[0]->ne[3]; const int64_t ne10 = node->src[1]->ne[0]; const int64_t ne11 = node->src[1]->ne[1]; const int64_t ne12 = node->src[1]->ne[2]; size_t cur = 0; cur += sizeof(ggml_fp16_t)*ne00*ne01*ne02*ne03; cur += sizeof(ggml_fp16_t)*ne10*ne11*ne12; work_size = ((work_size) > (cur) ? (work_size) : (cur)); } break; case GGML_OP_POOL_1D: case GGML_OP_POOL_2D: { n_tasks = 1; } break; case GGML_OP_UPSCALE: { n_tasks = n_threads; } break; case GGML_OP_FLASH_ATTN: { n_tasks = n_threads; size_t cur = 0; const int64_t ne11 = ggml_up(node->src[1]->ne[1], 4); if (node->src[1]->type == GGML_TYPE_F32) { cur = sizeof(float)*ne11*n_tasks; cur += sizeof(float)*ne11*n_tasks; } if (node->src[1]->type == GGML_TYPE_F16) { cur = sizeof(float)*ne11*n_tasks; cur += sizeof(float)*ne11*n_tasks; } work_size = ((work_size) > (cur) ? (work_size) : (cur)); } break; case GGML_OP_FLASH_FF: { n_tasks = n_threads; size_t cur = 0; if (node->src[1]->type == GGML_TYPE_F32) { cur = sizeof(float)*node->src[1]->ne[1]*n_tasks; cur += sizeof(float)*node->src[1]->ne[1]*n_tasks; } if (node->src[1]->type == GGML_TYPE_F16) { cur = sizeof(float)*node->src[1]->ne[1]*n_tasks; cur += sizeof(float)*node->src[1]->ne[1]*n_tasks; } work_size = ((work_size) > (cur) ? (work_size) : (cur)); } break; case GGML_OP_FLASH_ATTN_BACK: { n_tasks = n_threads; size_t cur = 0; const int64_t D = node->src[0]->ne[0]; const int64_t ne11 = ggml_up(node->src[1]->ne[1], 4); const int64_t mxDn = ((D) > (ne11) ? (D) : (ne11)) * 2; if (node->src[1]->type == GGML_TYPE_F32) { cur = sizeof(float)*mxDn*n_tasks; cur += sizeof(float)*mxDn*n_tasks; } if (node->src[1]->type == GGML_TYPE_F16) { cur = sizeof(float)*mxDn*n_tasks; cur += sizeof(float)*mxDn*n_tasks; } work_size = ((work_size) > (cur) ? (work_size) : (cur)); } break; case GGML_OP_WIN_PART: case GGML_OP_WIN_UNPART: case GGML_OP_GET_REL_POS: case GGML_OP_MAP_UNARY: case GGML_OP_MAP_BINARY: case GGML_OP_MAP_CUSTOM1_F32: case GGML_OP_MAP_CUSTOM2_F32: case GGML_OP_MAP_CUSTOM3_F32: { n_tasks = 1; } break; case GGML_OP_MAP_CUSTOM1: { struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params; if (p->n_tasks == -1) { n_tasks = n_threads; } else { n_tasks = ((p->n_tasks) < (n_threads) ? (p->n_tasks) : (n_threads)); } } break; case GGML_OP_MAP_CUSTOM2: { struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params; if (p->n_tasks == -1) { n_tasks = n_threads; } else { n_tasks = ((p->n_tasks) < (n_threads) ? (p->n_tasks) : (n_threads)); } } break; case GGML_OP_MAP_CUSTOM3: { struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params; if (p->n_tasks == -1) { n_tasks = n_threads; } else { n_tasks = ((p->n_tasks) < (n_threads) ? (p->n_tasks) : (n_threads)); } } break; case GGML_OP_CROSS_ENTROPY_LOSS: { n_tasks = n_threads; size_t cur = ggml_type_size(node->type)*(n_tasks + node->src[0]->ne[0]*n_tasks); work_size = ((work_size) > (cur) ? (work_size) : (cur)); } break; case GGML_OP_CROSS_ENTROPY_LOSS_BACK: { n_tasks = n_threads; } break; case GGML_OP_NONE: { n_tasks = 1; } break; case GGML_OP_COUNT: { do { if (!(0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 17703, "false"); abort(); } } while (0); } break; } cplan.n_tasks[i] = n_tasks; } if (work_size > 0) { work_size += 64*(n_threads - 1); } cplan.n_threads = n_threads; cplan.work_size = work_size; cplan.work_data = ((void*)0); return cplan; } int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { { do { if (!(cplan)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 17723, "cplan"); abort(); } } while (0); do { if (!(cplan->n_threads > 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 17724, "cplan->n_threads > 0"); abort(); } } while (0); if (cplan->work_size > 0) { do { if (!(cplan->work_data)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 17727, "cplan->work_data"); abort(); } } while (0); } for (int i = 0; i < cgraph->n_nodes; ++i) { if (cgraph->nodes[i]->op != GGML_OP_NONE) { do { if (!(cplan->n_tasks[i] > 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 17732, "cplan->n_tasks[i] > 0"); abort(); } } while (0); } } } const int n_threads = cplan->n_threads; struct ggml_compute_state_shared state_shared = { cgraph, cplan, 0, 0, n_threads, n_threads, -1, ((void*)0), ((void*)0), }; struct ggml_compute_state * workers = __builtin_alloca(sizeof(struct ggml_compute_state)*n_threads); if (n_threads > 1) { for (int j = 1; j < n_threads; ++j) { workers[j] = (struct ggml_compute_state) { .thrd = 0, .ith = j, .shared = &state_shared, }; const int rc = pthread_create(&workers[j].thrd, ((void*)0), ggml_graph_compute_thread, &workers[j]); do { if (!(rc == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 17762, "rc == 0"); abort(); } } while (0); (void)(rc); } } workers[0].ith = 0; workers[0].shared = &state_shared; const int64_t perf_start_cycles = 0; const int64_t perf_start_time_us = 0; int compute_status = (size_t) ggml_graph_compute_thread(&workers[0]); clear_numa_thread_affinity(); if (n_threads > 1) { for (int j = 1; j < n_threads; j++) { const int rc = pthread_join(workers[j].thrd, ((void*)0)); do { if (!(rc == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 17783, "rc == 0"); abort(); } } while (0); } } { int64_t perf_cycles_cur = 0 - perf_start_cycles; int64_t perf_time_us_cur = 0 - perf_start_time_us; cgraph->perf_runs++; cgraph->perf_cycles += perf_cycles_cur; cgraph->perf_time_us += perf_time_us_cur; ; } return compute_status; } void ggml_graph_reset(struct ggml_cgraph * cgraph) { for (int i = 0; i < cgraph->n_nodes; i++) { struct ggml_tensor * grad = cgraph->grads[i]; if (grad) { ggml_set_zero(grad); } } } void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) { struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads); struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size); cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs; ggml_graph_compute(cgraph, &cplan); } struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name) { for (int i = 0; i < cgraph->n_leafs; i++) { struct ggml_tensor * leaf = cgraph->leafs[i]; if (strcmp(leaf->name, name) == 0) { return leaf; } } for (int i = 0; i < cgraph->n_nodes; i++) { struct ggml_tensor * node = cgraph->nodes[i]; if (strcmp(node->name, name) == 0) { return node; } } return ((void*)0); } static void ggml_graph_export_leaf(const struct ggml_tensor * tensor, FILE * fout) { const int64_t * ne = tensor->ne; const size_t * nb = tensor->nb; fprintf(fout, "%-6s %-12s %8d %" "ll""d" " %" "ll""d" " %" "ll""d" " %" "ll""d" " %16zu %16zu %16zu %16zu %16p %32s\n", ggml_type_name(tensor->type), ggml_op_name (tensor->op), tensor->n_dims, ne[0], ne[1], ne[2], ne[3], nb[0], nb[1], nb[2], nb[3], tensor->data, tensor->name); } static void ggml_graph_export_node(const struct ggml_tensor * tensor, const char * arg, FILE * fout) { const int64_t * ne = tensor->ne; const size_t * nb = tensor->nb; fprintf(fout, "%-6s %-6s %-12s %8d %" "ll""d" " %" "ll""d" " %" "ll""d" " %" "ll""d" " %16zu %16zu %16zu %16zu %16p %32s\n", arg, ggml_type_name(tensor->type), ggml_op_name (tensor->op), tensor->n_dims, ne[0], ne[1], ne[2], ne[3], nb[0], nb[1], nb[2], nb[3], tensor->data, tensor->name); } void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) { uint64_t size_eval = 0; for (int i = 0; i < cgraph->n_nodes; ++i) { size_eval += ggml_nbytes_pad(cgraph->nodes[i]); } { FILE * fout = stdout; fprintf(fout, "\n"); fprintf(fout, "%-16s %8x\n", "magic", 0x67676d6c); fprintf(fout, "%-16s %8d\n", "version", 1); fprintf(fout, "%-16s %8d\n", "leafs", cgraph->n_leafs); fprintf(fout, "%-16s %8d\n", "nodes", cgraph->n_nodes); fprintf(fout, "%-16s %" "ll""u" "\n", "eval", size_eval); fprintf(fout, "\n"); fprintf(fout, "%-6s %-12s %8s %8s %8s %8s %8s %16s %16s %16s %16s %16s %16s\n", "TYPE", "OP", "NDIMS", "NE0", "NE1", "NE2", "NE3", "NB0", "NB1", "NB2", "NB3", "DATA", "NAME"); for (int i = 0; i < cgraph->n_leafs; ++i) { ggml_graph_export_leaf(cgraph->leafs[i], fout); do { if (!(cgraph->leafs[i]->op == GGML_OP_NONE)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 17904, "cgraph->leafs[i]->op == GGML_OP_NONE"); abort(); } } while (0); do { if (!(cgraph->leafs[i]->src[0] == ((void*)0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 17905, "cgraph->leafs[i]->src[0] == NULL"); abort(); } } while (0); do { if (!(cgraph->leafs[i]->src[1] == ((void*)0))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 17906, "cgraph->leafs[i]->src[1] == NULL"); abort(); } } while (0); } fprintf(fout, "\n"); fprintf(fout, "%-6s %-6s %-12s %8s %8s %8s %8s %8s %16s %16s %16s %16s %8s %16s %16s\n", "ARG", "TYPE", "OP", "NDIMS", "NE0", "NE1", "NE2", "NE3", "NB0", "NB1", "NB2", "NB3", "NTASKS", "DATA", "NAME"); for (int i = 0; i < cgraph->n_nodes; ++i) { ggml_graph_export_node(cgraph->nodes[i], "DST", fout); for (int j = 0; j < 6; ++j) { if (cgraph->nodes[i]->src[j]) { ggml_graph_export_node(cgraph->nodes[i]->src[j], "SRC", fout); } } fprintf(fout, "\n"); } fprintf(fout, "\n"); } { FILE * fout = fopen(fname, "wb"); if (!fout) { fprintf(stderr, "%s: failed to open %s\n", __func__, fname); return; } { const uint32_t magic = 0x67676d6c; const uint32_t version = 1; const uint32_t n_leafs = cgraph->n_leafs; const uint32_t nodes = cgraph->n_nodes; fwrite(&magic, sizeof(uint32_t), 1, fout); fwrite(&version, sizeof(uint32_t), 1, fout); fwrite(&n_leafs, sizeof(uint32_t), 1, fout); fwrite(&nodes, sizeof(uint32_t), 1, fout); fwrite(&size_eval, sizeof(uint64_t), 1, fout); } { for (int i = 0; i < cgraph->n_leafs; ++i) { const struct ggml_tensor * tensor = cgraph->leafs[i]; const uint32_t type = tensor->type; const uint32_t op = tensor->op; const uint32_t n_dims = tensor->n_dims; fwrite(&type, sizeof(uint32_t), 1, fout); fwrite(&op, sizeof(uint32_t), 1, fout); fwrite(&n_dims, sizeof(uint32_t), 1, fout); for (int j = 0; j < 4; ++j) { const uint64_t ne = tensor->ne[j]; const uint64_t nb = tensor->nb[j]; fwrite(&ne, sizeof(uint64_t), 1, fout); fwrite(&nb, sizeof(uint64_t), 1, fout); } fwrite(tensor->name, sizeof(char), 64, fout); fwrite(tensor->op_params, sizeof(char), 32, fout); { const size_t size = ggml_nbytes(tensor); fwrite(tensor->data, sizeof(char), size, fout); } } } { for (int i = 0; i < cgraph->n_nodes; ++i) { const struct ggml_tensor * tensor = cgraph->nodes[i]; const uint32_t type = tensor->type; const uint32_t op = tensor->op; const uint32_t n_dims = tensor->n_dims; fwrite(&type, sizeof(uint32_t), 1, fout); fwrite(&op, sizeof(uint32_t), 1, fout); fwrite(&n_dims, sizeof(uint32_t), 1, fout); for (int j = 0; j < 4; ++j) { const uint64_t ne = tensor->ne[j]; const uint64_t nb = tensor->nb[j]; fwrite(&ne, sizeof(uint64_t), 1, fout); fwrite(&nb, sizeof(uint64_t), 1, fout); } fwrite(tensor->name, sizeof(char), 64, fout); fwrite(tensor->op_params, sizeof(char), 32, fout); { struct ggml_tensor * args[6] = { ((void*)0) }; for (int j = 0; j < 6; ++j) { args[j] = tensor->src[j]; } for (int j = 0; j < 6; ++j) { if (args[j]) { int32_t idx = -1; { for (int k = 0; k < cgraph->n_leafs; ++k) { if (args[j] == cgraph->leafs[k]) { idx = k; break; } } } if (idx == -1) { for (int k = 0; k < cgraph->n_nodes; ++k) { if (args[j] == cgraph->nodes[k]) { idx = 4096 + k; break; } } } if (idx == -1) { fprintf(stderr, "%s: failed to find tensor, arg = %d, node = %d\n", __func__, j, i); return; } fwrite(&idx, sizeof(int32_t), 1, fout); } else { const int32_t nul = -1; fwrite(&nul, sizeof(int32_t), 1, fout); } } } } } fclose(fout); } } struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval) { ((void) (0)); ((void) (0)); struct ggml_cgraph result = { 0 }; struct ggml_tensor * data = ((void*)0); { FILE * fin = fopen(fname, "rb"); if (!fin) { fprintf(stderr, "%s: failed to open %s\n", __func__, fname); return result; } size_t fsize = 0; fseek(fin, 0, 2); fsize = ftell(fin); fseek(fin, 0, 0); { const size_t overhead = 1*ggml_tensor_overhead(); struct ggml_init_params params = { .mem_size = fsize + overhead, .mem_buffer = ((void*)0), .no_alloc = 0, }; *ctx_data = ggml_init(params); if (!*ctx_data) { fprintf(stderr, "%s: failed to create ggml context\n", __func__); fclose(fin); return result; } } data = ggml_new_tensor_1d(*ctx_data, GGML_TYPE_I8, fsize); { const size_t ret = fread(data->data, sizeof(char), fsize, fin); if (ret != fsize) { fprintf(stderr, "%s: failed to read %s\n", __func__, fname); fclose(fin); return result; } } fclose(fin); } { char * ptr = (char *) data->data; const uint32_t magic = *(const uint32_t *) ptr; ptr += sizeof(magic); if (magic != 0x67676d6c) { fprintf(stderr, "%s: invalid magic number, got %08x\n", __func__, magic); return result; } const uint32_t version = *(const uint32_t *) ptr; ptr += sizeof(version); if (version != 1) { fprintf(stderr, "%s: invalid version number\n", __func__); return result; } const uint32_t n_leafs = *(const uint32_t *) ptr; ptr += sizeof(n_leafs); const uint32_t n_nodes = *(const uint32_t *) ptr; ptr += sizeof(n_nodes); const uint64_t size_eval = *(const uint64_t *) ptr; ptr += sizeof(size_eval); result.n_leafs = n_leafs; result.n_nodes = n_nodes; { const size_t overhead = (n_leafs + n_nodes)*ggml_tensor_overhead(); struct ggml_init_params params = { .mem_size = size_eval + overhead, .mem_buffer = ((void*)0), .no_alloc = 1, }; *ctx_eval = ggml_init(params); if (!*ctx_eval) { fprintf(stderr, "%s: failed to create ggml context\n", __func__); return result; } } { uint32_t type; uint32_t op; uint32_t n_dims; for (uint32_t i = 0; i < n_leafs; ++i) { type = *(const uint32_t *) ptr; ptr += sizeof(type); op = *(const uint32_t *) ptr; ptr += sizeof(op); n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims); int64_t ne[4]; size_t nb[4]; for (int j = 0; j < 4; ++j) { uint64_t ne_cur; uint64_t nb_cur; ne_cur = *(const uint64_t *) ptr; ptr += sizeof(ne_cur); nb_cur = *(const uint64_t *) ptr; ptr += sizeof(nb_cur); ne[j] = ne_cur; nb[j] = nb_cur; } struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne); tensor->op = (enum ggml_op) op; memcpy(tensor->name, ptr, 64); ptr += 64; memcpy(tensor->op_params, ptr, 32); ptr += 32; tensor->data = (void *) ptr; for (int j = 0; j < 4; ++j) { tensor->nb[j] = nb[j]; } result.leafs[i] = tensor; ptr += ggml_nbytes(tensor); fprintf(stderr, "%s: loaded leaf %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor)); } } ggml_set_no_alloc(*ctx_eval, 0); { uint32_t type; uint32_t op; uint32_t n_dims; for (uint32_t i = 0; i < n_nodes; ++i) { type = *(const uint32_t *) ptr; ptr += sizeof(type); op = *(const uint32_t *) ptr; ptr += sizeof(op); n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims); enum ggml_op eop = (enum ggml_op) op; int64_t ne[4]; size_t nb[4]; for (int j = 0; j < 4; ++j) { uint64_t ne_cur; uint64_t nb_cur; ne_cur = *(const uint64_t *) ptr; ptr += sizeof(ne_cur); nb_cur = *(const uint64_t *) ptr; ptr += sizeof(nb_cur); ne[j] = ne_cur; nb[j] = nb_cur; } const char * ptr_name = ptr; ptr += 64; const char * ptr_op_params = ptr; ptr += 32; const int32_t * ptr_arg_idx = (const int32_t *) ptr; ptr += 6*sizeof(int32_t); struct ggml_tensor * args[6] = { ((void*)0) }; for (int j = 0; j < 6; ++j) { const int32_t arg_idx = ptr_arg_idx[j]; if (arg_idx == -1) { continue; } if (arg_idx < 4096) { args[j] = result.leafs[arg_idx]; } else { args[j] = result.nodes[arg_idx - 4096]; } } struct ggml_tensor * tensor = ((void*)0); switch (eop) { case GGML_OP_RESHAPE: { tensor = ggml_reshape_4d(*ctx_eval, args[0], ne[0], ne[1], ne[2], ne[3]); } break; case GGML_OP_VIEW: { tensor = ggml_view_4d(*ctx_eval, args[0], ne[0], ne[1], ne[2], ne[3], 0, 0, 0, 0); size_t offs; memcpy(&offs, ptr_op_params, sizeof(offs)); tensor->data = ((char *) tensor->data) + offs; } break; case GGML_OP_TRANSPOSE: { tensor = ggml_transpose(*ctx_eval, args[0]); } break; case GGML_OP_PERMUTE: { tensor = ggml_view_4d(*ctx_eval, args[0], ne[0], ne[1], ne[2], ne[3], 0, 0, 0, 0); } break; default: { tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne); tensor->op = eop; } break; } memcpy(tensor->name, ptr_name, 64); memcpy(tensor->op_params, ptr_op_params, 32); for (int j = 0; j < 4; ++j) { tensor->nb[j] = nb[j]; } for (int j = 0; j < 6; ++j) { tensor->src[j] = args[j]; } result.nodes[i] = tensor; fprintf(stderr, "%s: loaded node %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor)); } } } return result; } void ggml_graph_print(const struct ggml_cgraph * cgraph) { int64_t perf_total_per_op_us[GGML_OP_COUNT] = {0}; printf("=== GRAPH ===\n"); printf("n_nodes = %d\n", cgraph->n_nodes); for (int i = 0; i < cgraph->n_nodes; i++) { struct ggml_tensor * node = cgraph->nodes[i]; perf_total_per_op_us[node->op] += ((1) > (node->perf_time_us) ? (1) : (node->perf_time_us)); printf(" - %3d: [ %5" "ll""d" ", %5" "ll""d" ", %5" "ll""d" "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_name(node->op), node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs, (double) node->perf_cycles / (double) ggml_cycles_per_ms(), (double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs, (double) node->perf_time_us / 1000.0, (double) node->perf_time_us / 1000.0 / node->perf_runs); } printf("n_leafs = %d\n", cgraph->n_leafs); for (int i = 0; i < cgraph->n_leafs; i++) { struct ggml_tensor * node = cgraph->leafs[i]; printf(" - %3d: [ %5" "ll""d" ", %5" "ll""d" "] %8s\n", i, node->ne[0], node->ne[1], ggml_op_name(node->op)); } for (int i = 0; i < GGML_OP_COUNT; i++) { if (perf_total_per_op_us[i] == 0) { continue; } printf("perf_total_per_op_us[%16s] = %7.3f ms\n", ggml_op_name(i), (double) perf_total_per_op_us[i] / 1000.0); } printf("========================================\n"); } static _Bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) { if (cgraph == ((void*)0)) { return 1; } for (int i = 0; i < cgraph->n_nodes; i++) { if (cgraph->nodes[i] == node) { return 1; } } return 0; } static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) { for (int i = 0; i < cgraph->n_nodes; i++) { struct ggml_tensor * parent = cgraph->nodes[i]; if (parent->grad == node) { return parent; } } return ((void*)0); } static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph * gb, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label) { struct ggml_tensor * gparent = ggml_graph_get_parent(gb, node); struct ggml_tensor * gparent0 = ggml_graph_get_parent(gb, parent); fprintf(fp, " \"%p\":%s -> \"%p\":%s [ arrowhead = %s; style = %s; label = \"%s\"; ]\n", gparent0 ? (void *) gparent0 : (void *) parent, gparent0 ? "g" : "x", gparent ? (void *) gparent : (void *) node, gparent ? "g" : "x", gparent ? "empty" : "vee", gparent ? "dashed" : "solid", label); } static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label) { fprintf(fp, " \"%p\":%s -> \"%p\":%s [ label = \"%s\"; ]\n", (void *) parent, "x", (void *) node, "x", label); } void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename) { char color[16]; FILE * fp = fopen(filename, "w"); do { if (!(fp)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 18408, "fp"); abort(); } } while (0); fprintf(fp, "digraph G {\n"); fprintf(fp, " newrank = true;\n"); fprintf(fp, " rankdir = LR;\n"); for (int i = 0; i < gb->n_nodes; i++) { struct ggml_tensor * node = gb->nodes[i]; if (ggml_graph_get_parent(gb, node) != ((void*)0)) { continue; } if (node->is_param) { snprintf(color, sizeof(color), "yellow"); } else if (node->grad) { if (ggml_graph_find(gf, node)) { snprintf(color, sizeof(color), "green"); } else { snprintf(color, sizeof(color), "lightblue"); } } else { snprintf(color, sizeof(color), "white"); } fprintf(fp, " \"%p\" [ " "style = filled; fillcolor = %s; shape = record; " "label=\"", (void *) node, color); if (strlen(node->name) > 0) { fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type)); } else { fprintf(fp, "(%s)|", ggml_type_name(node->type)); } if (node->n_dims == 2) { fprintf(fp, "%d [%" "ll""d" ", %" "ll""d" "] | %s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op)); } else { fprintf(fp, "%d [%" "ll""d" ", %" "ll""d" ", %" "ll""d" "] | %s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op)); } if (node->grad) { fprintf(fp, " | %s\"; ]\n", ggml_op_symbol(node->grad->op)); } else { fprintf(fp, "\"; ]\n"); } } for (int i = 0; i < gb->n_leafs; i++) { struct ggml_tensor * node = gb->leafs[i]; snprintf(color, sizeof(color), "pink"); fprintf(fp, " \"%p\" [ " "style = filled; fillcolor = %s; shape = record; " "label=\"", (void *) node, color); if (strlen(node->name) > 0) { fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type)); } else { fprintf(fp, "(%s)|", ggml_type_name(node->type)); } fprintf(fp, "CONST %d [%" "ll""d" ", %" "ll""d" "]", i, node->ne[0], node->ne[1]); if (ggml_nelements(node) < 5) { fprintf(fp, " | ("); for (int j = 0; j < ggml_nelements(node); j++) { if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) { fprintf(fp, "%d", ggml_get_i32_1d(node, j)); } else if (node->type == GGML_TYPE_F32 || node->type == GGML_TYPE_F16) { fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, j)); } else { fprintf(fp, "#"); } if (j < ggml_nelements(node) - 1) { fprintf(fp, ", "); } } fprintf(fp, ")"); } fprintf(fp, "\"; ]\n"); } for (int i = 0; i < gb->n_nodes; i++) { struct ggml_tensor * node = gb->nodes[i]; for (int j = 0; j < 6; j++) { if (node->src[j]) { char label[16]; snprintf(label, sizeof(label), "src %d", j); ggml_graph_dump_dot_node_edge(fp, gb, node, node->src[j], label); } } } for (int i = 0; i < gb->n_leafs; i++) { struct ggml_tensor * node = gb->leafs[i]; for (int j = 0; j < 6; j++) { if (node->src[j]) { char label[16]; snprintf(label, sizeof(label), "src %d", j); ggml_graph_dump_dot_leaf_edge(fp, node, node->src[j], label); } } } fprintf(fp, "}\n"); fclose(fp); printf("%s: dot -Tpng %s -o %s.png && open %s.png\n", __func__, filename, filename, filename); } static void ggml_opt_set_params(int np, struct ggml_tensor * const ps[], const float * x) { int i = 0; for (int p = 0; p < np; ++p) { const int64_t ne = ggml_nelements(ps[p]) ; for (int64_t j = 0; j < ne; ++j) { ggml_set_f32_1d(ps[p], j, x[i++]); } } } static void ggml_opt_get_params(int np, struct ggml_tensor * const ps[], float * x) { int i = 0; for (int p = 0; p < np; ++p) { const int64_t ne = ggml_nelements(ps[p]) ; for (int64_t j = 0; j < ne; ++j) { x[i++] = ggml_get_f32_1d(ps[p], j); } } } static void ggml_opt_get_grad(int np, struct ggml_tensor * const ps[], float * g) { int i = 0; for (int p = 0; p < np; ++p) { const int64_t ne = ggml_nelements(ps[p]) ; for (int64_t j = 0; j < ne; ++j) { g[i++] = ggml_get_f32_1d(ps[p]->grad, j); } } } static enum ggml_opt_result ggml_opt_adam( struct ggml_context * ctx, struct ggml_opt_context * opt, struct ggml_opt_params params, struct ggml_tensor * f, struct ggml_cgraph * gf, struct ggml_cgraph * gb, ggml_opt_callback callback, void * callback_data) { do { if (!(ggml_is_scalar(f))) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 18576, "ggml_is_scalar(f)"); abort(); } } while (0); struct ggml_tensor * ps[256]; int np = 0; int64_t nx = 0; for (int i = 0; i < gf->n_nodes; ++i) { if (gf->nodes[i]->is_param) { ; do { if (!(np < 256)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 18587, "np < GGML_MAX_PARAMS"); abort(); } } while (0); ps[np++] = gf->nodes[i]; nx += ggml_nelements(gf->nodes[i]); } } if ((opt->params.type != params.type) || (opt->nx != nx) || (opt->params.past != params.past)) { int iter = opt->iter; ggml_opt_init(opt->ctx, opt, params, nx); opt->iter = iter; } float sched = params.adam.sched; const float alpha = params.adam.alpha; const float decay = params.adam.decay * alpha; const float beta1 = params.adam.beta1; const float beta2 = params.adam.beta2; const float eps = params.adam.eps; const float gclip = params.adam.gclip; const int decay_min_ndim = params.adam.decay_min_ndim; float * m = opt->adam.m->data; float * v = opt->adam.v->data; float * pf = params.past > 0 ? opt->adam.pf->data : ((void*)0); if (callback) { callback(callback_data, &sched); } ggml_graph_reset (gf); ggml_set_f32 (f->grad, 1.0f); struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads); struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size); cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs; ggml_graph_compute(gb, &cplan); opt->adam.fx_prev = ggml_get_f32_1d(f, 0); opt->adam.fx_best = opt->adam.fx_prev; if (pf) { pf[opt->iter % params.past] = opt->adam.fx_prev; } opt->loss_before = opt->adam.fx_prev; opt->loss_after = opt->adam.fx_prev; if (opt->just_initialized) { opt->adam.n_no_improvement = 0; opt->just_initialized = 0; } float * fx_best = &opt->adam.fx_best; float * fx_prev = &opt->adam.fx_prev; int * n_no_improvement = &opt->adam.n_no_improvement; int iter0 = opt->iter; for (int t = 0; t < params.adam.n_iter; ++t) { opt->iter = iter0 + t + 1; ; ; ; ; for (int i = 0; i < np; ++i) { ; } const int64_t t_start_wall = ggml_time_us(); const int64_t t_start_cpu = ggml_cycles(); (void)(t_start_wall); (void)(t_start_cpu); { float gnorm = 1.0f; if (gclip > 0.0f) { ggml_float sum = 0.0; for (int p = 0; p < np; ++p) { const int64_t ne = ggml_nelements(ps[p]); for (int64_t j = 0; j < ne; ++j) { float g = ggml_get_f32_1d(ps[p]->grad, j); sum += (ggml_float)(g*g); } } ggml_float norm = sqrt(sum); if (norm > (ggml_float) gclip) { gnorm = (float) ((ggml_float) gclip / norm); } } const float beta1h = alpha*sched/(1.0f - powf(beta1, opt->iter)); const float beta2h = 1.0f/(1.0f - powf(beta2, opt->iter)); int64_t i = 0; for (int p = 0; p < np; ++p) { const int64_t ne = ggml_nelements(ps[p]); const float p_decay = ((ps[p]->n_dims >= decay_min_ndim) ? decay : 0.0f) * sched; for (int64_t j = 0; j < ne; ++j) { float x = ggml_get_f32_1d(ps[p], j); float g = ggml_get_f32_1d(ps[p]->grad, j)*gnorm; m[i] = m[i]*beta1 + g*(1.0f - beta1); v[i] = v[i]*beta2 + g*g*(1.0f - beta2); float mh = m[i]*beta1h; float vh = v[i]*beta2h; vh = sqrtf(vh) + eps; x = x*(1.0f - p_decay) - mh/vh; ggml_set_f32_1d(ps[p], j, x); ++i; } } } if (callback) { callback(callback_data, &sched); } ggml_graph_reset (gf); ggml_set_f32 (f->grad, 1.0f); ggml_graph_compute(gb, &cplan); const float fx = ggml_get_f32_1d(f, 0); opt->loss_after = fx; if (fabsf(fx - fx_prev[0])/fx < params.adam.eps_f) { ; return GGML_OPT_OK; } if (pf != ((void*)0)) { if (params.past <= iter0 + t) { const float rate = (pf[(iter0 + t)%params.past] - fx)/fx; if (fabsf(rate) < params.delta) { return GGML_OPT_OK; } } pf[(iter0 + t)%params.past] = fx; } if (params.max_no_improvement > 0) { if (fx_best[0] > fx) { fx_best[0] = fx; n_no_improvement[0] = 0; } else { ++n_no_improvement[0]; if (n_no_improvement[0] >= params.max_no_improvement) { return GGML_OPT_OK; } } } fx_prev[0] = fx; { const int64_t t_end_cpu = ggml_cycles(); ; (void)(t_end_cpu); const int64_t t_end_wall = ggml_time_us(); ; (void)(t_end_wall); } } return GGML_OPT_DID_NOT_CONVERGE; } # 18778 "ggml.c" struct ggml_lbfgs_iteration_data { float alpha; float ys; float * s; float * y; }; static enum ggml_opt_result linesearch_backtracking( const struct ggml_opt_params * params, int nx, float * x, float * fx, float * g, float * d, float * step, const float * xp, struct ggml_tensor * f, struct ggml_cgraph * gf, struct ggml_cgraph * gb, struct ggml_cplan * cplan, const int np, struct ggml_tensor * ps[], ggml_opt_callback callback, void * callback_data) { int count = 0; float width = 0.0f; float dg = 0.0f; float finit = 0.0f; float dginit = 0.0f; float dgtest = 0.0f; const float dec = 0.5f; const float inc = 2.1f; if (*step <= 0.f) { return GGML_LINESEARCH_INVALID_PARAMETERS; } ggml_vec_dot_f32(nx, &dginit, g, d); if (0 < dginit) { return GGML_LINESEARCH_FAIL; } finit = *fx; dgtest = params->lbfgs.ftol*dginit; while (1) { if (callback) { float sched = 0; callback(callback_data, &sched); } ggml_vec_cpy_f32(nx, x, xp); ggml_vec_mad_f32(nx, x, d, *step); { ggml_opt_set_params(np, ps, x); ggml_graph_reset (gf); ggml_set_f32 (f->grad, 1.0f); ggml_graph_compute(gb, cplan); ggml_opt_get_grad(np, ps, g); *fx = ggml_get_f32_1d(f, 0); } ++count; if (*fx > finit + (*step)*dgtest) { width = dec; } else { if (params->lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_ARMIJO) { return count; } ggml_vec_dot_f32(nx, &dg, g, d); if (dg < params->lbfgs.wolfe * dginit) { width = inc; } else { if(params->lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE) { return count; } if(dg > -params->lbfgs.wolfe*dginit) { width = dec; } else { return count; } } } if (*step < params->lbfgs.min_step) { return GGML_LINESEARCH_MINIMUM_STEP; } if (*step > params->lbfgs.max_step) { return GGML_LINESEARCH_MAXIMUM_STEP; } if (params->lbfgs.max_linesearch <= count) { return GGML_LINESEARCH_MAXIMUM_ITERATIONS; } (*step) *= width; } return GGML_LINESEARCH_FAIL; } static enum ggml_opt_result ggml_opt_lbfgs( struct ggml_context * ctx, struct ggml_opt_context * opt, struct ggml_opt_params params, struct ggml_tensor * f, struct ggml_cgraph * gf, struct ggml_cgraph * gb, ggml_opt_callback callback, void * callback_data) { if (params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE || params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE) { if (params.lbfgs.wolfe <= params.lbfgs.ftol || 1.f <= params.lbfgs.wolfe) { return GGML_OPT_INVALID_WOLFE; } } const int m = params.lbfgs.m; struct ggml_tensor * ps[256]; int np = 0; int nx = 0; for (int i = 0; i < gf->n_nodes; ++i) { if (gf->nodes[i]->is_param) { ; do { if (!(np < 256)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 18926, "np < GGML_MAX_PARAMS"); abort(); } } while (0); ps[np++] = gf->nodes[i]; nx += ggml_nelements(gf->nodes[i]); } } if ((opt->params.type != params.type) || (opt->nx != nx) || (opt->params.past != params.past) || (opt->params.lbfgs.m != params.lbfgs.m)) { int iter = opt->iter; ggml_opt_init(ctx, opt, params, nx); opt->iter = iter; } struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads); struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size); cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs; float * x = opt->lbfgs.x->data; float * xp = opt->lbfgs.xp->data; float * g = opt->lbfgs.g->data; float * gp = opt->lbfgs.gp->data; float * d = opt->lbfgs.d->data; float * pf = params.past > 0 ? opt->lbfgs.pf->data : ((void*)0); float fx = 0.0f; float xnorm = 0.0f; float gnorm = 0.0f; ggml_opt_get_params(np, ps, x); float * lm_alpha = opt->lbfgs.lmal->data; float * lm_ys = opt->lbfgs.lmys->data; float * lm_s = opt->lbfgs.lms->data; float * lm_y = opt->lbfgs.lmy->data; if (callback) { float sched = 0; callback(callback_data, &sched); } { ggml_opt_set_params(np, ps, x); ggml_graph_reset (gf); ggml_set_f32 (f->grad, 1.0f); ggml_graph_compute(gb, &cplan); ggml_opt_get_grad(np, ps, g); fx = ggml_get_f32_1d(f, 0); opt->loss_before = fx; opt->loss_after = fx; } ggml_vec_neg_f32(nx, d, g); ggml_vec_norm_f32(nx, &xnorm, x); ggml_vec_norm_f32(nx, &gnorm, g); if (xnorm < 1.0f) { xnorm = 1.0f; } if (gnorm/xnorm <= params.lbfgs.eps) { return GGML_OPT_OK; } if (opt->just_initialized) { if (pf) { pf[0] = fx; } opt->lbfgs.fx_best = fx; ggml_vec_norm_inv_f32(nx, &opt->lbfgs.step, d); opt->lbfgs.j = 0; opt->lbfgs.k = 1; opt->lbfgs.end = 0; opt->lbfgs.n_no_improvement = 0; opt->just_initialized = 0; } float * fx_best = &opt->lbfgs.fx_best; float * step = &opt->lbfgs.step; int * j = &opt->lbfgs.j; int * k = &opt->lbfgs.k; int * end = &opt->lbfgs.end; int * n_no_improvement = &opt->lbfgs.n_no_improvement; int ls = 0; int bound = 0; float ys = 0.0f; float yy = 0.0f; float beta = 0.0f; int it = 0; while (1) { ggml_vec_cpy_f32(nx, xp, x); ggml_vec_cpy_f32(nx, gp, g); ls = linesearch_backtracking(¶ms, nx, x, &fx, g, d, step, xp, f, gf, gb, &cplan, np, ps, callback, callback_data); if (ls < 0) { ggml_vec_cpy_f32(nx, x, xp); ggml_vec_cpy_f32(nx, g, gp); return ls; } opt->loss_after = fx; ggml_vec_norm_f32(nx, &xnorm, x); ggml_vec_norm_f32(nx, &gnorm, g); ; if (xnorm < 1.0f) { xnorm = 1.0f; } if (gnorm/xnorm <= params.lbfgs.eps) { return GGML_OPT_OK; } if (pf != ((void*)0)) { if (params.past <= k[0]) { const float rate = (pf[k[0]%params.past] - fx)/fx; if (fabsf(rate) < params.delta) { return GGML_OPT_OK; } } pf[k[0]%params.past] = fx; } if (params.max_no_improvement > 0) { if (fx < fx_best[0]) { fx_best[0] = fx; n_no_improvement[0] = 0; } else { n_no_improvement[0]++; if (n_no_improvement[0] >= params.max_no_improvement) { return GGML_OPT_OK; } } } if (params.lbfgs.n_iter != 0 && params.lbfgs.n_iter < it + 1) { return GGML_OPT_DID_NOT_CONVERGE; } ggml_vec_sub_f32(nx, &lm_s[end[0]*nx], x, xp); ggml_vec_sub_f32(nx, &lm_y[end[0]*nx], g, gp); ggml_vec_dot_f32(nx, &ys, &lm_y[end[0]*nx], &lm_s[end[0]*nx]); ggml_vec_dot_f32(nx, &yy, &lm_y[end[0]*nx], &lm_y[end[0]*nx]); lm_ys[end[0]] = ys; bound = (m <= k[0]) ? m : k[0]; k[0]++; it++; end[0] = (end[0] + 1)%m; ggml_vec_neg_f32(nx, d, g); j[0] = end[0]; for (int i = 0; i < bound; ++i) { j[0] = (j[0] + m - 1) % m; ggml_vec_dot_f32(nx, &lm_alpha[j[0]], &lm_s[j[0]*nx], d); lm_alpha[j[0]] /= lm_ys[j[0]]; ggml_vec_mad_f32(nx, d, &lm_y[j[0]*nx], -lm_alpha[j[0]]); } ggml_vec_scale_f32(nx, d, ys/yy); for (int i = 0; i < bound; ++i) { ggml_vec_dot_f32(nx, &beta, &lm_y[j[0]*nx], d); beta /= lm_ys[j[0]]; ggml_vec_mad_f32(nx, d, &lm_s[j[0]*nx], lm_alpha[j[0]] - beta); j[0] = (j[0] + 1)%m; } step[0] = 1.0; } return GGML_OPT_DID_NOT_CONVERGE; } struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) { struct ggml_opt_params result; switch (type) { case GGML_OPT_ADAM: { result = (struct ggml_opt_params) { .type = GGML_OPT_ADAM, .n_threads = 1, .past = 0, .delta = 1e-5f, .max_no_improvement = 100, .print_forward_graph = 1, .print_backward_graph = 1, .adam = { .n_iter = 10000, .sched = 1.000f, .decay = 0.0f, .decay_min_ndim = 2, .alpha = 0.001f, .beta1 = 0.9f, .beta2 = 0.999f, .eps = 1e-8f, .eps_f = 1e-5f, .eps_g = 1e-3f, .gclip = 0.0f, }, }; } break; case GGML_OPT_LBFGS: { result = (struct ggml_opt_params) { .type = GGML_OPT_LBFGS, .n_threads = 1, .past = 0, .delta = 1e-5f, .max_no_improvement = 0, .print_forward_graph = 1, .print_backward_graph = 1, .lbfgs = { .m = 6, .n_iter = 100, .max_linesearch = 20, .eps = 1e-5f, .ftol = 1e-4f, .wolfe = 0.9f, .min_step = 1e-20f, .max_step = 1e+20f, .linesearch = GGML_LINESEARCH_DEFAULT, }, }; } break; } return result; } void ggml_opt_init( struct ggml_context * ctx, struct ggml_opt_context * opt, struct ggml_opt_params params, int64_t nx) { opt->ctx = ctx; opt->params = params; opt->iter = 0; opt->nx = nx; opt->just_initialized = 1; switch (opt->params.type) { case GGML_OPT_ADAM: { opt->adam.m = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx); opt->adam.v = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx); opt->adam.pf = params.past > 0 ? ggml_new_tensor_1d(ctx, GGML_TYPE_F32, params.past) : ((void*)0); ggml_set_zero(opt->adam.m); ggml_set_zero(opt->adam.v); if (opt->adam.pf) { ggml_set_zero(opt->adam.pf); } } break; case GGML_OPT_LBFGS: { opt->lbfgs.x = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx); opt->lbfgs.xp = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx); opt->lbfgs.g = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx); opt->lbfgs.gp = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx); opt->lbfgs.d = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx); opt->lbfgs.pf = params.past > 0 ? ggml_new_tensor_1d(ctx, GGML_TYPE_F32, params.past) : ((void*)0); opt->lbfgs.lmal = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, params.lbfgs.m); opt->lbfgs.lmys = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, params.lbfgs.m); opt->lbfgs.lms = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nx, params.lbfgs.m); opt->lbfgs.lmy = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nx, params.lbfgs.m); ggml_set_zero(opt->lbfgs.x); ggml_set_zero(opt->lbfgs.xp); ggml_set_zero(opt->lbfgs.g); ggml_set_zero(opt->lbfgs.gp); ggml_set_zero(opt->lbfgs.d); if (opt->lbfgs.pf) { ggml_set_zero(opt->lbfgs.pf); } ggml_set_zero(opt->lbfgs.lmal); ggml_set_zero(opt->lbfgs.lmys); ggml_set_zero(opt->lbfgs.lms); ggml_set_zero(opt->lbfgs.lmy); } break; } } enum ggml_opt_result ggml_opt( struct ggml_context * ctx, struct ggml_opt_params params, struct ggml_tensor * f) { _Bool free_ctx = 0; if (ctx == ((void*)0)) { struct ggml_init_params params_ctx = { .mem_size = 16*1024*1024, .mem_buffer = ((void*)0), .no_alloc = 0, }; ctx = ggml_init(params_ctx); if (ctx == ((void*)0)) { return GGML_OPT_NO_CONTEXT; } free_ctx = 1; } enum ggml_opt_result result = GGML_OPT_OK; struct ggml_opt_context * opt = (struct ggml_opt_context *) __builtin_alloca(sizeof(struct ggml_opt_context)); ggml_opt_init(ctx, opt, params, 0); result = ggml_opt_resume(ctx, opt, f); if (free_ctx) { ggml_free(ctx); } return result; } enum ggml_opt_result ggml_opt_resume( struct ggml_context * ctx, struct ggml_opt_context * opt, struct ggml_tensor * f) { struct ggml_tensor * gfbuf = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(struct ggml_cgraph) / ggml_type_size(GGML_TYPE_I32)+ (sizeof(struct ggml_cgraph) % ggml_type_size(GGML_TYPE_I32) ? 1 : 0)); struct ggml_tensor * gbbuf = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(struct ggml_cgraph) / ggml_type_size(GGML_TYPE_I32)+ (sizeof(struct ggml_cgraph) % ggml_type_size(GGML_TYPE_I32) ? 1 : 0)); struct ggml_cgraph * gf = (struct ggml_cgraph *) gfbuf->data; struct ggml_cgraph * gb = (struct ggml_cgraph *) gbbuf->data; *gf = ggml_build_forward (f); *gb = ggml_build_backward(ctx, gf, 1); return ggml_opt_resume_g(ctx, opt, f, gf, gb, ((void*)0), ((void*)0)); } enum ggml_opt_result ggml_opt_resume_g( struct ggml_context * ctx, struct ggml_opt_context * opt, struct ggml_tensor * f, struct ggml_cgraph * gf, struct ggml_cgraph * gb, ggml_opt_callback callback, void * callback_data) { enum ggml_opt_result result = GGML_OPT_OK; switch (opt->params.type) { case GGML_OPT_ADAM: { result = ggml_opt_adam(ctx, opt, opt->params, f, gf, gb, callback, callback_data); } break; case GGML_OPT_LBFGS: { result = ggml_opt_lbfgs(ctx, opt, opt->params, f, gf, gb, callback, callback_data); } break; } if (opt->params.print_forward_graph) { ggml_graph_print (gf); ggml_graph_dump_dot(gf, ((void*)0), "opt-forward.dot"); } if (opt->params.print_backward_graph) { ggml_graph_print (gb); ggml_graph_dump_dot(gb, gf, "opt-backward.dot"); } return result; } size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist) { ((void) (0)); const int nb = k / 32; for (int b = 0; b < n; b += k) { block_q4_0 * restrict y = (block_q4_0 *) dst + b/32; quantize_row_q4_0_reference(src + b, y, k); for (int i = 0; i < nb; i++) { for (int j = 0; j < 32; j += 2) { const uint8_t vi0 = y[i].qs[j/2] & 0x0F; const uint8_t vi1 = y[i].qs[j/2] >> 4; hist[vi0]++; hist[vi1]++; } } } return (n/32*sizeof(block_q4_0)); } size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist) { ((void) (0)); const int nb = k / 32; for (int b = 0; b < n; b += k) { block_q4_1 * restrict y = (block_q4_1 *) dst + b/32; quantize_row_q4_1_reference(src + b, y, k); for (int i = 0; i < nb; i++) { for (int j = 0; j < 32; j += 2) { const uint8_t vi0 = y[i].qs[j/2] & 0x0F; const uint8_t vi1 = y[i].qs[j/2] >> 4; hist[vi0]++; hist[vi1]++; } } } return (n/32*sizeof(block_q4_1)); } size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist) { ((void) (0)); const int nb = k / 32; for (int b = 0; b < n; b += k) { block_q5_0 * restrict y = (block_q5_0 *)dst + b/32; quantize_row_q5_0_reference(src + b, y, k); for (int i = 0; i < nb; i++) { uint32_t qh; memcpy(&qh, &y[i].qh, sizeof(qh)); for (int j = 0; j < 32; j += 2) { const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4; const uint8_t vh1 = ((qh & (1u << (j + 16))) >> (j + 12)); const uint8_t vi0 = ((y[i].qs[j/2] & 0x0F) | vh0) / 2; const uint8_t vi1 = ((y[i].qs[j/2] >> 4) | vh1) / 2; hist[vi0]++; hist[vi1]++; } } } return (n/32*sizeof(block_q5_0)); } size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist) { ((void) (0)); const int nb = k / 32; for (int b = 0; b < n; b += k) { block_q5_1 * restrict y = (block_q5_1 *)dst + b/32; quantize_row_q5_1_reference(src + b, y, k); for (int i = 0; i < nb; i++) { uint32_t qh; memcpy(&qh, &y[i].qh, sizeof(qh)); for (int j = 0; j < 32; j += 2) { const uint8_t vh0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4; const uint8_t vh1 = ((qh & (1u << (j + 16))) >> (j + 12)); const uint8_t vi0 = ((y[i].qs[j/2] & 0x0F) | vh0) / 2; const uint8_t vi1 = ((y[i].qs[j/2] >> 4) | vh1) / 2; hist[vi0]++; hist[vi1]++; } } } return (n/32*sizeof(block_q5_1)); } size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist) { ((void) (0)); const int nb = k / 32; for (int b = 0; b < n; b += k) { block_q8_0 * restrict y = (block_q8_0 *)dst + b/32; quantize_row_q8_0_reference(src + b, y, k); for (int i = 0; i < nb; i++) { for (int j = 0; j < 32; ++j) { const int8_t vi = y[i].qs[j]; hist[vi/16 + 8]++; } } } return (n/32*sizeof(block_q8_0)); } size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist) { size_t result = 0; switch (type) { case GGML_TYPE_Q4_0: { do { if (!(start % 32 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 19492, "start % QK4_0 == 0"); abort(); } } while (0); block_q4_0 * block = (block_q4_0*)dst + start / 32; result = ggml_quantize_q4_0(src + start, block, n, n, hist); } break; case GGML_TYPE_Q4_1: { do { if (!(start % 32 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 19498, "start % QK4_1 == 0"); abort(); } } while (0); block_q4_1 * block = (block_q4_1*)dst + start / 32; result = ggml_quantize_q4_1(src + start, block, n, n, hist); } break; case GGML_TYPE_Q5_0: { do { if (!(start % 32 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 19504, "start % QK5_0 == 0"); abort(); } } while (0); block_q5_0 * block = (block_q5_0*)dst + start / 32; result = ggml_quantize_q5_0(src + start, block, n, n, hist); } break; case GGML_TYPE_Q5_1: { do { if (!(start % 32 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 19510, "start % QK5_1 == 0"); abort(); } } while (0); block_q5_1 * block = (block_q5_1*)dst + start / 32; result = ggml_quantize_q5_1(src + start, block, n, n, hist); } break; case GGML_TYPE_Q8_0: { do { if (!(start % 32 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 19516, "start % QK8_0 == 0"); abort(); } } while (0); block_q8_0 * block = (block_q8_0*)dst + start / 32; result = ggml_quantize_q8_0(src + start, block, n, n, hist); } break; case GGML_TYPE_Q2_K: { do { if (!(start % 256 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 19523, "start % QK_K == 0"); abort(); } } while (0); block_q2_K * block = (block_q2_K*)dst + start / 256; result = ggml_quantize_q2_K(src + start, block, n, n, hist); } break; case GGML_TYPE_Q3_K: { do { if (!(start % 256 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 19529, "start % QK_K == 0"); abort(); } } while (0); block_q3_K * block = (block_q3_K*)dst + start / 256; result = ggml_quantize_q3_K(src + start, block, n, n, hist); } break; case GGML_TYPE_Q4_K: { do { if (!(start % 256 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 19535, "start % QK_K == 0"); abort(); } } while (0); block_q4_K * block = (block_q4_K*)dst + start / 256; result = ggml_quantize_q4_K(src + start, block, n, n, hist); } break; case GGML_TYPE_Q5_K: { do { if (!(start % 256 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 19541, "start % QK_K == 0"); abort(); } } while (0); block_q5_K * block = (block_q5_K*)dst + start / 256; result = ggml_quantize_q5_K(src + start, block, n, n, hist); } break; case GGML_TYPE_Q6_K: { do { if (!(start % 256 == 0)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 19547, "start % QK_K == 0"); abort(); } } while (0); block_q6_K * block = (block_q6_K*)dst + start / 256; result = ggml_quantize_q6_K(src + start, block, n, n, hist); } break; case GGML_TYPE_F16: { int elemsize = sizeof(ggml_fp16_t); ggml_fp32_to_fp16_row(src + start, (ggml_fp16_t *)dst + start, n); result = n * elemsize; } break; case GGML_TYPE_F32: { int elemsize = sizeof(float); result = n * elemsize; memcpy((uint8_t *)dst + start * elemsize, src + start, result); } break; default: ((void) (0)); } return result; } struct gguf_str { uint64_t n; char * data; }; static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = { [GGUF_TYPE_UINT8] = sizeof(uint8_t), [GGUF_TYPE_INT8] = sizeof(int8_t), [GGUF_TYPE_UINT16] = sizeof(uint16_t), [GGUF_TYPE_INT16] = sizeof(int16_t), [GGUF_TYPE_UINT32] = sizeof(uint32_t), [GGUF_TYPE_INT32] = sizeof(int32_t), [GGUF_TYPE_FLOAT32] = sizeof(float), [GGUF_TYPE_BOOL] = sizeof(_Bool), [GGUF_TYPE_STRING] = sizeof(struct gguf_str), [GGUF_TYPE_UINT64] = sizeof(uint64_t), [GGUF_TYPE_INT64] = sizeof(int64_t), [GGUF_TYPE_FLOAT64] = sizeof(double), [GGUF_TYPE_ARRAY] = 0, }; _Static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13"); static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = { [GGUF_TYPE_UINT8] = "u8", [GGUF_TYPE_INT8] = "i8", [GGUF_TYPE_UINT16] = "u16", [GGUF_TYPE_INT16] = "i16", [GGUF_TYPE_UINT32] = "u32", [GGUF_TYPE_INT32] = "i32", [GGUF_TYPE_FLOAT32] = "f32", [GGUF_TYPE_BOOL] = "bool", [GGUF_TYPE_STRING] = "str", [GGUF_TYPE_ARRAY] = "arr", [GGUF_TYPE_UINT64] = "u64", [GGUF_TYPE_INT64] = "i64", [GGUF_TYPE_FLOAT64] = "f64", }; _Static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13"); union gguf_value { uint8_t uint8; int8_t int8; uint16_t uint16; int16_t int16; uint32_t uint32; int32_t int32; float float32; uint64_t uint64; int64_t int64; double float64; _Bool bool_; struct gguf_str str; struct { enum gguf_type type; uint64_t n; void * data; } arr; }; struct gguf_kv { struct gguf_str key; enum gguf_type type; union gguf_value value; }; struct gguf_header { uint32_t magic; uint32_t version; uint64_t n_tensors; uint64_t n_kv; }; struct gguf_tensor_info { struct gguf_str name; uint32_t n_dims; uint64_t ne[4]; enum ggml_type type; uint64_t offset; const void * data; size_t size; }; struct gguf_context { struct gguf_header header; struct gguf_kv * kv; struct gguf_tensor_info * infos; size_t alignment; size_t offset; size_t size; void * data; }; static _Bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) { const size_t n = fread(dst, 1, size, file); *offset += n; return n == size; } static _Bool gguf_fread_str_cur(FILE * file, struct gguf_str * p, size_t * offset) { p->n = 0; p->data = ((void*)0); _Bool ok = 1; ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset); p->data = calloc(p->n + 1, 1); ok = ok && gguf_fread_el(file, p->data, p->n, offset); return ok; } static _Bool gguf_fread_str_v1(FILE * file, struct gguf_str * p, size_t * offset) { p->n = 0; p->data = ((void*)0); _Bool ok = 1; uint32_t n = 0; ok = ok && gguf_fread_el(file, &n, sizeof(n), offset); p->data = calloc(n + 1, 1); p->n = n; ok = ok && gguf_fread_el(file, p->data, p->n, offset); return ok; } struct gguf_context * gguf_init_empty(void) { struct gguf_context * ctx = ggml_aligned_malloc(sizeof(struct gguf_context)); ctx->header.magic = 0x46554747; ctx->header.version = 2; ctx->header.n_tensors = 0; ctx->header.n_kv = 0; ctx->kv = ((void*)0); ctx->infos = ((void*)0); ctx->alignment = 32; ctx->offset = 0; ctx->size = 0; ctx->data = ((void*)0); return ctx; } struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) { FILE * file = fopen(fname, "rb"); if (!file) { return ((void*)0); } size_t offset = 0; uint32_t magic = 0; { gguf_fread_el(file, &magic, sizeof(magic), &offset); if (magic != 0x46554747) { fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic); fclose(file); return ((void*)0); } } _Bool ok = 1; struct gguf_context * ctx = ggml_aligned_malloc(sizeof(struct gguf_context)); { ctx->header.magic = magic; ctx->kv = ((void*)0); ctx->infos = ((void*)0); ctx->data = ((void*)0); ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset); if (ctx->header.version == 1) { uint32_t n_tensors = 0; uint32_t n_kv = 0; ok = ok && gguf_fread_el(file, &n_tensors, sizeof(n_tensors), &offset); ok = ok && gguf_fread_el(file, &n_kv, sizeof(n_kv), &offset); ctx->header.n_tensors = n_tensors; ctx->header.n_kv = n_kv; } else { ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset); ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset); } if (!ok) { fprintf(stderr, "%s: failed to read header\n", __func__); fclose(file); gguf_free(ctx); return ((void*)0); } } _Bool (* gguf_fread_str)(FILE *, struct gguf_str *, size_t *) = gguf_fread_str_cur; if (ctx->header.version == 1) { gguf_fread_str = gguf_fread_str_v1; } { ctx->kv = malloc(ctx->header.n_kv * sizeof(struct gguf_kv)); for (uint32_t i = 0; i < ctx->header.n_kv; ++i) { struct gguf_kv * kv = &ctx->kv[i]; ok = ok && gguf_fread_str(file, &kv->key, &offset); ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset); switch (kv->type) { case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (file, &kv->value.uint8, sizeof(kv->value.uint8), &offset); break; case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (file, &kv->value.int8, sizeof(kv->value.int8), &offset); break; case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (file, &kv->value.uint16, sizeof(kv->value.uint16), &offset); break; case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (file, &kv->value.int16, sizeof(kv->value.int16), &offset); break; case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break; case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break; case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break; case GGUF_TYPE_UINT64: ok = ok && gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break; case GGUF_TYPE_INT64: ok = ok && gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break; case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break; case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break; case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break; case GGUF_TYPE_ARRAY: { ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset); if (ctx->header.version == 1) { uint32_t n = 0; ok = ok && gguf_fread_el(file, &n, sizeof(n), &offset); kv->value.arr.n = n; } else { ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset); } switch (kv->value.arr.type) { case GGUF_TYPE_UINT8: case GGUF_TYPE_INT8: case GGUF_TYPE_UINT16: case GGUF_TYPE_INT16: case GGUF_TYPE_UINT32: case GGUF_TYPE_INT32: case GGUF_TYPE_FLOAT32: case GGUF_TYPE_UINT64: case GGUF_TYPE_INT64: case GGUF_TYPE_FLOAT64: case GGUF_TYPE_BOOL: { kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]); ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], &offset); } break; case GGUF_TYPE_STRING: { kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str)); for (uint32_t j = 0; j < kv->value.arr.n; ++j) { ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset); } } break; case GGUF_TYPE_ARRAY: case GGUF_TYPE_COUNT: do { if (!(0 && "invalid type")) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 19858, "false && \"invalid type\""); abort(); } } while (0); break; }; } break; case GGUF_TYPE_COUNT: do { if (!(0 && "invalid type")) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 19861, "false && \"invalid type\""); abort(); } } while (0); }; if (!ok) { break; } } if (!ok) { fprintf(stderr, "%s: failed to read key-value pairs\n", __func__); fclose(file); gguf_free(ctx); return ((void*)0); } } { ctx->infos = malloc(ctx->header.n_tensors * sizeof(struct gguf_tensor_info)); for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { struct gguf_tensor_info * info = &ctx->infos[i]; for (int j = 0; j < 4; ++j) { info->ne[j] = 1; } ok = ok && gguf_fread_str(file, &info->name, &offset); ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset); for (uint32_t j = 0; j < info->n_dims; ++j) { if (ctx->header.version == 1) { uint32_t t = 0; ok = ok && gguf_fread_el(file, &t, sizeof(t), &offset); info->ne[j] = t; } else { ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset); } } ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset); ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset); if (!ok) { fprintf(stderr, "%s: failed to read tensor info\n", __func__); fclose(file); gguf_free(ctx); return ((void*)0); } } } ctx->alignment = 32; int alignment_idx = gguf_find_key(ctx, "general.alignment"); if (alignment_idx != -1) { ctx->alignment = gguf_get_val_u32(ctx, alignment_idx); } { const size_t offset_pad = offset % ctx->alignment; if (offset_pad != 0) { offset += ctx->alignment - offset_pad; fseek(file, offset, 0); } } ctx->offset = offset; { ctx->size = 0; for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { struct gguf_tensor_info * info = &ctx->infos[i]; const int64_t ne = (int64_t) info->ne[0] * (int64_t) info->ne[1] * (int64_t) info->ne[2] * (int64_t) info->ne[3]; if (ne % ggml_blck_size(info->type) != 0) { fprintf(stderr, "%s: tensor '%s' number of elements (%" "ll""d" ") is not a multiple of block size (%d)\n", __func__, info->name.data, ne, ggml_blck_size(info->type)); fclose(file); gguf_free(ctx); return ((void*)0); } const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type); ctx->size += (((size_cur) + (ctx->alignment) - 1) & ~((ctx->alignment) - 1)); } } if (params.ctx != ((void*)0)) { const size_t mem_size = params.no_alloc ? (ctx->header.n_tensors )*ggml_tensor_overhead() : (ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size; struct ggml_init_params pdata = { .mem_size = mem_size, .mem_buffer = ((void*)0), .no_alloc = params.no_alloc, }; *params.ctx = ggml_init(pdata); struct ggml_context * ctx_data = *params.ctx; struct ggml_tensor * data = ((void*)0); if (!params.no_alloc) { data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size); ok = ok && data != ((void*)0); ok = ok && gguf_fread_el(file, data->data, ctx->size, &offset); if (!ok) { fprintf(stderr, "%s: failed to read tensor data\n", __func__); fclose(file); ggml_free(ctx_data); gguf_free(ctx); return ((void*)0); } ctx->data = data->data; } ggml_set_no_alloc(ctx_data, 1); for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { const int64_t ne[4] = { ctx->infos[i].ne[0], ctx->infos[i].ne[1], ctx->infos[i].ne[2], ctx->infos[i].ne[3], }; struct ggml_tensor * cur = ggml_new_tensor(ctx_data, ctx->infos[i].type, ctx->infos[i].n_dims, ne); ok = ok && cur != ((void*)0); ggml_set_name(cur, ctx->infos[i].name.data); if (!ok) { break; } if (!params.no_alloc) { cur->data = (char *) data->data + ctx->infos[i].offset; } } if (!ok) { fprintf(stderr, "%s: failed to read the tensor data\n", __func__); fclose(file); ggml_free(ctx_data); gguf_free(ctx); return ((void*)0); } ggml_set_no_alloc(ctx_data, params.no_alloc); } fclose(file); return ctx; } void gguf_free(struct gguf_context * ctx) { if (ctx == ((void*)0)) { return; } if (ctx->kv) { for (uint32_t i = 0; i < ctx->header.n_kv; ++i) { struct gguf_kv * kv = &ctx->kv[i]; if (kv->key.data) { free(kv->key.data); } if (kv->type == GGUF_TYPE_STRING) { if (kv->value.str.data) { free(kv->value.str.data); } } if (kv->type == GGUF_TYPE_ARRAY) { if (kv->value.arr.data) { if (kv->value.arr.type == GGUF_TYPE_STRING) { for (uint32_t j = 0; j < kv->value.arr.n; ++j) { struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j]; if (str->data) { free(str->data); } } } free(kv->value.arr.data); } } } free(ctx->kv); } if (ctx->infos) { for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { struct gguf_tensor_info * info = &ctx->infos[i]; if (info->name.data) { free(info->name.data); } } free(ctx->infos); } free(ctx); } const char * gguf_type_name(enum gguf_type type) { return GGUF_TYPE_NAME[type]; } int gguf_get_version(struct gguf_context * ctx) { return ctx->header.version; } size_t gguf_get_alignment(struct gguf_context * ctx) { return ctx->alignment; } size_t gguf_get_data_offset(struct gguf_context * ctx) { return ctx->offset; } void * gguf_get_data(struct gguf_context * ctx) { return ctx->data; } int gguf_get_n_kv(struct gguf_context * ctx) { return ctx->header.n_kv; } int gguf_find_key(struct gguf_context * ctx, const char * key) { int keyfound = -1; const int n_kv = gguf_get_n_kv(ctx); for (int i = 0; i < n_kv; ++i) { if (strcmp(key, gguf_get_key(ctx, i)) == 0) { keyfound = i; break; } } return keyfound; } const char * gguf_get_key(struct gguf_context * ctx, int i) { return ctx->kv[i].key.data; } enum gguf_type gguf_get_kv_type(struct gguf_context * ctx, int i) { return ctx->kv[i].type; } enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i) { return ctx->kv[i].value.arr.type; } const void * gguf_get_arr_data(struct gguf_context * ctx, int i) { return ctx->kv[i].value.arr.data; } const char * gguf_get_arr_str(struct gguf_context * ctx, int key_id, int i) { struct gguf_kv * kv = &ctx->kv[key_id]; struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i]; return str->data; } int gguf_get_arr_n(struct gguf_context * ctx, int i) { return ctx->kv[i].value.arr.n; } uint8_t gguf_get_val_u8(struct gguf_context * ctx, int i) { return ctx->kv[i].value.uint8; } int8_t gguf_get_val_i8(struct gguf_context * ctx, int i) { return ctx->kv[i].value.int8; } uint16_t gguf_get_val_u16(struct gguf_context * ctx, int i) { return ctx->kv[i].value.uint16; } int16_t gguf_get_val_i16(struct gguf_context * ctx, int i) { return ctx->kv[i].value.int16; } uint32_t gguf_get_val_u32(struct gguf_context * ctx, int i) { return ctx->kv[i].value.uint32; } int32_t gguf_get_val_i32(struct gguf_context * ctx, int i) { return ctx->kv[i].value.int32; } float gguf_get_val_f32(struct gguf_context * ctx, int i) { return ctx->kv[i].value.float32; } uint64_t gguf_get_val_u64(struct gguf_context * ctx, int i) { return ctx->kv[i].value.uint64; } int64_t gguf_get_val_i64(struct gguf_context * ctx, int i) { return ctx->kv[i].value.int64; } double gguf_get_val_f64(struct gguf_context * ctx, int i) { return ctx->kv[i].value.float64; } _Bool gguf_get_val_bool(struct gguf_context * ctx, int i) { return ctx->kv[i].value.bool_; } const char * gguf_get_val_str (struct gguf_context * ctx, int i) { return ctx->kv[i].value.str.data; } int gguf_get_n_tensors(struct gguf_context * ctx) { return ctx->header.n_tensors; } int gguf_find_tensor(struct gguf_context * ctx, const char * name) { int tensorfound = -1; const int n_tensors = gguf_get_n_tensors(ctx); for (int i = 0; i < n_tensors; ++i) { if (strcmp(name, gguf_get_tensor_name(ctx, i)) == 0) { tensorfound = i; break; } } return tensorfound; } size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i) { return ctx->infos[i].offset; } char * gguf_get_tensor_name(struct gguf_context * ctx, int i) { return ctx->infos[i].name.data; } static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) { const int idx = gguf_find_key(ctx, key); if (idx >= 0) { return idx; } const int n_kv = gguf_get_n_kv(ctx); ctx->kv = realloc(ctx->kv, (n_kv + 1) * sizeof(struct gguf_kv)); ctx->kv[n_kv].key.n = strlen(key); ctx->kv[n_kv].key.data = strdup(key); ctx->header.n_kv++; return n_kv; } void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) { const int idx = gguf_get_or_add_key(ctx, key); ctx->kv[idx].type = GGUF_TYPE_UINT8; ctx->kv[idx].value.uint8 = val; } void gguf_set_val_i8(struct gguf_context * ctx, const char * key, int8_t val) { const int idx = gguf_get_or_add_key(ctx, key); ctx->kv[idx].type = GGUF_TYPE_INT8; ctx->kv[idx].value.int8 = val; } void gguf_set_val_u16(struct gguf_context * ctx, const char * key, uint16_t val) { const int idx = gguf_get_or_add_key(ctx, key); ctx->kv[idx].type = GGUF_TYPE_UINT16; ctx->kv[idx].value.uint16 = val; } void gguf_set_val_i16(struct gguf_context * ctx, const char * key, int16_t val) { const int idx = gguf_get_or_add_key(ctx, key); ctx->kv[idx].type = GGUF_TYPE_INT16; ctx->kv[idx].value.int16 = val; } void gguf_set_val_u32(struct gguf_context * ctx, const char * key, uint32_t val) { const int idx = gguf_get_or_add_key(ctx, key); ctx->kv[idx].type = GGUF_TYPE_UINT32; ctx->kv[idx].value.uint32 = val; } void gguf_set_val_i32(struct gguf_context * ctx, const char * key, int32_t val) { const int idx = gguf_get_or_add_key(ctx, key); ctx->kv[idx].type = GGUF_TYPE_INT32; ctx->kv[idx].value.int32 = val; } void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) { const int idx = gguf_get_or_add_key(ctx, key); ctx->kv[idx].type = GGUF_TYPE_FLOAT32; ctx->kv[idx].value.float32 = val; } void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) { const int idx = gguf_get_or_add_key(ctx, key); ctx->kv[idx].type = GGUF_TYPE_UINT64; ctx->kv[idx].value.uint64 = val; } void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) { const int idx = gguf_get_or_add_key(ctx, key); ctx->kv[idx].type = GGUF_TYPE_INT64; ctx->kv[idx].value.int64 = val; } void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) { const int idx = gguf_get_or_add_key(ctx, key); ctx->kv[idx].type = GGUF_TYPE_FLOAT64; ctx->kv[idx].value.float64 = val; } void gguf_set_val_bool(struct gguf_context * ctx, const char * key, _Bool val) { const int idx = gguf_get_or_add_key(ctx, key); ctx->kv[idx].type = GGUF_TYPE_BOOL; ctx->kv[idx].value.bool_ = val; } void gguf_set_val_str(struct gguf_context * ctx, const char * key, const char * val) { const int idx = gguf_get_or_add_key(ctx, key); ctx->kv[idx].type = GGUF_TYPE_STRING; ctx->kv[idx].value.str.n = strlen(val); ctx->kv[idx].value.str.data = strdup(val); } void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n) { const int idx = gguf_get_or_add_key(ctx, key); ctx->kv[idx].type = GGUF_TYPE_ARRAY; ctx->kv[idx].value.arr.type = type; ctx->kv[idx].value.arr.n = n; ctx->kv[idx].value.arr.data = malloc(n*GGUF_TYPE_SIZE[type]); memcpy(ctx->kv[idx].value.arr.data, data, n*GGUF_TYPE_SIZE[type]); } void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, int n) { const int idx = gguf_get_or_add_key(ctx, key); ctx->kv[idx].type = GGUF_TYPE_ARRAY; ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING; ctx->kv[idx].value.arr.n = n; ctx->kv[idx].value.arr.data = malloc(n*sizeof(struct gguf_str)); for (int i = 0; i < n; i++) { struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i]; str->n = strlen(data[i]); str->data = strdup(data[i]); } } void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) { for (uint32_t i = 0; i < src->header.n_kv; i++) { switch (src->kv[i].type) { case GGUF_TYPE_UINT8: gguf_set_val_u8 (ctx, src->kv[i].key.data, src->kv[i].value.uint8); break; case GGUF_TYPE_INT8: gguf_set_val_i8 (ctx, src->kv[i].key.data, src->kv[i].value.int8); break; case GGUF_TYPE_UINT16: gguf_set_val_u16 (ctx, src->kv[i].key.data, src->kv[i].value.uint16); break; case GGUF_TYPE_INT16: gguf_set_val_i16 (ctx, src->kv[i].key.data, src->kv[i].value.int16); break; case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break; case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break; case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break; case GGUF_TYPE_UINT64: gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64); break; case GGUF_TYPE_INT64: gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64); break; case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break; case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break; case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break; case GGUF_TYPE_ARRAY: { if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) { const char ** data = malloc(src->kv[i].value.arr.n*sizeof(char *)); for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) { data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data; } gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n); free(data); } else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) { do { if (!(0 && "nested arrays not supported")) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 20392, "false && \"nested arrays not supported\""); abort(); } } while (0); } else { gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n); } } break; case GGUF_TYPE_COUNT: do { if (!(0 && "invalid type")) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 20397, "false && \"invalid type\""); abort(); } } while (0); break; } } } void gguf_add_tensor( struct gguf_context * ctx, const struct ggml_tensor * tensor) { const int idx = ctx->header.n_tensors; ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info)); ctx->infos[idx].name.n = strlen(tensor->name); ctx->infos[idx].name.data = strdup(tensor->name); for (int i = 0; i < 4; ++i) { ctx->infos[idx].ne[i] = 1; } ctx->infos[idx].n_dims = tensor->n_dims; for (int i = 0; i < tensor->n_dims; i++) { ctx->infos[idx].ne[i] = tensor->ne[i]; } ctx->infos[idx].type = tensor->type; ctx->infos[idx].offset = 0; ctx->infos[idx].data = tensor->data; ctx->infos[idx].size = ggml_nbytes(tensor); if (ctx->header.n_tensors > 0) { ctx->infos[idx].offset = ctx->infos[idx - 1].offset + (((ctx->infos[idx - 1].size) + (ctx->alignment) - 1) & ~((ctx->alignment) - 1)); } ctx->header.n_tensors++; } void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type) { const int idx = gguf_find_tensor(ctx, name); if (idx < 0) { do { if (!(0 && "tensor not found")) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 20435, "false && \"tensor not found\""); abort(); } } while (0); } ctx->infos[idx].type = type; } void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size) { const int idx = gguf_find_tensor(ctx, name); if (idx < 0) { do { if (!(0 && "tensor not found")) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 20444, "false && \"tensor not found\""); abort(); } } while (0); } ctx->infos[idx].data = data; ctx->infos[idx].size = size; for (uint32_t i = idx + 1; i < ctx->header.n_tensors; ++i) { ctx->infos[i].offset = ctx->infos[i - 1].offset + (((ctx->infos[i - 1].size) + (ctx->alignment) - 1) & ~((ctx->alignment) - 1)); } } # 20465 "ggml.c" struct gguf_buf { void * data; size_t size; size_t offset; }; static struct gguf_buf gguf_buf_init(size_t size) { struct gguf_buf buf = { size == 0 ? ((void*)0) : malloc(size), size, 0, }; return buf; } static void gguf_buf_free(struct gguf_buf buf) { if (buf.data) { free(buf.data); } } static void gguf_buf_grow(struct gguf_buf * buf, size_t size) { if (buf->offset + size > buf->size) { buf->size = 1.5*(buf->offset + size); if (buf->data) { buf->data = realloc(buf->data, buf->size); } } } static void gguf_bwrite_str(struct gguf_buf * buf, const struct gguf_str * val) { gguf_buf_grow(buf, sizeof(val->n) + val->n); if (buf->data) { memcpy((char *) buf->data + buf->offset, &val->n, sizeof(val->n)); } buf->offset += sizeof(val->n); if (buf->data) { memcpy((char *) buf->data + buf->offset, val->data, val->n); } buf->offset += val->n; } static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_size) { gguf_buf_grow(buf, el_size); if (buf->data) { memcpy((char *) buf->data + buf->offset, val, el_size); } buf->offset += el_size; } static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf, _Bool only_meta) { gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic)); gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version)); gguf_bwrite_el(buf, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors)); gguf_bwrite_el(buf, &ctx->header.n_kv, sizeof(ctx->header.n_kv)); for (uint32_t i = 0; i < ctx->header.n_kv; ++i) { struct gguf_kv * kv = &ctx->kv[i]; gguf_bwrite_str(buf, &kv->key); gguf_bwrite_el (buf, &kv->type, sizeof(kv->type)); switch (kv->type) { case GGUF_TYPE_UINT8: gguf_bwrite_el( buf, &kv->value.uint8, sizeof(kv->value.uint8) ); break; case GGUF_TYPE_INT8: gguf_bwrite_el (buf, &kv->value.int8, sizeof(kv->value.int8) ); break; case GGUF_TYPE_UINT16: gguf_bwrite_el (buf, &kv->value.uint16, sizeof(kv->value.uint16) ); break; case GGUF_TYPE_INT16: gguf_bwrite_el (buf, &kv->value.int16, sizeof(kv->value.int16) ); break; case GGUF_TYPE_UINT32: gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break; case GGUF_TYPE_INT32: gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break; case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break; case GGUF_TYPE_UINT64: gguf_bwrite_el (buf, &kv->value.uint64, sizeof(kv->value.uint64) ); break; case GGUF_TYPE_INT64: gguf_bwrite_el (buf, &kv->value.int64, sizeof(kv->value.int64) ); break; case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break; case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break; case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break; case GGUF_TYPE_ARRAY: { gguf_bwrite_el(buf, &kv->value.arr.type, sizeof(kv->value.arr.type)); gguf_bwrite_el(buf, &kv->value.arr.n, sizeof(kv->value.arr.n) ); switch (kv->value.arr.type) { case GGUF_TYPE_UINT8: case GGUF_TYPE_INT8: case GGUF_TYPE_UINT16: case GGUF_TYPE_INT16: case GGUF_TYPE_UINT32: case GGUF_TYPE_INT32: case GGUF_TYPE_FLOAT32: case GGUF_TYPE_UINT64: case GGUF_TYPE_INT64: case GGUF_TYPE_FLOAT64: case GGUF_TYPE_BOOL: { gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]); } break; case GGUF_TYPE_STRING: { for (uint32_t j = 0; j < kv->value.arr.n; ++j) { gguf_bwrite_str(buf, &((struct gguf_str *) kv->value.arr.data)[j]); } } break; case GGUF_TYPE_ARRAY: case GGUF_TYPE_COUNT: do { if (!(0 && "invalid type")) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 20573, "false && \"invalid type\""); abort(); } } while (0); break; }; } break; case GGUF_TYPE_COUNT: do { if (!(0 && "invalid type")) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 20576, "false && \"invalid type\""); abort(); } } while (0); }; } for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { struct gguf_tensor_info * info = &ctx->infos[i]; gguf_bwrite_str(buf, &info->name); gguf_bwrite_el (buf, &info->n_dims, sizeof(info->n_dims)); for (uint32_t j = 0; j < info->n_dims; ++j) { gguf_bwrite_el(buf, &info->ne[j], sizeof(info->ne[j])); } gguf_bwrite_el(buf, &info->type, sizeof(info->type)); gguf_bwrite_el(buf, &info->offset, sizeof(info->offset)); } { const size_t offset = buf->offset; const size_t offset_pad = (((offset) + (ctx->alignment) - 1) & ~((ctx->alignment) - 1)); if (offset_pad != offset) { uint8_t pad = 0; for (size_t i = 0; i < offset_pad - offset; ++i) { gguf_bwrite_el(buf, &pad, sizeof(pad)); } } } if (only_meta) { return; } size_t offset = 0; for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) { struct gguf_tensor_info * info = &ctx->infos[i]; const size_t size = info->size; const size_t size_pad = (((size) + (ctx->alignment) - 1) & ~((ctx->alignment) - 1)); gguf_bwrite_el(buf, info->data, size); if (size_pad != size) { uint8_t pad = 0; for (size_t j = 0; j < size_pad - size; ++j) { gguf_bwrite_el(buf, &pad, sizeof(pad)); } } do { if (!(offset == info->offset)) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 20628, "offset == info->offset"); abort(); } } while (0); offset += size_pad; } } void gguf_write_to_file(struct gguf_context * ctx, const char * fname, _Bool only_meta) { FILE * file = fopen(fname, "wb"); if (!file) { do { if (!(0 && "failed to open file for writing")) { fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", "ggml.c", 20637, "false && \"failed to open file for writing\""); abort(); } } while (0); } struct gguf_buf buf = gguf_buf_init(16*1024); gguf_write_to_buf(ctx, &buf, only_meta); fwrite(buf.data, 1, buf.offset, file); gguf_buf_free(buf); fclose(file); } size_t gguf_get_meta_size(struct gguf_context * ctx) { struct gguf_buf buf = gguf_buf_init(0); gguf_write_to_buf(ctx, &buf, 1); return buf.offset; } void gguf_get_meta_data(struct gguf_context * ctx, void * data) { struct gguf_buf buf = gguf_buf_init(16*1024); gguf_write_to_buf(ctx, &buf, 1); memcpy(data, buf.data, buf.offset); gguf_buf_free(buf); } int ggml_cpu_has_avx(void) { return 0; } int ggml_cpu_has_avx2(void) { return 0; } int ggml_cpu_has_avx512(void) { return 0; } int ggml_cpu_has_avx512_vbmi(void) { return 0; } int ggml_cpu_has_avx512_vnni(void) { return 0; } int ggml_cpu_has_fma(void) { return 0; } int ggml_cpu_has_neon(void) { return 1; } int ggml_cpu_has_arm_fma(void) { return 1; } int ggml_cpu_has_f16c(void) { return 0; } int ggml_cpu_has_fp16_va(void) { return 0; } int ggml_cpu_has_wasm_simd(void) { return 0; } int ggml_cpu_has_blas(void) { return 0; } int ggml_cpu_has_cublas(void) { return 0; } int ggml_cpu_has_clblast(void) { return 0; } int ggml_cpu_has_gpublas(void) { return ggml_cpu_has_cublas() || ggml_cpu_has_clblast(); } int ggml_cpu_has_sse3(void) { return 0; } int ggml_cpu_has_ssse3(void) { return 0; } int ggml_cpu_has_vsx(void) { return 0; }