Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

metal : bug with ggml_cont #3012

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions ggml-metal.m
Original file line number Diff line number Diff line change
Expand Up @@ -541,10 +541,7 @@ void ggml_metal_graph_find_concurrency(
int64_t data_start = (int64_t) gf->nodes[i]->data;
int64_t length = (int64_t) ggml_nbytes(gf->nodes[i]);
for (int j = n_start; j < i; j++) {
if (nodes_unused[j] && gf->nodes[j]->op != GGML_OP_RESHAPE \
&& gf->nodes[j]->op != GGML_OP_VIEW \
&& gf->nodes[j]->op != GGML_OP_TRANSPOSE \
&& gf->nodes[j]->op != GGML_OP_PERMUTE) {
if (nodes_unused[j] && gf->nodes[j]->view_src == NULL) {
Comment on lines 543 to +544
Copy link
Member Author

@ggerganov ggerganov Sep 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need confirmation if this is correct.

Btw, I see that check_mem is always set to false.
Are there cases where we would still need it?
If it's not needed, should we remove it?

if (((int64_t)gf->nodes[j]->data) >= data_start + length || \
((int64_t)gf->nodes[j]->data) + (int64_t) ggml_nbytes(gf->nodes[j]) <= data_start) {
continue;
Expand Down
8 changes: 4 additions & 4 deletions ggml-metal.metal
Original file line number Diff line number Diff line change
Expand Up @@ -783,11 +783,11 @@ kernel void kernel_cpy_f16_f16(
const int64_t i1 = (n - i3*ne2*ne1*ne0 - i2*ne1*ne0) / ne0;
const int64_t i0 = (n - i3*ne2*ne1*ne0 - i2*ne1*ne0 - i1*ne0);

device half * dst_data = (device half *) ((device char *) dst + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);

for (int64_t i00 = tpitg.x; i00 < ne00; i00 += ntg.x) {
device const half * src = (device half *)((device char *) src0 + i03*nb03 + i02*nb02 + i01*nb01 + i00*nb00);
dst_data[i00] = src[0];
device const half * src = (device half *) ((device char *) src0 + i03*nb03 + i02*nb02 + i01*nb01 + i00*nb00);
device half * dst_data = (device half *) ((device char *) dst + i3*nb3 + i2*nb2 + i1*nb1 + i00*nb0);

*dst_data = *src;
}
}

Expand Down
2 changes: 1 addition & 1 deletion ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -4285,7 +4285,7 @@ int64_t ggml_nrows(const struct ggml_tensor * tensor) {
}

size_t ggml_nbytes(const struct ggml_tensor * tensor) {
size_t nbytes = tensor->ne[0]*tensor->nb[0]/ggml_blck_size(tensor->type);
size_t nbytes = (tensor->ne[0]*tensor->nb[0])/ggml_blck_size(tensor->type);
for (int i = 1; i < GGML_MAX_DIMS; ++i) {
nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
}
Expand Down
4 changes: 2 additions & 2 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2418,11 +2418,11 @@ static struct ggml_cgraph * llm_build_llama(

// split cached V into n_head heads
struct ggml_tensor * V =
ggml_view_3d(ctx0, kv_self.v,
ggml_cont(ctx0, ggml_view_3d(ctx0, kv_self.v,
n_past + N, n_embd_head, n_head_kv,
ggml_element_size(kv_self.v)*n_ctx,
ggml_element_size(kv_self.v)*n_ctx*n_embd_head,
ggml_element_size(kv_self.v)*n_ctx*n_embd_gqa*il);
ggml_element_size(kv_self.v)*n_ctx*n_embd_gqa*il));
offload_func_v(V);
ggml_set_name(V, "V");

Expand Down