Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix excess array object padding #41287

Merged
merged 1 commit into from
Jun 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions src/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,20 +114,23 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims,
}

int ndimwords = jl_array_ndimwords(ndims);
int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
if (tot <= ARRAY_INLINE_NBYTES) {
if (isunboxed && elsz >= 4)
tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT); // align data area
// align data area
if (tot >= ARRAY_CACHE_ALIGN_THRESHOLD)
tsz = JL_ARRAY_ALIGN(tsz, JL_CACHE_BYTE_ALIGNMENT);
else if (isunboxed && elsz >= 4)
tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT);
size_t doffs = tsz;
tsz += tot;
tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT); // align whole object
// jl_array_t is large enough that objects will always be aligned 16
a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
assert(((size_t)a & 15) == 0);
// No allocation or safepoint allowed after this
a->flags.how = 0;
data = (char*)a + doffs;
}
else {
tsz = JL_ARRAY_ALIGN(tsz, JL_CACHE_BYTE_ALIGNMENT); // align whole object
data = jl_gc_managed_malloc(tot);
// Allocate the Array **after** allocating the data
// to make sure the array is still young
Expand Down Expand Up @@ -223,7 +226,7 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
assert(jl_types_equal(jl_tparam0(jl_typeof(data)), jl_tparam0(atype)));

int ndimwords = jl_array_ndimwords(ndims);
int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords * sizeof(size_t) + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT);
int tsz = sizeof(jl_array_t) + ndimwords * sizeof(size_t) + sizeof(void*);
a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
// No allocation or safepoint allowed after this
a->flags.pooled = tsz <= GC_MAX_SZCLASS;
Expand Down Expand Up @@ -304,7 +307,7 @@ JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str)
jl_array_t *a;

int ndimwords = jl_array_ndimwords(1);
int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t) + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT);
int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t) + sizeof(void*);
a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, jl_array_uint8_type);
a->flags.pooled = tsz <= GC_MAX_SZCLASS;
a->flags.ndims = 1;
Expand Down Expand Up @@ -351,7 +354,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
"unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);

int ndimwords = jl_array_ndimwords(1);
int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
// No allocation or safepoint allowed after this
a->flags.pooled = tsz <= GC_MAX_SZCLASS;
Expand Down Expand Up @@ -418,7 +421,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
"unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);

int ndimwords = jl_array_ndimwords(ndims);
int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
// No allocation or safepoint allowed after this
a->flags.pooled = tsz <= GC_MAX_SZCLASS;
Expand Down
1 change: 1 addition & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,7 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz)
// JL_HEAP_ALIGNMENT is the maximum alignment that the GC can provide
#define JL_HEAP_ALIGNMENT JL_SMALL_BYTE_ALIGNMENT
#define GC_MAX_SZCLASS (2032-sizeof(void*))
static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, "");

STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
{
Expand Down
8 changes: 6 additions & 2 deletions src/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,14 @@

// object layout options ------------------------------------------------------

// how much space we're willing to waste if an array outgrows its
// original object
// The data for an array this size or below will be allocated within the
// Array object. If the array outgrows that space, it will be wasted.
#define ARRAY_INLINE_NBYTES (2048*sizeof(void*))

// Arrays at least this size will get larger alignment (JL_CACHE_BYTE_ALIGNMENT).
// Must be bigger than GC_MAX_SZCLASS.
#define ARRAY_CACHE_ALIGN_THRESHOLD 2048

// codegen options ------------------------------------------------------------

// (Experimental) Use MCJIT ELF, even where it's not the native format
Expand Down
45 changes: 29 additions & 16 deletions src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -782,14 +782,23 @@ static void jl_write_values(jl_serializer_state *s)
#define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes)
jl_array_t *ar = (jl_array_t*)v;
jl_value_t *et = jl_tparam0(jl_typeof(v));
size_t alen = jl_array_len(ar);
size_t datasize = alen * ar->elsize;
size_t tot = datasize;
int isbitsunion = jl_array_isbitsunion(ar);
if (isbitsunion)
tot += alen;
else if (ar->elsize == 1)
tot += 1;
int ndimwords = jl_array_ndimwords(ar->flags.ndims);
size_t tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords * sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
size_t headersize = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
// copy header
ios_write(s->s, (char*)v, tsz);
ios_write(s->s, (char*)v, headersize);
size_t alignment_amt = JL_SMALL_BYTE_ALIGNMENT;
if (tot >= ARRAY_CACHE_ALIGN_THRESHOLD)
alignment_amt = JL_CACHE_BYTE_ALIGNMENT;
// make some header modifications in-place
jl_array_t *newa = (jl_array_t*)&s->s->buf[reloc_offset];
size_t alen = jl_array_len(ar);
size_t tot = alen * ar->elsize;
if (newa->flags.ndims == 1)
newa->maxsize = alen;
newa->offset = 0;
Expand All @@ -799,8 +808,7 @@ static void jl_write_values(jl_serializer_state *s)

// write data
if (!ar->flags.ptrarray && !ar->flags.hasptr) {
uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), 16);
// realign stream to max(data-align(array), sizeof(void*))
uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), alignment_amt);
write_padding(s->const_data, data - ios_pos(s->const_data));
// write data and relocations
newa->data = NULL; // relocation offset
Expand All @@ -815,22 +823,27 @@ static void jl_write_values(jl_serializer_state *s)
write_pointer(s->const_data);
}
else {
int isbitsunion = jl_array_isbitsunion(ar);
if (ar->elsize == 1 && !isbitsunion)
tot += 1;
ios_write(s->const_data, (char*)jl_array_data(ar), tot);
if (isbitsunion)
if (isbitsunion) {
ios_write(s->const_data, (char*)jl_array_data(ar), datasize);
ios_write(s->const_data, jl_array_typetagdata(ar), alen);
}
else {
ios_write(s->const_data, (char*)jl_array_data(ar), tot);
}
}
}
else {
newa->data = (void*)tsz; // relocation offset
size_t data = LLT_ALIGN(ios_pos(s->s), alignment_amt);
size_t padding_amt = data - ios_pos(s->s);
write_padding(s->s, padding_amt);
headersize += padding_amt;
newa->data = (void*)headersize; // relocation offset
arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, data))); // relocation location
arraylist_push(&s->relocs_list, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + item)); // relocation target
if (ar->flags.hasptr) {
// copy all of the data first
const char *data = (const char*)jl_array_data(ar);
ios_write(s->s, data, tot);
ios_write(s->s, data, datasize);
// the rewrite all of the embedded pointers to null+relocation
uint16_t elsz = ar->elsize;
size_t j, np = ((jl_datatype_t*)et)->layout->npointers;
Expand All @@ -840,12 +853,12 @@ static void jl_write_values(jl_serializer_state *s)
size_t offset = i * elsz + jl_ptr_offset(((jl_datatype_t*)et), j) * sizeof(jl_value_t*);
jl_value_t *fld = *(jl_value_t**)&data[offset];
if (fld != NULL) {
arraylist_push(&s->relocs_list, (void*)(uintptr_t)(reloc_offset + tsz + offset)); // relocation location
arraylist_push(&s->relocs_list, (void*)(uintptr_t)(reloc_offset + headersize + offset)); // relocation location
arraylist_push(&s->relocs_list, (void*)backref_id(s, fld)); // relocation target
memset(&s->s->buf[reloc_offset + tsz + offset], 0, sizeof(fld)); // relocation offset (none)
memset(&s->s->buf[reloc_offset + headersize + offset], 0, sizeof(fld)); // relocation offset (none)
}
else {
assert(*(jl_value_t**)&s->s->buf[reloc_offset + tsz + offset] == NULL);
assert(*(jl_value_t**)&s->s->buf[reloc_offset + headersize + offset] == NULL);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion test/arrayops.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2660,7 +2660,7 @@ let TT = Union{UInt8, Int8}
resize!(b, 1)
@assert pointer(a) == pa
@assert pointer(b) == pb
unsafe_store!(pa, 0x1, 2) # reset a[2] to 1
unsafe_store!(Ptr{UInt8}(pa), 0x1, 2) # reset a[2] to 1
@test length(a) == length(b) == 1
@test a[1] == b[1] == 0x0
@test a == b
Expand Down
12 changes: 10 additions & 2 deletions test/cmdlineargs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,11 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
rm(memfile)
@test popfirst!(got) == " 0 g(x) = x + 123456"
@test popfirst!(got) == " - function f(x)"
@test popfirst!(got) == " 80 []"
if Sys.WORD_SIZE == 64
@test popfirst!(got) == " 48 []"
else
@test popfirst!(got) == " 32 []"
end
if Sys.WORD_SIZE == 64
# P64 pools with 64 bit tags
@test popfirst!(got) == " 16 Base.invokelatest(g, 0)"
Expand All @@ -337,7 +341,11 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
@test popfirst!(got) == " 8 Base.invokelatest(g, 0)"
@test popfirst!(got) == " 32 Base.invokelatest(g, x)"
end
@test popfirst!(got) == " 80 []"
if Sys.WORD_SIZE == 64
@test popfirst!(got) == " 48 []"
else
@test popfirst!(got) == " 32 []"
end
@test popfirst!(got) == " - end"
@test popfirst!(got) == " - f(1.23)"
@test isempty(got) || got
Expand Down