diff --git a/src/array.c b/src/array.c index 47eb5e782a3be..778fb6d1e677c 100644 --- a/src/array.c +++ b/src/array.c @@ -114,20 +114,23 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims, } int ndimwords = jl_array_ndimwords(ndims); - int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT); + int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t); if (tot <= ARRAY_INLINE_NBYTES) { - if (isunboxed && elsz >= 4) - tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT); // align data area + // align data area + if (tot >= ARRAY_CACHE_ALIGN_THRESHOLD) + tsz = JL_ARRAY_ALIGN(tsz, JL_CACHE_BYTE_ALIGNMENT); + else if (isunboxed && elsz >= 4) + tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT); size_t doffs = tsz; tsz += tot; - tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT); // align whole object + // jl_array_t is large enough that objects will always be aligned 16 a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype); + assert(((size_t)a & 15) == 0); // No allocation or safepoint allowed after this a->flags.how = 0; data = (char*)a + doffs; } else { - tsz = JL_ARRAY_ALIGN(tsz, JL_CACHE_BYTE_ALIGNMENT); // align whole object data = jl_gc_managed_malloc(tot); // Allocate the Array **after** allocating the data // to make sure the array is still young @@ -223,7 +226,7 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data, assert(jl_types_equal(jl_tparam0(jl_typeof(data)), jl_tparam0(atype))); int ndimwords = jl_array_ndimwords(ndims); - int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords * sizeof(size_t) + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT); + int tsz = sizeof(jl_array_t) + ndimwords * sizeof(size_t) + sizeof(void*); a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype); // No allocation or safepoint allowed after this a->flags.pooled = tsz <= GC_MAX_SZCLASS; @@ -304,7 +307,7 @@ JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str) jl_array_t *a; int ndimwords = jl_array_ndimwords(1); - int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t) + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT); + int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t) + sizeof(void*); a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, jl_array_uint8_type); a->flags.pooled = tsz <= GC_MAX_SZCLASS; a->flags.ndims = 1; @@ -351,7 +354,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data, "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align); int ndimwords = jl_array_ndimwords(1); - int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT); + int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t); a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype); // No allocation or safepoint allowed after this a->flags.pooled = tsz <= GC_MAX_SZCLASS; @@ -418,7 +421,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data, "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align); int ndimwords = jl_array_ndimwords(ndims); - int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT); + int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t); a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype); // No allocation or safepoint allowed after this a->flags.pooled = tsz <= GC_MAX_SZCLASS; diff --git a/src/julia_internal.h b/src/julia_internal.h index c9ff2716530eb..c22f8846de52a 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -341,6 +341,7 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) // JL_HEAP_ALIGNMENT is the maximum alignment that the GC can provide #define JL_HEAP_ALIGNMENT JL_SMALL_BYTE_ALIGNMENT #define GC_MAX_SZCLASS (2032-sizeof(void*)) +static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, ""); STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) { diff --git a/src/options.h b/src/options.h index 5ea220900b5eb..bb56e0c41c7c0 100644 --- a/src/options.h +++ b/src/options.h @@ -12,10 +12,14 @@ // object layout options ------------------------------------------------------ -// how much space we're willing to waste if an array outgrows its -// original object +// The data for an array this size or below will be allocated within the +// Array object. If the array outgrows that space, it will be wasted. #define ARRAY_INLINE_NBYTES (2048*sizeof(void*)) +// Arrays at least this size will get larger alignment (JL_CACHE_BYTE_ALIGNMENT). +// Must be bigger than GC_MAX_SZCLASS. +#define ARRAY_CACHE_ALIGN_THRESHOLD 2048 + // codegen options ------------------------------------------------------------ // (Experimental) Use MCJIT ELF, even where it's not the native format diff --git a/src/staticdata.c b/src/staticdata.c index 2425e8bc44450..d70e35542de2a 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -782,14 +782,23 @@ static void jl_write_values(jl_serializer_state *s) #define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes) jl_array_t *ar = (jl_array_t*)v; jl_value_t *et = jl_tparam0(jl_typeof(v)); + size_t alen = jl_array_len(ar); + size_t datasize = alen * ar->elsize; + size_t tot = datasize; + int isbitsunion = jl_array_isbitsunion(ar); + if (isbitsunion) + tot += alen; + else if (ar->elsize == 1) + tot += 1; int ndimwords = jl_array_ndimwords(ar->flags.ndims); - size_t tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords * sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT); + size_t headersize = sizeof(jl_array_t) + ndimwords*sizeof(size_t); // copy header - ios_write(s->s, (char*)v, tsz); + ios_write(s->s, (char*)v, headersize); + size_t alignment_amt = JL_SMALL_BYTE_ALIGNMENT; + if (tot >= ARRAY_CACHE_ALIGN_THRESHOLD) + alignment_amt = JL_CACHE_BYTE_ALIGNMENT; // make some header modifications in-place jl_array_t *newa = (jl_array_t*)&s->s->buf[reloc_offset]; - size_t alen = jl_array_len(ar); - size_t tot = alen * ar->elsize; if (newa->flags.ndims == 1) newa->maxsize = alen; newa->offset = 0; @@ -799,8 +808,7 @@ static void jl_write_values(jl_serializer_state *s) // write data if (!ar->flags.ptrarray && !ar->flags.hasptr) { - uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), 16); - // realign stream to max(data-align(array), sizeof(void*)) + uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), alignment_amt); write_padding(s->const_data, data - ios_pos(s->const_data)); // write data and relocations newa->data = NULL; // relocation offset @@ -815,22 +823,27 @@ static void jl_write_values(jl_serializer_state *s) write_pointer(s->const_data); } else { - int isbitsunion = jl_array_isbitsunion(ar); - if (ar->elsize == 1 && !isbitsunion) - tot += 1; - ios_write(s->const_data, (char*)jl_array_data(ar), tot); - if (isbitsunion) + if (isbitsunion) { + ios_write(s->const_data, (char*)jl_array_data(ar), datasize); ios_write(s->const_data, jl_array_typetagdata(ar), alen); + } + else { + ios_write(s->const_data, (char*)jl_array_data(ar), tot); + } } } else { - newa->data = (void*)tsz; // relocation offset + size_t data = LLT_ALIGN(ios_pos(s->s), alignment_amt); + size_t padding_amt = data - ios_pos(s->s); + write_padding(s->s, padding_amt); + headersize += padding_amt; + newa->data = (void*)headersize; // relocation offset arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, data))); // relocation location arraylist_push(&s->relocs_list, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + item)); // relocation target if (ar->flags.hasptr) { // copy all of the data first const char *data = (const char*)jl_array_data(ar); - ios_write(s->s, data, tot); + ios_write(s->s, data, datasize); // the rewrite all of the embedded pointers to null+relocation uint16_t elsz = ar->elsize; size_t j, np = ((jl_datatype_t*)et)->layout->npointers; @@ -840,12 +853,12 @@ static void jl_write_values(jl_serializer_state *s) size_t offset = i * elsz + jl_ptr_offset(((jl_datatype_t*)et), j) * sizeof(jl_value_t*); jl_value_t *fld = *(jl_value_t**)&data[offset]; if (fld != NULL) { - arraylist_push(&s->relocs_list, (void*)(uintptr_t)(reloc_offset + tsz + offset)); // relocation location + arraylist_push(&s->relocs_list, (void*)(uintptr_t)(reloc_offset + headersize + offset)); // relocation location arraylist_push(&s->relocs_list, (void*)backref_id(s, fld)); // relocation target - memset(&s->s->buf[reloc_offset + tsz + offset], 0, sizeof(fld)); // relocation offset (none) + memset(&s->s->buf[reloc_offset + headersize + offset], 0, sizeof(fld)); // relocation offset (none) } else { - assert(*(jl_value_t**)&s->s->buf[reloc_offset + tsz + offset] == NULL); + assert(*(jl_value_t**)&s->s->buf[reloc_offset + headersize + offset] == NULL); } } } diff --git a/test/arrayops.jl b/test/arrayops.jl index 27e366f1ce3cc..7823fce7a6175 100644 --- a/test/arrayops.jl +++ b/test/arrayops.jl @@ -2660,7 +2660,7 @@ let TT = Union{UInt8, Int8} resize!(b, 1) @assert pointer(a) == pa @assert pointer(b) == pb - unsafe_store!(pa, 0x1, 2) # reset a[2] to 1 + unsafe_store!(Ptr{UInt8}(pa), 0x1, 2) # reset a[2] to 1 @test length(a) == length(b) == 1 @test a[1] == b[1] == 0x0 @test a == b diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl index 13e7251ba3a95..fbf60acdbc848 100644 --- a/test/cmdlineargs.jl +++ b/test/cmdlineargs.jl @@ -324,7 +324,11 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no` rm(memfile) @test popfirst!(got) == " 0 g(x) = x + 123456" @test popfirst!(got) == " - function f(x)" - @test popfirst!(got) == " 80 []" + if Sys.WORD_SIZE == 64 + @test popfirst!(got) == " 48 []" + else + @test popfirst!(got) == " 32 []" + end if Sys.WORD_SIZE == 64 # P64 pools with 64 bit tags @test popfirst!(got) == " 16 Base.invokelatest(g, 0)" @@ -337,7 +341,11 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no` @test popfirst!(got) == " 8 Base.invokelatest(g, 0)" @test popfirst!(got) == " 32 Base.invokelatest(g, x)" end - @test popfirst!(got) == " 80 []" + if Sys.WORD_SIZE == 64 + @test popfirst!(got) == " 48 []" + else + @test popfirst!(got) == " 32 []" + end @test popfirst!(got) == " - end" @test popfirst!(got) == " - f(1.23)" @test isempty(got) || got