Skip to content

Commit

Permalink
Merge pull request #24014 from JuliaLang/yyc/align
Browse files Browse the repository at this point in the history
Some alignment fix in the runtime
  • Loading branch information
yuyichao authored Oct 7, 2017
2 parents b9a0d1d + 4ab04c5 commit c6899ec
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 67 deletions.
25 changes: 13 additions & 12 deletions src/builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,11 @@ static int bits_equal(void *a, void *b, int sz)
{
switch (sz) {
case 1: return *(int8_t*)a == *(int8_t*)b;
case 2: return *(int16_t*)a == *(int16_t*)b;
case 4: return *(int32_t*)a == *(int32_t*)b;
case 8: return *(int64_t*)a == *(int64_t*)b;
default: return memcmp(a, b, sz)==0;
// Let compiler constant folds the following.
case 2: return memcmp(a, b, 2) == 0;
case 4: return memcmp(a, b, 4) == 0;
case 8: return memcmp(a, b, 8) == 0;
default: return memcmp(a, b, sz) == 0;
}
}

Expand Down Expand Up @@ -198,22 +199,22 @@ JL_DLLEXPORT int jl_egal(jl_value_t *a, jl_value_t *b)

// object_id ------------------------------------------------------------------

static uintptr_t bits_hash(void *b, size_t sz)
static uintptr_t bits_hash(const void *b, size_t sz)
{
switch (sz) {
case 1: return int32hash(*(int8_t*)b);
case 2: return int32hash(*(int16_t*)b);
case 4: return int32hash(*(int32_t*)b);
case 1: return int32hash(*(const int8_t*)b);
case 2: return int32hash(jl_load_unaligned_i16(b));
case 4: return int32hash(jl_load_unaligned_i32(b));
#ifdef _P64
case 8: return int64hash(*(int64_t*)b);
case 8: return int64hash(jl_load_unaligned_i64(b));
#else
case 8: return int64to32hash(*(int64_t*)b);
case 8: return int64to32hash(jl_load_unaligned_i64(b));
#endif
default:
#ifdef _P64
return memhash((char*)b, sz);
return memhash((const char*)b, sz);
#else
return memhash32((char*)b, sz);
return memhash32((const char*)b, sz);
#endif
}
}
Expand Down
40 changes: 11 additions & 29 deletions src/crc32c.c
Original file line number Diff line number Diff line change
Expand Up @@ -229,24 +229,6 @@ CRC_TARGET static inline uint32_t crc32cb(uint32_t crc, uint32_t val)
asm("crc32cb %w0, %w1, %w2" : "=r"(res) : "r"(crc), "r"(val));
return res;
}
static inline uint64_t unaligned_i64(const char *ptr)
{
uint64_t val;
memcpy(&val, ptr, 8);
return val;
}
static inline uint32_t unaligned_i32(const char *ptr)
{
uint32_t val;
memcpy(&val, ptr, 4);
return val;
}
static inline uint16_t unaligned_i16(const char *ptr)
{
uint16_t val;
memcpy(&val, ptr, 2);
return val;
}

// Modified from the SSE4.2 version.
CRC_TARGET static uint32_t crc32c_armv8(uint32_t crc, const char *buf, size_t len)
Expand All @@ -270,11 +252,11 @@ CRC_TARGET static uint32_t crc32c_armv8(uint32_t crc, const char *buf, size_t le
const char *buf2 = end;
const char *buf3 = end + LONG;
do {
crc = crc32cx(crc, unaligned_i64(buf));
crc = crc32cx(crc, jl_load_unaligned_i64(buf));
buf += 8;
crc1 = crc32cx(crc1, unaligned_i64(buf2));
crc1 = crc32cx(crc1, jl_load_unaligned_i64(buf2));
buf2 += 8;
crc2 = crc32cx(crc2, unaligned_i64(buf3));
crc2 = crc32cx(crc2, jl_load_unaligned_i64(buf3));
buf3 += 8;
} while (buf < end);
crc = crc32c_shift(crc32c_long, crc) ^ crc1;
Expand All @@ -292,11 +274,11 @@ CRC_TARGET static uint32_t crc32c_armv8(uint32_t crc, const char *buf, size_t le
const char *buf2 = end;
const char *buf3 = end + SHORT;
do {
crc = crc32cx(crc, unaligned_i64(buf));
crc = crc32cx(crc, jl_load_unaligned_i64(buf));
buf += 8;
crc1 = crc32cx(crc1, unaligned_i64(buf2));
crc1 = crc32cx(crc1, jl_load_unaligned_i64(buf2));
buf2 += 8;
crc2 = crc32cx(crc2, unaligned_i64(buf3));
crc2 = crc32cx(crc2, jl_load_unaligned_i64(buf3));
buf3 += 8;
} while (buf < end);
crc = crc32c_shift(crc32c_short, crc) ^ crc1;
Expand All @@ -310,9 +292,9 @@ CRC_TARGET static uint32_t crc32c_armv8(uint32_t crc, const char *buf, size_t le
const char *end = buf + SHORT;
const char *buf2 = end;
do {
crc = crc32cx(crc, unaligned_i64(buf));
crc = crc32cx(crc, jl_load_unaligned_i64(buf));
buf += 8;
crc1 = crc32cx(crc1, unaligned_i64(buf2));
crc1 = crc32cx(crc1, jl_load_unaligned_i64(buf2));
buf2 += 8;
} while (buf < end);
crc = crc32c_shift(crc32c_short, crc) ^ crc1;
Expand All @@ -324,15 +306,15 @@ CRC_TARGET static uint32_t crc32c_armv8(uint32_t crc, const char *buf, size_t le
block */
const char *end = buf + len - 8;
while (buf <= end) {
crc = crc32cx(crc, unaligned_i64(buf));
crc = crc32cx(crc, jl_load_unaligned_i64(buf));
buf += 8;
}
if (len & 4) {
crc = crc32cw(crc, unaligned_i32(buf));
crc = crc32cw(crc, jl_load_unaligned_i32(buf));
buf += 4;
}
if (len & 2) {
crc = crc32ch(crc, unaligned_i16(buf));
crc = crc32ch(crc, jl_load_unaligned_i16(buf));
buf += 2;
}
if (len & 1)
Expand Down
48 changes: 26 additions & 22 deletions src/datatype.c
Original file line number Diff line number Diff line change
Expand Up @@ -485,33 +485,34 @@ JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name, jl_module_t *

// bits constructors ----------------------------------------------------------

typedef struct {
int64_t a;
int64_t b;
} bits128_t;

// TODO: do we care that this has invalid alignment assumptions?
JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, void *data)
{
// data may not have the alignment required by the data type.
jl_ptls_t ptls = jl_get_ptls_states();
assert(jl_is_datatype(dt));
jl_datatype_t *bt = (jl_datatype_t*)dt;
size_t nb = jl_datatype_size(bt);
if (nb == 0) return jl_new_struct_uninit(bt); // returns bt->instance
if (bt == jl_uint8_type) return jl_box_uint8(*(uint8_t*)data);
if (bt == jl_int64_type) return jl_box_int64(*(int64_t*)data);
if (bt == jl_int64_type) return jl_box_int64(jl_load_unaligned_i64(data));
if (bt == jl_bool_type) return (*(int8_t*)data) ? jl_true : jl_false;
if (bt == jl_int32_type) return jl_box_int32(*(int32_t*)data);
if (bt == jl_float64_type) return jl_box_float64(*(double*)data);
if (bt == jl_int32_type) return jl_box_int32(jl_load_unaligned_i32(data));
if (bt == jl_float64_type) {
double f;
memcpy(&f, data, 8);
return jl_box_float64(f);
}

jl_value_t *v = jl_gc_alloc(ptls, nb, bt);
switch (nb) {
case 1: *(int8_t*) jl_data_ptr(v) = *(int8_t*)data; break;
case 2: *(int16_t*) jl_data_ptr(v) = *(int16_t*)data; break;
case 4: *(int32_t*) jl_data_ptr(v) = *(int32_t*)data; break;
case 8: *(int64_t*) jl_data_ptr(v) = *(int64_t*)data; break;
case 16: *(bits128_t*)jl_data_ptr(v) = *(bits128_t*)data; break;
default: memcpy(jl_data_ptr(v), data, nb);
case 1: *(uint8_t*) v = *(uint8_t*)data; break;
case 2: *(uint16_t*)v = jl_load_unaligned_i16(data); break;
case 4: *(uint32_t*)v = jl_load_unaligned_i32(data); break;
case 8: *(uint64_t*)v = jl_load_unaligned_i64(data); break;
case 16:
memcpy(jl_assume_aligned(v, 16), data, 16);
break;
default: memcpy(v, data, nb);
}
return v;
}
Expand All @@ -521,21 +522,24 @@ JL_DLLEXPORT jl_value_t *jl_typemax_uint(jl_value_t *bt)
{
uint64_t data = 0xffffffffffffffffULL;
jl_value_t *v = jl_gc_alloc(jl_get_ptls_states(), sizeof(size_t), bt);
memcpy(jl_data_ptr(v), &data, sizeof(size_t));
memcpy(v, &data, sizeof(size_t));
return v;
}

void jl_assign_bits(void *dest, jl_value_t *bits)
{
// bits must be a heap box.
size_t nb = jl_datatype_size(jl_typeof(bits));
if (nb == 0) return;
switch (nb) {
case 1: *(int8_t*)dest = *(int8_t*)jl_data_ptr(bits); break;
case 2: *(int16_t*)dest = *(int16_t*)jl_data_ptr(bits); break;
case 4: *(int32_t*)dest = *(int32_t*)jl_data_ptr(bits); break;
case 8: *(int64_t*)dest = *(int64_t*)jl_data_ptr(bits); break;
case 16: *(bits128_t*)dest = *(bits128_t*)jl_data_ptr(bits); break;
default: memcpy(dest, jl_data_ptr(bits), nb);
case 1: *(uint8_t*)dest = *(uint8_t*)bits; break;
case 2: jl_store_unaligned_i16(dest, *(uint16_t*)bits); break;
case 4: jl_store_unaligned_i32(dest, *(uint32_t*)bits); break;
case 8: jl_store_unaligned_i64(dest, *(uint64_t*)bits); break;
case 16:
memcpy(dest, jl_assume_aligned(bits, 16), 16);
break;
default: memcpy(dest, bits, nb);
}
}

Expand Down
58 changes: 58 additions & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1007,6 +1007,64 @@ void jl_register_fptrs(uint64_t sysimage_base, const char *base, const int32_t *

extern arraylist_t partial_inst;

STATIC_INLINE uint64_t jl_load_unaligned_i64(const void *ptr)
{
uint64_t val;
memcpy(&val, ptr, 8);
return val;
}
STATIC_INLINE uint32_t jl_load_unaligned_i32(const void *ptr)
{
uint32_t val;
memcpy(&val, ptr, 4);
return val;
}
STATIC_INLINE uint16_t jl_load_unaligned_i16(const void *ptr)
{
uint16_t val;
memcpy(&val, ptr, 2);
return val;
}

STATIC_INLINE void jl_store_unaligned_i64(void *ptr, uint64_t val)
{
memcpy(ptr, &val, 8);
}
STATIC_INLINE void jl_store_unaligned_i32(void *ptr, uint32_t val)
{
memcpy(ptr, &val, 4);
}
STATIC_INLINE void jl_store_unaligned_i16(void *ptr, uint16_t val)
{
memcpy(ptr, &val, 2);
}

#if jl_has_builtin(__builtin_assume_aligned) || defined(_COMPILER_GCC_)
#define jl_assume_aligned(ptr, align) __builtin_assume_aligned(ptr, align)
#elif defined(_COMPILER_INTEL_)
#define jl_assume_aligned(ptr, align) (__extension__ ({ \
__typeof__(ptr) ptr_ = (ptr); \
__assume_aligned(ptr_, align); \
ptr_; \
}))
#elif defined(__GNUC__)
#define jl_assume_aligned(ptr, align) (__extension__ ({ \
__typeof__(ptr) ptr_ = (ptr); \
jl_assume(((uintptr_t)ptr) % (align) == 0); \
ptr_; \
}))
#elif defined(__cplusplus)
template<typename T>
static inline T
jl_assume_aligned(T ptr, unsigned align)
{
(void)jl_assume(((uintptr_t)ptr) % align == 0);
return ptr;
}
#else
#define jl_assume_aligned(ptr, align) (ptr)
#endif

#ifdef __cplusplus
}
#endif
Expand Down
7 changes: 3 additions & 4 deletions src/runtime_intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ JL_DLLEXPORT jl_value_t *jl_pointerref(jl_value_t *p, jl_value_t *i, jl_value_t
JL_TYPECHK(pointerref, pointer, p);
JL_TYPECHK(pointerref, long, i)
JL_TYPECHK(pointerref, long, align);
// TODO: alignment
jl_value_t *ety = jl_tparam0(jl_typeof(p));
if (ety == (jl_value_t*)jl_any_type) {
jl_value_t **pp = (jl_value_t**)(jl_unbox_long(p) + (jl_unbox_long(i)-1)*sizeof(void*));
Expand All @@ -58,7 +57,6 @@ JL_DLLEXPORT jl_value_t *jl_pointerset(jl_value_t *p, jl_value_t *x, jl_value_t
JL_TYPECHK(pointerset, pointer, p);
JL_TYPECHK(pointerset, long, i);
JL_TYPECHK(pointerref, long, align);
// TODO: alignment
jl_value_t *ety = jl_tparam0(jl_typeof(p));
if (ety == (jl_value_t*)jl_any_type) {
jl_value_t **pp = (jl_value_t**)(jl_unbox_long(p) + (jl_unbox_long(i)-1)*sizeof(void*));
Expand All @@ -67,11 +65,12 @@ JL_DLLEXPORT jl_value_t *jl_pointerset(jl_value_t *p, jl_value_t *x, jl_value_t
else {
if (!jl_is_datatype(ety))
jl_error("pointerset: invalid pointer");
size_t nb = LLT_ALIGN(jl_datatype_size(ety), jl_datatype_align(ety));
size_t elsz = jl_datatype_size(ety);
size_t nb = LLT_ALIGN(elsz, jl_datatype_align(ety));
char *pp = (char*)jl_unbox_long(p) + (jl_unbox_long(i)-1)*nb;
if (jl_typeof(x) != ety)
jl_error("pointerset: type mismatch in assign");
jl_assign_bits(pp, x);
memcpy(pp, x, elsz);
}
return p;
}
Expand Down

0 comments on commit c6899ec

Please sign in to comment.