diff --git a/src/RandomX/aes_cuda.hpp b/src/RandomX/aes_cuda.hpp index 65b54e2..af0f8ec 100644 --- a/src/RandomX/aes_cuda.hpp +++ b/src/RandomX/aes_cuda.hpp @@ -793,12 +793,11 @@ __global__ void hashAes1Rx4(const void* input, void* hash, uint32_t batch_size) #define ITER(m) \ { \ - uint32_t k[4], y[4]; \ - *(uint4*)(k) = p[m * 4]; \ - y[0] = t0[get_byte(x[0], 0)] ^ t1[get_byte(x[1], s1)] ^ t2[get_byte(x[2], 16)] ^ t3[get_byte(x[3], s3)] ^ k[0]; \ - y[1] = t0[get_byte(x[1], 0)] ^ t1[get_byte(x[2], s1)] ^ t2[get_byte(x[3], 16)] ^ t3[get_byte(x[0], s3)] ^ k[1]; \ - y[2] = t0[get_byte(x[2], 0)] ^ t1[get_byte(x[3], s1)] ^ t2[get_byte(x[0], 16)] ^ t3[get_byte(x[1], s3)] ^ k[2]; \ - y[3] = t0[get_byte(x[3], 0)] ^ t1[get_byte(x[0], s1)] ^ t2[get_byte(x[1], 16)] ^ t3[get_byte(x[2], s3)] ^ k[3]; \ + uint32_t y[4]; \ + y[0] = t0[get_byte(x[0], 0)] ^ t1[get_byte(x[1], s1)] ^ t2[get_byte(x[2], 16)] ^ t3[get_byte(x[3], s3)] ^ p[m * 4].x; \ + y[1] = t0[get_byte(x[1], 0)] ^ t1[get_byte(x[2], s1)] ^ t2[get_byte(x[3], 16)] ^ t3[get_byte(x[0], s3)] ^ p[m * 4].y; \ + y[2] = t0[get_byte(x[2], 0)] ^ t1[get_byte(x[3], s1)] ^ t2[get_byte(x[0], 16)] ^ t3[get_byte(x[1], s3)] ^ p[m * 4].z; \ + y[3] = t0[get_byte(x[3], 0)] ^ t1[get_byte(x[0], s1)] ^ t2[get_byte(x[1], 16)] ^ t3[get_byte(x[2], s3)] ^ p[m * 4].w; \ x[0] = y[0]; \ x[1] = y[1]; \ x[2] = y[2]; \