-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathsign.cu
146 lines (120 loc) · 4.95 KB
/
sign.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#include "ed25519.h"
#include "sha512.h"
#include "ge.h"
#include "sc.h"
#include "gpu_common.h"
#include "gpu_ctx.h"
static void __device__ __host__
ed25519_sign_device(unsigned char *signature,
const unsigned char *message,
size_t message_len,
const unsigned char *public_key,
const unsigned char *private_key) {
sha512_context hash;
unsigned char hram[64];
unsigned char r[64];
ge_p3 R;
sha512_init(&hash);
sha512_update(&hash, private_key + 32, 32);
sha512_update(&hash, message, message_len);
sha512_final(&hash, r);
sc_reduce(r);
ge_scalarmult_base(&R, r);
ge_p3_tobytes(signature, &R);
sha512_init(&hash);
sha512_update(&hash, signature, 32);
sha512_update(&hash, public_key, 32);
sha512_update(&hash, message, message_len);
sha512_final(&hash, hram);
sc_reduce(hram);
sc_muladd(signature + 32, hram, private_key, r);
}
void ed25519_sign(unsigned char *signature,
const unsigned char *message,
size_t message_len,
const unsigned char *public_key,
const unsigned char *private_key) {
ed25519_sign_device(signature, message, message_len, public_key, private_key);
}
__global__ void ed25519_sign_kernel(unsigned char* packets,
uint32_t message_size,
uint32_t* public_key_offsets,
uint32_t* private_key_offsets,
uint32_t* message_start_offsets,
uint32_t* message_lens,
size_t num_transactions,
uint8_t* out)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < num_transactions) {
uint32_t message_start_offset = message_start_offsets[i];
uint32_t public_key_offset = public_key_offsets[i];
uint32_t private_key_offset = private_key_offsets[i];
uint32_t message_len = message_lens[i];
ed25519_sign_device(&out[i * SIG_SIZE],
&packets[message_start_offset],
message_len,
&packets[public_key_offset],
&packets[private_key_offset]);
}
}
void ed25519_sign_many(const gpu_Elems* elems,
uint32_t num_elems,
uint32_t message_size,
uint32_t total_packets,
uint32_t total_signatures,
const uint32_t* message_lens,
const uint32_t* public_key_offsets,
const uint32_t* private_key_offsets,
const uint32_t* message_start_offsets,
uint8_t* signatures_out,
uint8_t use_non_default_stream
) {
int num_threads_per_block = 64;
int num_blocks = ROUND_UP_DIV(total_signatures, num_threads_per_block);
size_t sig_out_size = SIG_SIZE * total_signatures;
if (0 == total_packets) {
return;
}
uint32_t total_packets_size = total_packets * message_size;
LOG("signing %d packets sig_size: %zu message_size: %d\n",
total_packets, sig_out_size, message_size);
gpu_ctx_t* gpu_ctx = get_gpu_ctx();
verify_ctx_t* cur_ctx = &gpu_ctx->verify_ctx;
cudaStream_t stream = 0;
if (0 != use_non_default_stream) {
stream = gpu_ctx->stream;
}
setup_gpu_ctx(cur_ctx,
elems,
num_elems,
message_size,
total_packets,
total_packets_size,
total_signatures,
message_lens,
public_key_offsets,
private_key_offsets,
message_start_offsets,
sig_out_size,
stream
);
LOG("signing blocks: %d threads_per_block: %d\n", num_blocks, num_threads_per_block);
ed25519_sign_kernel<<<num_blocks, num_threads_per_block, 0, stream>>>
(cur_ctx->packets,
message_size,
cur_ctx->public_key_offsets,
cur_ctx->signature_offsets,
cur_ctx->message_start_offsets,
cur_ctx->message_lens,
total_signatures,
cur_ctx->out);
cudaError_t err = cudaMemcpyAsync(signatures_out, cur_ctx->out, sig_out_size, cudaMemcpyDeviceToHost, stream);
if (err != cudaSuccess) {
fprintf(stderr, "sign: cudaMemcpy(out) error: out = %p cur_ctx->out = %p size = %zu num: %d elems = %p\n",
signatures_out, cur_ctx->out, sig_out_size, num_elems, elems);
}
CUDA_CHK(err);
CUDA_CHK(cudaStreamSynchronize(stream));
release_gpu_ctx(gpu_ctx);
}