120 lines
5.1 KiB
Plaintext
120 lines
5.1 KiB
Plaintext
#ifndef __EDSIGN_CUH
|
|
#define __EDSIGN_CUH
|
|
#include <ed25519.cuh>
|
|
#ifndef COMPACT_DISABLE_ED25519
|
|
#include <sha512.cuh>
|
|
#include <fprime.cuh>
|
|
#include <cuda_runtime.h>
|
|
#include <cstdio>
|
|
#include <cstring>
|
|
#define EXPANDED_SIZE 64
|
|
#define EDSIGN_SECRET_KEY_SIZE 32
|
|
#define EDSIGN_PUBLIC_KEY_SIZE 32
|
|
#define EDSIGN_SIGNATURE_SIZE 64
|
|
#define SHA512_HASH_SIZE 64
|
|
__device__ __constant__ uint8_t ed25519_order[FPRIME_SIZE] = {
|
|
0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58,
|
|
0xd6, 0x9c, 0xf7, 0xa2, 0xde, 0xf9, 0xde, 0x14,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10
|
|
};
|
|
__device__ __forceinline__ void expand_key(uint8_t* expanded, const uint8_t* secret) {
|
|
struct sha512_state s;
|
|
sha512_init(&s);
|
|
sha512_final(&s, secret, EDSIGN_SECRET_KEY_SIZE);
|
|
sha512_get(&s, expanded, 0, EXPANDED_SIZE);
|
|
ed25519_prepare(expanded);
|
|
}
|
|
__device__ __forceinline__ uint8_t upp(struct ed25519_pt* p, const uint8_t* packed) {
|
|
uint8_t x[F25519_SIZE], y[F25519_SIZE];
|
|
uint8_t ok = ed25519_try_unpack(x, y, packed);
|
|
ed25519_project(p, x, y);
|
|
return ok;
|
|
}
|
|
__device__ __forceinline__ void pp(uint8_t* packed, const struct ed25519_pt* p) {
|
|
uint8_t x[F25519_SIZE], y[F25519_SIZE];
|
|
ed25519_unproject(x, y, p);
|
|
ed25519_pack(packed, x, y);
|
|
}
|
|
__device__ __forceinline__ void sm_pack(uint8_t* r, const uint8_t* k) {
|
|
struct ed25519_pt p;
|
|
ed25519_smult(&p, &ed25519_base, k);
|
|
pp(r, &p);
|
|
}
|
|
__device__ __forceinline__ void edsign_sec_to_pub(uint8_t* pub, const uint8_t* secret) {
|
|
uint8_t expanded[EXPANDED_SIZE];
|
|
expand_key(expanded, secret);
|
|
sm_pack(pub, expanded);
|
|
}
|
|
__device__ __forceinline__ void hash_with_prefix(uint8_t* out_fp, uint8_t* init_block, unsigned int prefix_size, const uint8_t* message, size_t len) {
|
|
struct sha512_state s;
|
|
sha512_init(&s);
|
|
if (len < SHA512_BLOCK_SIZE && len + prefix_size < SHA512_BLOCK_SIZE) {
|
|
memcpy(init_block + prefix_size, message, len);
|
|
sha512_final(&s, init_block, len + prefix_size);
|
|
} else {
|
|
size_t i;
|
|
memcpy(init_block + prefix_size, message, SHA512_BLOCK_SIZE - prefix_size);
|
|
sha512_block(&s, init_block);
|
|
for (i = SHA512_BLOCK_SIZE - prefix_size; i + SHA512_BLOCK_SIZE <= len; i += SHA512_BLOCK_SIZE) {
|
|
sha512_block(&s, message + i);
|
|
}
|
|
sha512_final(&s, message + i, len - i + prefix_size);
|
|
}
|
|
sha512_get(&s, init_block, 0, SHA512_HASH_SIZE);
|
|
fprime_from_bytes(out_fp, init_block, SHA512_HASH_SIZE, ed25519_order);
|
|
}
|
|
__device__ __forceinline__ void generate_k(uint8_t* k, const uint8_t* kgen_key, const uint8_t* message, size_t len) {
|
|
uint8_t block[SHA512_BLOCK_SIZE];
|
|
memcpy(block, kgen_key, 32);
|
|
hash_with_prefix(k, block, 32, message, len);
|
|
}
|
|
__device__ __forceinline__ void hash_message(uint8_t* z, const uint8_t* r, const uint8_t* a, const uint8_t* m, size_t len) {
|
|
uint8_t block[SHA512_BLOCK_SIZE];
|
|
memcpy(block, r, 32);
|
|
memcpy(block + 32, a, 32);
|
|
hash_with_prefix(z, block, 64, m, len);
|
|
}
|
|
__device__ void edsign_sign(uint8_t* signature, const uint8_t* pub, const uint8_t* secret, const uint8_t* message, size_t len) {
|
|
uint8_t expanded[EXPANDED_SIZE];
|
|
uint8_t e[FPRIME_SIZE], s[FPRIME_SIZE], k[FPRIME_SIZE], z[FPRIME_SIZE];
|
|
expand_key(expanded, secret);
|
|
generate_k(k, expanded + 32, message, len);
|
|
sm_pack(signature, k);
|
|
hash_message(z, signature, pub, message, len);
|
|
fprime_from_bytes(e, expanded, 32, ed25519_order);
|
|
fprime_mul(s, z, e, ed25519_order);
|
|
fprime_add(s, k, ed25519_order);
|
|
memcpy(signature + 32, s, 32);
|
|
}
|
|
__device__ uint8_t edsign_verify(const uint8_t* signature, const uint8_t* pub, const uint8_t* message, size_t len) {
|
|
struct ed25519_pt p, q;
|
|
uint8_t lhs[F25519_SIZE], rhs[F25519_SIZE], z[FPRIME_SIZE];
|
|
uint8_t ok = 1;
|
|
hash_message(z, signature, pub, message, len);
|
|
sm_pack(lhs, signature + 32);
|
|
ok &= upp(&p, pub);
|
|
ed25519_smult(&p, &p, z);
|
|
ok &= upp(&q, signature);
|
|
ed25519_add(&p, &p, &q);
|
|
pp(rhs, &p);
|
|
return ok & f25519_eq(lhs, rhs);
|
|
}
|
|
__global__ void sign_kernel(uint8_t* d_signatures, const uint8_t* d_pubs, const uint8_t* d_secrets, const uint8_t* d_messages, const size_t* d_message_lens, int num_messages) {
|
|
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (idx >= num_messages) return;
|
|
uint8_t* signature = d_signatures + idx * EDSIGN_SIGNATURE_SIZE;
|
|
const uint8_t* pub = d_pubs + idx * EDSIGN_PUBLIC_KEY_SIZE;
|
|
const uint8_t* secret = d_secrets + idx * EDSIGN_SECRET_KEY_SIZE;
|
|
const uint8_t* message = d_messages;
|
|
size_t len = d_message_lens[idx];
|
|
edsign_sign(signature, pub, secret, message, len);
|
|
}
|
|
void launch_sign_kernel(uint8_t* d_signatures, const uint8_t* d_pubs, const uint8_t* d_secrets, const uint8_t* d_messages, const size_t* d_message_lens, int num_messages) {
|
|
int threadsPerBlock = 256;
|
|
int blocksPerGrid = (num_messages + threadsPerBlock - 1) / threadsPerBlock;
|
|
sign_kernel << <blocksPerGrid, threadsPerBlock >> > (d_signatures, d_pubs, d_secrets, d_messages, d_message_lens, num_messages);
|
|
cudaDeviceSynchronize();
|
|
}
|
|
#endif
|
|
#endif |