yggm/libs/edsign.cuh
2025-03-13 04:09:27 +05:00

120 lines
5.1 KiB
Plaintext

#ifndef __EDSIGN_CUH
#define __EDSIGN_CUH
#include <ed25519.cuh>
#ifndef COMPACT_DISABLE_ED25519
#include <sha512.cuh>
#include <fprime.cuh>
#include <cuda_runtime.h>
#include <cstdio>
#include <cstring>
#define EXPANDED_SIZE 64
#define EDSIGN_SECRET_KEY_SIZE 32
#define EDSIGN_PUBLIC_KEY_SIZE 32
#define EDSIGN_SIGNATURE_SIZE 64
#define SHA512_HASH_SIZE 64
__device__ __constant__ uint8_t ed25519_order[FPRIME_SIZE] = {
0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58,
0xd6, 0x9c, 0xf7, 0xa2, 0xde, 0xf9, 0xde, 0x14,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10
};
__device__ __forceinline__ void expand_key(uint8_t* expanded, const uint8_t* secret) {
struct sha512_state s;
sha512_init(&s);
sha512_final(&s, secret, EDSIGN_SECRET_KEY_SIZE);
sha512_get(&s, expanded, 0, EXPANDED_SIZE);
ed25519_prepare(expanded);
}
__device__ __forceinline__ uint8_t upp(struct ed25519_pt* p, const uint8_t* packed) {
uint8_t x[F25519_SIZE], y[F25519_SIZE];
uint8_t ok = ed25519_try_unpack(x, y, packed);
ed25519_project(p, x, y);
return ok;
}
__device__ __forceinline__ void pp(uint8_t* packed, const struct ed25519_pt* p) {
uint8_t x[F25519_SIZE], y[F25519_SIZE];
ed25519_unproject(x, y, p);
ed25519_pack(packed, x, y);
}
__device__ __forceinline__ void sm_pack(uint8_t* r, const uint8_t* k) {
struct ed25519_pt p;
ed25519_smult(&p, &ed25519_base, k);
pp(r, &p);
}
__device__ __forceinline__ void edsign_sec_to_pub(uint8_t* pub, const uint8_t* secret) {
uint8_t expanded[EXPANDED_SIZE];
expand_key(expanded, secret);
sm_pack(pub, expanded);
}
__device__ __forceinline__ void hash_with_prefix(uint8_t* out_fp, uint8_t* init_block, unsigned int prefix_size, const uint8_t* message, size_t len) {
struct sha512_state s;
sha512_init(&s);
if (len < SHA512_BLOCK_SIZE && len + prefix_size < SHA512_BLOCK_SIZE) {
memcpy(init_block + prefix_size, message, len);
sha512_final(&s, init_block, len + prefix_size);
} else {
size_t i;
memcpy(init_block + prefix_size, message, SHA512_BLOCK_SIZE - prefix_size);
sha512_block(&s, init_block);
for (i = SHA512_BLOCK_SIZE - prefix_size; i + SHA512_BLOCK_SIZE <= len; i += SHA512_BLOCK_SIZE) {
sha512_block(&s, message + i);
}
sha512_final(&s, message + i, len - i + prefix_size);
}
sha512_get(&s, init_block, 0, SHA512_HASH_SIZE);
fprime_from_bytes(out_fp, init_block, SHA512_HASH_SIZE, ed25519_order);
}
__device__ __forceinline__ void generate_k(uint8_t* k, const uint8_t* kgen_key, const uint8_t* message, size_t len) {
uint8_t block[SHA512_BLOCK_SIZE];
memcpy(block, kgen_key, 32);
hash_with_prefix(k, block, 32, message, len);
}
__device__ __forceinline__ void hash_message(uint8_t* z, const uint8_t* r, const uint8_t* a, const uint8_t* m, size_t len) {
uint8_t block[SHA512_BLOCK_SIZE];
memcpy(block, r, 32);
memcpy(block + 32, a, 32);
hash_with_prefix(z, block, 64, m, len);
}
__device__ void edsign_sign(uint8_t* signature, const uint8_t* pub, const uint8_t* secret, const uint8_t* message, size_t len) {
uint8_t expanded[EXPANDED_SIZE];
uint8_t e[FPRIME_SIZE], s[FPRIME_SIZE], k[FPRIME_SIZE], z[FPRIME_SIZE];
expand_key(expanded, secret);
generate_k(k, expanded + 32, message, len);
sm_pack(signature, k);
hash_message(z, signature, pub, message, len);
fprime_from_bytes(e, expanded, 32, ed25519_order);
fprime_mul(s, z, e, ed25519_order);
fprime_add(s, k, ed25519_order);
memcpy(signature + 32, s, 32);
}
__device__ uint8_t edsign_verify(const uint8_t* signature, const uint8_t* pub, const uint8_t* message, size_t len) {
struct ed25519_pt p, q;
uint8_t lhs[F25519_SIZE], rhs[F25519_SIZE], z[FPRIME_SIZE];
uint8_t ok = 1;
hash_message(z, signature, pub, message, len);
sm_pack(lhs, signature + 32);
ok &= upp(&p, pub);
ed25519_smult(&p, &p, z);
ok &= upp(&q, signature);
ed25519_add(&p, &p, &q);
pp(rhs, &p);
return ok & f25519_eq(lhs, rhs);
}
__global__ void sign_kernel(uint8_t* d_signatures, const uint8_t* d_pubs, const uint8_t* d_secrets, const uint8_t* d_messages, const size_t* d_message_lens, int num_messages) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx >= num_messages) return;
uint8_t* signature = d_signatures + idx * EDSIGN_SIGNATURE_SIZE;
const uint8_t* pub = d_pubs + idx * EDSIGN_PUBLIC_KEY_SIZE;
const uint8_t* secret = d_secrets + idx * EDSIGN_SECRET_KEY_SIZE;
const uint8_t* message = d_messages;
size_t len = d_message_lens[idx];
edsign_sign(signature, pub, secret, message, len);
}
void launch_sign_kernel(uint8_t* d_signatures, const uint8_t* d_pubs, const uint8_t* d_secrets, const uint8_t* d_messages, const size_t* d_message_lens, int num_messages) {
int threadsPerBlock = 256;
int blocksPerGrid = (num_messages + threadsPerBlock - 1) / threadsPerBlock;
sign_kernel << <blocksPerGrid, threadsPerBlock >> > (d_signatures, d_pubs, d_secrets, d_messages, d_message_lens, num_messages);
cudaDeviceSynchronize();
}
#endif
#endif