#ifndef __EDSIGN_CUH #define __EDSIGN_CUH #include #ifndef COMPACT_DISABLE_ED25519 #include #include #include #include #include #define EXPANDED_SIZE 64 #define EDSIGN_SECRET_KEY_SIZE 32 #define EDSIGN_PUBLIC_KEY_SIZE 32 #define EDSIGN_SIGNATURE_SIZE 64 #define SHA512_HASH_SIZE 64 __device__ __constant__ uint8_t ed25519_order[FPRIME_SIZE] = { 0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58, 0xd6, 0x9c, 0xf7, 0xa2, 0xde, 0xf9, 0xde, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10 }; __device__ __forceinline__ void expand_key(uint8_t* expanded, const uint8_t* secret) { struct sha512_state s; sha512_init(&s); sha512_final(&s, secret, EDSIGN_SECRET_KEY_SIZE); sha512_get(&s, expanded, 0, EXPANDED_SIZE); ed25519_prepare(expanded); } __device__ __forceinline__ uint8_t upp(struct ed25519_pt* p, const uint8_t* packed) { uint8_t x[F25519_SIZE], y[F25519_SIZE]; uint8_t ok = ed25519_try_unpack(x, y, packed); ed25519_project(p, x, y); return ok; } __device__ __forceinline__ void pp(uint8_t* packed, const struct ed25519_pt* p) { uint8_t x[F25519_SIZE], y[F25519_SIZE]; ed25519_unproject(x, y, p); ed25519_pack(packed, x, y); } __device__ __forceinline__ void sm_pack(uint8_t* r, const uint8_t* k) { struct ed25519_pt p; ed25519_smult(&p, &ed25519_base, k); pp(r, &p); } __device__ __forceinline__ void edsign_sec_to_pub(uint8_t* pub, const uint8_t* secret) { uint8_t expanded[EXPANDED_SIZE]; expand_key(expanded, secret); sm_pack(pub, expanded); } __device__ __forceinline__ void hash_with_prefix(uint8_t* out_fp, uint8_t* init_block, unsigned int prefix_size, const uint8_t* message, size_t len) { struct sha512_state s; sha512_init(&s); if (len < SHA512_BLOCK_SIZE && len + prefix_size < SHA512_BLOCK_SIZE) { memcpy(init_block + prefix_size, message, len); sha512_final(&s, init_block, len + prefix_size); } else { size_t i; memcpy(init_block + prefix_size, message, SHA512_BLOCK_SIZE - prefix_size); sha512_block(&s, init_block); for (i = SHA512_BLOCK_SIZE - prefix_size; i + SHA512_BLOCK_SIZE <= len; i += SHA512_BLOCK_SIZE) { sha512_block(&s, message + i); } sha512_final(&s, message + i, len - i + prefix_size); } sha512_get(&s, init_block, 0, SHA512_HASH_SIZE); fprime_from_bytes(out_fp, init_block, SHA512_HASH_SIZE, ed25519_order); } __device__ __forceinline__ void generate_k(uint8_t* k, const uint8_t* kgen_key, const uint8_t* message, size_t len) { uint8_t block[SHA512_BLOCK_SIZE]; memcpy(block, kgen_key, 32); hash_with_prefix(k, block, 32, message, len); } __device__ __forceinline__ void hash_message(uint8_t* z, const uint8_t* r, const uint8_t* a, const uint8_t* m, size_t len) { uint8_t block[SHA512_BLOCK_SIZE]; memcpy(block, r, 32); memcpy(block + 32, a, 32); hash_with_prefix(z, block, 64, m, len); } __device__ void edsign_sign(uint8_t* signature, const uint8_t* pub, const uint8_t* secret, const uint8_t* message, size_t len) { uint8_t expanded[EXPANDED_SIZE]; uint8_t e[FPRIME_SIZE], s[FPRIME_SIZE], k[FPRIME_SIZE], z[FPRIME_SIZE]; expand_key(expanded, secret); generate_k(k, expanded + 32, message, len); sm_pack(signature, k); hash_message(z, signature, pub, message, len); fprime_from_bytes(e, expanded, 32, ed25519_order); fprime_mul(s, z, e, ed25519_order); fprime_add(s, k, ed25519_order); memcpy(signature + 32, s, 32); } __device__ uint8_t edsign_verify(const uint8_t* signature, const uint8_t* pub, const uint8_t* message, size_t len) { struct ed25519_pt p, q; uint8_t lhs[F25519_SIZE], rhs[F25519_SIZE], z[FPRIME_SIZE]; uint8_t ok = 1; hash_message(z, signature, pub, message, len); sm_pack(lhs, signature + 32); ok &= upp(&p, pub); ed25519_smult(&p, &p, z); ok &= upp(&q, signature); ed25519_add(&p, &p, &q); pp(rhs, &p); return ok & f25519_eq(lhs, rhs); } __global__ void sign_kernel(uint8_t* d_signatures, const uint8_t* d_pubs, const uint8_t* d_secrets, const uint8_t* d_messages, const size_t* d_message_lens, int num_messages) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx >= num_messages) return; uint8_t* signature = d_signatures + idx * EDSIGN_SIGNATURE_SIZE; const uint8_t* pub = d_pubs + idx * EDSIGN_PUBLIC_KEY_SIZE; const uint8_t* secret = d_secrets + idx * EDSIGN_SECRET_KEY_SIZE; const uint8_t* message = d_messages; size_t len = d_message_lens[idx]; edsign_sign(signature, pub, secret, message, len); } void launch_sign_kernel(uint8_t* d_signatures, const uint8_t* d_pubs, const uint8_t* d_secrets, const uint8_t* d_messages, const size_t* d_message_lens, int num_messages) { int threadsPerBlock = 256; int blocksPerGrid = (num_messages + threadsPerBlock - 1) / threadsPerBlock; sign_kernel << > > (d_signatures, d_pubs, d_secrets, d_messages, d_message_lens, num_messages); cudaDeviceSynchronize(); } #endif #endif