yggm/libs/edsign.cuh

#ifndef __EDSIGN_CUH
#define __EDSIGN_CUH
#include <ed25519.cuh>
#ifndef COMPACT_DISABLE_ED25519
#include <sha512.cuh>
#include <fprime.cuh>
#include <cuda_runtime.h>
#include <cstdio>
#include <cstring>
#define EXPANDED_SIZE         64
#define EDSIGN_SECRET_KEY_SIZE 32
#define EDSIGN_PUBLIC_KEY_SIZE 32
#define EDSIGN_SIGNATURE_SIZE  64
#define SHA512_HASH_SIZE       64
__device__ __constant__ uint8_t ed25519_order[FPRIME_SIZE] = {
    0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58,
    0xd6, 0x9c, 0xf7, 0xa2, 0xde, 0xf9, 0xde, 0x14,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10
};
__device__ __forceinline__ void expand_key(uint8_t* expanded, const uint8_t* secret) {
    struct sha512_state s;
    sha512_init(&s);
    sha512_final(&s, secret, EDSIGN_SECRET_KEY_SIZE);
    sha512_get(&s, expanded, 0, EXPANDED_SIZE);
    ed25519_prepare(expanded);
}
__device__ __forceinline__ uint8_t upp(struct ed25519_pt* p, const uint8_t* packed) {
    uint8_t x[F25519_SIZE], y[F25519_SIZE];
    uint8_t ok = ed25519_try_unpack(x, y, packed);
    ed25519_project(p, x, y);
    return ok;
}
__device__ __forceinline__ void pp(uint8_t* packed, const struct ed25519_pt* p) {
    uint8_t x[F25519_SIZE], y[F25519_SIZE];
    ed25519_unproject(x, y, p);
    ed25519_pack(packed, x, y);
}
__device__ __forceinline__ void sm_pack(uint8_t* r, const uint8_t* k) {
    struct ed25519_pt p;
    ed25519_smult(&p, &ed25519_base, k);
    pp(r, &p);
}
__device__ __forceinline__ void edsign_sec_to_pub(uint8_t* pub, const uint8_t* secret) {
    uint8_t expanded[EXPANDED_SIZE];
    expand_key(expanded, secret);
    sm_pack(pub, expanded);
}
__device__ __forceinline__ void hash_with_prefix(uint8_t* out_fp, uint8_t* init_block, unsigned int prefix_size, const uint8_t* message, size_t len) {
    struct sha512_state s;
    sha512_init(&s);
    if (len < SHA512_BLOCK_SIZE && len + prefix_size < SHA512_BLOCK_SIZE) {
        memcpy(init_block + prefix_size, message, len);
        sha512_final(&s, init_block, len + prefix_size);
    } else {
        size_t i;
        memcpy(init_block + prefix_size, message, SHA512_BLOCK_SIZE - prefix_size);
        sha512_block(&s, init_block);
        for (i = SHA512_BLOCK_SIZE - prefix_size; i + SHA512_BLOCK_SIZE <= len; i += SHA512_BLOCK_SIZE) {
            sha512_block(&s, message + i);
        }
        sha512_final(&s, message + i, len - i + prefix_size);
    }
    sha512_get(&s, init_block, 0, SHA512_HASH_SIZE);
    fprime_from_bytes(out_fp, init_block, SHA512_HASH_SIZE, ed25519_order);
}
__device__ __forceinline__ void generate_k(uint8_t* k, const uint8_t* kgen_key, const uint8_t* message, size_t len) {
    uint8_t block[SHA512_BLOCK_SIZE];
    memcpy(block, kgen_key, 32);
    hash_with_prefix(k, block, 32, message, len);
}
__device__ __forceinline__ void hash_message(uint8_t* z, const uint8_t* r, const uint8_t* a, const uint8_t* m, size_t len) {
    uint8_t block[SHA512_BLOCK_SIZE];
    memcpy(block, r, 32);
    memcpy(block + 32, a, 32);
    hash_with_prefix(z, block, 64, m, len);
}
__device__ void edsign_sign(uint8_t* signature, const uint8_t* pub, const uint8_t* secret, const uint8_t* message, size_t len) {
    uint8_t expanded[EXPANDED_SIZE];
    uint8_t e[FPRIME_SIZE], s[FPRIME_SIZE], k[FPRIME_SIZE], z[FPRIME_SIZE];
    expand_key(expanded, secret);
    generate_k(k, expanded + 32, message, len);
    sm_pack(signature, k);
    hash_message(z, signature, pub, message, len);
    fprime_from_bytes(e, expanded, 32, ed25519_order);
    fprime_mul(s, z, e, ed25519_order);
    fprime_add(s, k, ed25519_order);
    memcpy(signature + 32, s, 32);
}
__device__ uint8_t edsign_verify(const uint8_t* signature, const uint8_t* pub, const uint8_t* message, size_t len) {
    struct ed25519_pt p, q;
    uint8_t lhs[F25519_SIZE], rhs[F25519_SIZE], z[FPRIME_SIZE];
    uint8_t ok = 1;
    hash_message(z, signature, pub, message, len);
    sm_pack(lhs, signature + 32);
    ok &= upp(&p, pub);
    ed25519_smult(&p, &p, z);
    ok &= upp(&q, signature);
    ed25519_add(&p, &p, &q);
    pp(rhs, &p);
    return ok & f25519_eq(lhs, rhs);
}
__global__ void sign_kernel(uint8_t* d_signatures, const uint8_t* d_pubs, const uint8_t* d_secrets, const uint8_t* d_messages, const size_t* d_message_lens, int num_messages) {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    if (idx >= num_messages) return;
    uint8_t* signature = d_signatures + idx * EDSIGN_SIGNATURE_SIZE;
    const uint8_t* pub = d_pubs + idx * EDSIGN_PUBLIC_KEY_SIZE;
    const uint8_t* secret = d_secrets + idx * EDSIGN_SECRET_KEY_SIZE;
    const uint8_t* message = d_messages;
    size_t len = d_message_lens[idx];
    edsign_sign(signature, pub, secret, message, len);
}
void launch_sign_kernel(uint8_t* d_signatures, const uint8_t* d_pubs, const uint8_t* d_secrets, const uint8_t* d_messages, const size_t* d_message_lens, int num_messages) {
    int threadsPerBlock = 256;
    int blocksPerGrid = (num_messages + threadsPerBlock - 1) / threadsPerBlock;
    sign_kernel << <blocksPerGrid, threadsPerBlock >> > (d_signatures, d_pubs, d_secrets, d_messages, d_message_lens, num_messages);
    cudaDeviceSynchronize();
}
#endif
#endif