#pragma once #include __device__ void raw_add(unsigned char* x, const unsigned char* p) { unsigned short c = 0; for (int i = 0; i < 32; i++) { c += ((unsigned short)x[i]) + ((unsigned short)p[i]); x[i] = (unsigned char)c; c >>= 8; } } __device__ void fprime_select(unsigned char* dst, const unsigned char* zero, const unsigned char* one, unsigned char condition) { const unsigned char mask = -condition; for (int i = 0; i < 32; i++) dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i])); } __device__ void raw_try_sub(unsigned char* x, const unsigned char* p) { unsigned char minusp[32]; unsigned short c = 0; for (int i = 0; i < 32; i++) { c = ((unsigned short)x[i]) - ((unsigned short)p[i]) - c; minusp[i] = (unsigned char)c; c = (c >> 8) & 1; } fprime_select(x, minusp, x, c); } __device__ int prime_msb(const unsigned char* p) { int i; unsigned char x; for (i = 32 - 1; i >= 0; i--) { if (p[i]) break; } x = p[i]; i <<= 3; while (x) { x >>= 1; i++; } return i - 1; } __device__ void shift_n_bits(unsigned char* x, int n) { unsigned short c = 0; for (int i = 0; i < 32; i++) { c |= ((unsigned short)x[i]) << n; x[i] = (unsigned char)c; c >>= 8; } } __device__ inline int min_int(int a, int b) { return a < b ? a : b; } __device__ void fprime_from_bytes(unsigned char* n, const unsigned char* x, unsigned long len, const unsigned char* modulus) { const int preload_total = min_int(prime_msb(modulus) - 1, (int)(len << 3)); const int preload_bytes = preload_total >> 3; const int preload_bits = preload_total & 7; const int rbits = (len << 3) - preload_total; memset(n, 0, 32); for (int i = 0; i < preload_bytes; i++) n[i] = x[len - preload_bytes + i]; if (preload_bits) { shift_n_bits(n, preload_bits); n[0] |= x[len - preload_bytes - 1] >> (8 - preload_bits); } for (int i = rbits - 1; i >= 0; i--) { const unsigned char bit = (x[i >> 3] >> (i & 7)) & 1; shift_n_bits(n, 1); n[0] |= bit; raw_try_sub(n, modulus); } } __device__ void fprime_add(unsigned char* r, const unsigned char* a, const unsigned char* modulus) { raw_add(r, a); raw_try_sub(r, modulus); } __device__ inline void fprime_copy(unsigned char* x, const unsigned char* a) { memcpy(x, a, 32); } __device__ void fprime_mul(unsigned char* r, const unsigned char* a, const unsigned char* b, const unsigned char* modulus) { memset(r, 0, 32); for (int i = prime_msb(modulus); i >= 0; i--) { const unsigned char bit = (b[i >> 3] >> (i & 7)) & 1; unsigned char plusa[32]; shift_n_bits(r, 1); raw_try_sub(r, modulus); fprime_copy(plusa, r); fprime_add(plusa, a, modulus); fprime_select(r, r, plusa, bit); } }