yggm/libs/fprime.cuh
2025-03-13 19:43:54 +05:00

88 lines
2.9 KiB
Plaintext

#pragma once
#include <string.h>
__device__ void raw_add(unsigned char* x, const unsigned char* p) {
unsigned short c = 0;
for (int i = 0; i < 32; i++) {
c += ((unsigned short)x[i]) + ((unsigned short)p[i]);
x[i] = (unsigned char)c;
c >>= 8;
}
}
__device__ void fprime_select(unsigned char* dst, const unsigned char* zero, const unsigned char* one, unsigned char condition) {
const unsigned char mask = -condition;
for (int i = 0; i < 32; i++)
dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
}
__device__ void raw_try_sub(unsigned char* x, const unsigned char* p) {
unsigned char minusp[32];
unsigned short c = 0;
for (int i = 0; i < 32; i++) {
c = ((unsigned short)x[i]) - ((unsigned short)p[i]) - c;
minusp[i] = (unsigned char)c;
c = (c >> 8) & 1;
}
fprime_select(x, minusp, x, c);
}
__device__ int prime_msb(const unsigned char* p) {
int i;
unsigned char x;
for (i = 32 - 1; i >= 0; i--) {
if (p[i]) break;
}
x = p[i];
i <<= 3;
while (x) {
x >>= 1;
i++;
}
return i - 1;
}
__device__ void shift_n_bits(unsigned char* x, int n) {
unsigned short c = 0;
for (int i = 0; i < 32; i++) {
c |= ((unsigned short)x[i]) << n;
x[i] = (unsigned char)c;
c >>= 8;
}
}
__device__ inline int min_int(int a, int b) {
return a < b ? a : b;
}
__device__ void fprime_from_bytes(unsigned char* n, const unsigned char* x, unsigned long len, const unsigned char* modulus) {
const int preload_total = min_int(prime_msb(modulus) - 1, (int)(len << 3));
const int preload_bytes = preload_total >> 3;
const int preload_bits = preload_total & 7;
const int rbits = (len << 3) - preload_total;
memset(n, 0, 32);
for (int i = 0; i < preload_bytes; i++)
n[i] = x[len - preload_bytes + i];
if (preload_bits) {
shift_n_bits(n, preload_bits);
n[0] |= x[len - preload_bytes - 1] >> (8 - preload_bits);
}
for (int i = rbits - 1; i >= 0; i--) {
const unsigned char bit = (x[i >> 3] >> (i & 7)) & 1;
shift_n_bits(n, 1);
n[0] |= bit;
raw_try_sub(n, modulus);
}
}
__device__ void fprime_add(unsigned char* r, const unsigned char* a, const unsigned char* modulus) {
raw_add(r, a);
raw_try_sub(r, modulus);
}
__device__ inline void fprime_copy(unsigned char* x, const unsigned char* a) {
memcpy(x, a, 32);
}
__device__ void fprime_mul(unsigned char* r, const unsigned char* a, const unsigned char* b, const unsigned char* modulus) {
memset(r, 0, 32);
for (int i = prime_msb(modulus); i >= 0; i--) {
const unsigned char bit = (b[i >> 3] >> (i & 7)) & 1;
unsigned char plusa[32];
shift_n_bits(r, 1);
raw_try_sub(r, modulus);
fprime_copy(plusa, r);
fprime_add(plusa, a, modulus);
fprime_select(r, r, plusa, bit);
}
}