yggm/libs/f25519.cuh

198 lines
6.3 KiB
Plaintext
Raw Normal View History

2025-03-13 19:43:54 +05:00
#pragma once
2025-03-13 04:09:27 +05:00
#define F25519_SIZE 32
2025-03-13 19:43:54 +05:00
__device__ __constant__ unsigned char f25519_zero[F25519_SIZE] = { 0 };
__device__ __constant__ unsigned char f25519_one[F25519_SIZE] = { 1 };
__device__ __forceinline__ void f25519_load(unsigned char* __restrict__ x, unsigned int c) {
2025-03-13 04:09:27 +05:00
#pragma unroll
for (unsigned int i = 0; i < sizeof(c); i++) {
x[i] = c & 0xFF;
c >>= 8;
}
#pragma unroll
for (unsigned int i = sizeof(c); i < F25519_SIZE; i++) {
x[i] = 0;
}
}
2025-03-13 19:43:54 +05:00
__device__ __forceinline__ void f25519_copy(unsigned char* __restrict__ x, const unsigned char* __restrict__ a) {
2025-03-13 04:09:27 +05:00
#pragma unroll
for (int i = 0; i < F25519_SIZE; i++) {
x[i] = a[i];
}
}
2025-03-13 19:43:54 +05:00
__device__ __forceinline__ void f25519_select(unsigned char* __restrict__ dst, const unsigned char* __restrict__ zero, const unsigned char* __restrict__ one, unsigned char cond) {
const unsigned char mask = 0 - cond;
2025-03-13 04:09:27 +05:00
#pragma unroll
for (int i = 0; i < F25519_SIZE; i++) {
dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
}
}
2025-03-13 19:43:54 +05:00
__device__ __forceinline__ void f25519_normalize(unsigned char* __restrict__ x) {
unsigned char minusp[F25519_SIZE];
unsigned short c = (x[31] >> 7) * 19;
2025-03-13 04:09:27 +05:00
x[31] &= 127;
#pragma unroll
for (int i = 0; i < F25519_SIZE; i++) {
c += x[i];
2025-03-13 19:43:54 +05:00
x[i] = (unsigned char)c;
2025-03-13 04:09:27 +05:00
c >>= 8;
}
c = 19;
#pragma unroll
for (int i = 0; i + 1 < F25519_SIZE; i++) {
c += x[i];
2025-03-13 19:43:54 +05:00
minusp[i] = (unsigned char)c;
2025-03-13 04:09:27 +05:00
c >>= 8;
}
c += x[F25519_SIZE - 1] - 128;
2025-03-13 19:43:54 +05:00
minusp[F25519_SIZE - 1] = (unsigned char)c;
2025-03-13 04:09:27 +05:00
f25519_select(x, minusp, x, (c >> 15) & 1);
}
2025-03-13 19:43:54 +05:00
__device__ __forceinline__ unsigned char f25519_eq(const unsigned char* __restrict__ x, const unsigned char* __restrict__ y) {
unsigned char s = 0;
2025-03-13 04:09:27 +05:00
#pragma unroll
for (int i = 0; i < F25519_SIZE; i++)
s |= x[i] ^ y[i];
s |= s >> 4;
s |= s >> 2;
s |= s >> 1;
return (s ^ 1) & 1;
}
2025-03-13 19:43:54 +05:00
__device__ __forceinline__ void f25519_add(unsigned char* __restrict__ r, const unsigned char* __restrict__ a, const unsigned char* __restrict__ b) {
unsigned short c = 0;
2025-03-13 04:09:27 +05:00
#pragma unroll
for (int i = 0; i < F25519_SIZE; i++) {
2025-03-13 19:43:54 +05:00
c = (c >> 8) + ((unsigned short)a[i]) + ((unsigned short)b[i]);
r[i] = (unsigned char)c;
2025-03-13 04:09:27 +05:00
}
r[F25519_SIZE - 1] &= 127;
c = (c >> 7) * 19;
#pragma unroll
for (int i = 0; i < F25519_SIZE; i++) {
c += r[i];
2025-03-13 19:43:54 +05:00
r[i] = (unsigned char)c;
2025-03-13 04:09:27 +05:00
c >>= 8;
}
}
2025-03-13 19:43:54 +05:00
__device__ __forceinline__ void f25519_sub(unsigned char* __restrict__ r, const unsigned char* __restrict__ a, const unsigned char* __restrict__ b) {
2025-03-13 04:09:27 +05:00
uint32_t c = 218;
int i = 0;
#pragma unroll
for (i = 0; i + 1 < F25519_SIZE; i++) {
c += 65280 + ((uint32_t)a[i]) - ((uint32_t)b[i]);
2025-03-13 19:43:54 +05:00
r[i] = (unsigned char)c;
2025-03-13 04:09:27 +05:00
c >>= 8;
}
c += ((uint32_t)a[i]) - ((uint32_t)b[i]);
2025-03-13 19:43:54 +05:00
r[i] = (unsigned char)(c & 127);
2025-03-13 04:09:27 +05:00
c = (c >> 7) * 19;
#pragma unroll
for (i = 0; i < F25519_SIZE; i++) {
c += r[i];
2025-03-13 19:43:54 +05:00
r[i] = (unsigned char)c;
2025-03-13 04:09:27 +05:00
c >>= 8;
}
}
2025-03-13 19:43:54 +05:00
__device__ __forceinline__ void f25519_neg(unsigned char* __restrict__ r, const unsigned char* __restrict__ a) {
2025-03-13 04:09:27 +05:00
uint32_t c = 218;
int i = 0;
#pragma unroll
for (i = 0; i + 1 < F25519_SIZE; i++) {
c += 65280 - ((uint32_t)a[i]);
2025-03-13 19:43:54 +05:00
r[i] = (unsigned char)c;
2025-03-13 04:09:27 +05:00
c >>= 8;
}
c -= ((uint32_t)a[i]);
2025-03-13 19:43:54 +05:00
r[i] = (unsigned char)(c & 127);
2025-03-13 04:09:27 +05:00
c = (c >> 7) * 19;
#pragma unroll
for (i = 0; i < F25519_SIZE; i++) {
c += r[i];
2025-03-13 19:43:54 +05:00
r[i] = (unsigned char)c;
2025-03-13 04:09:27 +05:00
c >>= 8;
}
}
2025-03-13 19:43:54 +05:00
__device__ __forceinline__ void f25519_mul__distinct(unsigned char* __restrict__ r, const unsigned char* __restrict__ a, const unsigned char* __restrict__ b) {
2025-03-13 04:09:27 +05:00
uint32_t c = 0;
#pragma unroll
for (int i = 0; i < F25519_SIZE; i++) {
c >>= 8;
for (int j = 0; j <= i; j++) {
c += ((uint32_t)a[j]) * ((uint32_t)b[i - j]);
}
for (int j = i + 1; j < F25519_SIZE; j++) {
c += ((uint32_t)a[j]) * ((uint32_t)b[F25519_SIZE + i - j]) * 38;
}
2025-03-13 19:43:54 +05:00
r[i] = (unsigned char)c;
2025-03-13 04:09:27 +05:00
}
r[F25519_SIZE - 1] &= 127;
c = (c >> 7) * 19;
#pragma unroll
for (int i = 0; i < F25519_SIZE; i++) {
c += r[i];
2025-03-13 19:43:54 +05:00
r[i] = (unsigned char)c;
2025-03-13 04:09:27 +05:00
c >>= 8;
}
}
2025-03-13 19:43:54 +05:00
/*
__device__ __forceinline__ void f25519_mul_c(unsigned char* __restrict__ r, const unsigned char* __restrict__ a, uint32_t b) {
2025-03-13 04:09:27 +05:00
uint32_t c = 0;
#pragma unroll
for (int i = 0; i < F25519_SIZE; i++) {
c = (c >> 8) + b * ((uint32_t)a[i]);
2025-03-13 19:43:54 +05:00
r[i] = (unsigned char)c;
2025-03-13 04:09:27 +05:00
}
r[F25519_SIZE - 1] &= 127;
c = (c >> 7) * 19;
#pragma unroll
for (int i = 0; i < F25519_SIZE; i++) {
c += r[i];
2025-03-13 19:43:54 +05:00
r[i] = (unsigned char)c;
2025-03-13 04:09:27 +05:00
c >>= 8;
}
}
2025-03-13 19:43:54 +05:00
*/
__device__ __forceinline__ void f25519_inv__distinct(unsigned char* __restrict__ r, const unsigned char* __restrict__ x) {
unsigned char s[F25519_SIZE];
2025-03-13 04:09:27 +05:00
f25519_mul__distinct(s, x, x);
f25519_mul__distinct(r, s, x);
#pragma unroll
for (int i = 0; i < 248; i++) {
f25519_mul__distinct(s, r, r);
f25519_mul__distinct(r, s, x);
}
f25519_mul__distinct(s, r, r);
f25519_mul__distinct(r, s, s);
f25519_mul__distinct(s, r, x);
f25519_mul__distinct(r, s, s);
f25519_mul__distinct(s, r, r);
f25519_mul__distinct(r, s, x);
f25519_mul__distinct(s, r, r);
f25519_mul__distinct(r, s, x);
}
2025-03-13 19:43:54 +05:00
/*
__device__ __forceinline__ void exp2523(unsigned char* __restrict__ r, const unsigned char* __restrict__ x, unsigned char* __restrict__ s) {
2025-03-13 04:09:27 +05:00
int i;
f25519_mul__distinct(r, x, x);
f25519_mul__distinct(s, r, x);
#pragma unroll
for (i = 0; i < 248; i++) {
f25519_mul__distinct(r, s, s);
f25519_mul__distinct(s, r, x);
}
f25519_mul__distinct(r, s, s);
f25519_mul__distinct(s, r, r);
f25519_mul__distinct(r, s, x);
}
2025-03-13 19:43:54 +05:00
__device__ __forceinline__ void f25519_sqrt(unsigned char* __restrict__ r, const unsigned char* __restrict__ a) {
unsigned char v[F25519_SIZE], i_val[F25519_SIZE], x[F25519_SIZE], y[F25519_SIZE];
2025-03-13 04:09:27 +05:00
f25519_mul_c(x, a, 2);
exp2523(v, x, y);
f25519_mul__distinct(y, v, v);
f25519_mul__distinct(i_val, x, y);
2025-03-13 19:43:54 +05:00
unsigned char one[F25519_SIZE];
2025-03-13 04:09:27 +05:00
f25519_load(one, 1);
f25519_sub(i_val, i_val, one);
f25519_mul__distinct(x, v, a);
f25519_mul__distinct(r, x, i_val);
}
2025-03-13 19:43:54 +05:00
*/