129 lines
3.8 KiB
Plaintext
129 lines
3.8 KiB
Plaintext
#include <f25519.cuh>
|
|
__device__ void f25519_copy(unsigned char* __restrict__ x, const unsigned char* __restrict__ a) {
|
|
#pragma unroll
|
|
for (int i = 0; i < 32; i++) {
|
|
x[i] = a[i];
|
|
}
|
|
}
|
|
__device__ void f25519_select(unsigned char* __restrict__ dst, const unsigned char* __restrict__ zero, const unsigned char* __restrict__ one, unsigned char cond) {
|
|
const unsigned char mask = -cond;
|
|
#pragma unroll
|
|
for (int i = 0; i < 32; i++) {
|
|
dst[i] = (zero[i] & ~mask) | (one[i] & mask);
|
|
}
|
|
}
|
|
__device__ void f25519_normalize(unsigned char* __restrict__ x) {
|
|
unsigned char minusp[32];
|
|
unsigned short c = (x[31] >> 7) * 19;
|
|
x[31] &= 127;
|
|
#pragma unroll 32
|
|
for (int i = 0; i < 32; i++) {
|
|
c += x[i];
|
|
x[i] = (unsigned char)c;
|
|
c >>= 8;
|
|
}
|
|
c = 19;
|
|
#pragma unroll
|
|
for (int i = 0; i + 1 < 32; i++) {
|
|
c += x[i];
|
|
minusp[i] = (unsigned char)c;
|
|
c >>= 8;
|
|
}
|
|
c += x[32 - 1] - 128;
|
|
minusp[32 - 1] = (unsigned char)c;
|
|
f25519_select(x, minusp, x, (c >> 15) & 1);
|
|
}
|
|
__device__ void f25519_add(unsigned char* __restrict__ r, const unsigned char* __restrict__ a, const unsigned char* __restrict__ b) {
|
|
unsigned short c = 0;
|
|
#pragma unroll
|
|
for (int i = 0; i < 32; i++) {
|
|
c = (c >> 8) + ((unsigned short)a[i]) + ((unsigned short)b[i]);
|
|
r[i] = (unsigned char)c;
|
|
}
|
|
r[32 - 1] &= 127;
|
|
c = (c >> 7) * 19;
|
|
#pragma unroll
|
|
for (int i = 0; i < 32; i++) {
|
|
c += r[i];
|
|
r[i] = (unsigned char)c;
|
|
c >>= 8;
|
|
}
|
|
}
|
|
__device__ void f25519_sub(unsigned char* __restrict__ r, const unsigned char* __restrict__ a, const unsigned char* __restrict__ b) {
|
|
unsigned c = 218;
|
|
int i = 0;
|
|
#pragma unroll
|
|
for (i = 0; i + 1 < 32; i++) {
|
|
c += 65280 + ((unsigned)a[i]) - ((unsigned)b[i]);
|
|
r[i] = (unsigned char)c;
|
|
c >>= 8;
|
|
}
|
|
c += ((unsigned)a[i]) - ((unsigned)b[i]);
|
|
r[i] = (unsigned char)(c & 127);
|
|
c = (c >> 7) * 19;
|
|
#pragma unroll
|
|
for (i = 0; i < 32; i++) {
|
|
c += r[i];
|
|
r[i] = (unsigned char)c;
|
|
c >>= 8;
|
|
}
|
|
}
|
|
__device__ void f25519_neg(unsigned char* __restrict__ r, const unsigned char* __restrict__ a) {
|
|
unsigned c = 218;
|
|
int i = 0;
|
|
#pragma unroll
|
|
for (i = 0; i + 1 < 32; i++) {
|
|
c += 65280 - ((unsigned)a[i]);
|
|
r[i] = (unsigned char)c;
|
|
c >>= 8;
|
|
}
|
|
c -= ((unsigned)a[i]);
|
|
r[i] = (unsigned char)(c & 127);
|
|
c = (c >> 7) * 19;
|
|
#pragma unroll
|
|
for (i = 0; i < 32; i++) {
|
|
c += r[i];
|
|
r[i] = (unsigned char)c;
|
|
c >>= 8;
|
|
}
|
|
}
|
|
__device__ void f25519_mul__distinct(unsigned char* __restrict__ r, const unsigned char* __restrict__ a, const unsigned char* __restrict__ b) {
|
|
unsigned c = 0;
|
|
#pragma unroll
|
|
for (int i = 0; i < 32; i++) {
|
|
c >>= 8;
|
|
for (int j = 0; j <= i; j++) {
|
|
c += ((unsigned)a[j]) * ((unsigned)b[i - j]);
|
|
}
|
|
for (int j = i + 1; j < 32; j++) {
|
|
c += ((unsigned)a[j]) * ((unsigned)b[32 + i - j]) * 38;
|
|
}
|
|
r[i] = (unsigned char)c;
|
|
}
|
|
r[32 - 1] &= 127;
|
|
c = (c >> 7) * 19;
|
|
#pragma unroll
|
|
for (int i = 0; i < 32; i++) {
|
|
c += r[i];
|
|
r[i] = (unsigned char)c;
|
|
c >>= 8;
|
|
}
|
|
}
|
|
__device__ void f25519_inv__distinct(unsigned char* __restrict__ r, const unsigned char* __restrict__ x) {
|
|
unsigned char s[32];
|
|
f25519_mul__distinct(s, x, x);
|
|
f25519_mul__distinct(r, s, x);
|
|
#pragma unroll
|
|
for (int i = 0; i < 248; i++) {
|
|
f25519_mul__distinct(s, r, r);
|
|
f25519_mul__distinct(r, s, x);
|
|
}
|
|
f25519_mul__distinct(s, r, r);
|
|
f25519_mul__distinct(r, s, s);
|
|
f25519_mul__distinct(s, r, x);
|
|
f25519_mul__distinct(r, s, s);
|
|
f25519_mul__distinct(s, r, r);
|
|
f25519_mul__distinct(r, s, x);
|
|
f25519_mul__distinct(s, r, r);
|
|
f25519_mul__distinct(r, s, x);
|
|
} |