156 lines
4.5 KiB
Plaintext
156 lines
4.5 KiB
Plaintext
#include <ed25519.cuh>
|
|
#include <f25519.cuh>
|
|
#include <precomp_data.h>
|
|
void __host__ __device__ ge_madd(ge_p1p1& __restrict__ r, const ge_p3& __restrict__ p, const precomp_data& __restrict__ q) {
|
|
fe t0;
|
|
fe_add(r.X, p.Y, p.X);
|
|
fe_sub(r.Y, p.Y, p.X);
|
|
fe_mul(r.Z, r.X, q.yplusx);
|
|
fe_mul(r.Y, r.Y, q.yminusx);
|
|
fe_mul(r.T, q.xy2d, p.T);
|
|
fe_add(t0, p.Z, p.Z);
|
|
fe_sub(r.X, r.Z, r.Y);
|
|
fe_add(r.Y, r.Z, r.Y);
|
|
fe_add(r.Z, t0, r.T);
|
|
fe_sub(r.T, t0, r.T);
|
|
}
|
|
// r.XYZ = p.XYZ * p.TZT
|
|
void __host__ __device__ ge_p1p1_to_p2(ge_p2& __restrict__ r, const ge_p1p1& __restrict__ p) {
|
|
fe_mul(r.X, p.X, p.T);
|
|
fe_mul(r.Y, p.Y, p.Z);
|
|
fe_mul(r.Z, p.Z, p.T);
|
|
}
|
|
void inline __host__ __device__ ge_p1p1_to_p3(ge_p3& __restrict__ r, const ge_p1p1& __restrict__ p) {
|
|
fe_mul(r.X, p.X, p.T);
|
|
fe_mul(r.Y, p.Y, p.Z);
|
|
fe_mul(r.Z, p.Z, p.T);
|
|
fe_mul(r.T, p.X, p.Y);
|
|
}
|
|
void __host__ __device__ ge_p2_dbl(ge_p1p1& __restrict__ r, const ge_p2& __restrict__ p) {
|
|
fe t0;
|
|
fe_mul(r.X, p.X, p.X);
|
|
fe_mul(r.Z, p.Y, p.Y);
|
|
fe_mul2(r.T, p.Z);
|
|
fe_add(r.Y, p.X, p.Y);
|
|
fe_mul(t0, r.Y, r.Y);
|
|
fe_add(r.Y, r.Z, r.X);
|
|
fe_sub(r.Z, r.Z, r.X);
|
|
fe_sub(r.X, t0, r.Y);
|
|
fe_sub(r.T, r.T, r.Z);
|
|
}
|
|
void __host__ __device__ ge_p3_dbl(ge_p1p1& __restrict__ r, const ge_p3& __restrict__ p) {
|
|
ge_p2 q;
|
|
fe_copy(q.X, p.X);
|
|
fe_copy(q.Y, p.Y);
|
|
fe_copy(q.Z, p.Z);
|
|
ge_p2_dbl(r, q);
|
|
}
|
|
void __host__ __device__ ge_p3_tobytes(unsigned char* __restrict__ s, const ge_p3& __restrict__ h) {
|
|
fe recip, x, y;
|
|
fe_invert(recip, h.Z);
|
|
fe_mul(x, h.X, recip);
|
|
fe_mul(y, h.Y, recip);
|
|
fe_tobytes(s, y);
|
|
s[31] ^= fe_isnegative(x) << 7;
|
|
}
|
|
static constexpr unsigned char __host__ __device__ equal(const signed char b, const signed char c) {
|
|
unsigned long x = (b ^ c) - 1;
|
|
return static_cast<unsigned char>(x >>= 63);
|
|
}
|
|
static void __host__ __device__ cmov(precomp_data& __restrict__ t, const precomp_data& __restrict__ u, unsigned char b) {
|
|
fe_cmov(t.yplusx, u.yplusx, b);
|
|
fe_cmov(t.yminusx, u.yminusx, b);
|
|
fe_cmov(t.xy2d, u.xy2d, b);
|
|
}
|
|
static void __host__ __device__ select(precomp_data& __restrict__ t, signed int pos, signed char b) {
|
|
precomp_data minust;
|
|
unsigned long x = b;
|
|
x >>= 63;
|
|
unsigned char bnegative = static_cast<unsigned char>(x);
|
|
unsigned char babs = b - (((-bnegative) & b) << 1);
|
|
t.yplusx[0] = 1;
|
|
t.yminusx[0] = 1;
|
|
t.yplusx[1] = 0;
|
|
t.yminusx[1] = 0;
|
|
t.yplusx[2] = 0;
|
|
t.yminusx[2] = 0;
|
|
t.yplusx[3] = 0;
|
|
t.yminusx[3] = 0;
|
|
t.yplusx[4] = 0;
|
|
t.yminusx[4] = 0;
|
|
t.yplusx[5] = 0;
|
|
t.yminusx[5] = 0;
|
|
t.yplusx[6] = 0;
|
|
t.yminusx[6] = 0;
|
|
t.yplusx[7] = 0;
|
|
t.yminusx[7] = 0;
|
|
t.yplusx[8] = 0;
|
|
t.yminusx[8] = 0;
|
|
t.yplusx[9] = 0;
|
|
t.yminusx[9] = 0;
|
|
t.xy2d[0] = 0;
|
|
t.xy2d[1] = 0;
|
|
t.xy2d[2] = 0;
|
|
t.xy2d[3] = 0;
|
|
t.xy2d[4] = 0;
|
|
t.xy2d[5] = 0;
|
|
t.xy2d[6] = 0;
|
|
t.xy2d[7] = 0;
|
|
t.xy2d[8] = 0;
|
|
t.xy2d[9] = 0;
|
|
cmov(t, base[pos][0], equal(babs, 1));
|
|
cmov(t, base[pos][1], equal(babs, 2));
|
|
cmov(t, base[pos][2], equal(babs, 3));
|
|
cmov(t, base[pos][3], equal(babs, 4));
|
|
cmov(t, base[pos][4], equal(babs, 5));
|
|
cmov(t, base[pos][5], equal(babs, 6));
|
|
cmov(t, base[pos][6], equal(babs, 7));
|
|
cmov(t, base[pos][7], equal(babs, 8));
|
|
fe_copy(minust.yplusx, t.yminusx);
|
|
fe_copy(minust.yminusx, t.yplusx);
|
|
fe_neg(minust.xy2d, t.xy2d);
|
|
cmov(t, minust, bnegative);
|
|
}
|
|
void __device__ __host__ ge_scalarmult_base(ge_p3& __restrict__ h, const unsigned char* __restrict__ a) {
|
|
signed char e[64], carry;
|
|
signed int x;
|
|
ge_p1p1 r;
|
|
ge_p2 s;
|
|
precomp_data t;
|
|
#pragma unroll 32
|
|
for (signed int i = 0; i < 32; i++) {
|
|
e[2 * i] = a[i] & 15;
|
|
e[2 * i + 1] = a[i] >> 4;
|
|
}
|
|
#pragma unroll 63
|
|
for (x = 0, carry = 0; x < 63; x++) {
|
|
e[x] += carry;
|
|
carry = (e[x] + 8) >> 4;
|
|
e[x] -= carry << 4;
|
|
}
|
|
e[63] += carry;
|
|
fe_0(h.X);
|
|
fe_1(h.Y);
|
|
fe_1(h.Z);
|
|
fe_0(h.T);
|
|
#pragma unroll 32
|
|
for (int i = 1; i < 64; i += 2) {
|
|
select(t, i >> 1, e[i]);
|
|
ge_madd(r, h, t);
|
|
ge_p1p1_to_p3(h, r);
|
|
}
|
|
ge_p3_dbl(r, h);
|
|
ge_p1p1_to_p2(s, r);
|
|
ge_p2_dbl(r, s);
|
|
ge_p1p1_to_p2(s, r);
|
|
ge_p2_dbl(r, s);
|
|
ge_p1p1_to_p2(s, r);
|
|
ge_p2_dbl(r, s);
|
|
ge_p1p1_to_p3(h, r);
|
|
#pragma unroll 32
|
|
for (int i = 0; i < 64; i += 2) {
|
|
select(t, i >> 1, e[i]);
|
|
ge_madd(r, h, t);
|
|
ge_p1p1_to_p3(h, r);
|
|
}
|
|
} |