129 lines
4.0 KiB
Plaintext
129 lines
4.0 KiB
Plaintext
#include <ge.cuh>
|
|
#include <precomp_data.h>
|
|
void __host__ __device__ ge_madd(ge_p1p1* __restrict__ r, const ge_p3* __restrict__ p, const ge_precomp* __restrict__ q) {
|
|
fe t0;
|
|
fe_add(r->X, p->Y, p->X);
|
|
fe_sub(r->Y, p->Y, p->X);
|
|
fe_mul(r->Z, r->X, q->yplusx);
|
|
fe_mul(r->Y, r->Y, q->yminusx);
|
|
fe_mul(r->T, q->xy2d, p->T);
|
|
fe_add(t0, p->Z, p->Z);
|
|
fe_sub(r->X, r->Z, r->Y);
|
|
fe_add(r->Y, r->Z, r->Y);
|
|
fe_add(r->Z, t0, r->T);
|
|
fe_sub(r->T, t0, r->T);
|
|
}
|
|
void __host__ __device__ ge_p1p1_to_p2(ge_p2* __restrict__ r, const ge_p1p1* __restrict__ p) {
|
|
fe_mul(r->X, p->X, p->T);
|
|
fe_mul(r->Y, p->Y, p->Z);
|
|
fe_mul(r->Z, p->Z, p->T);
|
|
}
|
|
void __host__ __device__ ge_p1p1_to_p3(ge_p3* __restrict__ r, const ge_p1p1* __restrict__ p) {
|
|
fe_mul(r->X, p->X, p->T);
|
|
fe_mul(r->Y, p->Y, p->Z);
|
|
fe_mul(r->Z, p->Z, p->T);
|
|
fe_mul(r->T, p->X, p->Y);
|
|
}
|
|
void __host__ __device__ ge_p2_dbl(ge_p1p1* __restrict__ r, const ge_p2* __restrict__ p) {
|
|
fe t0;
|
|
fe_sq(r->X, p->X);
|
|
fe_sq(r->Z, p->Y);
|
|
fe_sq2(r->T, p->Z);
|
|
fe_add(r->Y, p->X, p->Y);
|
|
fe_sq(t0, r->Y);
|
|
fe_add(r->Y, r->Z, r->X);
|
|
fe_sub(r->Z, r->Z, r->X);
|
|
fe_sub(r->X, t0, r->Y);
|
|
fe_sub(r->T, r->T, r->Z);
|
|
}
|
|
void __host__ __device__ ge_p3_dbl(ge_p1p1* __restrict__ r, const ge_p3* __restrict__ p) {
|
|
ge_p2 q;
|
|
fe_copy(q.X, p->X);
|
|
fe_copy(q.Y, p->Y);
|
|
fe_copy(q.Z, p->Z);
|
|
ge_p2_dbl(r, &q);
|
|
}
|
|
void ge_p3_tobytes(unsigned char* __restrict__ s, const ge_p3* __restrict__ h) {
|
|
fe recip, x, y;
|
|
fe_invert(recip, h->Z);
|
|
fe_mul(x, h->X, recip);
|
|
fe_mul(y, h->Y, recip);
|
|
fe_tobytes(s, y);
|
|
s[31] ^= fe_isnegative(x) << 7;
|
|
}
|
|
static unsigned char __host__ __device__ equal(signed char b, signed char c) {
|
|
unsigned long x = b ^ c;
|
|
x -= 1;
|
|
x >>= 63;
|
|
return (unsigned char)x;
|
|
}
|
|
static void __host__ __device__ cmov(ge_precomp* __restrict__ t, const ge_precomp* __restrict__ u, unsigned char b) {
|
|
fe_cmov(t->yplusx, u->yplusx, b);
|
|
fe_cmov(t->yminusx, u->yminusx, b);
|
|
fe_cmov(t->xy2d, u->xy2d, b);
|
|
}
|
|
static void __host__ __device__ select(ge_precomp* t, int pos, signed char b) {
|
|
ge_precomp minust;
|
|
unsigned long x = b;
|
|
x >>= 63;
|
|
unsigned char bnegative = static_cast<unsigned char>(x);
|
|
unsigned char babs = b - (((-bnegative) & b) << 1);
|
|
fe_1(t->yplusx);
|
|
fe_1(t->yminusx);
|
|
#pragma unroll 10
|
|
for (int i = 0; i < 10; i++) t->xy2d[i] = 0;
|
|
cmov(t, &base[pos][0], equal(babs, 1));
|
|
cmov(t, &base[pos][1], equal(babs, 2));
|
|
cmov(t, &base[pos][2], equal(babs, 3));
|
|
cmov(t, &base[pos][3], equal(babs, 4));
|
|
cmov(t, &base[pos][4], equal(babs, 5));
|
|
cmov(t, &base[pos][5], equal(babs, 6));
|
|
cmov(t, &base[pos][6], equal(babs, 7));
|
|
cmov(t, &base[pos][7], equal(babs, 8));
|
|
fe_copy(minust.yplusx, t->yminusx);
|
|
fe_copy(minust.yminusx, t->yplusx);
|
|
fe_neg(minust.xy2d, t->xy2d);
|
|
cmov(t, &minust, bnegative);
|
|
}
|
|
void __device__ __host__ ge_scalarmult_base(ge_p3* __restrict__ h, const unsigned char* __restrict__ a) {
|
|
signed char e[64], carry;
|
|
ge_p1p1 r;
|
|
ge_p2 s;
|
|
ge_precomp t;
|
|
int i;
|
|
#pragma unroll 32
|
|
for (i = 0; i < 32; i++) {
|
|
e[2 * i] = a[i] & 15;
|
|
e[2 * i + 1] = a[i] >> 4;
|
|
}
|
|
#pragma unroll 63
|
|
for (i = 0, carry = 0; i < 63; i++) {
|
|
e[i] += carry;
|
|
carry = (e[i] + 8) >> 4;
|
|
e[i] -= carry << 4;
|
|
}
|
|
e[63] += carry;
|
|
#pragma unroll 10
|
|
for (int i = 0; i < 10; i++) h->X[i] = 0;
|
|
fe_1(h->Y);
|
|
fe_1(h->Z);
|
|
#pragma unroll 10
|
|
for (int i = 0; i < 10; i++) h->T[i] = 0;
|
|
#pragma unroll
|
|
for (i = 1; i < 64; i += 2) {
|
|
select(&t, i >> 1, e[i]);
|
|
ge_madd(&r, h, &t);
|
|
ge_p1p1_to_p3(h, &r);
|
|
}
|
|
ge_p3_dbl(&r, h);
|
|
ge_p1p1_to_p2(&s, &r); ge_p2_dbl(&r, &s);
|
|
ge_p1p1_to_p2(&s, &r); ge_p2_dbl(&r, &s);
|
|
ge_p1p1_to_p2(&s, &r); ge_p2_dbl(&r, &s);
|
|
ge_p1p1_to_p3(h, &r);
|
|
#pragma unroll 32
|
|
for (i = 0; i < 64; i += 2) {
|
|
select(&t, i >> 1, e[i]);
|
|
ge_madd(&r, h, &t);
|
|
ge_p1p1_to_p3(h, &r);
|
|
}
|
|
} |