sosiski
This commit is contained in:
BIN
.cache/clangd/index/defines.h.A8A7C8E390377B59.idx
Normal file
BIN
.cache/clangd/index/defines.h.A8A7C8E390377B59.idx
Normal file
Binary file not shown.
BIN
.cache/clangd/index/main.cpp.0BC15DB6D83B1DFF.idx
Normal file
BIN
.cache/clangd/index/main.cpp.0BC15DB6D83B1DFF.idx
Normal file
Binary file not shown.
@@ -1,11 +1,17 @@
|
||||
cmake_minimum_required(VERSION 3.31)
|
||||
project(yggm)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
file(GLOB ${PROJECT_NAME}SOURCES *.cpp)
|
||||
file(GLOB ${PROJECT_NAME}HEADERS *.h)
|
||||
|
||||
add_executable(${PROJECT_NAME} sources/main.cpp)
|
||||
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/libs)
|
||||
|
||||
if(CMAKE_BUILD_TYPE)
|
||||
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
add_definitions(-DDEBUG)
|
||||
@@ -28,21 +34,23 @@ else()
|
||||
endif()
|
||||
|
||||
set(CXX_ADDITIONAL_FLAGS
|
||||
"-fomit-frame-pointer \
|
||||
-funroll-loops \
|
||||
-ftree-vectorize \
|
||||
-ftree-slp-vectorize \
|
||||
-fdelete-null-pointer-checks \
|
||||
-fno-exceptions \
|
||||
-fno-rtti \
|
||||
-funsafe-math-optimizations \
|
||||
-fstrict-aliasing \
|
||||
-fstrict-overflow \
|
||||
-fno-stack-protector \
|
||||
-fno-math-errno")
|
||||
"-fomit-frame-pointer \
|
||||
-funroll-loops \
|
||||
-ftree-vectorize \
|
||||
-ftree-slp-vectorize \
|
||||
-fdelete-null-pointer-checks \
|
||||
-fno-exceptions \
|
||||
-fno-rtti \
|
||||
-funsafe-math-optimizations \
|
||||
-fstrict-aliasing \
|
||||
-fstrict-overflow \
|
||||
-fno-stack-protector \
|
||||
-fno-math-errno")
|
||||
|
||||
set(CXXFLAGSR "-march=native -O3 -ffast-math -pipe -Wall -Wextra -Wpedantic -Wconversion -Wuninitialized -Wsign-conversion -flto")
|
||||
set(CXXFLAGSD "-O0 -pipe -Wall -Wextra -Wpedantic -Wconversion -Wuninitialized -Wsign-conversion -g")
|
||||
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CXXFLAGSR} ${CXX_ADDITIONAL_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CXXFLAGSD}")
|
||||
|
||||
target_link_libraries(${PROJECT_NAME} pthread sodium)
|
||||
target_link_libraries(${PROJECT_NAME} sodium)
|
||||
@@ -1,5 +1,5 @@
|
||||
NVCC := nvcc
|
||||
NVCC_FLAGS := -rdc=true -O3 -Xptxas -O3 \
|
||||
NVCC_FLAGS := -rdc=true -O3 -Xptxas -O3 \
|
||||
-use_fast_math -ftz=true -prec-div=false -prec-sqrt=false \
|
||||
-gencode arch=compute_75,code=sm_75 \
|
||||
--default-stream per-thread \
|
||||
|
||||
108
libs/f25519.cu
108
libs/f25519.cu
@@ -27,9 +27,9 @@ void __device__ __host__ fe_1(fe& __restrict__ h) {
|
||||
}
|
||||
//h = f + g
|
||||
void __device__ __host__ fe_add(fe& __restrict__ h, const fe& __restrict__ f, const fe& __restrict__ g) {
|
||||
signed int f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
||||
signed int g0 = g[0], g1 = g[1], g2 = g[2], g3 = g[3], g4 = g[4], g5 = g[5], g6 = g[6], g7 = g[7], g8 = g[8], g9 = g[9];
|
||||
signed int h0 = f0 + g0, h1 = f1 + g1, h2 = f2 + g2, h3 = f3 + g3, h4 = f4 + g4, h5 = f5 + g5, h6 = f6 + g6, h7 = f7 + g7, h8 = f8 + g8, h9 = f9 + g9;
|
||||
int f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
||||
int g0 = g[0], g1 = g[1], g2 = g[2], g3 = g[3], g4 = g[4], g5 = g[5], g6 = g[6], g7 = g[7], g8 = g[8], g9 = g[9];
|
||||
int h0 = f0 + g0, h1 = f1 + g1, h2 = f2 + g2, h3 = f3 + g3, h4 = f4 + g4, h5 = f5 + g5, h6 = f6 + g6, h7 = f7 + g7, h8 = f8 + g8, h9 = f9 + g9;
|
||||
h[0] = h0;
|
||||
h[1] = h1;
|
||||
h[2] = h2;
|
||||
@@ -42,10 +42,10 @@ void __device__ __host__ fe_add(fe& __restrict__ h, const fe& __restrict__ f, co
|
||||
h[9] = h9;
|
||||
}
|
||||
void __device__ __host__ fe_cmov(fe& __restrict__ f, const fe& __restrict__ g, const unsigned b) {
|
||||
signed int x = -b;
|
||||
signed int f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
||||
signed int g0 = g[0], g1 = g[1], g2 = g[2], g3 = g[3], g4 = g[4], g5 = g[5], g6 = g[6], g7 = g[7], g8 = g[8], g9 = g[9];
|
||||
signed int x0 = (f0 ^ g0) & x, x1 = (f1 ^ g1) & x, x2 = (f2 ^ g2) & x, x3 = (f3 ^ g3) & x, x4 = (f4 ^ g4) & x, x5 = (f5 ^ g5) & x, x6 = (f6 ^ g6) & x, x7 = (f7 ^ g7) & x, x8 = (f8 ^ g8) & x, x9 = (f9 ^ g9) & x;
|
||||
int x = -b;
|
||||
int f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
||||
int g0 = g[0], g1 = g[1], g2 = g[2], g3 = g[3], g4 = g[4], g5 = g[5], g6 = g[6], g7 = g[7], g8 = g[8], g9 = g[9];
|
||||
int x0 = (f0 ^ g0) & x, x1 = (f1 ^ g1) & x, x2 = (f2 ^ g2) & x, x3 = (f3 ^ g3) & x, x4 = (f4 ^ g4) & x, x5 = (f5 ^ g5) & x, x6 = (f6 ^ g6) & x, x7 = (f7 ^ g7) & x, x8 = (f8 ^ g8) & x, x9 = (f9 ^ g9) & x;
|
||||
f[0] = f0 ^ x0;
|
||||
f[1] = f1 ^ x1;
|
||||
f[2] = f2 ^ x2;
|
||||
@@ -59,7 +59,7 @@ void __device__ __host__ fe_cmov(fe& __restrict__ f, const fe& __restrict__ g, c
|
||||
}
|
||||
//h[0..9] = f[0..9]
|
||||
void __device__ __host__ fe_copy(fe& __restrict__ h, const fe& __restrict__ f) {
|
||||
signed int f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
||||
int f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
||||
h[0] = f0;
|
||||
h[1] = f1;
|
||||
h[2] = f2;
|
||||
@@ -131,19 +131,19 @@ void __device__ __host__ fe_invert(fe& __restrict__ out, const fe& __restrict__
|
||||
fe_mul(t1, t2, t1);
|
||||
fe_mul(t2, t1, t1);
|
||||
#pragma unroll 49
|
||||
for (signed int i = 0; i < 49; ++i) {
|
||||
for (int i = 0; i < 49; ++i) {
|
||||
fe_mul(t2, t2, t2);
|
||||
}
|
||||
fe_mul(t2, t2, t1);
|
||||
fe_mul(t3, t2, t2);
|
||||
#pragma unroll 99
|
||||
for (signed int i = 0; i < 99; ++i) {
|
||||
for (int i = 0; i < 99; ++i) {
|
||||
fe_mul(t3, t3, t3);
|
||||
}
|
||||
fe_mul(t2, t3, t2);
|
||||
fe_mul(t2, t2, t2);
|
||||
#pragma unroll 49
|
||||
for (signed int i = 0; i < 49; ++i) {
|
||||
for (int i = 0; i < 49; ++i) {
|
||||
fe_mul(t2, t2, t2);
|
||||
}
|
||||
fe_mul(t1, t2, t1);
|
||||
@@ -161,29 +161,29 @@ int __device__ __host__ fe_isnegative(const fe& __restrict__ f) {
|
||||
}
|
||||
//h = f * g
|
||||
void __device__ __host__ fe_mul(fe& __restrict__ h, const fe& __restrict__ f, const fe& __restrict__ g) {
|
||||
signed long f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
||||
signed long g0 = g[0], g1 = g[1], g2 = g[2], g3 = g[3], g4 = g[4], g5 = g[5], g6 = g[6], g7 = g[7], g8 = g[8], g9 = g[9];
|
||||
signed long f0g0 = f0 * g0, f0g1 = f0 * g1, f0g2 = f0 * g2, f0g3 = f0 * g3, f0g4 = f0 * g4, f0g5 = f0 * g5, f0g6 = f0 * g6, f0g7 = f0 * g7, f0g8 = f0 * g8, f0g9 = f0 * g9;
|
||||
signed long f1g0 = f1 * g0, f1g1_2 = f1 * g1 << 1L, f1g2 = f1 * g2, f1g3_2 = f1 * g3 << 1L, f1g4 = f1 * g4, f1g5_2 = f1 * g5 << 1L, f1g6 = f1 * g6, f1g7_2 = f1 * g7 << 1L, f1g8 = f1 * g8, f1g9_38 = f1 * g9 * 38L;
|
||||
signed long f2g0 = f2 * g0, f2g1 = f2 * g1, f2g2 = f2 * g2, f2g3 = f2 * g3, f2g4 = f2 * g4, f2g5 = f2 * g5, f2g6 = f2 * g6, f2g7 = f2 * g7, f2g8_19 = f2 * g8 * 19L, f2g9_19 = f2 * g9 * 38L >> 1L;
|
||||
signed long f3g0 = f3 * g0, f3g1_2 = f3 * g1 << 1L, f3g2 = f3 * g2, f3g3_2 = f3 * g3 << 1L, f3g4 = f3 * g4, f3g5_2 = f3 * g5 << 1L, f3g6 = f3 * g6, f3g7_38 = f3 * g7 * 38L, f3g8_19 = f3 * g8 * 19L, f3g9_38 = f3 * g9 * 38L;
|
||||
signed long f4g0 = f4 * g0, f4g1 = f4 * g1, f4g2 = f4 * g2, f4g3 = f4 * g3, f4g4 = f4 * g4, f4g5 = f4 * g5, f4g6_19 = f4 * g6 * 19L, f4g7_19 = f4 * g7 * 38L >> 1L, f4g8_19 = f4 * g8 * 19L, f4g9_19 = f4 * g9 * 38L >> 1L;
|
||||
signed long f5g0 = f5 * g0, f5g1_2 = f5 * g1 << 1L, f5g2 = f5 * g2, f5g3_2 = f5 * g3 << 1L, f5g4 = f5 * g4, f5g5_38 = f5 * g5 * 38L, f5g6_19 = f5 * g6 * 19L, f5g7_38 = f5 * g7 * 38L, f5g8_19 = f5 * g8 * 19L, f5g9_38 = f5 * g9 * 38L;
|
||||
signed long f6g0 = f6 * g0, f6g1 = f6 * g1, f6g2 = f6 * g2, f6g3 = f6 * g3, f6g4_19 = f6 * g4 * 19L, f6g5_19 = f6 * g5 * 38L >> 1L, f6g6_19 = f6 * g6 * 19L, f6g7_19 = f6 * g7 * 38L >> 1L, f6g8_19 = f6 * g8 * 19L, f6g9_19 = f6 * g9 * 38L >> 1L;
|
||||
signed long f7g0 = f7 * g0, f7g1_2 = f7 * g1 << 1L, f7g2 = f7 * g2, f7g3_38 = f7 * g3 * 38L, f7g4_19 = f7 * g4 * 19L, f7g5_38 = f7 * g5 * 38L, f7g6_19 = f7 * g6 * 19L, f7g7_38 = f7 * g7 * 38L, f7g8_19 = f7 * g8 * 19L, f7g9_38 = f7 * g9 * 38L;
|
||||
signed long f8g0 = f8 * g0, f8g1 = f8 * g1, f8g2_19 = f8 * g2 * 19L, f8g3_19 = f8 * g3 * 38L >> 1L, f8g4_19 = f8 * g4 * 19L, f8g5_19 = f8 * g5 * 38L >> 1L, f8g6_19 = f8 * g6 * 19L, f8g7_19 = f8 * g7 * 38L >> 1L, f8g8_19 = f8 * g8 * 19L, f8g9_19 = f8 * g9 * 38L >> 1L;
|
||||
signed long f9g0 = f9 * g0, f9g1_38 = f9 * g1 * 38L, f9g2_19 = f9 * g2 * 19L, f9g3_38 = f9 * g3 * 38L, f9g4_19 = f9 * g4 * 19L, f9g5_38 = f9 * g5 * 38L, f9g6_19 = f9 * g6 * 19L, f9g7_38 = f9 * g7 * 38L, f9g8_19 = f9 * g8 * 19L, f9g9_38 = f9 * g9 * 38L;
|
||||
signed long h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38;
|
||||
signed long h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19;
|
||||
signed long h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38;
|
||||
signed long h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19;
|
||||
signed long h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38;
|
||||
signed long h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19;
|
||||
signed long h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38;
|
||||
signed long h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19;
|
||||
signed long h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38;
|
||||
signed long h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0;
|
||||
signed long carry = (h0 + 33554432L) >> 26L; h1 += carry; h0 -= carry << 26L;
|
||||
long f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
||||
long g0 = g[0], g1 = g[1], g2 = g[2], g3 = g[3], g4 = g[4], g5 = g[5], g6 = g[6], g7 = g[7], g8 = g[8], g9 = g[9];
|
||||
long f0g0 = f0 * g0, f0g1 = f0 * g1, f0g2 = f0 * g2, f0g3 = f0 * g3, f0g4 = f0 * g4, f0g5 = f0 * g5, f0g6 = f0 * g6, f0g7 = f0 * g7, f0g8 = f0 * g8, f0g9 = f0 * g9;
|
||||
long f1g0 = f1 * g0, f1g1_2 = f1 * g1 << 1L, f1g2 = f1 * g2, f1g3_2 = f1 * g3 << 1L, f1g4 = f1 * g4, f1g5_2 = f1 * g5 << 1L, f1g6 = f1 * g6, f1g7_2 = f1 * g7 << 1L, f1g8 = f1 * g8, f1g9_38 = f1 * g9 * 38L;
|
||||
long f2g0 = f2 * g0, f2g1 = f2 * g1, f2g2 = f2 * g2, f2g3 = f2 * g3, f2g4 = f2 * g4, f2g5 = f2 * g5, f2g6 = f2 * g6, f2g7 = f2 * g7, f2g8_19 = f2 * g8 * 19L, f2g9_19 = f2 * g9 * 38L >> 1L;
|
||||
long f3g0 = f3 * g0, f3g1_2 = f3 * g1 << 1L, f3g2 = f3 * g2, f3g3_2 = f3 * g3 << 1L, f3g4 = f3 * g4, f3g5_2 = f3 * g5 << 1L, f3g6 = f3 * g6, f3g7_38 = f3 * g7 * 38L, f3g8_19 = f3 * g8 * 19L, f3g9_38 = f3 * g9 * 38L;
|
||||
long f4g0 = f4 * g0, f4g1 = f4 * g1, f4g2 = f4 * g2, f4g3 = f4 * g3, f4g4 = f4 * g4, f4g5 = f4 * g5, f4g6_19 = f4 * g6 * 19L, f4g7_19 = f4 * g7 * 38L >> 1L, f4g8_19 = f4 * g8 * 19L, f4g9_19 = f4 * g9 * 38L >> 1L;
|
||||
long f5g0 = f5 * g0, f5g1_2 = f5 * g1 << 1L, f5g2 = f5 * g2, f5g3_2 = f5 * g3 << 1L, f5g4 = f5 * g4, f5g5_38 = f5 * g5 * 38L, f5g6_19 = f5 * g6 * 19L, f5g7_38 = f5 * g7 * 38L, f5g8_19 = f5 * g8 * 19L, f5g9_38 = f5 * g9 * 38L;
|
||||
long f6g0 = f6 * g0, f6g1 = f6 * g1, f6g2 = f6 * g2, f6g3 = f6 * g3, f6g4_19 = f6 * g4 * 19L, f6g5_19 = f6 * g5 * 38L >> 1L, f6g6_19 = f6 * g6 * 19L, f6g7_19 = f6 * g7 * 38L >> 1L, f6g8_19 = f6 * g8 * 19L, f6g9_19 = f6 * g9 * 38L >> 1L;
|
||||
long f7g0 = f7 * g0, f7g1_2 = f7 * g1 << 1L, f7g2 = f7 * g2, f7g3_38 = f7 * g3 * 38L, f7g4_19 = f7 * g4 * 19L, f7g5_38 = f7 * g5 * 38L, f7g6_19 = f7 * g6 * 19L, f7g7_38 = f7 * g7 * 38L, f7g8_19 = f7 * g8 * 19L, f7g9_38 = f7 * g9 * 38L;
|
||||
long f8g0 = f8 * g0, f8g1 = f8 * g1, f8g2_19 = f8 * g2 * 19L, f8g3_19 = f8 * g3 * 38L >> 1L, f8g4_19 = f8 * g4 * 19L, f8g5_19 = f8 * g5 * 38L >> 1L, f8g6_19 = f8 * g6 * 19L, f8g7_19 = f8 * g7 * 38L >> 1L, f8g8_19 = f8 * g8 * 19L, f8g9_19 = f8 * g9 * 38L >> 1L;
|
||||
long f9g0 = f9 * g0, f9g1_38 = f9 * g1 * 38L, f9g2_19 = f9 * g2 * 19L, f9g3_38 = f9 * g3 * 38L, f9g4_19 = f9 * g4 * 19L, f9g5_38 = f9 * g5 * 38L, f9g6_19 = f9 * g6 * 19L, f9g7_38 = f9 * g7 * 38L, f9g8_19 = f9 * g8 * 19L, f9g9_38 = f9 * g9 * 38L;
|
||||
long h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38;
|
||||
long h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19;
|
||||
long h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38;
|
||||
long h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19;
|
||||
long h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38;
|
||||
long h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19;
|
||||
long h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38;
|
||||
long h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19;
|
||||
long h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38;
|
||||
long h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0;
|
||||
long carry = (h0 + 33554432L) >> 26L; h1 += carry; h0 -= carry << 26L;
|
||||
carry = (h4 + 33554432L) >> 26L; h5 += carry; h4 -= carry << 26L;
|
||||
carry = (h1 + 16777216L) >> 25L; h2 += carry; h1 -= carry << 25L;
|
||||
carry = (h5 + 16777216L) >> 25L; h6 += carry; h5 -= carry << 25L;
|
||||
@@ -208,8 +208,8 @@ void __device__ __host__ fe_mul(fe& __restrict__ h, const fe& __restrict__ f, co
|
||||
}
|
||||
//h = -f
|
||||
void __host__ __device__ fe_neg(fe& __restrict__ h, const fe& __restrict__ f) {
|
||||
signed int f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
||||
signed int h0 = -f0, h1 = -f1, h2 = -f2, h3 = -f3, h4 = -f4, h5 = -f5, h6 = -f6, h7 = -f7, h8 = -f8, h9 = -f9;
|
||||
int f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
||||
int h0 = -f0, h1 = -f1, h2 = -f2, h3 = -f3, h4 = -f4, h5 = -f5, h6 = -f6, h7 = -f7, h8 = -f8, h9 = -f9;
|
||||
h[0] = h0;
|
||||
h[1] = h1;
|
||||
h[2] = h2;
|
||||
@@ -222,19 +222,19 @@ void __host__ __device__ fe_neg(fe& __restrict__ h, const fe& __restrict__ f) {
|
||||
h[9] = h9;
|
||||
}
|
||||
void __host__ __device__ fe_mul2(fe& __restrict__ h, const fe& __restrict__ f) {
|
||||
const signed long f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
||||
signed long f0_2 = f0 << 1, f1_2 = f1 << 1, f2_2 = f2 << 1, f3_2 = f3 << 1, f4_2 = f4 << 1, f5_2 = f5 << 1, f6_2 = f6 << 1, f7_2 = f7 << 1;
|
||||
signed int f5_38 = 38 * f5, f6_19 = 19 * f6, f7_38 = 38 * f7, f8_19 = 19 * f8, f9_38 = 38 * f9;
|
||||
signed long f0f0 = f0 * f0, f0f1_2 = f0_2 * f1, f0f2_2 = f0_2 * f2, f0f3_2 = f0_2 * f3, f0f4_2 = f0_2 * f4, f0f5_2 = f0_2 * f5, f0f6_2 = f0_2 * f6, f0f7_2 = f0_2 * f7, f0f8_2 = f0_2 * f8, f0f9_2 = f0_2 * f9;
|
||||
signed long f1f1_2 = f1_2 * f1, f1f2_2 = f1_2 * f2, f1f3_4 = f1_2 * f3_2, f1f4_2 = f1_2 * f4, f1f5_4 = f1_2 * f5_2, f1f6_2 = f1_2 * f6, f1f7_4 = f1_2 * f7_2, f1f8_2 = f1_2 * f8, f1f9_76 = f1_2 * f9_38;
|
||||
signed long f2f2 = f2 * f2, f2f3_2 = f2_2 * f3, f2f4_2 = f2_2 * f4, f2f5_2 = f2_2 * f5, f2f6_2 = f2_2 * f6, f2f7_2 = f2_2 * f7, f2f8_38 = f2_2 * f8_19, f2f9_38 = f2 * f9_38;
|
||||
signed long f3f3_2 = f3_2 * f3, f3f4_2 = f3_2 * f4, f3f5_4 = f3_2 * f5_2, f3f6_2 = f3_2 * f6, f3f7_76 = f3_2 * f7_38, f3f8_38 = f3_2 * f8_19, f3f9_76 = f3_2 * f9_38;
|
||||
signed long f4f4 = f4 * f4, f4f5_2 = f4_2 * f5, f4f6_38 = f4_2 * f6_19, f4f7_38 = f4 * f7_38, f4f8_38 = f4_2 * f8_19, f4f9_38 = f4 * f9_38;
|
||||
signed long f5f5_38 = f5 * f5_38, f5f6_38 = f5_2 * f6_19, f5f7_76 = f5_2 * f7_38, f5f8_38 = f5_2 * f8_19, f5f9_76 = f5_2 * f9_38;
|
||||
signed long f6f6_19 = f6 * f6_19, f6f7_38 = f6 * f7_38, f6f8_38 = f6_2 * f8_19, f6f9_38 = f6 * f9_38;
|
||||
signed long f7f7_38 = f7 * f7_38, f7f8_38 = f7_2 * f8_19, f7f9_76 = f7_2 * f9_38;
|
||||
signed long f8f8_19 = f8 * f8_19, f8f9_38 = f8 * f9_38;
|
||||
signed long f9f9_38 = f9 * f9_38;
|
||||
const long f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
||||
long f0_2 = f0 << 1, f1_2 = f1 << 1, f2_2 = f2 << 1, f3_2 = f3 << 1, f4_2 = f4 << 1, f5_2 = f5 << 1, f6_2 = f6 << 1, f7_2 = f7 << 1;
|
||||
int f5_38 = 38 * f5, f6_19 = 19 * f6, f7_38 = 38 * f7, f8_19 = 19 * f8, f9_38 = 38 * f9;
|
||||
long f0f0 = f0 * f0, f0f1_2 = f0_2 * f1, f0f2_2 = f0_2 * f2, f0f3_2 = f0_2 * f3, f0f4_2 = f0_2 * f4, f0f5_2 = f0_2 * f5, f0f6_2 = f0_2 * f6, f0f7_2 = f0_2 * f7, f0f8_2 = f0_2 * f8, f0f9_2 = f0_2 * f9;
|
||||
long f1f1_2 = f1_2 * f1, f1f2_2 = f1_2 * f2, f1f3_4 = f1_2 * f3_2, f1f4_2 = f1_2 * f4, f1f5_4 = f1_2 * f5_2, f1f6_2 = f1_2 * f6, f1f7_4 = f1_2 * f7_2, f1f8_2 = f1_2 * f8, f1f9_76 = f1_2 * f9_38;
|
||||
long f2f2 = f2 * f2, f2f3_2 = f2_2 * f3, f2f4_2 = f2_2 * f4, f2f5_2 = f2_2 * f5, f2f6_2 = f2_2 * f6, f2f7_2 = f2_2 * f7, f2f8_38 = f2_2 * f8_19, f2f9_38 = f2 * f9_38;
|
||||
long f3f3_2 = f3_2 * f3, f3f4_2 = f3_2 * f4, f3f5_4 = f3_2 * f5_2, f3f6_2 = f3_2 * f6, f3f7_76 = f3_2 * f7_38, f3f8_38 = f3_2 * f8_19, f3f9_76 = f3_2 * f9_38;
|
||||
long f4f4 = f4 * f4, f4f5_2 = f4_2 * f5, f4f6_38 = f4_2 * f6_19, f4f7_38 = f4 * f7_38, f4f8_38 = f4_2 * f8_19, f4f9_38 = f4 * f9_38;
|
||||
long f5f5_38 = f5 * f5_38, f5f6_38 = f5_2 * f6_19, f5f7_76 = f5_2 * f7_38, f5f8_38 = f5_2 * f8_19, f5f9_76 = f5_2 * f9_38;
|
||||
long f6f6_19 = f6 * f6_19, f6f7_38 = f6 * f7_38, f6f8_38 = f6_2 * f8_19, f6f9_38 = f6 * f9_38;
|
||||
long f7f7_38 = f7 * f7_38, f7f8_38 = f7_2 * f8_19, f7f9_76 = f7_2 * f9_38;
|
||||
long f8f8_19 = f8 * f8_19, f8f9_38 = f8 * f9_38;
|
||||
long f9f9_38 = f9 * f9_38;
|
||||
long h0 = (f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38) << 1;
|
||||
long h1 = (f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38) << 1;
|
||||
long h2 = (f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19) << 1;
|
||||
@@ -245,7 +245,7 @@ void __host__ __device__ fe_mul2(fe& __restrict__ h, const fe& __restrict__ f) {
|
||||
long h7 = (f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38) << 1;
|
||||
long h8 = (f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38) << 1;
|
||||
long h9 = (f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2) << 1;
|
||||
signed long carry = (h0 + 33554432L) >> 26L; h1 += carry; h0 -= carry << 26L;
|
||||
long carry = (h0 + 33554432L) >> 26L; h1 += carry; h0 -= carry << 26L;
|
||||
carry = (h4 + 33554432L) >> 26L; h5 += carry; h4 -= carry << 26L;
|
||||
carry = (h1 + 16777216L) >> 25L; h2 += carry; h1 -= carry << 25L;
|
||||
carry = (h5 + 16777216L) >> 25L; h6 += carry; h5 -= carry << 25L;
|
||||
@@ -269,9 +269,9 @@ void __host__ __device__ fe_mul2(fe& __restrict__ h, const fe& __restrict__ f) {
|
||||
h[9] = static_cast<int>(h9);
|
||||
}
|
||||
void __device__ __host__ fe_sub(fe& __restrict__ h, const fe& __restrict__ f, const fe& __restrict__ g) {
|
||||
signed int f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
||||
signed int g0 = g[0], g1 = g[1], g2 = g[2], g3 = g[3], g4 = g[4], g5 = g[5], g6 = g[6], g7 = g[7], g8 = g[8], g9 = g[9];
|
||||
signed int h0 = f0 - g0, h1 = f1 - g1, h2 = f2 - g2, h3 = f3 - g3, h4 = f4 - g4, h5 = f5 - g5, h6 = f6 - g6, h7 = f7 - g7, h8 = f8 - g8, h9 = f9 - g9;
|
||||
int f0 = f[0], f1 = f[1], f2 = f[2], f3 = f[3], f4 = f[4], f5 = f[5], f6 = f[6], f7 = f[7], f8 = f[8], f9 = f[9];
|
||||
int g0 = g[0], g1 = g[1], g2 = g[2], g3 = g[3], g4 = g[4], g5 = g[5], g6 = g[6], g7 = g[7], g8 = g[8], g9 = g[9];
|
||||
int h0 = f0 - g0, h1 = f1 - g1, h2 = f2 - g2, h3 = f3 - g3, h4 = f4 - g4, h5 = f5 - g5, h6 = f6 - g6, h7 = f7 - g7, h8 = f8 - g8, h9 = f9 - g9;
|
||||
h[0] = h0;
|
||||
h[1] = h1;
|
||||
h[2] = h2;
|
||||
@@ -284,7 +284,7 @@ void __device__ __host__ fe_sub(fe& __restrict__ h, const fe& __restrict__ f, co
|
||||
h[9] = h9;
|
||||
}
|
||||
void __device__ __host__ fe_tobytes(unsigned char* __restrict__ s, const fe& __restrict__ h) {
|
||||
signed int h0 = h[0], h1 = h[1], h2 = h[2], h3 = h[3], h4 = h[4], h5 = h[5], h6 = h[6], h7 = h[7], h8 = h[8], h9 = h[9], carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7, carry8, carry9, q;
|
||||
int h0 = h[0], h1 = h[1], h2 = h[2], h3 = h[3], h4 = h[4], h5 = h[5], h6 = h[6], h7 = h[7], h8 = h[8], h9 = h[9], carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7, carry8, carry9, q;
|
||||
q = (19 * h9 + (1 << 24)) >> 25;
|
||||
q = (h0 + q) >> 26;
|
||||
q = (h1 + q) >> 25;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#include "../libs/defines.h"
|
||||
#include <defines.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <atomic>
|
||||
#include <immintrin.h>
|
||||
|
||||
@@ -1,26 +1,27 @@
|
||||
#include <stdio.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <curand_kernel.h>
|
||||
#include <sha512.cuh>
|
||||
#include <defines.h>
|
||||
#include <ed25519.cuh>
|
||||
#include <edsign.cuh>
|
||||
#include <string.cuh>
|
||||
#include <keymanip.cuh>
|
||||
#include <defines.h>
|
||||
#include <sha512.cuh>
|
||||
#include <stdio.h>
|
||||
#include <string.cuh>
|
||||
__device__ unsigned d_high = 0x14;
|
||||
__device__ int parameters(const char* arg) noexcept {
|
||||
if ((cstring_find(arg, "--altitude") == 0 && cstring_length(arg) == 10) ||
|
||||
(cstring_find(arg, "-a") == 0 && cstring_length(arg) == 2)) {
|
||||
if ((cstring_find(arg, "--altitude") == 0 && cstring_length(arg) == 10) || (cstring_find(arg, "-a") == 0 && cstring_length(arg) == 2)) {
|
||||
return 777;
|
||||
}
|
||||
int space_index = cstring_find(arg, " ");
|
||||
if (space_index == -1) return 0;
|
||||
if (space_index == -1)
|
||||
return 0;
|
||||
const int substr_start = space_index + 1;
|
||||
char sub_arg[256];
|
||||
extract_substring(arg, substr_start, sub_arg, 256);
|
||||
if (cstring_find(arg, "--altitude") != -1 || cstring_find(arg, "-a") != -1) {
|
||||
unsigned tmp_high;
|
||||
if (cstring_to_ull(sub_arg, &tmp_high) != 0) return 1;
|
||||
if (cstring_to_ull(sub_arg, &tmp_high) != 0)
|
||||
return 1;
|
||||
d_high = tmp_high;
|
||||
}
|
||||
return 0;
|
||||
@@ -98,7 +99,7 @@ __device__ __forceinline__ void rmbytes(unsigned char* __restrict__ buf, curandS
|
||||
__global__ void KeyGenKernel(curandState* __restrict__ randStates) {
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
curandState localState = randStates[idx];
|
||||
#ifdef DEBUG
|
||||
#ifdef DEBUG
|
||||
unsigned x = 0;
|
||||
#endif
|
||||
while (WHCOND) {
|
||||
@@ -111,23 +112,23 @@ __global__ void KeyGenKernel(curandState* __restrict__ randStates) {
|
||||
Key32 inv;
|
||||
invertKey(keys.PublicKey, inv);
|
||||
getRawAddress(zeros, inv, raw);
|
||||
#ifdef RELEASE
|
||||
#ifdef RELEASE
|
||||
printf("\nIPv6:\t%s\nPK:\t%s\nSK:\t%s\n", getAddr(raw).data, ktos(keys.PublicKey).data, ktos(keys.PrivateKey).data);
|
||||
#else
|
||||
#else
|
||||
printf("\nIPv6:\t%s\nFK:\t%s%s\n", getAddr(raw).data, ktos(keys.PrivateKey).data, ktos(keys.PublicKey).data);
|
||||
#endif
|
||||
#endif
|
||||
d_high = zeros;
|
||||
}
|
||||
#ifdef DEBUG
|
||||
#ifdef DEBUG
|
||||
if ((++x & 0xFF) == 0) {
|
||||
printf("\rIters: %d", x);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
}
|
||||
int main(int argc, char* argv[]) {
|
||||
printf("BuildType: %s\n", __BUILDTYPE__);
|
||||
int* d_result, mBpSM, h_high;
|
||||
int *d_result, mBpSM, h_high;
|
||||
char** d_argv;
|
||||
cudaDeviceProp prop;
|
||||
curandState* rst;
|
||||
@@ -165,7 +166,7 @@ int main(int argc, char* argv[]) {
|
||||
free(h_seeds);
|
||||
cudaFree(d_seeds);
|
||||
#endif
|
||||
KeyGenKernel << <THDIVTHPB, THREADSPB >> > (rst);
|
||||
KeyGenKernel<<<THDIVTHPB, THREADSPB>>>(rst);
|
||||
cudaFree(rst);
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user