fixes
This commit is contained in:
parent
b778652ef8
commit
7e1705c720
@ -1,5 +1,5 @@
|
|||||||
NVCC := nvcc
|
NVCC := nvcc
|
||||||
NVCC_FLAGS := -rdc=true -O3 -use_fast_math -Xptxas -O3 \
|
NVCC_FLAGS := -rdc=true -O3 -Xptxas -O3 \
|
||||||
-gencode arch=compute_75,code=sm_75 \
|
-gencode arch=compute_75,code=sm_75 \
|
||||||
--default-stream per-thread \
|
--default-stream per-thread \
|
||||||
-Wno-deprecated-gpu-targets \
|
-Wno-deprecated-gpu-targets \
|
||||||
|
@ -94,6 +94,7 @@ __device__ __forceinline__ void ed25519_copy(struct ed25519_pt* dst, const struc
|
|||||||
}
|
}
|
||||||
__device__ void ed25519_smult(struct ed25519_pt* r_out, const struct ed25519_pt* p, const unsigned char* e) {
|
__device__ void ed25519_smult(struct ed25519_pt* r_out, const struct ed25519_pt* p, const unsigned char* e) {
|
||||||
struct ed25519_pt r = ed25519_neutral;
|
struct ed25519_pt r = ed25519_neutral;
|
||||||
|
#pragma unroll 256
|
||||||
for (int i = 255; i >= 0; i--) {
|
for (int i = 255; i >= 0; i--) {
|
||||||
struct ed25519_pt s;
|
struct ed25519_pt s;
|
||||||
ed25519_double(&r, &r);
|
ed25519_double(&r, &r);
|
||||||
|
@ -66,7 +66,7 @@ __device__ __forceinline__ unsigned long rot64(unsigned long x, int bits) {
|
|||||||
}
|
}
|
||||||
__device__ void sha512_block(sha512_state* s, const unsigned char* blk) {
|
__device__ void sha512_block(sha512_state* s, const unsigned char* blk) {
|
||||||
unsigned long w[16];
|
unsigned long w[16];
|
||||||
#pragma unroll
|
#pragma unroll 16
|
||||||
for (int i = 0; i < 16; i++) {
|
for (int i = 0; i < 16; i++) {
|
||||||
w[i] = load64(blk + i * 8);
|
w[i] = load64(blk + i * 8);
|
||||||
}
|
}
|
||||||
@ -78,7 +78,7 @@ __device__ void sha512_block(sha512_state* s, const unsigned char* blk) {
|
|||||||
unsigned long f = s->h[5];
|
unsigned long f = s->h[5];
|
||||||
unsigned long g = s->h[6];
|
unsigned long g = s->h[6];
|
||||||
unsigned long h = s->h[7];
|
unsigned long h = s->h[7];
|
||||||
#pragma unroll
|
#pragma unroll 80
|
||||||
for (int i = 0; i < 80; i++) {
|
for (int i = 0; i < 80; i++) {
|
||||||
const int idx = i & 15;
|
const int idx = i & 15;
|
||||||
const int idx1 = (i + 1) & 15;
|
const int idx1 = (i + 1) & 15;
|
||||||
|
@ -59,6 +59,7 @@ inline std::string getAddress(const Address& rawAddr) noexcept {
|
|||||||
inline std::string KeyToString(const unsigned char* key) noexcept {
|
inline std::string KeyToString(const unsigned char* key) noexcept {
|
||||||
char result[65];
|
char result[65];
|
||||||
const char* hexDigits = "0123456789abcdef";
|
const char* hexDigits = "0123456789abcdef";
|
||||||
|
#pragma unroll
|
||||||
for (unsigned char i = 0; i < 32; i++) {
|
for (unsigned char i = 0; i < 32; i++) {
|
||||||
result[2 * i] = hexDigits[key[i] >> 4];
|
result[2 * i] = hexDigits[key[i] >> 4];
|
||||||
result[2 * i + 1] = hexDigits[key[i] & 0x0F];
|
result[2 * i + 1] = hexDigits[key[i] & 0x0F];
|
||||||
@ -109,7 +110,7 @@ inline void invertKey(const unsigned char* __restrict key, Key& inverted) noexce
|
|||||||
return static_cast<long long>(state * 2685821657736338717);
|
return static_cast<long long>(state * 2685821657736338717);
|
||||||
}
|
}
|
||||||
inline void rmbytes(unsigned char* __restrict buf, unsigned char size, unsigned long& state) noexcept {
|
inline void rmbytes(unsigned char* __restrict buf, unsigned char size, unsigned long& state) noexcept {
|
||||||
for (unsigned char x = 0; x < size / 32; x++) {
|
for (unsigned char x = 0; x < 32; x++) {
|
||||||
_mm256_store_si256((__m256i*) & buf[x * 32], _mm256_set_epi64x(xorshift64(state), xorshift64(state), xorshift64(state), xorshift64(state)));
|
_mm256_store_si256((__m256i*) & buf[x * 32], _mm256_set_epi64x(xorshift64(state), xorshift64(state), xorshift64(state), xorshift64(state)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -12,7 +12,7 @@ struct KeysBox {
|
|||||||
Key PublicKey;
|
Key PublicKey;
|
||||||
Key PrivateKey;
|
Key PrivateKey;
|
||||||
};
|
};
|
||||||
__device__ unsigned high = 0x10;
|
__device__ unsigned d_high = 0x10;
|
||||||
__device__ int parameters(const char* arg) {
|
__device__ int parameters(const char* arg) {
|
||||||
int space_index = cstring_find(arg, " ");
|
int space_index = cstring_find(arg, " ");
|
||||||
if (space_index != -1) {
|
if (space_index != -1) {
|
||||||
@ -26,7 +26,7 @@ __device__ int parameters(const char* arg) {
|
|||||||
unsigned tmp_high;
|
unsigned tmp_high;
|
||||||
int ret = cstring_to_ull(sub_arg, &tmp_high);
|
int ret = cstring_to_ull(sub_arg, &tmp_high);
|
||||||
if (ret != 0) return 1;
|
if (ret != 0) return 1;
|
||||||
high = tmp_high;
|
d_high = tmp_high;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -63,19 +63,19 @@ struct ds46 {
|
|||||||
__device__ ds64 ktos(const unsigned char* key) noexcept {
|
__device__ ds64 ktos(const unsigned char* key) noexcept {
|
||||||
ds64 str;
|
ds64 str;
|
||||||
const char* hexDigits = "0123456789abcdef";
|
const char* hexDigits = "0123456789abcdef";
|
||||||
#pragma unroll
|
#pragma unroll 32
|
||||||
for (unsigned char i = 0; i < 32; i++) {
|
for (unsigned char i = 0; i < 32; i++) {
|
||||||
str.data[2 * i] = hexDigits[key[i] >> 4];
|
str.data[2 * i] = hexDigits[key[i] >> 4];
|
||||||
str.data[2 * i + 1] = hexDigits[key[i] & 0x0F];
|
str.data[2 * i + 1] = hexDigits[key[i] & 0x0F];
|
||||||
}
|
}
|
||||||
str.data[64] = '\0';
|
str.data[65] = '\0';
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
__device__ ds46 getAddr(const unsigned char rawAddr[16]) noexcept {
|
__device__ ds46 getAddr(const unsigned char rawAddr[16]) noexcept {
|
||||||
ds46 addrStr;
|
ds46 addrStr;
|
||||||
const char* hexDigits = "0123456789abcdef";
|
const char* hexDigits = "0123456789abcdef";
|
||||||
unsigned pos = 0;
|
unsigned pos = 0;
|
||||||
#pragma unroll
|
#pragma unroll 8
|
||||||
for (unsigned char group = 0; group < 8; group++) {
|
for (unsigned char group = 0; group < 8; group++) {
|
||||||
int idx = group * 2;
|
int idx = group * 2;
|
||||||
addrStr.data[pos++] = hexDigits[rawAddr[idx] >> 4];
|
addrStr.data[pos++] = hexDigits[rawAddr[idx] >> 4];
|
||||||
@ -132,14 +132,12 @@ __device__ __forceinline__ unsigned long long xorshift128plus(unsigned long long
|
|||||||
state[1] = x;
|
state[1] = x;
|
||||||
return x + y;
|
return x + y;
|
||||||
}
|
}
|
||||||
__device__ __forceinline__ void rmbytes(unsigned char* buf, unsigned long size, unsigned long long* state) {
|
__device__ __forceinline__ void rmbytes(unsigned char* buf, unsigned long long* state) {
|
||||||
#pragma unroll
|
#pragma unroll 32
|
||||||
for (unsigned long i = 0; i < size; i++) {
|
for (unsigned long i = 0; i < 32; i++) buf[i] = static_cast<unsigned char>(xorshift128plus(state) & 0xFF);
|
||||||
buf[i] = static_cast<unsigned char>(xorshift128plus(state) & 0xFF);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
__device__ __forceinline__ void invertKey(const unsigned char* key, unsigned char* inverted) {
|
__device__ __forceinline__ void invertKey(const unsigned char* key, unsigned char* inverted) {
|
||||||
#pragma unroll
|
#pragma unroll 32
|
||||||
for (unsigned char i = 0; i < 32; i++) inverted[i] = key[i] ^ 0xFF;
|
for (unsigned char i = 0; i < 32; i++) inverted[i] = key[i] ^ 0xFF;
|
||||||
}
|
}
|
||||||
__global__ void KeyGen(curandState* randStates) {
|
__global__ void KeyGen(curandState* randStates) {
|
||||||
@ -148,17 +146,16 @@ __global__ void KeyGen(curandState* randStates) {
|
|||||||
xorshiftState[0] = curand(&localState);
|
xorshiftState[0] = curand(&localState);
|
||||||
xorshiftState[1] = curand(&localState);
|
xorshiftState[1] = curand(&localState);
|
||||||
Key seed;
|
Key seed;
|
||||||
|
KeysBox keys;
|
||||||
while (true) {
|
while (true) {
|
||||||
rmbytes(seed, sizeof(seed), xorshiftState);
|
rmbytes(seed, xorshiftState);
|
||||||
KeysBox keys;
|
|
||||||
ed25519_keygen(keys.PrivateKey, keys.PublicKey, seed);
|
ed25519_keygen(keys.PrivateKey, keys.PublicKey, seed);
|
||||||
unsigned zeros = getZeros(keys.PublicKey);
|
if (unsigned zeros = getZeros(keys.PublicKey); zeros > atomicMax((unsigned*)&d_high, zeros)) {
|
||||||
if (zeros > atomicMax((unsigned*)&high, zeros)) {
|
|
||||||
Address raw;
|
Address raw;
|
||||||
Key inv;
|
Key inv;
|
||||||
invertKey(keys.PublicKey, inv);
|
invertKey(keys.PublicKey, inv);
|
||||||
getRawAddress(zeros, inv, raw);
|
getRawAddress(zeros, inv, raw);
|
||||||
printf("\nIPv6:\t%s\nPK:\t%s\nSK:\t%s\nFK:\t%s%s\n", getAddr(raw).data, ktos(keys.PublicKey).data, ktos(keys.PrivateKey).data, ktos(keys.PrivateKey).data, ktos(keys.PublicKey).data);
|
printf("\nIPv6:\t%s\nPK:\t%s\nSK:\t%s\n", getAddr(raw).data, ktos(keys.PublicKey).data, ktos(keys.PrivateKey).data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -176,20 +173,17 @@ int main(int argc, char* argv[]) {
|
|||||||
}
|
}
|
||||||
args<<<1, 1 >>>(d_argv, argc, d_result);
|
args<<<1, 1 >>>(d_argv, argc, d_result);
|
||||||
unsigned h_high;
|
unsigned h_high;
|
||||||
cudaMemcpyFromSymbol(&h_high, high, sizeof(unsigned));
|
cudaMemcpyFromSymbol(&h_high, d_high, sizeof(unsigned));
|
||||||
printf("High addresses (2%02x+)\n", h_high);
|
printf("High addresses (2%02x+)\n", h_high);
|
||||||
const int threadsPerBlock = 256;
|
const int threadsPerBlock = 128;
|
||||||
cudaDeviceProp prop;
|
cudaDeviceProp prop;
|
||||||
cudaGetDeviceProperties(&prop, 0);
|
cudaGetDeviceProperties(&prop, 0);
|
||||||
int mBpSM;
|
int mBpSM;
|
||||||
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&mBpSM, KeyGen, threadsPerBlock, 0);
|
cudaOccupancyMaxActiveBlocksPerMultiprocessor(&mBpSM, KeyGen, threadsPerBlock, 0);
|
||||||
int SMs = prop.multiProcessorCount;
|
const int totalThreads = mBpSM * prop.multiProcessorCount * threadsPerBlock;
|
||||||
int maxBlocks = mBpSM * SMs;
|
printf("SMs: %d\n", prop.multiProcessorCount);
|
||||||
const int totalThreads = maxBlocks * threadsPerBlock;
|
|
||||||
printf("SMs: %d\n", SMs);
|
|
||||||
printf("maxBlocks: %d\n", maxBlocks);
|
|
||||||
printf("totalThreads: %d\n", totalThreads);
|
|
||||||
printf("MaxBlocksPerSM: %d\n", mBpSM);
|
printf("MaxBlocksPerSM: %d\n", mBpSM);
|
||||||
|
printf("totalThreads: %d\n", totalThreads);
|
||||||
printf("BlocksThreads: %d:%d\n", totalThreads / threadsPerBlock, threadsPerBlock);
|
printf("BlocksThreads: %d:%d\n", totalThreads / threadsPerBlock, threadsPerBlock);
|
||||||
curandState* rst;
|
curandState* rst;
|
||||||
cudaMalloc(&rst, totalThreads * sizeof(curandState));
|
cudaMalloc(&rst, totalThreads * sizeof(curandState));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user