diff --git a/.gitignore b/.gitignore index 0d69d66d2..07f699fbd 100644 --- a/.gitignore +++ b/.gitignore @@ -75,3 +75,4 @@ node_modules/ # vscode .vscode/ /.vs +CMakeSettings.json diff --git a/CMakeLists.txt b/CMakeLists.txt index 2a6dc05f2..2e6b37291 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,9 +13,9 @@ cable_configure_toolchain(DEFAULT cxx11) set(HUNTER_CONFIGURATION_TYPES Release) HunterGate( - URL "https://github.com/ruslo/hunter/archive/v0.20.34.tar.gz" - SHA1 "2f04d1beffdf39db1c40d8347beb8c10bbe9b8ed" - LOCAL + URL "https://github.com/ruslo/hunter/archive/v0.23.197.tar.gz" + SHA1 "f494a08bc9bb489527be1240d223d3ff69ece322" + LOCAL ) project(ethminer) diff --git a/README.md b/README.md index 212d74dc2..be7cf7dc6 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,26 @@ Ethash requires external memory due to the large size of the DAG. However that ## ProgPoW Algorithm Walkthrough -The DAG is generated exactly as in Ethash. All the parameters (epoch length, DAG size, etc) are unchanged. See the original [Ethash](https://github.com/ethereum/wiki/wiki/Ethash) spec for details on generating the DAG. +Up to release 0.9.3 the DAG is generated exactly as in Ethash. All the parameters (epoch length, DAG size, etc) are unchanged. See the original [Ethash](https://github.com/ethereum/wiki/wiki/Ethash) spec for details on generating the DAG. + +Release 0.9.3 has been software and hardware audited: +* [Least Authority — ProgPoW Software Audit PDF](https://leastauthority.com/static/publications/Least%20Authority%20-%20ProgPow%20Algorithm%20Final%20Audit%20Report.pdf) +* [Bob Rao - ProgPoW Hardware Audit PDF](https://github.com/ethereum-cat-herders/progpow-audit/raw/master/Bob%20Rao%20-%20ProgPOW%20Hardware%20Audit%20Report%20Final.pdf) + +Following the suggestion expressed by Least Authority in their findings, new proposed release 0.9.4 introduces a tweak in DAG generation in order to mitigate the possibility of a "Light Evaluation" attack. +This change implies the modification of `ETHASH_DATASET_PARENTS` from a value of 256 to the new value of 512. Due to this the DAG memory file used by ProgPoW is no more compatible with the one used by Ethash (epoch lenght and size increase ratio remain the same though). + +After the audits release a clever finding by [Kik](https://github.com/kik/) disclosed an exploitable condition to [bypass ProgPoW memory hardness](https://github.com/kik/progpow-exploit). Worth to mention the exploit would require the availability of a customized node able to accept modified block headers by the miner. +Purpose of this new spec release is to patch the condition modifying the input state of the last keccak pass so it changes from : +* header (256 bits) + +* seed for mix initiator (64 bits) + +* mix from main loop (256 bits) +* no padding +to +* digest from initial keccak (256 bits) + +* mix from main loop (256 bits) + +* padding +thus widening the constraint to target in keccak [brute force keccak linear searches](https://github.com/kik/progpow-exploit) from 64 to 256 bits. ProgPoW can be tuned using the following parameters. The proposed settings have been tuned for a range of existing, commodity GPUs: @@ -124,21 +143,26 @@ ProgPoW can be tuned using the following parameters. The proposed settings have * `PROGPOW_CNT_CACHE`: The number of cache accesses per loop * `PROGPOW_CNT_MATH`: The number of math operations per loop -The value of these parameters has been tweaked between version 0.9.2 (live on the Gangnam testnet) and 0.9.3 (proposed for Ethereum adoption). See [this medium post](https://medium.com/@ifdefelse/progpow-progress-da5bb31a651b) for details. +The value of these parameters has been tweaked between version 0.9.2 (live on the Gangnam testnet) and 0.9.3 (proposed for [Ethereum adoption](https://github.com/ethereum/EIPs/blob/master/EIPS/eip-1057.md)). See [this medium post](https://medium.com/@ifdefelse/progpow-progress-da5bb31a651b) for details. +Release 0.9.4 keeps the same tunables of 0.9.3 and includes the tweak for DAG generation. -| Parameter | 0.9.2 | 0.9.3 | -|-----------------------|-------|-------| -| `PROGPOW_PERIOD` | `50` | `10` | -| `PROGPOW_LANES` | `16` | `16` | -| `PROGPOW_REGS` | `32` | `32` | -| `PROGPOW_DAG_LOADS` | `4` | `4` | -| `PROGPOW_CACHE_BYTES` | `16x1024` | `16x1024` | -| `PROGPOW_CNT_DAG` | `64` | `64` | -| `PROGPOW_CNT_CACHE` | `12` | `11` | -| `PROGPOW_CNT_MATH` | `20` | `18` | +| Parameter | 0.9.2 | 0.9.3 | 0.9.4 | +|-----------------------|-------|-------|-------| +| `PROGPOW_PERIOD` | `50` | `10` | `10` | +| `PROGPOW_LANES` | `16` | `16` | `16` | +| `PROGPOW_REGS` | `32` | `32` | `32` | +| `PROGPOW_DAG_LOADS` | `4` | `4` | `4` | +| `PROGPOW_CACHE_BYTES` | `16x1024` | `16x1024` | `16x1024` | +| `PROGPOW_CNT_DAG` | `64` | `64` | `64` | +| `PROGPOW_CNT_CACHE` | `12` | `11` | `11` | +| `PROGPOW_CNT_MATH` | `20` | `18` | `18` | +| DAG Parameter | 0.9.2 | 0.9.3 | 0.9.4 | +|--------------------------|-------|-------|-------| +| `ETHASH_DATASET_PARENTS` | `256` | `256` | `512` | -The random program changes every `PROGPOW_PERIOD` blocks (default `50`, roughly 12.5 minutes) to ensure the hardware executing the algorithm is fully programmable. If the program only changed every DAG epoch (roughly 5 days) certain miners could have time to develop hand-optimized versions of the random sequence, giving them an undue advantage. + +The random program changes every `PROGPOW_PERIOD` blocks (default `10`, roughly 2 minutes) to ensure the hardware executing the algorithm is fully programmable. If the program only changed every DAG epoch (roughly 5 days) certain miners could have time to develop hand-optimized versions of the random sequence, giving them an undue advantage. Sample code is written in C++, this should be kept in mind when evaluating the code in the specification. @@ -196,13 +220,14 @@ void fill_mix( { // Use FNV to expand the per-warp seed to per-lane // Use KISS to expand the per-lane seed to fill mix + uint32_t fnv_hash = FNV_OFFSET_BASIS; kiss99_t st; - st.z = fnv1a(FNV_OFFSET_BASIS, seed); - st.w = fnv1a(st.z, seed >> 32); - st.jsr = fnv1a(st.w, lane_id); - st.jcong = fnv1a(st.jsr, lane_id); + st.z = fnv1a(fnv_hash, seed); + st.w = fnv1a(fnv_hash, seed >> 32); + st.jsr = fnv1a(fnv_hash, lane_id); + st.jcong = fnv1a(fnv_hash, lane_id); for (int i = 0; i < PROGPOW_REGS; i++) - mix[i] = kiss99(st); + mix[i] = kiss99(st); } ``` @@ -210,25 +235,9 @@ Like Ethash Keccak is used to seed the sequence per-nonce and to produce the fin As with Ethash the input and output of the keccak function are fixed and relatively small. This means only a single "absorb" and "squeeze" phase are required. For a pseudo-code implementation of the `keccak_f800_round` function see the `Round[b](A,RC)` function in the "Pseudo-code description of the permutations" section of the [official Keccak specs](https://keccak.team/keccak_specs_summary.html). -Test vectors can be found [in the test vectors file](test-vectors.md#keccak_f800_progpow). - ```cpp -hash32_t keccak_f800_progpow(hash32_t header, uint64_t seed, hash32_t digest) +hash32_t keccak_f800_progpow(uint32_t* state) { - uint32_t st[25]; - - // Initialization - for (int i = 0; i < 25; i++) - st[i] = 0; - - // Absorb phase for fixed 18 words of input - for (int i = 0; i < 8; i++) - st[i] = header.uint32s[i]; - st[8] = seed; - st[9] = seed >> 32; - for (int i = 0; i < 8; i++) - st[10+i] = digest.uint32s[i]; - // keccak_f800 call for the single absorb pass for (int r = 0; r < 22; r++) keccak_f800_round(st, r); @@ -244,7 +253,7 @@ hash32_t keccak_f800_progpow(hash32_t header, uint64_t seed, hash32_t digest) The inner loop uses FNV and KISS99 to generate a random sequence from the `prog_seed`. This random sequence determines which mix state is accessed and what random math is performed. -Since the `prog_seed` changes only once per `PROGPOW_PERIOD` (50 blocks or about 12.5 minutes) it is expected that while mining `progPowLoop` will be evaluated on the CPU to generate source code for that period's sequence. The source code will be compiled on the CPU before running on the GPU. You can see an example sequence and generated source code in [kernel.cu](test/kernel.cu). +Since the `prog_seed` changes only once per `PROGPOW_PERIOD` (10 blocks or about 2 minutes) it is expected that while mining `progPowLoop` will be evaluated on the CPU to generate source code for that period's sequence. The source code will be compiled on the CPU before running on the GPU. You can see an example sequence and generated source code in [kernel.cu](test/kernel.cu). Test vectors can be found [in the test vectors file](test-vectors.md#progPowInit). @@ -417,30 +426,52 @@ void progPowLoop( ``` The flow of the overall algorithm is: -* A keccak hash of the header + nonce to create a seed -* Use the seed to generate initial mix data +* A keccak hash of the header + nonce to create a digest of 256 bits from keccak_f800 (padding is consistent with custom one in ethash) +* Use first two words of digest as seed to generate initial mix data * Loop multiple times, each time hashing random loads and random math into the mix data * Hash all the mix data into a single 256-bit value -* A final keccak hash is computed +* A final keccak hash using carry-over digest from initial data + mix_data final 256 bit value (padding is consistent with custom one in ethash) * When mining this final value is compared against a `hash32_t` target ```cpp hash32_t progPowHash( - const uint64_t prog_seed, // value is (block_number/PROGPOW_PERIOD) + const uint64_t prog_seed, // value is (block_number/PROGPOW_PERIOD) const uint64_t nonce, const hash32_t header, - const uint32_t *dag // gigabyte DAG located in framebuffer - the first portion gets cached + const uint32_t *dag // gigabyte DAG located in framebuffer - the first portion gets cached ) { + hash32_t hash_init; + hash32_t hash_final; + uint32_t mix[PROGPOW_LANES][PROGPOW_REGS]; - hash32_t digest; - for (int i = 0; i < 8; i++) - digest.uint32s[i] = 0; - // keccak(header..nonce) - hash32_t seed_256 = keccak_f800_progpow(header, nonce, digest); - // endian swap so byte 0 of the hash is the MSB of the value - uint64_t seed = ((uint64_t)bswap(seed_256.uint32s[0]) << 32) | bswap(seed_256.uint32s[1]); + /* + ======================================== + Absorb phase for initial keccak pass + ======================================== + */ + + { + uint32_t state[25] = {0x0}; + // 1st fill with header data (8 words) + for (int i = 0; i < 8; i++) + state[i] = header.uint32s[i]; + + // 2nd fill with nonce (2 words) + state[8] = nonce; + state[9] = nonce >> 32; + + // 3rd apply padding + state[10] = 0x00000001; + state[18] = 0x80008081; + + // keccak(header..nonce) + hash_init = keccak_f800_progpow(state); + + // get the seed to initialize mix + seed = ((uint64_t)hash_init.uint32s[1] << 32) | hash_init.uint32s[0]); + } // initialize mix for all lanes for (int l = 0; l < PROGPOW_LANES; l++) @@ -458,41 +489,68 @@ hash32_t progPowHash( for (int i = 0; i < PROGPOW_REGS; i++) digest_lane[l] = fnv1a(digest_lane[l], mix[l][i]); } + // Reduce all lanes to a single 256-bit digest for (int i = 0; i < 8; i++) digest.uint32s[i] = FNV_OFFSET_BASIS; for (int l = 0; l < PROGPOW_LANES; l++) digest.uint32s[l%8] = fnv1a(digest.uint32s[l%8], digest_lane[l]); - // keccak(header .. keccak(header..nonce) .. digest); - return keccak_f800_progpow(header, seed, digest); + /* + ======================================== + Absorb phase for final keccak pass + ======================================== + */ + + { + uint32_t state[25] = {0x0}; + + // 1st fill with hash_init (8 words) + for (int i = 0; i < 8; i++) + state[i] = hash_init.uint32s[i]; + + // 2nd fill with digest from main loop + for (int i = 8; i < 16; i++) + state[i] = digest.uint32s[i - 8]; + + // 3rd apply padding + state[17] = 0x00000001; + state[24] = 0x80008081; + + // keccak(header..nonce) + hash_final = keccak_f800_progpow(state); + } + + // Compare hash final to target + [...] + } ``` - ## Example / Testcase -For ProgPoW 0.9.2: +For ProgPoW 0.9.4: -The random sequence generated for block 30,000 (prog_seed 600) can been seen in [kernel.cu](test/kernel.cu). +The random sequence generated for block 30,000 (prog_seed 3,000) can been seen in [kernel.cu](test/kernel.cu). The algorithm run on block 30,000 produces the following digest and result: ``` -header ffeeddccbbaa9988776655443322110000112233445566778899aabbccddeeff -nonce 123456789abcdef0 - -digest: 11f19805c58ab46610ff9c719dcf0a5f18fa2f1605798eef770c47219274767d -result: 5b7ccd472dbefdd95b895cac8ece67ff0deb5a6bd2ecc6e162383d00c3728ece +Header : 0xffeeddccbbaa9988776655443322110000112233445566778899aabbccddeeff +Nonce : 0x123456789abcdef0 +Hash init : 0xee304846ddd0a47b98179e96b60ec5ceeae2727834367e593de780e3e6d1892f +Mix seed : 0x7ba4d0dd464830ee +Mix hash : 0x493c13e9807440571511b561132834bbd558dddaa3b70c09515080a6a1aff6d0 +Hash final : 0x46b72b75f238bea3fcfd227e0027dc173dceaa1fb71744bd3d5e030ed2fed053 ``` -A full run showing some intermediate values can be seen in [result.log](test/result.log) - Additional test vectors can be found [in the test vectors file](test-vectors.md#progPowHash). ## Change History + +- 0.9.4 (current) - Patch the [bypass memory hardness](https://github.com/ifdefelse/ProgPOW/issues/51) vulnerability. - [0.9.3](https://github.com/ifdefelse/ProgPOW/tree/spec-0.9.3) - Reduce parameters PERIOD, CNT_MATH, and CNT_CACHE. See [this medium post](https://medium.com/@ifdefelse/progpow-progress-da5bb31a651b) for details. - [0.9.2](https://github.com/ifdefelse/ProgPOW/tree/spec-0.9.2) - Unique sources for math() and prevent rotation by 0 in merge(). Suggested by [SChernykh](https://github.com/ifdefelse/ProgPOW/issues/19) - [0.9.1](https://github.com/ifdefelse/ProgPOW/tree/spec-0.9.1) - Shuffle what part of the DAG entry each lane accesses. Suggested by [mbevand](https://github.com/ifdefelse/ProgPOW/pull/13) diff --git a/cmake/Hunter/config.cmake b/cmake/Hunter/config.cmake index 418c3cc35..74167d94c 100644 --- a/cmake/Hunter/config.cmake +++ b/cmake/Hunter/config.cmake @@ -1,2 +1,3 @@ hunter_config(CURL VERSION ${HUNTER_CURL_VERSION} CMAKE_ARGS HTTP_ONLY=ON CMAKE_USE_OPENSSL=OFF CMAKE_USE_LIBSSH2=OFF) -hunter_config(libjson-rpc-cpp VERSION ${HUNTER_libjson-rpc-cpp_VERSION} CMAKE_ARGS TCP_SOCKET_SERVER=ON) \ No newline at end of file +hunter_config(libjson-rpc-cpp VERSION ${HUNTER_libjson-rpc-cpp_VERSION} CMAKE_ARGS TCP_SOCKET_SERVER=ON) +hunter_config(Boost VERSION 1.70.0-p0) \ No newline at end of file diff --git a/libethash-cl/CLMiner_kernel.cl b/libethash-cl/CLMiner_kernel.cl index 9a9b7e8cf..6065c9a1c 100644 --- a/libethash-cl/CLMiner_kernel.cl +++ b/libethash-cl/CLMiner_kernel.cl @@ -17,6 +17,9 @@ #define HASHES_PER_GROUP (GROUP_SIZE / PROGPOW_LANES) +#define FNV_PRIME 0x1000193 +#define FNV_OFFSET_BASIS 0x811c9dc5 + typedef struct { uint32_t uint32s[32 / sizeof(uint32_t)]; @@ -80,31 +83,19 @@ void keccak_f800_round(uint32_t st[25], const int r) // Keccak - implemented as a variant of SHAKE // The width is 800, with a bitrate of 576, a capacity of 224, and no padding // Only need 64 bits of output for mining -uint64_t keccak_f800(__constant hash32_t const* g_header, uint64_t seed, hash32_t digest) +uint64_t keccak_f800(uint32_t* st) { - uint32_t st[25]; - - for (int i = 0; i < 25; i++) - st[i] = 0; - for (int i = 0; i < 8; i++) - st[i] = g_header->uint32s[i]; - st[8] = seed; - st[9] = seed >> 32; - for (int i = 0; i < 8; i++) - st[10+i] = digest.uint32s[i]; - - for (int r = 0; r < 21; r++) { - keccak_f800_round(st, r); - } - // last round can be simplified due to partial output - keccak_f800_round(st, 21); + // Assumes input state has already been filled + // at higher level - // Byte swap so byte 0 of hash is MSB of result - uint64_t res = (uint64_t)st[1] << 32 | st[0]; - return as_ulong(as_uchar8(res).s76543210); + // Complete all 22 rounds as a separate impl to + // evaluate only first 8 words is wasteful of regsters + for (int r = 0; r < 22; r++) { + keccak_f800_round(st, r); + } } -#define fnv1a(h, d) (h = (h ^ d) * 0x1000193) +#define fnv1a(h, d) (h = (h ^ d) * FNV_PRIME) typedef struct { uint32_t z, w, jsr, jcong; @@ -125,14 +116,14 @@ uint32_t kiss99(kiss99_t *st) return ((MWC^st->jcong) + st->jsr); } -void fill_mix(uint64_t seed, uint32_t lane_id, uint32_t mix[PROGPOW_REGS]) +void fill_mix(local uint32_t* seed, uint32_t lane_id, uint32_t* mix) { // Use FNV to expand the per-warp seed to per-lane // Use KISS to expand the per-lane seed to fill mix - uint32_t fnv_hash = 0x811c9dc5; + uint32_t fnv_hash = FNV_OFFSET_BASIS; kiss99_t st; - st.z = fnv1a(fnv_hash, seed); - st.w = fnv1a(fnv_hash, seed >> 32); + st.z = fnv1a(fnv_hash, seed[0]); + st.w = fnv1a(fnv_hash, seed[1]); st.jsr = fnv1a(fnv_hash, lane_id); st.jcong = fnv1a(fnv_hash, lane_id); #pragma unroll @@ -168,35 +159,63 @@ __kernel void ethash_search( const uint32_t group_id = lid / PROGPOW_LANES; // Load the first portion of the DAG into the cache - for (uint32_t word = lid*PROGPOW_DAG_LOADS; word < PROGPOW_CACHE_WORDS; word += GROUP_SIZE*PROGPOW_DAG_LOADS) + for (uint32_t word = lid * PROGPOW_DAG_LOADS; word < PROGPOW_CACHE_WORDS; + word += GROUP_SIZE * PROGPOW_DAG_LOADS) { - dag_t load = g_dag[word/PROGPOW_DAG_LOADS]; - for (int i = 0; iuint32s[i]; + // 2nd fill with nonce (2 words) + state[8] = nonce; + state[9] = nonce >> 32; + + // 3rd apply input constraints + state[10] = keccakf_rndc[0]; + state[18] = keccakf_rndc[6]; + + // Run intial keccak round + keccak_f800(state); + + for (int i = 0; i < 8; i++) + state2[i] = state[i]; + } + + +#pragma unroll 1 for (uint32_t h = 0; h < PROGPOW_LANES; h++) { uint32_t mix[PROGPOW_REGS]; // share the hash's seed across all lanes - //uint64_t hash_seed = __shfl_sync(0xFFFFFFFF, seed, h, PROGPOW_LANES); - if (lane_id == h) - share[group_id].uint64s[0] = seed; + if (lane_id == h) + { + share[group_id].uint32s[0] = state2[0]; + share[group_id].uint32s[1] = state2[1]; + } + barrier(CLK_LOCAL_MEM_FENCE); - uint64_t hash_seed = share[group_id].uint64s[0]; + + //uint64_t hash_seed = share[group_id].uint64s[0]; // initialize mix for all lanes - fill_mix(hash_seed, lane_id, mix); + fill_mix(share[group_id].uint32s, lane_id, mix); // Apparently, no unrolling ("#pragma unroll 1") often results in // miscompiles with AMD OpenCL, so use at least 2 @@ -205,7 +224,7 @@ __kernel void ethash_search( progPowLoop(l, mix, g_dag, c_dag, share[0].uint64s, hack_false); // Reduce mix data to a per-lane 32-bit digest - uint32_t mix_hash = 0x811c9dc5; + uint32_t mix_hash = FNV_OFFSET_BASIS; #pragma unroll for (int i = 0; i < PROGPOW_REGS; i++) fnv1a(mix_hash, mix[i]); @@ -213,7 +232,7 @@ __kernel void ethash_search( // Reduce all lanes to a single 256-bit digest hash32_t digest_temp; for (int i = 0; i < 8; i++) - digest_temp.uint32s[i] = 0x811c9dc5; + digest_temp.uint32s[i] = FNV_OFFSET_BASIS; share[group_id].uint32s[lane_id] = mix_hash; barrier(CLK_LOCAL_MEM_FENCE); #pragma unroll @@ -223,8 +242,33 @@ __kernel void ethash_search( digest = digest_temp; } + uint64_t result; + + { + uint32_t state[25] = { 0x0 }; // Keccak's state initialized to zero + + // Absorb phase for last round of keccak (256 bits) + // 1st initial 8 words of state are kept as carry-over from initial keccak + for (int i = 0; i < 8; i++) + state[i] = state2[i]; + + // 2nd subsequent 8 words are carried from digest/mix + for (int i = 8; i < 16; i++) + state[i] = digest.uint32s[i - 8]; + + // 3rd apply input constraints + state[17] = keccakf_rndc[0]; + state[24] = keccakf_rndc[6]; + + // Run keccak loop + keccak_f800(state); + + int64_t res = (uint64_t)state[1] << 32 | state[0]; + result = as_ulong(as_uchar8(res).s76543210); + } + // keccak(header .. keccak(header..nonce) .. digest); - if (keccak_f800(g_header, seed, digest) < target) + if (result <= target) { uint slot = atomic_inc(&g_output[0]) + 1; if(slot < MAX_OUTPUTS) @@ -376,25 +420,25 @@ static void keccak_f1600_round(uint2* a, uint r) t[20] = ROL2(a[2], 62); t[5] = ROL2(a[3], 28); t[15] = ROL2(a[4], 27); - + t[16] = ROL2(a[5], 36); t[1] = ROL2(a[6], 44); t[11] = ROL2(a[7], 6); t[21] = ROL2(a[8], 55); t[6] = ROL2(a[9], 20); - + t[7] = ROL2(a[10], 3); t[17] = ROL2(a[11], 10); t[2] = ROL2(a[12], 43); t[12] = ROL2(a[13], 25); t[22] = ROL2(a[14], 39); - + t[23] = ROL2(a[15], 41); t[8] = ROL2(a[16], 45); t[18] = ROL2(a[17], 15); t[3] = ROL2(a[18], 21); t[13] = ROL2(a[19], 8); - + t[14] = ROL2(a[20], 18); t[24] = ROL2(a[21], 2); t[9] = ROL2(a[22], 61); @@ -419,7 +463,7 @@ static void keccak_f1600_no_absorb(uint2* a, uint out_size, uint isolate) // better with surrounding code, however I haven't done this // without causing the AMD compiler to blow up the VGPR usage. - + //uint o = 25; for (uint r = 0; r < 24;) { @@ -435,8 +479,8 @@ static void keccak_f1600_no_absorb(uint2* a, uint out_size, uint isolate) keccak_f1600_round(a, r++); //if (r == 23) o = out_size; } - } - + } + // final round optimised for digest size //keccak_f1600_round(a, 23, out_size); diff --git a/libethash-cuda/CUDAMiner.cpp b/libethash-cuda/CUDAMiner.cpp index 59e837bfb..e682ab5b8 100644 --- a/libethash-cuda/CUDAMiner.cpp +++ b/libethash-cuda/CUDAMiner.cpp @@ -74,7 +74,7 @@ bool CUDAMiner::init(int epoch) cuda_init(getNumDevices(), light->light, lightData.data(), lightData.size(), device, (s_dagLoadMode == DAG_LOAD_MODE_SINGLE), s_dagInHostMemory, s_dagCreateDevice); s_dagLoadIndex++; - + if (s_dagLoadMode == DAG_LOAD_MODE_SINGLE) { if (s_dagLoadIndex >= s_numInstances && s_dagInHostMemory) @@ -369,7 +369,7 @@ bool CUDAMiner::cuda_init( cudalog << "CUDA device " << string(device_props.name) << " has insufficient GPU memory." << device_props.totalGlobalMem << " bytes of memory found < " << dagBytes << " bytes of memory required"; return false; } - //We need to reset the device and recreate the dag + //We need to reset the device and recreate the dag cudalog << "Resetting device"; CUDA_SAFE_CALL(cudaDeviceReset()); CUdevice device; @@ -379,23 +379,23 @@ bool CUDAMiner::cuda_init( //We need to reset the light and the Dag for the following code to reallocate //since cudaDeviceReset() frees all previous allocated memory m_light[m_device_num] = nullptr; - m_dag = nullptr; + m_dag = nullptr; } // create buffer for cache hash64_t * dag = m_dag; hash64_t * light = m_light[m_device_num]; - if(!light){ + if(!light){ cudalog << "Allocating light with size: " << _lightBytes; CUDA_SAFE_CALL(cudaMalloc(reinterpret_cast(&light), _lightBytes)); } // copy lightData to device CUDA_SAFE_CALL(cudaMemcpy(reinterpret_cast(light), _lightData, _lightBytes, cudaMemcpyHostToDevice)); m_light[m_device_num] = light; - + if(dagElms != m_dag_elms || !dag) // create buffer for dag CUDA_SAFE_CALL(cudaMalloc(reinterpret_cast(&dag), dagBytes)); - + if(dagElms != m_dag_elms || !dag) { // create mining buffers @@ -405,7 +405,7 @@ bool CUDAMiner::cuda_init( CUDA_SAFE_CALL(cudaMallocHost(&m_search_buf[i], sizeof(search_results))); CUDA_SAFE_CALL(cudaStreamCreate(&m_streams[i])); } - + memset(&m_current_header, 0, sizeof(hash32_t)); m_current_target = 0; m_current_nonce = 0; @@ -429,7 +429,7 @@ bool CUDAMiner::cuda_init( } }else{ while(!hostDAG) - this_thread::sleep_for(chrono::milliseconds(100)); + this_thread::sleep_for(chrono::milliseconds(100)); goto cpyDag; } } @@ -441,7 +441,7 @@ bool CUDAMiner::cuda_init( CUDA_SAFE_CALL(cudaMemcpy(reinterpret_cast(dag), hdag, dagBytes, cudaMemcpyHostToDevice)); } } - + m_dag = dag; m_dag_elms = dagElms; diff --git a/libethash-cuda/CUDAMiner_kernel.cu b/libethash-cuda/CUDAMiner_kernel.cu index 3f7666d29..d18025c57 100644 --- a/libethash-cuda/CUDAMiner_kernel.cu +++ b/libethash-cuda/CUDAMiner_kernel.cu @@ -2,6 +2,9 @@ #define SEARCH_RESULTS 4 #endif +#define FNV_PRIME 0x1000193 +#define FNV_OFFSET_BASIS 0x811c9dc5 + typedef struct { uint32_t count; struct { @@ -79,31 +82,21 @@ __device__ __forceinline__ uint32_t cuda_swab32(const uint32_t x) // Keccak - implemented as a variant of SHAKE // The width is 800, with a bitrate of 576, a capacity of 224, and no padding -// Only need 64 bits of output for mining -__device__ __noinline__ uint64_t keccak_f800(hash32_t header, uint64_t seed, hash32_t digest) +__device__ __forceinline__ void keccak_f800(uint32_t* st) { - uint32_t st[25]; - for (int i = 0; i < 25; i++) - st[i] = 0; - for (int i = 0; i < 8; i++) - st[i] = header.uint32s[i]; - st[8] = seed; - st[9] = seed >> 32; - for (int i = 0; i < 8; i++) - st[10+i] = digest.uint32s[i]; + // Assumes input state has already been filled + // at higher level - for (int r = 0; r < 21; r++) { + // Complete all 22 rounds as a separate impl to + // evaluate only first 8 words is wasteful of regsters + for (int r = 0; r < 22; r++) { keccak_f800_round(st, r); } - // last round can be simplified due to partial output - keccak_f800_round(st, 21); - // Byte swap so byte 0 of hash is MSB of result - return (uint64_t)cuda_swab32(st[0]) << 32 | cuda_swab32(st[1]); } -#define fnv1a(h, d) (h = (uint32_t(h) ^ uint32_t(d)) * uint32_t(0x1000193)) +#define fnv1a(h, d) (h = (uint32_t(h) ^ uint32_t(d)) * uint32_t(FNV_PRIME)) typedef struct { uint32_t z, w, jsr, jcong; @@ -124,14 +117,14 @@ __device__ __forceinline__ uint32_t kiss99(kiss99_t &st) return ((MWC^st.jcong) + st.jsr); } -__device__ __forceinline__ void fill_mix(uint64_t seed, uint32_t lane_id, uint32_t mix[PROGPOW_REGS]) +__device__ __forceinline__ void fill_mix(uint32_t* hash_seed, uint32_t lane_id, uint32_t* mix) { // Use FNV to expand the per-warp seed to per-lane // Use KISS to expand the per-lane seed to fill mix - uint32_t fnv_hash = 0x811c9dc5; + uint32_t fnv_hash = FNV_OFFSET_BASIS; kiss99_t st; - st.z = fnv1a(fnv_hash, seed); - st.w = fnv1a(fnv_hash, seed >> 32); + st.z = fnv1a(fnv_hash, hash_seed[0]); + st.w = fnv1a(fnv_hash, hash_seed[1]); st.jsr = fnv1a(fnv_hash, lane_id); st.jcong = fnv1a(fnv_hash, lane_id); #pragma unroll @@ -139,7 +132,7 @@ __device__ __forceinline__ void fill_mix(uint64_t seed, uint32_t lane_id, uint32 mix[i] = kiss99(st); } -__global__ void +__global__ void progpow_search( uint64_t start_nonce, const hash32_t header, @@ -155,7 +148,7 @@ progpow_search( const uint32_t lane_id = threadIdx.x & (PROGPOW_LANES - 1); - // Load the first portion of the DAG into the cache + // Load the first portion of the DAG into the shared cache for (uint32_t word = threadIdx.x*PROGPOW_DAG_LOADS; word < PROGPOW_CACHE_WORDS; word += blockDim.x*PROGPOW_DAG_LOADS) { dag_t load = g_dag[word/PROGPOW_DAG_LOADS]; @@ -163,22 +156,48 @@ progpow_search( c_dag[word + i] = load.s[i]; } - hash32_t digest; - for (int i = 0; i < 8; i++) - digest.uint32s[i] = 0; - // keccak(header..nonce) - uint64_t seed = keccak_f800(header, nonce, digest); - + // Force threads to sync and ensure shared mem is in sync __syncthreads(); + //uint32_t state[25]; // Keccak's state + uint32_t hash_seed[2]; // KISS99 initiator + hash32_t digest; // Carry-over from mix output + + uint32_t state2[8]; + + { + // Absorb phase for initial round of keccak + // 1st fill with header data (8 words) + uint32_t state[25] = { 0x0 }; // Keccak's state initialized to zero + for (int i = 0; i < 8; i++) + state[i] = header.uint32s[i]; + // 2nd fill with nonce (2 words) + state[8] = nonce; + state[9] = nonce >> 32; + + // 3rd apply padding + state[10] = keccakf_rndc[0]; + state[18] = keccakf_rndc[6]; + + // Run intial keccak round + keccak_f800(state); + + for (int i = 0; i < 8; i++) + state2[i] = state[i]; + } + + // Main loop #pragma unroll 1 for (uint32_t h = 0; h < PROGPOW_LANES; h++) { uint32_t mix[PROGPOW_REGS]; - // share the hash's seed across all lanes - uint64_t hash_seed = __shfl_sync(0xFFFFFFFF, seed, h, PROGPOW_LANES); - // initialize mix for all lanes + // share the first two words of digest across all lanes + hash_seed[0] = __shfl_sync(0xFFFFFFFF, state2[0], h, PROGPOW_LANES); + hash_seed[1] = __shfl_sync(0xFFFFFFFF, state2[1], h, PROGPOW_LANES); + + // initialize mix for all lanes using first + // two words from header_hash fill_mix(hash_seed, lane_id, mix); #pragma unroll 1 @@ -187,7 +206,7 @@ progpow_search( // Reduce mix data to a per-lane 32-bit digest - uint32_t digest_lane = 0x811c9dc5; + uint32_t digest_lane = FNV_OFFSET_BASIS; #pragma unroll for (int i = 0; i < PROGPOW_REGS; i++) fnv1a(digest_lane, mix[i]); @@ -196,7 +215,7 @@ progpow_search( hash32_t digest_temp; #pragma unroll for (int i = 0; i < 8; i++) - digest_temp.uint32s[i] = 0x811c9dc5; + digest_temp.uint32s[i] = FNV_OFFSET_BASIS; for (int i = 0; i < PROGPOW_LANES; i += 8) #pragma unroll @@ -207,8 +226,33 @@ progpow_search( digest = digest_temp; } + uint64_t result; + + { + uint32_t state[25] = { 0x0 }; // Keccak's state initialized to zero + + // Absorb phase for last round of keccak (256 bits) + // 1st initial 8 words of state are kept as carry-over from initial keccak + for (int i = 0; i < 8; i++) + state[i] = state2[i]; + + // 2nd subsequent 8 words are carried from digest/mix + for (int i = 8; i < 16; i++) + state[i] = digest.uint32s[i - 8]; + + // 3rd apply padding + state[17] = keccakf_rndc[0]; + state[24] = keccakf_rndc[6]; + + // Run keccak loop + keccak_f800(state); + + // Extract result, swap endianness, and compare with target + result = (uint64_t)cuda_swab32(state[0]) << 32 | cuda_swab32(state[1]); + } + // keccak(header .. keccak(header..nonce) .. digest); - if (keccak_f800(header, seed, digest) >= target) + if (result >= target) return; uint32_t index = atomicInc((uint32_t *)&g_output->count, 0xffffffff); diff --git a/libprogpow/ProgPow.cpp b/libprogpow/ProgPow.cpp index 1907c5a30..afea9c4db 100644 --- a/libprogpow/ProgPow.cpp +++ b/libprogpow/ProgPow.cpp @@ -159,7 +159,7 @@ std::string ProgPow::getKern(uint64_t block_number, kernel_t kern) if (i < PROGPOW_CNT_MATH) { // Random Math - // Generate 2 unique sources + // Generate 2 unique sources int src_rnd = rnd() % ((PROGPOW_REGS - 1) * PROGPOW_REGS); int src1 = src_rnd % PROGPOW_REGS; // 0 <= src1 < PROGPOW_REGS int src2 = src_rnd / PROGPOW_REGS; // 0 <= src2 < PROGPOW_REGS - 1 diff --git a/test-vectors.md b/test-vectors.md index 0901134e2..8b9dd92df 100644 --- a/test-vectors.md +++ b/test-vectors.md @@ -24,7 +24,7 @@ For `z`=`362436069`, `w`=`521288629`, `jsr`=`123456789`, and `jcong`=`380116160` ## fill_mix -For `hash_seed`=`0xEE304846DDD0A47B` and `lane_id`=`0` the values stored in the `mix` array will be +For `seed`=`0xEE304846DDD0A47B` and `lane_id`=`0` the values stored in the `mix` array will be > ``` > 0x10C02F0D, 0x99891C9E, 0xC59649A0, 0x43F0394D, @@ -50,26 +50,6 @@ For the same hash and `lane_id`=`13` the value in the `mix` array will be > 0xB4CCB32D, 0x83A09132, 0x27EA8279, 0x3837DDA3 > ``` -## keccak_f800_progpow - -Test case 1: - -| | | -| -------- | ----------------------------------------------------------------------------------------------------------------- | -| header | `0xCCDDEEFF`, `0x8899AABB`, `0x44556677`, `0x00112233`,
`0x33221100`, `0x77665544`, `0xBBAA9988`, `0xFFEEDDCC` | -| seed | `0x123456789ABCDEF0` | -| digest | `0x00000000`, `0x00000000`, `0x00000000`, `0x00000000`,
`0x00000000`, `0x00000000`, `0x00000000`, `0x00000000` | -| _result_ | `0x464830EE`, `0x7BA4D0DD`, `0x969E1798`, `0xCEC50EB6`,
`0x7872E2EA`, `0x597E3634`, `0xE380E73D`, `0x2F89D1E6` | - -Test case 2: - -| | | -| -------- | ----------------------------------------------------------------------------------------------------------------- | -| header | `0xCCDDEEFF`, `0x8899AABB`, `0x44556677`, `0x00112233`,
`0x33221100`, `0x77665544`, `0xBBAA9988`, `0xFFEEDDCC` | -| seed | `0xEE304846DDD0A47B` | -| digest | `0x0598F111`, `0x66B48AC5`, `0x719CFF10`, `0x5F0ACF9D`,
`0x162FFA18`, `0xEF8E7905`, `0x21470C77`, `0x7D767492` | -| _result_ | `0x47CD7C5B`, `0xD9FDBE2D`, `0xAC5C895B`, `0xFF67CE8E`,
`0x6B5AEB0D`, `0xE1C6ECD2`, `0x003D3862`, `0xCE8E72C3` | - ## progPowInit For ProgPow period 600 (block 30,000) the configurations should be @@ -120,338 +100,80 @@ Kiss 99 state: ## progPowLoop For the first loop iteration of block 30,000 the seed to use for `fill_mix` -would be `0xEE304846DDD0A47B`. A two dimensional `mix` array should be created -passing the rows into `fill_mix` witht he column number as the loop argument. +would be `0x7ba4d0dd464830ee`. A two dimensional `mix` array should be created +passing the rows into `fill_mix` with the column number as the loop argument. The state of the mix array after the call to `progPowLoop` for block 30,000, loop 1 are as follows. -`mix[0]` - - -> ``` -> 0x40E09E9C, 0x967A7DF0, 0x8626BB1F, 0x12C2392F, -> 0xA21D8305, 0x44C2702E, 0x94C93945, 0x6B66B158, -> 0x0CF00FAA, 0x26F5E6B5, 0x36EC0134, 0xC89805AF, -> 0x58118540, 0x8617DC4D, 0xC759F486, 0x8A81E396, -> 0x22443D4D, 0x64291E2F, 0x1998AB7F, 0x11C0FBBB, -> 0xBEA9C139, 0x82D1E47E, 0x7ED3E850, 0x2F81531A, -> 0xBBDFBC4E, 0xF58AEE4D, 0x3CA34321, 0x357BD48A, -> 0x2F9C8B5D, 0x2319B193, 0x2856BB38, 0x2E3C33E6 -> ``` - -`mix[1]` - - -> ``` -> 0x4EB8A8F9, 0xD978BF17, 0x7D5074D4, 0x7A092D5D, -> 0x8682D1BE, 0xC3D2941C, 0xF1A1A38B, 0x54BB6D34, -> 0x2F0FB257, 0xB5464B50, 0x40927B67, 0xBB92A7E1, -> 0x1305A517, 0xE06C6765, 0xA75FD647, 0x9F232D6E, -> 0x0D9213ED, 0x8884671D, 0x54352B96, 0x6772E58E, -> 0x1B8120C9, 0x179F3CFB, 0x116FFC82, 0x6D019BCE, -> 0x1C26A750, 0x89716638, 0x02BEB948, 0x2E0AD5CE, -> 0x7FA915B2, 0x93024F2F, 0x2F58032E, 0xF02E550C -> ``` - -`mix[2]` - - -> ``` -> 0x008FF9BD, 0xC41F9802, 0x2E36FDC8, 0x9FBA2A91, -> 0x0A921670, 0x231308E6, 0xEF09A56E, 0x9657A64A, -> 0xF67723FE, 0x963DCD40, 0x354CBFDB, 0x57C07B9A, -> 0x06AF5B40, 0xBA5DE5A6, 0xDA5AAE7B, 0x9F8A5E4B, -> 0x7D6AFC9A, 0xE4783F78, 0x89B24946, 0x5EE94228, -> 0xA209DAAA, 0xDCC27C64, 0x3366FBED, 0x0FEFB673, -> 0x0FC205E3, 0xB61515B2, 0x70A45E9B, 0xBB225E5D, -> 0xB8C38EA0, 0xE01DE9B4, 0x866FAA5B, 0x1A125220 -> ``` - -`mix[3]` - - -> ``` -> 0xE5F9C5CC, 0x6F75CFA2, 0xE0F50924, 0xE7B4F5EF, -> 0x779B903D, 0x5F068253, 0x05FF68E5, 0x39348653, -> 0x654B89E4, 0x0559769E, 0xA3D46B93, 0xD084454D, -> 0xCFC5CF7D, 0x8C11D8E4, 0x795BDB59, 0xD9E03113, -> 0xBAE8C355, 0x12B63814, 0x4046A018, 0xA269A32E, -> 0x54A57C4B, 0x2ED1065B, 0xB69A2C76, 0x4AEF0950, -> 0x6C2D187B, 0x8252FAE7, 0x3E9C0ED2, 0x26E47B15, -> 0xFEFB48E3, 0xDA088C7F, 0xA82B0379, 0xA49C6D86 -> ``` - -`mix[4]` - - -> ``` -> 0xB926334C, 0x686A29AF, 0xD9E2EF15, 0x1C8A2D39, -> 0x307ED4F4, 0x2ABB1DB6, 0xD6F95128, 0xDFCA05F8, -> 0x904D9472, 0xEC09E200, 0x7143F47F, 0xEE488438, -> 0xFCA48DA8, 0xA64C7DD4, 0xC4AE9A30, 0xEBA30BC9, -> 0xB02630BF, 0xD1DF40CC, 0x4DFE8B7B, 0x205C97B3, -> 0xE40376F8, 0x2491117E, 0x34984321, 0xA01546A7, -> 0xB254F2F9, 0xC78A7C25, 0xFFC615E2, 0x5839FC88, -> 0x2A04DF6C, 0xC02A9A8A, 0x39238EAD, 0x7139060C -> ``` - -`mix[5]` - - -> ``` -> 0xC416E54B, 0x64AD1C57, 0xBF7CBA55, 0x176F714E, -> 0xBE733426, 0x995C4132, 0x5F50F779, 0x0F76FDF3, -> 0x526F7870, 0xE56A1A8A, 0xDCEB677E, 0xD471CC19, -> 0xA9ED60E4, 0x145E807F, 0x8D652E92, 0x80E8116F, -> 0xFF1A37EB, 0x1E0C49A1, 0x59D756DA, 0x39A8E761, -> 0x2F0F646F, 0x43F41278, 0x88CC48DA, 0x8FDFF7A4, -> 0x9AEACA2E, 0x59E7808C, 0x7F72E46B, 0xCA572333, -> 0xC6029C88, 0x7736E592, 0xF1338231, 0x262B2C7F -> ``` - -`mix[6]` - - -> ``` -> 0x3C554151, 0x70999423, 0x64BB49A8, 0xF9EBE9E9, -> 0x7D9C28CF, 0x23EE7659, 0xD6504FCF, 0x1C58C2A1, -> 0x62B9C627, 0x680AE248, 0xF196A153, 0x2A3C345A, -> 0x860E6EB2, 0x266D2652, 0x3C9F2420, 0xF790A538, -> 0x710A5523, 0xBEA2603A, 0x1C1CC272, 0xF91D482A, -> 0x1CA19931, 0x7A80ED37, 0x9572513D, 0x376F1CFE, -> 0xE57C1264, 0xE47BF931, 0xC7310E05, 0x7866CC9E, -> 0xC676BBD5, 0x4C167FEB, 0x0FE03D2B, 0x46C6D26C -> ``` - -`mix[7]` - - -> ``` -> 0x3395F65A, 0x7142A5B1, 0x97780661, 0xE5EE45B8, -> 0xCD9FDC42, 0x25BF044C, 0x0350F81B, 0x55D50703, -> 0xA8CB893E, 0xEE795201, 0xC2D6E598, 0xC2AC2D7A, -> 0xD2E81716, 0xAD876790, 0x0F3339C7, 0xEEC31E01, -> 0xA293ABF6, 0x28AE317D, 0x44A7AC05, 0xBEBA1C5E, -> 0x325ED29E, 0x4344131E, 0x921CD8DD, 0x08AB9E0B, -> 0xC18E66A6, 0x87E6BCA3, 0x24CE82AE, 0xC910B4F1, -> 0x9E513EC0, 0xA1B8CB76, 0xF0455815, 0x36BC0DCF -> ``` - -`mix[8]` - - -> ``` -> 0x0117C85F, 0xE018F2C6, 0x416C897D, 0x9D288A0F, -> 0x2AA9EA93, 0x5A6D3CEA, 0xAA99B726, 0x0A42DAB7, -> 0x72F6EA4A, 0x1DB074E6, 0x2E2A606C, 0xAC5D509B, -> 0x53F13E85, 0x1D44B521, 0x24234C42, 0xAD5BAD70, -> 0xAB2DA791, 0x6479546A, 0xD27B3771, 0xBB0A09DD, -> 0x6D3C8056, 0x96572D4B, 0x52DB6535, 0x3D242BC1, -> 0xF37D7C7A, 0xA60F7111, 0x59B59667, 0xF28635B0, -> 0xC2A8F9F5, 0x7CFB9CCB, 0xDF8697AA, 0xA3260D94 -> ``` - -`mix[9]` - - -> ``` -> 0xA387FC4B, 0xC757D3A0, 0xA584E879, 0xB0A1EC29, -> 0x82CB2EC3, 0x6BF33664, 0x41FECC42, 0xF60C2AC5, -> 0xEA250BE5, 0x42BE9F33, 0x9227B0B3, 0x9080A6AB, -> 0xAF193598, 0xC708BC8A, 0x020CDEDB, 0x7FA2F773, -> 0x4338E670, 0x069E0242, 0x5AD87326, 0xD7A87124, -> 0x220D5C46, 0x26D3400D, 0x4899D1EE, 0x90EAD2F6, -> 0xFA3F1F74, 0x9C5A5D58, 0xAE20567C, 0x424B690D, -> 0xC9A4057A, 0x9F2A5CD1, 0xAA33CD5F, 0x18F58C00 -> ``` - -`mix[10]` - - -> ``` -> 0xEAFE893C, 0x1ABB2971, 0x29803BB3, 0x5BC2F71F, -> 0x619DAFAD, 0xD9CFEFB6, 0xB4FEFAB5, 0x5EB249EC, -> 0x1A6E2B3A, 0xFB05DD28, 0xDCB33C2E, 0x630BB8AE, -> 0x43463B39, 0x3BD2F552, 0xFB20C0A2, 0x3383BA34, -> 0x2E9C1A99, 0x60A949B2, 0x861372AB, 0xC149D929, -> 0xA77A0A93, 0xE0CEE0D9, 0x791E7E82, 0x66A8D75A, -> 0x44D1845F, 0xE534DC4A, 0x2C7DD20C, 0xEEDAB329, -> 0x3209FE2A, 0x0C0406BC, 0xD6D4BD2A, 0x5FDB13CC -> ``` - -`mix[11]` - - -> ``` -> 0x2520ABB3, 0xCD942485, 0x9A2929BC, 0x0E10F18C, -> 0xDFB1815E, 0x8BEF05A3, 0x531A8837, 0x668838E4, -> 0xBACCE200, 0x003F85C2, 0x56226F05, 0xC2233173, -> 0x2F39A0D9, 0xF4466D0D, 0x0B9E686C, 0x82C69BDA, -> 0x0C8A8CD6, 0xA93F3001, 0x36A65EC1, 0x40CCFD7A, -> 0x84484E23, 0xF0896D45, 0x06D9F760, 0x6559142C, -> 0x9FFE2E88, 0x9593DC89, 0x89C9E3B9, 0x33285F41, -> 0x16F636C8, 0xA08169C7, 0xA5E1C956, 0xC22CCF52 -> ``` - -`mix[12]` - - -> ``` -> 0xDC3B8CAA, 0xC6941197, 0x9969D596, 0x46453D3E, -> 0x568EAFEA, 0x5B823345, 0xDE606E8E, 0x7523C86D, -> 0x0EDAF441, 0x00C3D848, 0xAE5BAB99, 0xD705B9EE, -> 0x54B49E3D, 0xF364A6A4, 0x42C55975, 0xFE41EED5, -> 0xAD46170F, 0xAABE4868, 0x270379F9, 0xD33D0D7C, -> 0xF39C476C, 0xA449118E, 0x71BCC1E4, 0x5E300E77, -> 0x1CACD489, 0x4D82FABD, 0x090F9F80, 0xB2DB9626, -> 0xE12A973B, 0x1B77460C, 0xD25F89F5, 0x5753612E -> ``` - -`mix[13]` - - -> ``` -> 0x042D951C, 0x38833AA7, 0xBEA9894D, 0x7AE7F381, -> 0x42DB6723, 0x1FB0294F, 0x41452A28, 0xA7A97B9C, -> 0x228AA7EA, 0x781A7420, 0x4589736D, 0xB3C19349, -> 0x685EF9E6, 0xB4987DF6, 0xC9C3B188, 0x2DCA6A03, -> 0xE89A6D3D, 0x50EF7CF5, 0xF6274868, 0x8AA22824, -> 0x980FFDE3, 0xD4A6CB4E, 0x06FF9E1A, 0xBADB6DF5, -> 0xEDE3ADF3, 0xC9CF45F6, 0xFDFA194C, 0xAF076AA8, -> 0x7B876CEA, 0xB0C89575, 0x35A72155, 0x6CFDFC06 -> ``` - -`mix[14]` - - -> ``` -> 0x0E3E28C8, 0xEC329DEC, 0x06D0A1D1, 0xF95ABEF8, -> 0x168DCF28, 0xDD7714C1, 0x769C119E, 0xA5530A7D, -> 0x1EEACB59, 0x30FD21BB, 0x082A3691, 0x1C4C9BCA, -> 0x420F27DE, 0xA8FDA3AE, 0xE182142E, 0x5102F0FF, -> 0x15B82277, 0x120C3217, 0x7BE714ED, 0xA251DCD5, -> 0x6FB4F831, 0xB71D7B32, 0xD5F7A04A, 0x763E1A20, -> 0x38E68B0C, 0xBB5A4121, 0x9340BF06, 0x948B03F8, -> 0xE71BF17B, 0x1BB5F06B, 0x26F2A200, 0x5F28C415 -> ``` - -`mix[15]` - - -> ``` -> 0xC818CD64, 0xBC910343, 0xB18B7776, 0x7182DEBA, -> 0x9DB319EE, 0x9AE7F32F, 0x3CA9F8B5, 0xC63F48ED, -> 0x8321533A, 0x059C96B1, 0x8DCDA60A, 0x75B6C1D1, -> 0xC3406B57, 0x3DFE9E9B, 0xC01E1FD7, 0xC4643218, -> 0x6873F0BA, 0x8ABD36B9, 0xA74D0CBD, 0x8A637118, -> 0x6916416C, 0xB6E3A8DD, 0xB68DD4FA, 0xFBD543EE, -> 0x56F05592, 0x33D6DB82, 0x58D0A7DD, 0x18630C6E, -> 0xB33749CA, 0x5D2E87F7, 0x0F3C39DB, 0x3CAE9895 -> ``` +[TO DO] ## progPowHash -Block 30000: - -- `prog_seed` - 600 -- `nonce` - `123456789abcdef0` -- `header` - `ffeeddccbbaa9988776655443322110000112233445566778899aabbccddeeff` -- _digest_ - `11f19805c58ab46610ff9c719dcf0a5f18fa2f1605798eef770c47219274767d` -- _result_ - `5b7ccd472dbefdd95b895cac8ece67ff0deb5a6bd2ecc6e162383d00c3728ece` - Block 0: - -- `prog_seed` - 0 -- `nonce` - `0000000000000000` -- `header` - `0000000000000000000000000000000000000000000000000000000000000000` -- _digest_ - `faeb1be51075b03a4ff44b335067951ead07a3b078539ace76fd56fc410557a3` -- _result_ - `63155f732f2bf556967f906155b510c917e48e99685ead76ea83f4eca03ab12` +``` +Header : 0xffeeddccbbaa9988776655443322110000112233445566778899aabbccddeeff +Nonce : 0x123456789abcdef0 +Hash init : 0xee304846ddd0a47b98179e96b60ec5ceeae2727834367e593de780e3e6d1892f +Mix seed : 0x7ba4d0dd464830ee +Mix hash : 0xc2e883b6876ec4cc514b9cea269f343095619faf9f2edcafb3fcf6928fa58141 +Hash final : 0xfa70fbf9979f80ec3db2c3f118a5e683fcf5f54ea7edc41b0b5d336508694cb8 + +Header : 0xffeeddccbbaa9988776655443322110000112233445566778899aabbccddeeff +Nonce : 0x0000000000000001 +Hash init : 0xc69d81127f23e360ee0e7ca302fdc1f9e25a772a3728ec48be074a3d398e7677 +Mix seed : 0x60e3237f12819dc6 +Mix hash : 0xab94222a9736b2c93282fbf6e7217f792f87504033a83eb5501beb24f0d235e7 +Hash final : 0x1260d102572f6ab9840556e8766ba670b511bcf768767b0ab05af45ea9fbad8d +``` Block 49: - -- `prog_seed` - 0 -- `nonce` - `0000000006ff2c47` -- `header` - `63155f732f2bf556967f906155b510c917e48e99685ead76ea83f4eca03ab12b` -- _digest_ - `c789c1180f890ec555ff42042913465481e8e6bc512cb981e1c1108dc3f2227d` -- _result_ - `9e7248f20914913a73d80a70174c331b1d34f260535ac3631d770e656b5dd92` - -Block 50: - -- `prog_seed` - 1 -- `nonce` - `00000000076e482e` -- `header` - `9e7248f20914913a73d80a70174c331b1d34f260535ac3631d770e656b5dd922` -- _digest_ - `c7340542c2a06b3a7dc7222635f7cd402abf8b528ae971ddac6bbe2b0c7cb518` -- _result_ - `de37e1824c86d35d154cf65a88de6d9286aec4f7f10c3fc9f0fa1bcc2687188` - -Block 99: - -- `prog_seed` - 1 -- `nonce` - `000000003917afab` -- `header` - `de37e1824c86d35d154cf65a88de6d9286aec4f7f10c3fc9f0fa1bcc2687188d` -- _digest_ - `f5e60b2c5bfddd136167a30cbc3c8dbdbd15a512257dee7964e0bc6daa9f8ba7` -- _result_ - `ac7b55e801511b77e11d52e9599206101550144525b5679f2dab19386f23dcc` - -Block 29,950: - -- `prog_seed` - 599 -- `nonce` - `005d409dbc23a62a` -- `header` - `ac7b55e801511b77e11d52e9599206101550144525b5679f2dab19386f23dcce` -- _digest_ - `07393d15805eb08ee6fc6cb3ad4ad1010533bd0ff92d6006850246829f18fd6e` -- _result_ - `e43d7e0bdc8a4a3f6e291a5ed790b9fa1a0948a2b9e33c844888690847de19f` - -Block 29,999: - -- `prog_seed` - 599 -- `nonce` - `005db5fa4c2a3d03` -- `header` - `e43d7e0bdc8a4a3f6e291a5ed790b9fa1a0948a2b9e33c844888690847de19f5` -- _digest_ - `7551bddf977491da2f6cfc1679299544b23483e8f8ee0931c4c16a796558a0b8` -- _result_ - `d34519f72c97cae8892c277776259db3320820cb5279a299d0ef1e155e5c645` +``` +Header : 0xffeeddccbbaa9988776655443322110000112233445566778899aabbccddeeff +Nonce : 0x123456789abcdef0 +Hash init : 0xee304846ddd0a47b98179e96b60ec5ceeae2727834367e593de780e3e6d1892f +Mix seed : 0x7ba4d0dd464830ee +Mix hash : 0xa0e00c15ccff10aefeeef6ca28260807fdd7f2daaff7948b15857e3a65908f09 +Hash final : 0xa66465873e0674e95ac58efba116458342b3252abeb47874adaf139843ef79bb + +Header : 0xffeeddccbbaa9988776655443322110000112233445566778899aabbccddeeff +Nonce : 0x0000000000000001 +Hash init : 0xc69d81127f23e360ee0e7ca302fdc1f9e25a772a3728ec48be074a3d398e7677 +Mix seed : 0x60e3237f12819dc6 +Mix hash : 0x1704a993e5a8603615b964990253896681da83ddd10c0e6e8fee2f273fa2a961 +Hash final : 0x528dff2f543825030a8e0943013de7bc6a4b7c203c7398607811176b03ce70f4 +``` + +Block 14999: +``` +Header : 0xffeeddccbbaa9988776655443322110000112233445566778899aabbccddeeff +Nonce : 0x123456789abcdef0 +Hash init : 0xee304846ddd0a47b98179e96b60ec5ceeae2727834367e593de780e3e6d1892f +Mix seed : 0x7ba4d0dd464830ee +Mix hash : 0xfbbed3db6316658244eef0a897a901fdb40956de9439cf15a74582427443d3bc +Hash final : 0xcaaa67746a4a26c102580851c4f8542f455cd97c6f2749de216c4425504d53c4 + +Header : 0xffeeddccbbaa9988776655443322110000112233445566778899aabbccddeeff +Nonce : 0x0000000000000001 +Hash init : 0xc69d81127f23e360ee0e7ca302fdc1f9e25a772a3728ec48be074a3d398e7677 +Mix seed : 0x60e3237f12819dc6 +Mix hash : 0x0a66b4d37962836650099ad914d2688ffb5dc8688424256cf177c3e7b3f85e88 +Hash final : 0x4bd39ef9155cfd42f0ebb486ee7097d08f793147a9d157027db3d188770ac29d +``` Block 30,000: - -- `prog_seed` - 600 -- `nonce` - `005db8607994ff30` -- `header` - `d34519f72c97cae8892c277776259db3320820cb5279a299d0ef1e155e5c6454` -- _digest_ - `f1c2c7c32266af9635462e6ce1c98ebe4e7e3ecab7a38aaabfbf2e731e0fbff4` -- _result_ - `8b6ce5da0b06d18db7bd8492d9e5717f8b53e7e098d9fef7886d58a6e913ef6` - -Block 30,049: - -- `prog_seed` - 600 -- `nonce` - `005e2e215a8ca2e7` -- `header` - `8b6ce5da0b06d18db7bd8492d9e5717f8b53e7e098d9fef7886d58a6e913ef64` -- _digest_ - `57fe6a9fbf920b4e91deeb66cb0efa971e08229d1a160330e08da54af0689add` -- _result_ - `c2c46173481b9ced61123d2e293b42ede5a1b323210eb2a684df0874ffe0904` - -Block 30,050: - -- `prog_seed` - 601 -- `nonce` - `005e30899481055e` -- `header` - `c2c46173481b9ced61123d2e293b42ede5a1b323210eb2a684df0874ffe09047` -- _digest_ - `ba30c61cc5a2c74a5ecaf505965140a08f24a296d687e78720f0b48baf712f2d` -- _result_ - `ea42197eb2ba79c63cb5e655b8b1f612c5f08aae1a49ff236795a3516d87bc7` - -Block 30,099: - -- `prog_seed` - 601 -- `nonce` - `005ea6aef136f88b` -- `header` - `ea42197eb2ba79c63cb5e655b8b1f612c5f08aae1a49ff236795a3516d87bc71` -- _digest_ - `cfd5e46048cd133d40f261fe8704e51d3f497fc14203ac6a9ef6a0841780b1cd` -- _result_ - `49e15ba4bf501ce8fe8876101c808e24c69a859be15de554bf85dbc095491bd` - -Block 59,950: - -- `prog_seed` - 1,199 -- `nonce` - `02ebe0503bd7b1da` -- `header` - `49e15ba4bf501ce8fe8876101c808e24c69a859be15de554bf85dbc095491bd6` -- _digest_ - `21511fbaa31fb9f5fc4998a754e97b3083a866f4de86fa7500a633346f56d773` -- _result_ - `f5c50ba5c0d6210ddb16250ec3efda178de857b2b1703d8d5403bd0f848e19c` - -Block 59,999: - -- `prog_seed` - 1,199 -- `nonce` - `02edb6275bd221e3` -- `header` - `f5c50ba5c0d6210ddb16250ec3efda178de857b2b1703d8d5403bd0f848e19cf` -- _digest_ - `653eda37d337e39d311d22be9bbd3458d3abee4e643bee4a7280a6d08106ef98` -- _result_ - `341562d10d4afb706ec2c8d5537cb0c810de02b4ebb0a0eea5ae335af6fb2e8` - -Block 10,000,000: - -- `prog_seed` - 200,000 -- `nonce` - `005e30899481055e` -- `header` - `efda178de857b2b1703d8d5403bd0f848e19cff5c50ba5c0d6210ddb16250ec3` -- _digest_ - `b2403f56c426177856eaf0eedd707c86ae78a432b9169c3689a67058fcf2a848` -- _result_ - `206aee640c0fd21473d5cc3654d63c80442d9e2dfa676d2801d3ec1fbab38a6d` - -Block 100,000,000: - -- `prog_seed` - 2,000,000 -- `nonce` - `02abe0589481055e` -- `header` - `49e15ba4bf501ce8fe88765403bd0f848e19cff5c50ba5c0d6210ddb16250ec3` -- _digest_ - `ac452084d6f4e6eacf4282ad58dbd4ce7ef2653fb5e6b5c877f56928c907432a` -- _result_ - `b879f84923e71b812ef5a42ece0b5b9366c31cab218f40afe65f8a2cae448a6f` +``` +Header : 0xffeeddccbbaa9988776655443322110000112233445566778899aabbccddeeff +Nonce : 0x123456789abcdef0 +Hash init : 0xee304846ddd0a47b98179e96b60ec5ceeae2727834367e593de780e3e6d1892f +Mix seed : 0x7ba4d0dd464830ee +Mix hash : 0x493c13e9807440571511b561132834bbd558dddaa3b70c09515080a6a1aff6d0 +Hash final : 0x46b72b75f238bea3fcfd227e0027dc173dceaa1fb71744bd3d5e030ed2fed053 + +Header : 0xffeeddccbbaa9988776655443322110000112233445566778899aabbccddeeff +Nonce : 0x0000000000000001 +Hash init : 0xc69d81127f23e360ee0e7ca302fdc1f9e25a772a3728ec48be074a3d398e7677 +Mix seed : 0x60e3237f12819dc6 +Mix hash : 0x67ed6cc54b89262d1ac9c3f24ad0d2362cb703c1e23881713d0350ca3035e2ae +Hash final : 0xad8fa791bfb2f474a3487d27075bf339d73e0e69d62ecab14166add94c8d0f92 +``` diff --git a/test/kernel.cu b/test/kernel.cu index 7aa147310..4fe7b83f6 100644 --- a/test/kernel.cu +++ b/test/kernel.cu @@ -1,133 +1,123 @@ -// Inner loop for prog_seed 600 -__device__ __forceinline__ void progPowLoop(const uint32_t loop, +// Inner loop for prog_seed 3,000 +DEV_INLINE void progPowLoop(const uint32_t loop, uint32_t mix[PROGPOW_REGS], - const dag_t *g_dag, + const dag_t* g_dag, const uint32_t c_dag[PROGPOW_CACHE_WORDS], - const bool hack_false) + const bool hack_false, + const uint32_t lane_id) { dag_t data_dag; uint32_t offset, data; - const uint32_t lane_id = threadIdx.x & (PROGPOW_LANES - 1); // global load - offset = __shfl_sync(0xFFFFFFFF, mix[0], loop%PROGPOW_LANES, PROGPOW_LANES); + offset = _SHFL(mix[0], loop & (PROGPOW_LANES - 1), PROGPOW_LANES); offset %= PROGPOW_DAG_ELEMENTS; - offset = offset * PROGPOW_LANES + (lane_id ^ loop) % PROGPOW_LANES; + offset = offset * PROGPOW_LANES + ((lane_id ^ loop) & (PROGPOW_LANES - 1)); data_dag = g_dag[offset]; // hack to prevent compiler from reordering LD and usage if (hack_false) __threadfence_block(); // cache load 0 - offset = mix[26] % PROGPOW_CACHE_WORDS; + offset = mix[12] & (PROGPOW_CACHE_WORDS - 1); data = c_dag[offset]; - mix[0] = (mix[0] ^ data) * 33; + mix[26] = ROTR32(mix[26], 17) ^ data; // random math 0 - data = mix[10] ^ mix[16]; - mix[4] = ROTL32(mix[4], 27) ^ data; + data = mix[13] ^ mix[3]; + mix[9] = ROTL32(mix[9], 17) ^ data; // cache load 1 - offset = mix[30] % PROGPOW_CACHE_WORDS; + offset = mix[1] & (PROGPOW_CACHE_WORDS - 1); data = c_dag[offset]; - mix[27] = ROTR32(mix[27], 7) ^ data; + mix[15] = ROTL32(mix[15], 15) ^ data; // random math 1 - data = mix[24] & mix[14]; - mix[26] = (mix[26] * 33) + data; + data = mix[24] ^ mix[10]; + mix[16] = (mix[16] * 33) + data; // cache load 2 - offset = mix[1] % PROGPOW_CACHE_WORDS; + offset = mix[29] & (PROGPOW_CACHE_WORDS - 1); data = c_dag[offset]; - mix[13] = (mix[13] * 33) + data; + mix[25] = (mix[25] ^ data) * 33; // random math 2 - data = mix[17] & mix[16]; - mix[15] = ROTR32(mix[15], 12) ^ data; + data = ROTL32(mix[4], mix[12]); + mix[12] = ROTR32(mix[12], 13) ^ data; // cache load 3 - offset = mix[19] % PROGPOW_CACHE_WORDS; + offset = mix[6] & (PROGPOW_CACHE_WORDS - 1); data = c_dag[offset]; - mix[17] = (mix[17] ^ data) * 33; + mix[7] = ROTL32(mix[7], 8) ^ data; // random math 3 - data = mul_hi(mix[31], mix[5]); - mix[7] = (mix[7] ^ data) * 33; + data = mix[8] * mix[24]; + mix[31] = (mix[31] ^ data) * 33; // cache load 4 - offset = mix[11] % PROGPOW_CACHE_WORDS; + offset = mix[11] & (PROGPOW_CACHE_WORDS - 1); data = c_dag[offset]; - mix[14] = (mix[14] ^ data) * 33; + mix[27] = ROTL32(mix[27], 2) ^ data; // random math 4 - data = mix[23] * mix[19]; - mix[8] = (mix[8] * 33) + data; + data = popcount(mix[28]) + popcount(mix[17]); + mix[5] = (mix[5] * 33) + data; // cache load 5 - offset = mix[21] % PROGPOW_CACHE_WORDS; + offset = mix[18] & (PROGPOW_CACHE_WORDS - 1); data = c_dag[offset]; - mix[9] = (mix[9] ^ data) * 33; + mix[11] = ROTR32(mix[11], 28) ^ data; // random math 5 - data = clz(mix[30]) + clz(mix[15]); - mix[12] = ROTR32(mix[12], 16) ^ data; + data = mix[31] ^ mix[12]; + mix[17] = (mix[17] ^ data) * 33; // cache load 6 - offset = mix[15] % PROGPOW_CACHE_WORDS; + offset = mix[8] & (PROGPOW_CACHE_WORDS - 1); data = c_dag[offset]; - mix[3] = ROTR32(mix[3], 27) ^ data; + mix[29] = ROTR32(mix[29], 10) ^ data; // random math 6 - data = clz(mix[12]) + clz(mix[5]); + data = popcount(mix[4]) + popcount(mix[12]); mix[10] = (mix[10] * 33) + data; // cache load 7 - offset = mix[18] % PROGPOW_CACHE_WORDS; + offset = mix[14] & (PROGPOW_CACHE_WORDS - 1); data = c_dag[offset]; - mix[1] = ROTR32(mix[1], 6) ^ data; + mix[6] = (mix[6] ^ data) * 33; // random math 7 - data = min(mix[4], mix[25]); - mix[11] = ROTR32(mix[11], 27) ^ data; + data = min(mix[10], mix[20]); + mix[24] = (mix[24] * 33) + data; // cache load 8 - offset = mix[3] % PROGPOW_CACHE_WORDS; + offset = mix[17] & (PROGPOW_CACHE_WORDS - 1); data = c_dag[offset]; - mix[6] = (mix[6] ^ data) * 33; + mix[14] = (mix[14] ^ data) * 33; // random math 8 - data = mul_hi(mix[18], mix[16]); - mix[16] = (mix[16] ^ data) * 33; + data = mix[0] * mix[10]; + mix[19] = ROTR32(mix[19], 23) ^ data; // cache load 9 - offset = mix[17] % PROGPOW_CACHE_WORDS; + offset = mix[9] & (PROGPOW_CACHE_WORDS - 1); data = c_dag[offset]; - mix[28] = ROTL32(mix[28], 17) ^ data; + mix[23] = (mix[23] * 33) + data; // random math 9 - data = ROTL32(mix[15], mix[23]); - mix[31] = (mix[31] * 33) + data; + data = min(mix[22], mix[28]); + mix[1] = ROTR32(mix[1], 4) ^ data; // cache load 10 - offset = mix[31] % PROGPOW_CACHE_WORDS; + offset = mix[0] & (PROGPOW_CACHE_WORDS - 1); data = c_dag[offset]; - mix[2] = (mix[2] * 33) + data; + mix[18] = (mix[18] ^ data) * 33; // random math 10 - data = mix[11] | mix[17]; - mix[19] = ROTL32(mix[19], 28) ^ data; - // cache load 11 - offset = mix[16] % PROGPOW_CACHE_WORDS; - data = c_dag[offset]; - mix[30] = ROTR32(mix[30], 18) ^ data; + data = ROTL32(mix[22], mix[9]); + mix[21] = ROTR32(mix[21], 5) ^ data; // random math 11 - data = mix[22] * mix[7]; - mix[22] = ROTR32(mix[22], 30) ^ data; + data = min(mix[26], mix[4]); + mix[22] = (mix[22] * 33) + data; // random math 12 - data = mix[27] & mix[16]; - mix[29] = ROTR32(mix[29], 25) ^ data; + data = min(mix[19], mix[30]); + mix[8] = ROTL32(mix[8], 26) ^ data; // random math 13 - data = ROTL32(mix[11], mix[0]); - mix[5] = (mix[5] ^ data) * 33; + data = mix[12] ^ mix[24]; + mix[3] = ROTL32(mix[3], 30) ^ data; // random math 14 - data = ROTR32(mix[15], mix[25]); - mix[24] = ROTL32(mix[24], 13) ^ data; + data = min(mix[8], mix[13]); + mix[28] = ROTL32(mix[28], 31) ^ data; // random math 15 - data = mix[14] & mix[26]; - mix[18] = (mix[18] * 33) + data; + data = ROTL32(mix[12], mix[9]); + mix[30] = ROTL32(mix[30], 31) ^ data; // random math 16 - data = mix[28] * mix[16]; - mix[25] = (mix[25] ^ data) * 33; + data = ROTL32(mix[28], mix[27]); + mix[2] = (mix[2] * 33) + data; // random math 17 - data = mix[11] * mix[0]; - mix[23] = (mix[23] ^ data) * 33; - // random math 18 - data = mix[2] + mix[24]; - mix[21] = ROTR32(mix[21], 20) ^ data; - // random math 19 - data = mix[25] + mix[4]; - mix[20] = ROTL32(mix[20], 22) ^ data; + data = ROTL32(mix[30], mix[28]); + mix[20] = ROTL32(mix[20], 12) ^ data; // consume global load data // hack to prevent compiler from reordering LD and usage if (hack_false) __threadfence_block(); - mix[0] = (mix[0] ^ data_dag.s[0]) * 33; - mix[0] = ROTR32(mix[0], 21) ^ data_dag.s[1]; - mix[4] = (mix[4] * 33) + data_dag.s[2]; - mix[27] = (mix[27] ^ data_dag.s[3]) * 33; -} \ No newline at end of file + mix[0] = (mix[0] * 33) + data_dag.words[0]; + mix[4] = ROTL32(mix[4], 13) ^ data_dag.words[1]; + mix[13] = (mix[13] ^ data_dag.words[2]) * 33; + mix[0] = ROTR32(mix[0], 12) ^ data_dag.words[3]; +} diff --git a/test/result.log b/test/result.log deleted file mode 100644 index 9c9695cd6..000000000 --- a/test/result.log +++ /dev/null @@ -1,138 +0,0 @@ ->ethminer.exe -U -M 30000 - m 00:37:26|main | ethminer version 0.15.0.dev0 - m 00:37:26|main | Build: windows / release +git. 2c02a51 - cu 00:37:26|main | Using grid size 1024 , block size 512 -Benchmarking on platform: CUDA -Preparing DAG for block #30000 - i Warming up...00:37:26|cuda-0 | No work. - - i 00:37:26|cuda-0 | Initialising miner 0 - cu 00:37:26|cuda-0 | Using device: GeForce GTX 1060 6GB (Compute 6.1) - cu 00:37:26|cuda-0 | Set Device to current - cu 00:37:26|cuda-0 | Resetting device - cu 00:37:27|cuda-0 | Allocating light with size: 16907456 - cu 00:37:27|cuda-0 | Generating mining buffers - cu 00:37:27|cuda-0 | Generating DAG for GPU # 0 with dagBytes: 1082130304 gridSize: 1024 - cu 00:37:30|cuda-0 | Finished DAG - cu 00:37:31|cuda-0 | Compile log: - cu 00:37:31|cuda-0 | JIT info: - ptxas info : 202 bytes gmem, 96 bytes cmem[3] -ptxas info : Compiling entry function '_Z14progpow_searchy8hash32_tyPK5dag_tPV14search_resultsb' for 'sm_61' -ptxas info : Function properties for _Z14progpow_searchy8hash32_tyPK5dag_tPV14search_resultsb -ptxas . 32 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads -ptxas info : Used 158 registers, 16384 bytes smem, 385 bytes cmem[0], 4 bytes cmem[2] -ptxas info : Function properties for _Z11keccak_f8008hash32_tyS_ -ptxas . 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads - cu 00:37:31|cuda-0 | JIT err: - - cu 00:37:31|cuda-0 | Mangled name: _Z14progpow_searchy8hash32_tyPK5dag_tPV14search_resultsb - cu 00:37:31|cuda-0 | done compiling -header ffeeddccbbaa9988776655443322110000112233445566778899aabbccddeeff -nonce 123456789abcdef0 -seed ee304846ddd0a47b -cdag[0]=b3e35467 cdag[1]=ae7402e3 cdag[2]=8522a782 cdag[3]=a2d8353b -cdag[4]=ff4723bd cdag[5]=bfbc05ee cdag[6]=de6944de cdag[7]=f0d2b5b8 -cdag[8]=c74cbad3 cdag[9]=b100f797 cdag[10]=05bc60be cdag[11]=4f40840b -cdag[12]=35e47268 cdag[13]=9cd6f993 cdag[14]=6a0e4659 cdag[15]=b838e46e -cdag[4080]=bde0c650 cdag[4081]=57cba482 cdag[4082]=54877c9d cdag[4083]=f9fdc423 -cdag[4084]=fb65141b cdag[4085]=55074ca4 cdag[4086]=c7dd116e cdag[4087]=bc1737d1 -cdag[4088]=126e8847 cdag[4089]=b16983b2 cdag[4090]=f80c058e cdag[4091]=e0ad53b5 -cdag[4092]=d5f3e840 cdag[4093]=ff1bdd89 cdag[4094]=35660a19 cdag[4095]=73244193 -fill_mix lane 0 mix[0]=10c02f0d ... mix[31]=6d175b7e -fill_mix lane 1 mix[0]=f301da0a ... mix[31]=be60fc28 -fill_mix lane 2 mix[0]=81f99723 ... mix[31]=5cbc32d1 -fill_mix lane 3 mix[0]=35e2204b ... mix[31]=4951b6e6 -fill_mix lane 4 mix[0]=1e276b09 ... mix[31]=70db3fa7 -fill_mix lane 5 mix[0]=eb343d86 ... mix[31]=0ae9ecfa -fill_mix lane 6 mix[0]=ff34cf47 ... mix[31]=e2cd3093 -fill_mix lane 7 mix[0]=c7056ce8 ... mix[31]=4c362d17 -fill_mix lane 8 mix[0]=bd42a7b6 ... mix[31]=7da69107 -fill_mix lane 9 mix[0]=95d03571 ... mix[31]=7d22f89e -fill_mix lane 10 mix[0]=af0e74dc ... mix[31]=12f2d96c -fill_mix lane 11 mix[0]=1287d683 ... mix[31]=1931a478 -fill_mix lane 12 mix[0]=96f960c2 ... mix[31]=6f5b9f23 -fill_mix lane 13 mix[0]=4e46d05d ... mix[31]=3837dda3 -fill_mix lane 14 mix[0]=1a9b1d40 ... mix[31]=08b4fc39 -fill_mix lane 15 mix[0]=3344ce0f ... mix[31]=20201012 -loop 0 dag entry 2043727 of 4227071 total -loop 1 dag entry 1878577 of 4227071 total -loop 2 dag entry 1972818 of 4227071 total -loop 3 dag entry 4192557 of 4227071 total -loop 4 dag entry 2908963 of 4227071 total -loop 5 dag entry 650106 of 4227071 total -loop 6 dag entry 3360110 of 4227071 total -loop 7 dag entry 2666972 of 4227071 total -loop 8 dag entry 3262571 of 4227071 total -loop 9 dag entry 1876031 of 4227071 total -loop 10 dag entry 1099946 of 4227071 total -loop 11 dag entry 1058639 of 4227071 total -loop 12 dag entry 4091582 of 4227071 total -loop 13 dag entry 2295331 of 4227071 total -loop 14 dag entry 2587683 of 4227071 total -loop 15 dag entry 950942 of 4227071 total -loop 16 dag entry 2427766 of 4227071 total -loop 17 dag entry 677253 of 4227071 total -loop 18 dag entry 3564299 of 4227071 total -loop 19 dag entry 2373221 of 4227071 total -loop 20 dag entry 2065878 of 4227071 total -loop 21 dag entry 2684534 of 4227071 total -loop 22 dag entry 3563556 of 4227071 total -loop 23 dag entry 909053 of 4227071 total -loop 24 dag entry 3867986 of 4227071 total -loop 25 dag entry 959685 of 4227071 total -loop 26 dag entry 2837635 of 4227071 total -loop 27 dag entry 3312470 of 4227071 total -loop 28 dag entry 3048893 of 4227071 total -loop 29 dag entry 3601694 of 4227071 total -loop 30 dag entry 3536836 of 4227071 total -loop 31 dag entry 533409 of 4227071 total -loop 32 dag entry 3736438 of 4227071 total -loop 33 dag entry 864961 of 4227071 total -loop 34 dag entry 188397 of 4227071 total -loop 35 dag entry 3814381 of 4227071 total -loop 36 dag entry 4108296 of 4227071 total -loop 37 dag entry 3950694 of 4227071 total -loop 38 dag entry 2069968 of 4227071 total -loop 39 dag entry 2745630 of 4227071 total -loop 40 dag entry 1008990 of 4227071 total -loop 41 dag entry 2675149 of 4227071 total -loop 42 dag entry 3352224 of 4227071 total -loop 43 dag entry 472995 of 4227071 total -loop 44 dag entry 3348733 of 4227071 total -loop 45 dag entry 2693084 of 4227071 total -loop 46 dag entry 3406906 of 4227071 total -loop 47 dag entry 2629222 of 4227071 total -loop 48 dag entry 911095 of 4227071 total -loop 49 dag entry 1358231 of 4227071 total -loop 50 dag entry 3574453 of 4227071 total -loop 51 dag entry 4109829 of 4227071 total -loop 52 dag entry 3503254 of 4227071 total -loop 53 dag entry 3897141 of 4227071 total -loop 54 dag entry 4030761 of 4227071 total -loop 55 dag entry 2855865 of 4227071 total -loop 56 dag entry 452603 of 4227071 total -loop 57 dag entry 1258735 of 4227071 total -loop 58 dag entry 2380407 of 4227071 total -loop 59 dag entry 3320172 of 4227071 total -loop 60 dag entry 3180940 of 4227071 total -loop 61 dag entry 1407113 of 4227071 total -loop 62 dag entry 1057525 of 4227071 total -loop 63 dag entry 574671 of 4227071 total -digest lane 0: 5883883e -digest lane 1: 2fb0fd2e -digest lane 2: eadb7563 -digest lane 3: 4a171075 -digest lane 4: ac2758f5 -digest lane 5: aa5b06cf -digest lane 6: 52156e93 -digest lane 7: 4f7a7fff -digest lane 8: fe91e36a -digest lane 9: 9964c8b6 -digest lane 10: 6a3d93e2 -digest lane 11: 3c6d641f -digest lane 12: e90da618 -digest lane 13: 80cd8ab9 -digest lane 14: ce72386f -digest lane 15: 95517d28 -digest: 11f19805c58ab46610ff9c719dcf0a5f18fa2f1605798eef770c47219274767d -result (top 64 bits): 5b7ccd472dbefdd9 \ No newline at end of file