feat(crypto): RandomX PoW hash via CGo bridge

Vendor RandomX source, add bridge_randomx_hash() with static VM lifecycle. Key: LetheanRandomXv1. Input: header_hash || nonce. Co-Authored-By: Charon <charon@lethean.io>
2026-02-21 01:01:23 +00:00 · 2026-02-21 01:01:23 +00:00 · 4fe3fdfbd2
commit 4fe3fdfbd2
parent 7abac5e011
102 changed files with 19678 additions and 2 deletions
--- a/crypto/CMakeLists.txt
+++ b/crypto/CMakeLists.txt
@ -11,6 +11,8 @@ set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)

+include(CheckCCompilerFlag)
+
 # Include paths: upstream sources + compat stubs
 include_directories(
    ${CMAKE_CURRENT_SOURCE_DIR}/upstream
@ -43,6 +45,65 @@ set(CXX_SOURCES
    bridge.cpp
 )

+# --- RandomX PoW library ---
+set(RANDOMX_SOURCES
+    randomx/aes_hash.cpp
+    randomx/argon2_ref.c
+    randomx/argon2_ssse3.c
+    randomx/argon2_avx2.c
+    randomx/bytecode_machine.cpp
+    randomx/cpu.cpp
+    randomx/dataset.cpp
+    randomx/soft_aes.cpp
+    randomx/virtual_memory.c
+    randomx/vm_interpreted.cpp
+    randomx/allocator.cpp
+    randomx/assembly_generator_x86.cpp
+    randomx/instruction.cpp
+    randomx/randomx.cpp
+    randomx/superscalar.cpp
+    randomx/vm_compiled.cpp
+    randomx/vm_interpreted_light.cpp
+    randomx/argon2_core.c
+    randomx/blake2_generator.cpp
+    randomx/instructions_portable.cpp
+    randomx/reciprocal.c
+    randomx/virtual_machine.cpp
+    randomx/vm_compiled_light.cpp
+    randomx/blake2/blake2b.c
+    randomx/jit_compiler_x86.cpp
+    randomx/jit_compiler_x86_static.S
+)
+
+add_library(randomx STATIC ${RANDOMX_SOURCES})
+target_include_directories(randomx PRIVATE
+    ${CMAKE_CURRENT_SOURCE_DIR}/randomx
+)
+set_property(TARGET randomx PROPERTY POSITION_INDEPENDENT_CODE ON)
+set_property(TARGET randomx PROPERTY CXX_STANDARD 11)
+set_property(TARGET randomx PROPERTY CXX_STANDARD_REQUIRED ON)
+
+# Platform-specific flags for RandomX
+enable_language(ASM)
+target_compile_options(randomx PRIVATE -maes)
+
+check_c_compiler_flag(-mssse3 HAVE_SSSE3)
+if(HAVE_SSSE3)
+    set_source_files_properties(randomx/argon2_ssse3.c PROPERTIES COMPILE_FLAGS -mssse3)
+endif()
+check_c_compiler_flag(-mavx2 HAVE_AVX2)
+if(HAVE_AVX2)
+    set_source_files_properties(randomx/argon2_avx2.c PROPERTIES COMPILE_FLAGS -mavx2)
+endif()
+
+target_compile_options(randomx PRIVATE
+    -Wno-unused-variable
+    -Wno-unused-function
+    -Wno-sign-compare
+    -Wno-unused-parameter
+    -Wno-implicit-fallthrough
+)
+
 # --- Find system dependencies ---
 find_package(OpenSSL REQUIRED)
 find_package(Boost REQUIRED)
@ -53,12 +114,14 @@ add_library(cryptonote STATIC ${C_SOURCES} ${CXX_SOURCES})
 target_include_directories(cryptonote PRIVATE
    ${CMAKE_CURRENT_SOURCE_DIR}/upstream
    ${CMAKE_CURRENT_SOURCE_DIR}/compat
+    ${CMAKE_CURRENT_SOURCE_DIR}/randomx
    ${OPENSSL_INCLUDE_DIR}
    ${Boost_INCLUDE_DIRS}
 )

 target_link_libraries(cryptonote PRIVATE
    OpenSSL::Crypto
+    randomx
 )

 # Suppress warnings from upstream code (we don't modify it)
--- a/crypto/bridge.cpp
+++ b/crypto/bridge.cpp
@ -11,6 +11,7 @@
 #include "crypto-ops.h"
 #include "clsag.h"
 #include "hash-ops.h"
+#include "randomx.h"

 extern "C" {

@ -417,4 +418,33 @@ int cn_zarcanum_verify(const uint8_t /*hash*/[32], const uint8_t * /*proof*/,
    return -1; // not implemented
 }

+// ── RandomX PoW Hashing ──────────────────────────────────
+
+int bridge_randomx_hash(const uint8_t* key, size_t key_size,
+                        const uint8_t* input, size_t input_size,
+                        uint8_t* output) {
+    // Static RandomX state — initialised on first call.
+    // Thread safety: not thread-safe; Go wrapper must serialise calls.
+    static randomx_cache* rx_cache = nullptr;
+    static randomx_vm* rx_vm = nullptr;
+
+    if (rx_cache == nullptr) {
+        randomx_flags flags = randomx_get_flags();
+        // Use light mode (no dataset) for verification.
+        flags = (randomx_flags)(flags | RANDOMX_FLAG_DEFAULT);
+        rx_cache = randomx_alloc_cache(flags);
+        if (rx_cache == nullptr) return -1;
+        randomx_init_cache(rx_cache, key, key_size);
+        rx_vm = randomx_create_vm(flags, rx_cache, nullptr);
+        if (rx_vm == nullptr) {
+            randomx_release_cache(rx_cache);
+            rx_cache = nullptr;
+            return -1;
+        }
+    }
+
+    randomx_calculate_hash(rx_vm, input, input_size, output);
+    return 0;
+}
+
 } // extern "C"
--- a/crypto/bridge.h
+++ b/crypto/bridge.h
@ -105,6 +105,15 @@ int cn_bge_verify(const uint8_t context[32], const uint8_t *ring,
 int cn_zarcanum_verify(const uint8_t hash[32], const uint8_t *proof,
                       size_t proof_len);

+// ── RandomX PoW Hashing ──────────────────────────────────
+// key/key_size: RandomX cache key (e.g. "LetheanRandomXv1")
+// input/input_size: block header hash (32 bytes) + nonce (8 bytes LE)
+// output: 32-byte hash result
+// Returns 0 on success.
+int bridge_randomx_hash(const uint8_t* key, size_t key_size,
+                        const uint8_t* input, size_t input_size,
+                        uint8_t* output);
+
 #ifdef __cplusplus
 }
 #endif
--- a/crypto/crypto.go
+++ b/crypto/crypto.go
@ -3,8 +3,8 @@
 package crypto

 /*
-#cgo CPPFLAGS: -I${SRCDIR}/upstream -I${SRCDIR}/compat
-#cgo LDFLAGS: -L${SRCDIR}/build -lcryptonote -lstdc++ -lssl -lcrypto
+#cgo CPPFLAGS: -I${SRCDIR}/upstream -I${SRCDIR}/compat -I${SRCDIR}/randomx
+#cgo LDFLAGS: -L${SRCDIR}/build -lcryptonote -lrandomx -lstdc++ -lssl -lcrypto -lpthread
 #include "bridge.h"
 */
 import "C"
--- a/crypto/pow.go
+++ b/crypto/pow.go
@ -0,0 +1,29 @@
+// Copyright (c) 2017-2026 Lethean (https://lt.hn)
+//
+// Licensed under the European Union Public Licence (EUPL) version 1.2.
+// SPDX-Licence-Identifier: EUPL-1.2
+
+package crypto
+
+// #include "bridge.h"
+import "C"
+import (
+	"fmt"
+	"unsafe"
+)
+
+// RandomXHash computes the RandomX PoW hash. The key is the cache
+// initialisation key (e.g. "LetheanRandomXv1"). Input is typically
+// the block header hash (32 bytes) concatenated with the nonce (8 bytes LE).
+func RandomXHash(key, input []byte) ([32]byte, error) {
+	var output [32]byte
+	ret := C.bridge_randomx_hash(
+		(*C.uint8_t)(unsafe.Pointer(&key[0])), C.size_t(len(key)),
+		(*C.uint8_t)(unsafe.Pointer(&input[0])), C.size_t(len(input)),
+		(*C.uint8_t)(unsafe.Pointer(&output[0])),
+	)
+	if ret != 0 {
+		return output, fmt.Errorf("crypto: RandomX hash failed with code %d", ret)
+	}
+	return output, nil
+}
--- a/crypto/pow_test.go
+++ b/crypto/pow_test.go
@ -0,0 +1,40 @@
+// Copyright (c) 2017-2026 Lethean (https://lt.hn)
+//
+// Licensed under the European Union Public Licence (EUPL) version 1.2.
+// SPDX-Licence-Identifier: EUPL-1.2
+
+package crypto
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestRandomXHash_Good(t *testing.T) {
+	key := []byte("LetheanRandomXv1")
+	input := make([]byte, 40) // 32-byte hash + 8-byte nonce
+
+	hash, err := RandomXHash(key, input)
+	require.NoError(t, err)
+	assert.NotEqual(t, [32]byte{}, hash, "hash should be non-zero")
+
+	// Determinism: same input must produce the same output.
+	hash2, err := RandomXHash(key, input)
+	require.NoError(t, err)
+	assert.Equal(t, hash, hash2, "hash must be deterministic")
+}
+
+func TestRandomXHash_Bad(t *testing.T) {
+	key := []byte("LetheanRandomXv1")
+	input1 := make([]byte, 40)
+	input2 := make([]byte, 40)
+	input2[0] = 1
+
+	hash1, err := RandomXHash(key, input1)
+	require.NoError(t, err)
+	hash2, err := RandomXHash(key, input2)
+	require.NoError(t, err)
+	assert.NotEqual(t, hash1, hash2, "different inputs must produce different hashes")
+}
--- a/crypto/randomx/aes_hash.cpp
+++ b/crypto/randomx/aes_hash.cpp
@ -0,0 +1,379 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "soft_aes.h"
+#include "cpu.hpp"
+#include <cassert>
+
+#ifdef __riscv
+#include "aes_hash_rv64_zvkned.hpp"
+#include "aes_hash_rv64_vector.hpp"
+#endif
+
+//NOTE: The functions below were tuned for maximum performance
+//and are not cryptographically secure outside of the scope of RandomX.
+//It's not recommended to use them as general hash functions and PRNGs.
+
+//AesHash1R:
+//state0, state1, state2, state3 = Blake2b-512("RandomX AesHash1R state")
+//xkey0, xkey1 = Blake2b-256("RandomX AesHash1R xkeys")
+
+#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
+#define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e
+#define AES_HASH_1R_STATE2 0xe8a07ce4, 0x5079506b, 0xae62c7d0, 0x6a770017
+#define AES_HASH_1R_STATE3 0x7e994948, 0x79a10005, 0x07ad828d, 0x630a240c
+
+#define AES_HASH_1R_XKEY0 0x06890201, 0x90dc56bf, 0x8b24949f, 0xf6fa8389
+#define AES_HASH_1R_XKEY1 0xed18f99b, 0xee1043c6, 0x51f4e03c, 0x61b263d1
+
+/*
+	Calculate a 512-bit hash of 'input' using 4 lanes of AES.
+	The input is treated as a set of round keys for the encryption
+	of the initial state.
+
+	'inputSize' must be a multiple of 64.
+
+	For a 2 MiB input, this has the same security as 32768-round
+	AES encryption.
+
+	Hashing throughput: >20 GiB/s per CPU core with hardware AES
+*/
+template<bool softAes>
+void hashAes1Rx4(const void *input, size_t inputSize, void *hash) {
+	assert(inputSize % 64 == 0);
+
+#ifdef __riscv
+	if (randomx::cpu.hasAes()) {
+		hashAes1Rx4_zvkned(input, inputSize, hash);
+		return;
+	}
+
+	if (randomx::cpu.hasRVV() && (randomx::cpu.getRVV_Length() >= 256)) {
+		hashAes1Rx4_RVV(input, inputSize, hash);
+		return;
+	}
+#endif
+
+	const uint8_t* inptr = (uint8_t*)input;
+	const uint8_t* inputEnd = inptr + inputSize;
+
+	rx_vec_i128 state0, state1, state2, state3;
+	rx_vec_i128 in0, in1, in2, in3;
+
+	//initial state
+	state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0);
+	state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1);
+	state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2);
+	state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3);
+
+	//process 64 bytes at a time in 4 lanes
+	while (inptr < inputEnd) {
+		in0 = rx_load_vec_i128((rx_vec_i128*)inptr + 0);
+		in1 = rx_load_vec_i128((rx_vec_i128*)inptr + 1);
+		in2 = rx_load_vec_i128((rx_vec_i128*)inptr + 2);
+		in3 = rx_load_vec_i128((rx_vec_i128*)inptr + 3);
+
+		state0 = aesenc<softAes>(state0, in0);
+		state1 = aesdec<softAes>(state1, in1);
+		state2 = aesenc<softAes>(state2, in2);
+		state3 = aesdec<softAes>(state3, in3);
+
+		inptr += 64;
+	}
+
+	//two extra rounds to achieve full diffusion
+	rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0);
+	rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1);
+
+	state0 = aesenc<softAes>(state0, xkey0);
+	state1 = aesdec<softAes>(state1, xkey0);
+	state2 = aesenc<softAes>(state2, xkey0);
+	state3 = aesdec<softAes>(state3, xkey0);
+
+	state0 = aesenc<softAes>(state0, xkey1);
+	state1 = aesdec<softAes>(state1, xkey1);
+	state2 = aesenc<softAes>(state2, xkey1);
+	state3 = aesdec<softAes>(state3, xkey1);
+
+	//output hash
+	rx_store_vec_i128((rx_vec_i128*)hash + 0, state0);
+	rx_store_vec_i128((rx_vec_i128*)hash + 1, state1);
+	rx_store_vec_i128((rx_vec_i128*)hash + 2, state2);
+	rx_store_vec_i128((rx_vec_i128*)hash + 3, state3);
+}
+
+template void hashAes1Rx4<false>(const void *input, size_t inputSize, void *hash);
+template void hashAes1Rx4<true>(const void *input, size_t inputSize, void *hash);
+
+//AesGenerator1R:
+//key0, key1, key2, key3 = Blake2b-512("RandomX AesGenerator1R keys")
+
+#define AES_GEN_1R_KEY0 0xb4f44917, 0xdbb5552b, 0x62716609, 0x6daca553
+#define AES_GEN_1R_KEY1 0x0da1dc4e, 0x1725d378, 0x846a710d, 0x6d7caf07
+#define AES_GEN_1R_KEY2 0x3e20e345, 0xf4c0794f, 0x9f947ec6, 0x3f1262f1
+#define AES_GEN_1R_KEY3 0x49169154, 0x16314c88, 0xb1ba317c, 0x6aef8135
+
+/*
+	Fill 'buffer' with pseudorandom data based on 512-bit 'state'.
+	The state is encrypted using a single AES round per 16 bytes of output
+	in 4 lanes.
+
+	'outputSize' must be a multiple of 64.
+
+	The modified state is written back to 'state' to allow multiple
+	calls to this function.
+*/
+template<bool softAes>
+void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
+	assert(outputSize % 64 == 0);
+
+#ifdef __riscv
+	if (randomx::cpu.hasAes()) {
+		fillAes1Rx4_zvkned(state, outputSize, buffer);
+		return;
+	}
+
+	if (randomx::cpu.hasRVV() && (randomx::cpu.getRVV_Length() >= 256)) {
+		fillAes1Rx4_RVV(state, outputSize, buffer);
+		return;
+	}
+#endif
+
+	const uint8_t* outptr = (uint8_t*)buffer;
+	const uint8_t* outputEnd = outptr + outputSize;
+
+	rx_vec_i128 state0, state1, state2, state3;
+	rx_vec_i128 key0, key1, key2, key3;
+
+	key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0);
+	key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1);
+	key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2);
+	key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3);
+
+	state0 = rx_load_vec_i128((rx_vec_i128*)state + 0);
+	state1 = rx_load_vec_i128((rx_vec_i128*)state + 1);
+	state2 = rx_load_vec_i128((rx_vec_i128*)state + 2);
+	state3 = rx_load_vec_i128((rx_vec_i128*)state + 3);
+
+	while (outptr < outputEnd) {
+		state0 = aesdec<softAes>(state0, key0);
+		state1 = aesenc<softAes>(state1, key1);
+		state2 = aesdec<softAes>(state2, key2);
+		state3 = aesenc<softAes>(state3, key3);
+
+		rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
+		rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);
+		rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2);
+		rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3);
+
+		outptr += 64;
+	}
+
+	rx_store_vec_i128((rx_vec_i128*)state + 0, state0);
+	rx_store_vec_i128((rx_vec_i128*)state + 1, state1);
+	rx_store_vec_i128((rx_vec_i128*)state + 2, state2);
+	rx_store_vec_i128((rx_vec_i128*)state + 3, state3);
+}
+
+template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer);
+template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
+
+//AesGenerator4R:
+//key0, key1, key2, key3 = Blake2b-512("RandomX AesGenerator4R keys 0-3")
+//key4, key5, key6, key7 = Blake2b-512("RandomX AesGenerator4R keys 4-7")
+
+#define AES_GEN_4R_KEY0 0x99e5d23f, 0x2f546d2b, 0xd1833ddb, 0x6421aadd
+#define AES_GEN_4R_KEY1 0xa5dfcde5, 0x06f79d53, 0xb6913f55, 0xb20e3450
+#define AES_GEN_4R_KEY2 0x171c02bf, 0x0aa4679f, 0x515e7baf, 0x5c3ed904
+#define AES_GEN_4R_KEY3 0xd8ded291, 0xcd673785, 0xe78f5d08, 0x85623763
+#define AES_GEN_4R_KEY4 0x229effb4, 0x3d518b6d, 0xe3d6a7a6, 0xb5826f73
+#define AES_GEN_4R_KEY5 0xb272b7d2, 0xe9024d4e, 0x9c10b3d9, 0xc7566bf3
+#define AES_GEN_4R_KEY6 0xf63befa7, 0x2ba9660a, 0xf765a38b, 0xf273c9e7
+#define AES_GEN_4R_KEY7 0xc0b0762d, 0x0c06d1fd, 0x915839de, 0x7a7cd609
+
+template<bool softAes>
+void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
+	assert(outputSize % 64 == 0);
+
+#ifdef __riscv
+	if (randomx::cpu.hasAes()) {
+		fillAes4Rx4_zvkned(state, outputSize, buffer);
+		return;
+	}
+
+	if (randomx::cpu.hasRVV() && (randomx::cpu.getRVV_Length() >= 256)) {
+		fillAes4Rx4_RVV(state, outputSize, buffer);
+		return;
+	}
+#endif
+
+	const uint8_t* outptr = (uint8_t*)buffer;
+	const uint8_t* outputEnd = outptr + outputSize;
+
+	rx_vec_i128 state0, state1, state2, state3;
+	rx_vec_i128 key0, key1, key2, key3, key4, key5, key6, key7;
+
+	key0 = rx_set_int_vec_i128(AES_GEN_4R_KEY0);
+	key1 = rx_set_int_vec_i128(AES_GEN_4R_KEY1);
+	key2 = rx_set_int_vec_i128(AES_GEN_4R_KEY2);
+	key3 = rx_set_int_vec_i128(AES_GEN_4R_KEY3);
+	key4 = rx_set_int_vec_i128(AES_GEN_4R_KEY4);
+	key5 = rx_set_int_vec_i128(AES_GEN_4R_KEY5);
+	key6 = rx_set_int_vec_i128(AES_GEN_4R_KEY6);
+	key7 = rx_set_int_vec_i128(AES_GEN_4R_KEY7);
+
+	state0 = rx_load_vec_i128((rx_vec_i128*)state + 0);
+	state1 = rx_load_vec_i128((rx_vec_i128*)state + 1);
+	state2 = rx_load_vec_i128((rx_vec_i128*)state + 2);
+	state3 = rx_load_vec_i128((rx_vec_i128*)state + 3);
+
+	while (outptr < outputEnd) {
+		state0 = aesdec<softAes>(state0, key0);
+		state1 = aesenc<softAes>(state1, key0);
+		state2 = aesdec<softAes>(state2, key4);
+		state3 = aesenc<softAes>(state3, key4);
+
+		state0 = aesdec<softAes>(state0, key1);
+		state1 = aesenc<softAes>(state1, key1);
+		state2 = aesdec<softAes>(state2, key5);
+		state3 = aesenc<softAes>(state3, key5);
+
+		state0 = aesdec<softAes>(state0, key2);
+		state1 = aesenc<softAes>(state1, key2);
+		state2 = aesdec<softAes>(state2, key6);
+		state3 = aesenc<softAes>(state3, key6);
+
+		state0 = aesdec<softAes>(state0, key3);
+		state1 = aesenc<softAes>(state1, key3);
+		state2 = aesdec<softAes>(state2, key7);
+		state3 = aesenc<softAes>(state3, key7);
+
+		rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
+		rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);
+		rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2);
+		rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3);
+
+		outptr += 64;
+	}
+}
+
+template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
+template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
+
+template<bool softAes>
+void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
+#ifdef __riscv
+	if (randomx::cpu.hasAes()) {
+		hashAndFillAes1Rx4_zvkned(scratchpad, scratchpadSize, hash, fill_state);
+		return;
+	}
+
+	if (randomx::cpu.hasRVV() && (randomx::cpu.getRVV_Length() >= 256)) {
+		hashAndFillAes1Rx4_RVV(scratchpad, scratchpadSize, hash, fill_state);
+		return;
+	}
+#endif
+
+	uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
+	const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
+
+	// initial state
+	rx_vec_i128 hash_state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0);
+	rx_vec_i128 hash_state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1);
+	rx_vec_i128 hash_state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2);
+	rx_vec_i128 hash_state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3);
+
+	const rx_vec_i128 key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0);
+	const rx_vec_i128 key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1);
+	const rx_vec_i128 key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2);
+	const rx_vec_i128 key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3);
+
+	rx_vec_i128 fill_state0 = rx_load_vec_i128((rx_vec_i128*)fill_state + 0);
+	rx_vec_i128 fill_state1 = rx_load_vec_i128((rx_vec_i128*)fill_state + 1);
+	rx_vec_i128 fill_state2 = rx_load_vec_i128((rx_vec_i128*)fill_state + 2);
+	rx_vec_i128 fill_state3 = rx_load_vec_i128((rx_vec_i128*)fill_state + 3);
+
+	constexpr int PREFETCH_DISTANCE = 4096;
+	const char* prefetchPtr = ((const char*)scratchpad) + PREFETCH_DISTANCE;
+	scratchpadEnd -= PREFETCH_DISTANCE;
+
+	for (int i = 0; i < 2; ++i) {
+		//process 64 bytes at a time in 4 lanes
+		while (scratchpadPtr < scratchpadEnd) {
+			hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 0));
+			hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 1));
+			hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 2));
+			hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 3));
+
+			fill_state0 = aesdec<softAes>(fill_state0, key0);
+			fill_state1 = aesenc<softAes>(fill_state1, key1);
+			fill_state2 = aesdec<softAes>(fill_state2, key2);
+			fill_state3 = aesenc<softAes>(fill_state3, key3);
+
+			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 0, fill_state0);
+			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 1, fill_state1);
+			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 2, fill_state2);
+			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 3, fill_state3);
+
+			rx_prefetch_t0(prefetchPtr);
+
+			scratchpadPtr += 64;
+			prefetchPtr += 64;
+		}
+		prefetchPtr = (const char*) scratchpad;
+		scratchpadEnd += PREFETCH_DISTANCE;
+	}
+
+	rx_store_vec_i128((rx_vec_i128*)fill_state + 0, fill_state0);
+	rx_store_vec_i128((rx_vec_i128*)fill_state + 1, fill_state1);
+	rx_store_vec_i128((rx_vec_i128*)fill_state + 2, fill_state2);
+	rx_store_vec_i128((rx_vec_i128*)fill_state + 3, fill_state3);
+
+	//two extra rounds to achieve full diffusion
+	rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0);
+	rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1);
+
+	hash_state0 = aesenc<softAes>(hash_state0, xkey0);
+	hash_state1 = aesdec<softAes>(hash_state1, xkey0);
+	hash_state2 = aesenc<softAes>(hash_state2, xkey0);
+	hash_state3 = aesdec<softAes>(hash_state3, xkey0);
+
+	hash_state0 = aesenc<softAes>(hash_state0, xkey1);
+	hash_state1 = aesdec<softAes>(hash_state1, xkey1);
+	hash_state2 = aesenc<softAes>(hash_state2, xkey1);
+	hash_state3 = aesdec<softAes>(hash_state3, xkey1);
+
+	//output hash
+	rx_store_vec_i128((rx_vec_i128*)hash + 0, hash_state0);
+	rx_store_vec_i128((rx_vec_i128*)hash + 1, hash_state1);
+	rx_store_vec_i128((rx_vec_i128*)hash + 2, hash_state2);
+	rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
+}
+
+template void hashAndFillAes1Rx4<false>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
+template void hashAndFillAes1Rx4<true>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
--- a/crypto/randomx/aes_hash.hpp
+++ b/crypto/randomx/aes_hash.hpp
@ -0,0 +1,43 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cstddef>
+
+template<bool softAes>
+void hashAes1Rx4(const void *input, size_t inputSize, void *hash);
+
+template<bool softAes>
+void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
+
+template<bool softAes>
+void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
+
+template<bool softAes>
+void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
--- a/crypto/randomx/aes_hash_rv64_vector.cpp
+++ b/crypto/randomx/aes_hash_rv64_vector.cpp
@ -0,0 +1,274 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+Copyright (c) 2025 SChernykh   <https://github.com/SChernykh>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <riscv_vector.h>
+
+#include "soft_aes.h"
+#include "randomx.h"
+#include "blake2/endian.h"
+
+static FORCE_INLINE vuint32m1_t softaes_vector_double(
+	vuint32m1_t in,
+	vuint32m1_t key,
+	vuint8m1_t i0, vuint8m1_t i1, vuint8m1_t i2, vuint8m1_t i3,
+	const uint32_t* lut0, const uint32_t* lut1, const uint32_t *lut2, const uint32_t* lut3)
+{
+	const vuint8m1_t in8 = __riscv_vreinterpret_v_u32m1_u8m1(in);
+
+	const vuint32m1_t index0 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i0, 32));
+	const vuint32m1_t index1 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i1, 32));
+	const vuint32m1_t index2 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i2, 32));
+	const vuint32m1_t index3 = __riscv_vreinterpret_v_u8m1_u32m1(__riscv_vrgather_vv_u8m1(in8, i3, 32));
+
+	vuint32m1_t s0 = __riscv_vluxei32_v_u32m1(lut0, __riscv_vsll_vx_u32m1(index0, 2, 8), 8);
+	vuint32m1_t s1 = __riscv_vluxei32_v_u32m1(lut1, __riscv_vsll_vx_u32m1(index1, 2, 8), 8);
+	vuint32m1_t s2 = __riscv_vluxei32_v_u32m1(lut2, __riscv_vsll_vx_u32m1(index2, 2, 8), 8);
+	vuint32m1_t s3 = __riscv_vluxei32_v_u32m1(lut3, __riscv_vsll_vx_u32m1(index3, 2, 8), 8);
+
+	s0 = __riscv_vxor_vv_u32m1(s0, s1, 8);
+	s2 = __riscv_vxor_vv_u32m1(s2, s3, 8);
+	s0 = __riscv_vxor_vv_u32m1(s0, s2, 8);
+
+	return __riscv_vxor_vv_u32m1(s0, key, 8);
+}
+
+static constexpr uint32_t AES_HASH_1R_STATE02[8] = { 0x92b52c0d, 0x9fa856de, 0xcc82db47, 0xd7983aad, 0x6a770017, 0xae62c7d0, 0x5079506b, 0xe8a07ce4 };
+static constexpr uint32_t AES_HASH_1R_STATE13[8] = { 0x338d996e, 0x15c7b798, 0xf59e125a, 0xace78057, 0x630a240c, 0x07ad828d, 0x79a10005, 0x7e994948 };
+
+static constexpr uint32_t AES_GEN_1R_KEY02[8] = { 0x6daca553, 0x62716609, 0xdbb5552b, 0xb4f44917, 0x3f1262f1, 0x9f947ec6, 0xf4c0794f, 0x3e20e345 };
+static constexpr uint32_t AES_GEN_1R_KEY13[8] = { 0x6d7caf07, 0x846a710d, 0x1725d378, 0x0da1dc4e, 0x6aef8135, 0xb1ba317c, 0x16314c88, 0x49169154 };
+
+static constexpr uint32_t AES_HASH_1R_XKEY00[8] = { 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201, 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201 };
+static constexpr uint32_t AES_HASH_1R_XKEY11[8] = { 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b, 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b };
+
+static constexpr uint32_t AES_HASH_STRIDE_X2[8] = { 0, 4, 8, 12, 32, 36, 40, 44 };
+static constexpr uint32_t AES_HASH_STRIDE_X4[8] = { 12, 8, 4, 0, 76, 72, 68, 64 };
+
+void hashAes1Rx4_RVV(const void *input, size_t inputSize, void *hash) {
+	const uint8_t* inptr = (const uint8_t*)input;
+	const uint8_t* inputEnd = inptr + inputSize;
+
+	//intial state
+	vuint32m1_t state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
+	vuint32m1_t state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
+
+	const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
+
+	const vuint8m1_t lutenc_index0 = __riscv_vle8_v_u8m1(lutEncIndex[0], 32);
+	const vuint8m1_t lutenc_index1 = __riscv_vle8_v_u8m1(lutEncIndex[1], 32);
+	const vuint8m1_t lutenc_index2 = __riscv_vle8_v_u8m1(lutEncIndex[2], 32);
+	const vuint8m1_t lutenc_index3 = __riscv_vle8_v_u8m1(lutEncIndex[3], 32);
+
+	const vuint8m1_t& lutdec_index0 = lutenc_index0;
+	const vuint8m1_t lutdec_index1 = __riscv_vle8_v_u8m1(lutDecIndex[1], 32);
+	const vuint8m1_t& lutdec_index2 = lutenc_index2;
+	const vuint8m1_t lutdec_index3 = __riscv_vle8_v_u8m1(lutDecIndex[3], 32);
+
+	//process 64 bytes at a time in 4 lanes
+	while (inptr < inputEnd) {
+		state02 = softaes_vector_double(state02, __riscv_vluxei32_v_u32m1((uint32_t*)inptr + 0, stride, 8), lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
+		state13 = softaes_vector_double(state13, __riscv_vluxei32_v_u32m1((uint32_t*)inptr + 4, stride, 8), lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
+
+		inptr += 64;
+	}
+
+	//two extra rounds to achieve full diffusion
+	const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
+	const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
+
+	state02 = softaes_vector_double(state02, xkey00, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
+	state13 = softaes_vector_double(state13, xkey00, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
+
+	state02 = softaes_vector_double(state02, xkey11, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
+	state13 = softaes_vector_double(state13, xkey11, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
+
+	//output hash
+	__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, state02, 8);
+	__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, state13, 8);
+}
+
+void fillAes1Rx4_RVV(void *state, size_t outputSize, void *buffer) {
+	const uint8_t* outptr = (uint8_t*)buffer;
+	const uint8_t* outputEnd = outptr + outputSize;
+
+	const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
+	const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
+
+	const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
+
+	vuint32m1_t state02 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 0, stride, 8);
+	vuint32m1_t state13 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 4, stride, 8);
+
+	const vuint8m1_t lutenc_index0 = __riscv_vle8_v_u8m1(lutEncIndex[0], 32);
+	const vuint8m1_t lutenc_index1 = __riscv_vle8_v_u8m1(lutEncIndex[1], 32);
+	const vuint8m1_t lutenc_index2 = __riscv_vle8_v_u8m1(lutEncIndex[2], 32);
+	const vuint8m1_t lutenc_index3 = __riscv_vle8_v_u8m1(lutEncIndex[3], 32);
+
+	const vuint8m1_t& lutdec_index0 = lutenc_index0;
+	const vuint8m1_t lutdec_index1 = __riscv_vle8_v_u8m1(lutDecIndex[1], 32);
+	const vuint8m1_t& lutdec_index2 = lutenc_index2;
+	const vuint8m1_t lutdec_index3 = __riscv_vle8_v_u8m1(lutDecIndex[3], 32);
+
+	while (outptr < outputEnd) {
+		state02 = softaes_vector_double(state02, key02, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
+		state13 = softaes_vector_double(state13, key13, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
+
+		__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 0, stride, state02, 8);
+		__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 4, stride, state13, 8);
+
+		outptr += 64;
+	}
+
+	__riscv_vsuxei32_v_u32m1((uint32_t*)state + 0, stride, state02, 8);
+	__riscv_vsuxei32_v_u32m1((uint32_t*)state + 4, stride, state13, 8);
+}
+
+static constexpr uint32_t fillAes4Rx4_Key[] = {
+	0x99e5d23f, 0x2f546d2b, 0xd1833ddb, 0x6421aadd,
+	0xa5dfcde5, 0x06f79d53, 0xb6913f55, 0xb20e3450,
+	0x171c02bf, 0x0aa4679f, 0x515e7baf, 0x5c3ed904,
+	0xd8ded291, 0xcd673785, 0xe78f5d08, 0x85623763,
+	0x229effb4, 0x3d518b6d, 0xe3d6a7a6, 0xb5826f73,
+	0xb272b7d2, 0xe9024d4e, 0x9c10b3d9, 0xc7566bf3,
+	0xf63befa7, 0x2ba9660a, 0xf765a38b, 0xf273c9e7,
+	0xc0b0762d, 0x0c06d1fd, 0x915839de, 0x7a7cd609,
+};
+
+void fillAes4Rx4_RVV(void *state, size_t outputSize, void *buffer) {
+	const uint8_t* outptr = (uint8_t*)buffer;
+	const uint8_t* outputEnd = outptr + outputSize;
+
+	const vuint32m1_t stride4 = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X4, 8);
+
+	const vuint32m1_t key04 = __riscv_vluxei32_v_u32m1(fillAes4Rx4_Key +  0, stride4, 8);
+	const vuint32m1_t key15 = __riscv_vluxei32_v_u32m1(fillAes4Rx4_Key +  4, stride4, 8);
+	const vuint32m1_t key26 = __riscv_vluxei32_v_u32m1(fillAes4Rx4_Key +  8, stride4, 8);
+	const vuint32m1_t key37 = __riscv_vluxei32_v_u32m1(fillAes4Rx4_Key + 12, stride4, 8);
+
+	const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
+
+	vuint32m1_t state02 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 0, stride, 8);
+	vuint32m1_t state13 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 4, stride, 8);
+
+	const vuint8m1_t lutenc_index0 = __riscv_vle8_v_u8m1(lutEncIndex[0], 32);
+	const vuint8m1_t lutenc_index1 = __riscv_vle8_v_u8m1(lutEncIndex[1], 32);
+	const vuint8m1_t lutenc_index2 = __riscv_vle8_v_u8m1(lutEncIndex[2], 32);
+	const vuint8m1_t lutenc_index3 = __riscv_vle8_v_u8m1(lutEncIndex[3], 32);
+
+	const vuint8m1_t& lutdec_index0 = lutenc_index0;
+	const vuint8m1_t lutdec_index1 = __riscv_vle8_v_u8m1(lutDecIndex[1], 32);
+	const vuint8m1_t& lutdec_index2 = lutenc_index2;
+	const vuint8m1_t lutdec_index3 = __riscv_vle8_v_u8m1(lutDecIndex[3], 32);
+
+	while (outptr < outputEnd) {
+		state02 = softaes_vector_double(state02, key04, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
+		state13 = softaes_vector_double(state13, key04, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
+
+		state02 = softaes_vector_double(state02, key15, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
+		state13 = softaes_vector_double(state13, key15, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
+
+		state02 = softaes_vector_double(state02, key26, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
+		state13 = softaes_vector_double(state13, key26, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
+
+		state02 = softaes_vector_double(state02, key37, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
+		state13 = softaes_vector_double(state13, key37, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
+
+		__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 0, stride, state02, 8);
+		__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 4, stride, state13, 8);
+
+		outptr += 64;
+	}
+}
+
+void hashAndFillAes1Rx4_RVV(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
+	uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
+	const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
+
+	vuint32m1_t hash_state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
+	vuint32m1_t hash_state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
+
+	const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
+	const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
+
+	const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
+
+	vuint32m1_t fill_state02 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 0, stride, 8);
+	vuint32m1_t fill_state13 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 4, stride, 8);
+
+	const vuint8m1_t lutenc_index0 = __riscv_vle8_v_u8m1(lutEncIndex[0], 32);
+	const vuint8m1_t lutenc_index1 = __riscv_vle8_v_u8m1(lutEncIndex[1], 32);
+	const vuint8m1_t lutenc_index2 = __riscv_vle8_v_u8m1(lutEncIndex[2], 32);
+	const vuint8m1_t lutenc_index3 = __riscv_vle8_v_u8m1(lutEncIndex[3], 32);
+
+	const vuint8m1_t& lutdec_index0 = lutenc_index0;
+	const vuint8m1_t lutdec_index1 = __riscv_vle8_v_u8m1(lutDecIndex[1], 32);
+	const vuint8m1_t& lutdec_index2 = lutenc_index2;
+	const vuint8m1_t lutdec_index3 = __riscv_vle8_v_u8m1(lutDecIndex[3], 32);
+
+	//process 64 bytes at a time in 4 lanes
+	while (scratchpadPtr < scratchpadEnd) {
+#define HASH_STATE(k) \
+		hash_state02 = softaes_vector_double(hash_state02, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 0, stride, 8), lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3); \
+		hash_state13 = softaes_vector_double(hash_state13, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 4, stride, 8), lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
+
+#define FILL_STATE(k) \
+		fill_state02 = softaes_vector_double(fill_state02, key02, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3); \
+		fill_state13 = softaes_vector_double(fill_state13, key13, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3); \
+		__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 0, stride, fill_state02, 8); \
+		__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + k * 16 + 4, stride, fill_state13, 8);
+
+		HASH_STATE(0);
+		HASH_STATE(1);
+
+		FILL_STATE(0);
+		FILL_STATE(1);
+
+		scratchpadPtr += 128;
+	}
+
+#undef HASH_STATE
+#undef FILL_STATE
+
+	__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 0, stride, fill_state02, 8);
+	__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 4, stride, fill_state13, 8);
+
+	//two extra rounds to achieve full diffusion
+	const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
+	const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
+
+	hash_state02 = softaes_vector_double(hash_state02, xkey00, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
+	hash_state13 = softaes_vector_double(hash_state13, xkey00, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
+
+	hash_state02 = softaes_vector_double(hash_state02, xkey11, lutenc_index0, lutenc_index1, lutenc_index2, lutenc_index3, lutEnc0, lutEnc1, lutEnc2, lutEnc3);
+	hash_state13 = softaes_vector_double(hash_state13, xkey11, lutdec_index0, lutdec_index1, lutdec_index2, lutdec_index3, lutDec0, lutDec1, lutDec2, lutDec3);
+
+	//output hash
+	__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, hash_state02, 8);
+	__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, hash_state13, 8);
+}
--- a/crypto/randomx/aes_hash_rv64_vector.hpp
+++ b/crypto/randomx/aes_hash_rv64_vector.hpp
@ -0,0 +1,35 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+Copyright (c) 2025 SChernykh   <https://github.com/SChernykh>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+void hashAes1Rx4_RVV(const void *input, size_t inputSize, void *hash);
+void fillAes1Rx4_RVV(void *state, size_t outputSize, void *buffer);
+void fillAes4Rx4_RVV(void *state, size_t outputSize, void *buffer);
+void hashAndFillAes1Rx4_RVV(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
--- a/crypto/randomx/aes_hash_rv64_zvkned.cpp
+++ b/crypto/randomx/aes_hash_rv64_zvkned.cpp
@ -0,0 +1,210 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+Copyright (c) 2025 SChernykh   <https://github.com/SChernykh>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "aes_hash.hpp"
+#include "randomx.h"
+#include "blake2/endian.h"
+
+#include <riscv_vector.h>
+
+static FORCE_INLINE vuint32m1_t aesenc_zvkned(vuint32m1_t a, vuint32m1_t b) { return __riscv_vaesem_vv_u32m1(a, b, 8); }
+static FORCE_INLINE vuint32m1_t aesdec_zvkned(vuint32m1_t a, vuint32m1_t b, vuint32m1_t zero) { return __riscv_vxor_vv_u32m1(__riscv_vaesdm_vv_u32m1(a, zero, 8), b, 8); }
+
+static constexpr uint32_t AES_HASH_1R_STATE02[8] = { 0x92b52c0d, 0x9fa856de, 0xcc82db47, 0xd7983aad, 0x6a770017, 0xae62c7d0, 0x5079506b, 0xe8a07ce4 };
+static constexpr uint32_t AES_HASH_1R_STATE13[8] = { 0x338d996e, 0x15c7b798, 0xf59e125a, 0xace78057, 0x630a240c, 0x07ad828d, 0x79a10005, 0x7e994948 };
+
+static constexpr uint32_t AES_GEN_1R_KEY02[8] = { 0x6daca553, 0x62716609, 0xdbb5552b, 0xb4f44917, 0x3f1262f1, 0x9f947ec6, 0xf4c0794f, 0x3e20e345 };
+static constexpr uint32_t AES_GEN_1R_KEY13[8] = { 0x6d7caf07, 0x846a710d, 0x1725d378, 0x0da1dc4e, 0x6aef8135, 0xb1ba317c, 0x16314c88, 0x49169154 };
+
+static constexpr uint32_t AES_HASH_1R_XKEY00[8] = { 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201, 0xf6fa8389, 0x8b24949f, 0x90dc56bf, 0x06890201 };
+static constexpr uint32_t AES_HASH_1R_XKEY11[8] = { 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b, 0x61b263d1, 0x51f4e03c, 0xee1043c6, 0xed18f99b };
+
+static constexpr uint32_t AES_HASH_STRIDE_X2[8] = { 0, 4, 8, 12, 32, 36, 40, 44 };
+static constexpr uint32_t AES_HASH_STRIDE_X4[8] = { 12, 8, 4, 0, 76, 72, 68, 64 };
+
+void hashAes1Rx4_zvkned(const void *input, size_t inputSize, void *hash)
+{
+	const uint8_t* inptr = (const uint8_t*)input;
+	const uint8_t* inputEnd = inptr + inputSize;
+
+	//intial state
+	vuint32m1_t state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
+	vuint32m1_t state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
+
+	const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
+	const vuint32m1_t zero = {};
+
+	//process 64 bytes at a time in 4 lanes
+	while (inptr < inputEnd) {
+		state02 = aesenc_zvkned(state02, __riscv_vluxei32_v_u32m1((uint32_t*)inptr + 0, stride, 8));
+		state13 = aesdec_zvkned(state13, __riscv_vluxei32_v_u32m1((uint32_t*)inptr + 4, stride, 8), zero);
+
+		inptr += 64;
+	}
+
+	//two extra rounds to achieve full diffusion
+	const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
+	const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
+
+	state02 = aesenc_zvkned(state02, xkey00);
+	state13 = aesdec_zvkned(state13, xkey00, zero);
+
+	state02 = aesenc_zvkned(state02, xkey11);
+	state13 = aesdec_zvkned(state13, xkey11, zero);
+
+	//output hash
+	__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, state02, 8);
+	__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, state13, 8);
+}
+
+void fillAes1Rx4_zvkned(void *state, size_t outputSize, void *buffer)
+{
+	const uint8_t* outptr = (uint8_t*)buffer;
+	const uint8_t* outputEnd = outptr + outputSize;
+
+	const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
+	const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
+
+	const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
+	const vuint32m1_t zero = {};
+
+	vuint32m1_t state02 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 0, stride, 8);
+	vuint32m1_t state13 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 4, stride, 8);
+
+	while (outptr < outputEnd) {
+		state02 = aesdec_zvkned(state02, key02, zero);
+		state13 = aesenc_zvkned(state13, key13);
+
+		__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 0, stride, state02, 8);
+		__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 4, stride, state13, 8);
+
+		outptr += 64;
+	}
+
+	__riscv_vsuxei32_v_u32m1((uint32_t*)state + 0, stride, state02, 8);
+	__riscv_vsuxei32_v_u32m1((uint32_t*)state + 4, stride, state13, 8);
+}
+
+static constexpr uint32_t fillAes4Rx4_Key[] = {
+	0x99e5d23f, 0x2f546d2b, 0xd1833ddb, 0x6421aadd,
+	0xa5dfcde5, 0x06f79d53, 0xb6913f55, 0xb20e3450,
+	0x171c02bf, 0x0aa4679f, 0x515e7baf, 0x5c3ed904,
+	0xd8ded291, 0xcd673785, 0xe78f5d08, 0x85623763,
+	0x229effb4, 0x3d518b6d, 0xe3d6a7a6, 0xb5826f73,
+	0xb272b7d2, 0xe9024d4e, 0x9c10b3d9, 0xc7566bf3,
+	0xf63befa7, 0x2ba9660a, 0xf765a38b, 0xf273c9e7,
+	0xc0b0762d, 0x0c06d1fd, 0x915839de, 0x7a7cd609,
+};
+
+void fillAes4Rx4_zvkned(void *state, size_t outputSize, void *buffer)
+{
+	const uint8_t* outptr = (uint8_t*)buffer;
+	const uint8_t* outputEnd = outptr + outputSize;
+
+	const vuint32m1_t stride4 = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X4, 8);
+
+	const vuint32m1_t key04 = __riscv_vluxei32_v_u32m1(fillAes4Rx4_Key +  0, stride4, 8);
+	const vuint32m1_t key15 = __riscv_vluxei32_v_u32m1(fillAes4Rx4_Key +  4, stride4, 8);
+	const vuint32m1_t key26 = __riscv_vluxei32_v_u32m1(fillAes4Rx4_Key +  8, stride4, 8);
+	const vuint32m1_t key37 = __riscv_vluxei32_v_u32m1(fillAes4Rx4_Key + 12, stride4, 8);
+
+	const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
+	const vuint32m1_t zero = {};
+
+	vuint32m1_t state02 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 0, stride, 8);
+	vuint32m1_t state13 = __riscv_vluxei32_v_u32m1((uint32_t*)state + 4, stride, 8);
+
+	while (outptr < outputEnd) {
+		state02 = aesdec_zvkned(state02, key04, zero);
+		state13 = aesenc_zvkned(state13, key04);
+
+		state02 = aesdec_zvkned(state02, key15, zero);
+		state13 = aesenc_zvkned(state13, key15);
+
+		state02 = aesdec_zvkned(state02, key26, zero);
+		state13 = aesenc_zvkned(state13, key26);
+
+		state02 = aesdec_zvkned(state02, key37, zero);
+		state13 = aesenc_zvkned(state13, key37);
+
+		__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 0, stride, state02, 8);
+		__riscv_vsuxei32_v_u32m1((uint32_t*)outptr + 4, stride, state13, 8);
+
+		outptr += 64;
+	}
+}
+
+void hashAndFillAes1Rx4_zvkned(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state)
+{
+	uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
+	const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
+
+	vuint32m1_t hash_state02 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE02, 8);
+	vuint32m1_t hash_state13 = __riscv_vle32_v_u32m1(AES_HASH_1R_STATE13, 8);
+
+	const vuint32m1_t key02 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY02, 8);
+	const vuint32m1_t key13 = __riscv_vle32_v_u32m1(AES_GEN_1R_KEY13, 8);
+
+	const vuint32m1_t stride = __riscv_vle32_v_u32m1(AES_HASH_STRIDE_X2, 8);
+	const vuint32m1_t zero = {};
+
+	vuint32m1_t fill_state02 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 0, stride, 8);
+	vuint32m1_t fill_state13 = __riscv_vluxei32_v_u32m1((uint32_t*)fill_state + 4, stride, 8);
+
+	//process 64 bytes at a time in 4 lanes
+	while (scratchpadPtr < scratchpadEnd) {
+		hash_state02 = aesenc_zvkned(hash_state02, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + 0, stride, 8));
+		hash_state13 = aesdec_zvkned(hash_state13, __riscv_vluxei32_v_u32m1((uint32_t*)scratchpadPtr + 4, stride, 8), zero);
+
+		fill_state02 = aesdec_zvkned(fill_state02, key02, zero);
+		fill_state13 = aesenc_zvkned(fill_state13, key13);
+
+		__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + 0, stride, fill_state02, 8);
+		__riscv_vsuxei32_v_u32m1((uint32_t*)scratchpadPtr + 4, stride, fill_state13, 8);
+
+		scratchpadPtr += 64;
+	}
+
+	__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 0, stride, fill_state02, 8);
+	__riscv_vsuxei32_v_u32m1((uint32_t*)fill_state + 4, stride, fill_state13, 8);
+
+	//two extra rounds to achieve full diffusion
+	const vuint32m1_t xkey00 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY00, 8);
+	const vuint32m1_t xkey11 = __riscv_vle32_v_u32m1(AES_HASH_1R_XKEY11, 8);
+
+	hash_state02 = aesenc_zvkned(hash_state02, xkey00);
+	hash_state13 = aesdec_zvkned(hash_state13, xkey00, zero);
+
+	hash_state02 = aesenc_zvkned(hash_state02, xkey11);
+	hash_state13 = aesdec_zvkned(hash_state13, xkey11, zero);
+
+	//output hash
+	__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 0, stride, hash_state02, 8);
+	__riscv_vsuxei32_v_u32m1((uint32_t*)hash + 4, stride, hash_state13, 8);
+}
--- a/crypto/randomx/aes_hash_rv64_zvkned.hpp
+++ b/crypto/randomx/aes_hash_rv64_zvkned.hpp
@ -0,0 +1,35 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+Copyright (c) 2025 SChernykh   <https://github.com/SChernykh>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+void hashAes1Rx4_zvkned(const void *input, size_t inputSize, void *hash);
+void fillAes1Rx4_zvkned(void *state, size_t outputSize, void *buffer);
+void fillAes4Rx4_zvkned(void *state, size_t outputSize, void *buffer);
+void hashAndFillAes1Rx4_zvkned(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
--- a/crypto/randomx/allocator.cpp
+++ b/crypto/randomx/allocator.cpp
@ -0,0 +1,63 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <new>
+#include "allocator.hpp"
+#include "intrin_portable.h"
+#include "virtual_memory.h"
+#include "common.hpp"
+
+namespace randomx {
+
+	template<size_t alignment>
+	void* AlignedAllocator<alignment>::allocMemory(size_t count) {
+		void *mem = rx_aligned_alloc(count, alignment);
+		if (mem == nullptr)
+			throw std::bad_alloc();
+		return mem;
+	}
+
+	template<size_t alignment>
+	void AlignedAllocator<alignment>::freeMemory(void* ptr, size_t count) {
+		rx_aligned_free(ptr);
+	}
+
+	template struct AlignedAllocator<CacheLineSize>;
+
+	void* LargePageAllocator::allocMemory(size_t count) {
+		void *mem = allocLargePagesMemory(count);
+		if (mem == nullptr)
+			throw std::bad_alloc();
+		return mem;
+	}
+
+	void LargePageAllocator::freeMemory(void* ptr, size_t count) {
+		freePagedMemory(ptr, count);
+	};
+
+}
--- a/crypto/randomx/allocator.hpp
+++ b/crypto/randomx/allocator.hpp
@ -0,0 +1,46 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cstddef>
+
+namespace randomx {
+
+	template<size_t alignment>
+	struct AlignedAllocator {
+		static void* allocMemory(size_t);
+		static void freeMemory(void*, size_t);
+	};
+
+	struct LargePageAllocator {
+		static void* allocMemory(size_t);
+		static void freeMemory(void*, size_t);
+	};
+
+}
--- a/crypto/randomx/argon2.h
+++ b/crypto/randomx/argon2.h
@ -0,0 +1,261 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#pragma once
+
+#include <stdint.h>
+#include <stddef.h>
+#include <limits.h>
+
+/*
+ * Argon2 input parameter restrictions
+ */
+
+ /* Minimum and maximum number of lanes (degree of parallelism) */
+#define ARGON2_MIN_LANES UINT32_C(1)
+#define ARGON2_MAX_LANES UINT32_C(0xFFFFFF)
+
+/* Minimum and maximum number of threads */
+#define ARGON2_MIN_THREADS UINT32_C(1)
+#define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF)
+
+/* Number of synchronization points between lanes per pass */
+#define ARGON2_SYNC_POINTS UINT32_C(4)
+
+/* Minimum and maximum digest size in bytes */
+#define ARGON2_MIN_OUTLEN UINT32_C(4)
+#define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF)
+
+/* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */
+#define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */
+
+#define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b))
+/* Max memory size is addressing-space/2, topping at 2^32 blocks (4 TB) */
+#define ARGON2_MAX_MEMORY_BITS                                                 \
+    ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1))
+#define ARGON2_MAX_MEMORY                                                      \
+    ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS)
+
+/* Minimum and maximum number of passes */
+#define ARGON2_MIN_TIME UINT32_C(1)
+#define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF)
+
+/* Minimum and maximum password length in bytes */
+#define ARGON2_MIN_PWD_LENGTH UINT32_C(0)
+#define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF)
+
+/* Minimum and maximum associated data length in bytes */
+#define ARGON2_MIN_AD_LENGTH UINT32_C(0)
+#define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF)
+
+/* Minimum and maximum salt length in bytes */
+#define ARGON2_MIN_SALT_LENGTH UINT32_C(8)
+#define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF)
+
+/* Minimum and maximum key length in bytes */
+#define ARGON2_MIN_SECRET UINT32_C(0)
+#define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF)
+
+/* Flags to determine which fields are securely wiped (default = no wipe). */
+#define ARGON2_DEFAULT_FLAGS UINT32_C(0)
+#define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0)
+#define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1)
+
+
+/* Error codes */
+typedef enum Argon2_ErrorCodes {
+	ARGON2_OK = 0,
+
+	ARGON2_OUTPUT_PTR_NULL = -1,
+
+	ARGON2_OUTPUT_TOO_SHORT = -2,
+	ARGON2_OUTPUT_TOO_LONG = -3,
+
+	ARGON2_PWD_TOO_SHORT = -4,
+	ARGON2_PWD_TOO_LONG = -5,
+
+	ARGON2_SALT_TOO_SHORT = -6,
+	ARGON2_SALT_TOO_LONG = -7,
+
+	ARGON2_AD_TOO_SHORT = -8,
+	ARGON2_AD_TOO_LONG = -9,
+
+	ARGON2_SECRET_TOO_SHORT = -10,
+	ARGON2_SECRET_TOO_LONG = -11,
+
+	ARGON2_TIME_TOO_SMALL = -12,
+	ARGON2_TIME_TOO_LARGE = -13,
+
+	ARGON2_MEMORY_TOO_LITTLE = -14,
+	ARGON2_MEMORY_TOO_MUCH = -15,
+
+	ARGON2_LANES_TOO_FEW = -16,
+	ARGON2_LANES_TOO_MANY = -17,
+
+	ARGON2_PWD_PTR_MISMATCH = -18,    /* NULL ptr with non-zero length */
+	ARGON2_SALT_PTR_MISMATCH = -19,   /* NULL ptr with non-zero length */
+	ARGON2_SECRET_PTR_MISMATCH = -20, /* NULL ptr with non-zero length */
+	ARGON2_AD_PTR_MISMATCH = -21,     /* NULL ptr with non-zero length */
+
+	ARGON2_MEMORY_ALLOCATION_ERROR = -22,
+
+	ARGON2_FREE_MEMORY_CBK_NULL = -23,
+	ARGON2_ALLOCATE_MEMORY_CBK_NULL = -24,
+
+	ARGON2_INCORRECT_PARAMETER = -25,
+	ARGON2_INCORRECT_TYPE = -26,
+
+	ARGON2_OUT_PTR_MISMATCH = -27,
+
+	ARGON2_THREADS_TOO_FEW = -28,
+	ARGON2_THREADS_TOO_MANY = -29,
+
+	ARGON2_MISSING_ARGS = -30,
+
+	ARGON2_ENCODING_FAIL = -31,
+
+	ARGON2_DECODING_FAIL = -32,
+
+	ARGON2_THREAD_FAIL = -33,
+
+	ARGON2_DECODING_LENGTH_FAIL = -34,
+
+	ARGON2_VERIFY_MISMATCH = -35
+} argon2_error_codes;
+
+/* Memory allocator types --- for external allocation */
+typedef int(*allocate_fptr)(uint8_t **memory, size_t bytes_to_allocate);
+typedef void(*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate);
+
+/* Argon2 external data structures */
+
+/*
+	*****
+	* Context: structure to hold Argon2 inputs:
+	*  output array and its length,
+	*  password and its length,
+	*  salt and its length,
+	*  secret and its length,
+	*  associated data and its length,
+	*  number of passes, amount of used memory (in KBytes, can be rounded up a bit)
+	*  number of parallel threads that will be run.
+	* All the parameters above affect the output hash value.
+	* Additionally, two function pointers can be provided to allocate and
+	* deallocate the memory (if NULL, memory will be allocated internally).
+	* Also, three flags indicate whether to erase password, secret as soon as they
+	* are pre-hashed (and thus not needed anymore), and the entire memory
+	*****
+	* Simplest situation: you have output array out[8], password is stored in
+	* pwd[32], salt is stored in salt[16], you do not have keys nor associated
+	* data. You need to spend 1 GB of RAM and you run 5 passes of Argon2d with
+	* 4 parallel lanes.
+	* You want to erase the password, but you're OK with last pass not being
+	* erased. You want to use the default memory allocator.
+	* Then you initialize:
+	Argon2_Context(out,8,pwd,32,salt,16,NULL,0,NULL,0,5,1<<20,4,4,NULL,NULL,true,false,false,false)
+	*/
+typedef struct Argon2_Context {
+	uint8_t *out;    /* output array */
+	uint32_t outlen; /* digest length */
+
+	uint8_t *pwd;    /* password array */
+	uint32_t pwdlen; /* password length */
+
+	uint8_t *salt;    /* salt array */
+	uint32_t saltlen; /* salt length */
+
+	uint8_t *secret;    /* key array */
+	uint32_t secretlen; /* key length */
+
+	uint8_t *ad;    /* associated data array */
+	uint32_t adlen; /* associated data length */
+
+	uint32_t t_cost;  /* number of passes */
+	uint32_t m_cost;  /* amount of memory requested (KB) */
+	uint32_t lanes;   /* number of lanes */
+	uint32_t threads; /* maximum number of threads */
+
+	uint32_t version; /* version number */
+
+	allocate_fptr allocate_cbk; /* pointer to memory allocator */
+	deallocate_fptr free_cbk;   /* pointer to memory deallocator */
+
+	uint32_t flags; /* array of bool options */
+} argon2_context;
+
+/* Argon2 primitive type */
+typedef enum Argon2_type {
+	Argon2_d = 0,
+	Argon2_i = 1,
+	Argon2_id = 2
+} argon2_type;
+
+/* Version of the algorithm */
+typedef enum Argon2_version {
+	ARGON2_VERSION_10 = 0x10,
+	ARGON2_VERSION_13 = 0x13,
+	ARGON2_VERSION_NUMBER = ARGON2_VERSION_13
+} argon2_version;
+
+//Argon2 instance - forward declaration
+typedef struct Argon2_instance_t argon2_instance_t;
+
+//Argon2 position = forward declaration
+typedef struct Argon2_position_t argon2_position_t;
+
+//Argon2 implementation function
+typedef void randomx_argon2_impl(const argon2_instance_t* instance,
+	argon2_position_t position);
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*
+ * Function that fills the segment using previous segments also from other
+ * threads
+ * @param context current context
+ * @param instance Pointer to the current instance
+ * @param position Current position
+ * @pre all block pointers must be valid
+ */
+void randomx_argon2_fill_segment_ref(const argon2_instance_t* instance,
+	argon2_position_t position);
+
+randomx_argon2_impl *randomx_argon2_impl_ssse3();
+randomx_argon2_impl *randomx_argon2_impl_avx2();
+
+#if defined(__cplusplus)
+}
+#endif
--- a/crypto/randomx/argon2_avx2.c
+++ b/crypto/randomx/argon2_avx2.c
@ -0,0 +1,174 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "argon2.h"
+
+void randomx_argon2_fill_segment_avx2(const argon2_instance_t* instance,
+	argon2_position_t position);
+
+randomx_argon2_impl* randomx_argon2_impl_avx2() {
+#if defined(__AVX2__)
+	return &randomx_argon2_fill_segment_avx2;
+#endif
+	return NULL;
+}
+
+#if defined(__AVX2__)
+
+#include "argon2_core.h"
+
+#include "blake2/blamka-round-avx2.h"
+#include "blake2/blake2-impl.h"
+#include "blake2/blake2.h"
+
+static void fill_block(__m256i* state, const block* ref_block,
+	block* next_block, int with_xor) {
+	__m256i block_XY[ARGON2_HWORDS_IN_BLOCK];
+	unsigned int i;
+
+	if (with_xor) {
+		for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
+			state[i] = _mm256_xor_si256(
+				state[i], _mm256_loadu_si256((const __m256i*)ref_block->v + i));
+			block_XY[i] = _mm256_xor_si256(
+				state[i], _mm256_loadu_si256((const __m256i*)next_block->v + i));
+		}
+	}
+	else {
+		for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
+			block_XY[i] = state[i] = _mm256_xor_si256(
+				state[i], _mm256_loadu_si256((const __m256i*)ref_block->v + i));
+		}
+	}
+
+	for (i = 0; i < 4; ++i) {
+		BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5],
+			state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]);
+	}
+
+	for (i = 0; i < 4; ++i) {
+		BLAKE2_ROUND_2(state[0 + i], state[4 + i], state[8 + i], state[12 + i],
+			state[16 + i], state[20 + i], state[24 + i], state[28 + i]);
+	}
+
+	for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
+		state[i] = _mm256_xor_si256(state[i], block_XY[i]);
+		_mm256_storeu_si256((__m256i*)next_block->v + i, state[i]);
+	}
+}
+
+void randomx_argon2_fill_segment_avx2(const argon2_instance_t* instance,
+	argon2_position_t position) {
+	block* ref_block = NULL, * curr_block = NULL;
+	block address_block, input_block;
+	uint64_t pseudo_rand, ref_index, ref_lane;
+	uint32_t prev_offset, curr_offset;
+	uint32_t starting_index, i;
+	__m256i state[ARGON2_HWORDS_IN_BLOCK];
+
+	if (instance == NULL) {
+		return;
+	}
+
+	starting_index = 0;
+
+	if ((0 == position.pass) && (0 == position.slice)) {
+		starting_index = 2; /* we have already generated the first two blocks */
+	}
+
+	/* Offset of the current block */
+	curr_offset = position.lane * instance->lane_length +
+		position.slice * instance->segment_length + starting_index;
+
+	if (0 == curr_offset % instance->lane_length) {
+		/* Last block in this lane */
+		prev_offset = curr_offset + instance->lane_length - 1;
+	}
+	else {
+		/* Previous block */
+		prev_offset = curr_offset - 1;
+	}
+
+	memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
+
+	for (i = starting_index; i < instance->segment_length;
+		++i, ++curr_offset, ++prev_offset) {
+		/*1.1 Rotating prev_offset if needed */
+		if (curr_offset % instance->lane_length == 1) {
+			prev_offset = curr_offset - 1;
+		}
+
+		/* 1.2 Computing the index of the reference block */
+		/* 1.2.1 Taking pseudo-random value from the previous block */
+		pseudo_rand = instance->memory[prev_offset].v[0];
+
+		/* 1.2.2 Computing the lane of the reference block */
+		ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
+
+		if ((position.pass == 0) && (position.slice == 0)) {
+			/* Can not reference other lanes yet */
+			ref_lane = position.lane;
+		}
+
+		/* 1.2.3 Computing the number of possible reference block within the
+		 * lane.
+		 */
+		position.index = i;
+		ref_index = randomx_argon2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
+			ref_lane == position.lane);
+
+		/* 2 Creating a new block */
+		ref_block =
+			instance->memory + instance->lane_length * ref_lane + ref_index;
+		curr_block = instance->memory + curr_offset;
+		if (ARGON2_VERSION_10 == instance->version) {
+			/* version 1.2.1 and earlier: overwrite, not XOR */
+			fill_block(state, ref_block, curr_block, 0);
+		}
+		else {
+			if (0 == position.pass) {
+				fill_block(state, ref_block, curr_block, 0);
+			}
+			else {
+				fill_block(state, ref_block, curr_block, 1);
+			}
+		}
+	}
+}
+
+#endif
--- a/crypto/randomx/argon2_core.c
+++ b/crypto/randomx/argon2_core.c
@ -0,0 +1,411 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+ /*For memory wiping*/
+#ifdef _MSC_VER
+#include <windows.h>
+#include <winbase.h> /* For SecureZeroMemory */
+#endif
+#if defined __STDC_LIB_EXT1__
+#define __STDC_WANT_LIB_EXT1__ 1
+#endif
+#define VC_GE_2005(version) (version >= 1400)
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "argon2_core.h"
+#include "blake2/blake2.h"
+#include "blake2/blake2-impl.h"
+
+#ifdef GENKAT
+#include "genkat.h"
+#endif
+
+#if defined(__clang__)
+#if __has_attribute(optnone)
+#define NOT_OPTIMIZED __attribute__((optnone))
+#endif
+#elif defined(__GNUC__)
+#define GCC_VERSION                                                            \
+    (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+#if GCC_VERSION >= 40400
+#define NOT_OPTIMIZED __attribute__((optimize("O0")))
+#endif
+#endif
+#ifndef NOT_OPTIMIZED
+#define NOT_OPTIMIZED
+#endif
+
+/***************Instance and Position constructors**********/
+
+static void load_block(block *dst, const void *input) {
+	unsigned i;
+	for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
+		dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i]));
+	}
+}
+
+static void store_block(void *output, const block *src) {
+	unsigned i;
+	for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
+		store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]);
+	}
+}
+
+uint32_t randomx_argon2_index_alpha(const argon2_instance_t *instance,
+	const argon2_position_t *position, uint32_t pseudo_rand,
+	int same_lane) {
+	/*
+	 * Pass 0:
+	 *      This lane : all already finished segments plus already constructed
+	 * blocks in this segment
+	 *      Other lanes : all already finished segments
+	 * Pass 1+:
+	 *      This lane : (SYNC_POINTS - 1) last segments plus already constructed
+	 * blocks in this segment
+	 *      Other lanes : (SYNC_POINTS - 1) last segments
+	 */
+	uint32_t reference_area_size;
+	uint64_t relative_position;
+	uint32_t start_position, absolute_position;
+
+	if (0 == position->pass) {
+		/* First pass */
+		if (0 == position->slice) {
+			/* First slice */
+			reference_area_size =
+				position->index - 1; /* all but the previous */
+		}
+		else {
+			if (same_lane) {
+				/* The same lane => add current segment */
+				reference_area_size =
+					position->slice * instance->segment_length +
+					position->index - 1;
+			}
+			else {
+				reference_area_size =
+					position->slice * instance->segment_length +
+					((position->index == 0) ? (-1) : 0);
+			}
+		}
+	}
+	else {
+		/* Second pass */
+		if (same_lane) {
+			reference_area_size = instance->lane_length -
+				instance->segment_length + position->index -
+				1;
+		}
+		else {
+			reference_area_size = instance->lane_length -
+				instance->segment_length +
+				((position->index == 0) ? (-1) : 0);
+		}
+	}
+
+	/* 1.2.4. Mapping pseudo_rand to 0..<reference_area_size-1> and produce
+	 * relative position */
+	relative_position = pseudo_rand;
+	relative_position = relative_position * relative_position >> 32;
+	relative_position = reference_area_size - 1 -
+		(reference_area_size * relative_position >> 32);
+
+	/* 1.2.5 Computing starting position */
+	start_position = 0;
+
+	if (0 != position->pass) {
+		start_position = (position->slice == ARGON2_SYNC_POINTS - 1)
+			? 0
+			: (position->slice + 1) * instance->segment_length;
+	}
+
+	/* 1.2.6. Computing absolute position */
+	absolute_position = (start_position + relative_position) %
+		instance->lane_length; /* absolute position */
+	return absolute_position;
+}
+
+/* Single-threaded version for p=1 case */
+static int fill_memory_blocks_st(argon2_instance_t *instance) {
+	uint32_t r, s, l;
+
+	for (r = 0; r < instance->passes; ++r) {
+		for (s = 0; s < ARGON2_SYNC_POINTS; ++s) {
+			for (l = 0; l < instance->lanes; ++l) {
+				argon2_position_t position = { r, l, (uint8_t)s, 0 };
+				//fill the segment using the selected implementation
+				instance->impl(instance, position);
+			}
+		}
+	}
+	return ARGON2_OK;
+}
+
+int randomx_argon2_fill_memory_blocks(argon2_instance_t *instance) {
+	if (instance == NULL || instance->lanes == 0) {
+		return ARGON2_INCORRECT_PARAMETER;
+	}
+	return fill_memory_blocks_st(instance);
+}
+
+int randomx_argon2_validate_inputs(const argon2_context *context) {
+	if (NULL == context) {
+		return ARGON2_INCORRECT_PARAMETER;
+	}
+
+	/* Validate password (required param) */
+	if (NULL == context->pwd) {
+		if (0 != context->pwdlen) {
+			return ARGON2_PWD_PTR_MISMATCH;
+		}
+	}
+
+	if (ARGON2_MIN_PWD_LENGTH > context->pwdlen) {
+		return ARGON2_PWD_TOO_SHORT;
+	}
+
+	if (ARGON2_MAX_PWD_LENGTH < context->pwdlen) {
+		return ARGON2_PWD_TOO_LONG;
+	}
+
+	/* Validate salt (required param) */
+	if (NULL == context->salt) {
+		if (0 != context->saltlen) {
+			return ARGON2_SALT_PTR_MISMATCH;
+		}
+	}
+
+	if (ARGON2_MIN_SALT_LENGTH > context->saltlen) {
+		return ARGON2_SALT_TOO_SHORT;
+	}
+
+	if (ARGON2_MAX_SALT_LENGTH < context->saltlen) {
+		return ARGON2_SALT_TOO_LONG;
+	}
+
+	/* Validate secret (optional param) */
+	if (NULL == context->secret) {
+		if (0 != context->secretlen) {
+			return ARGON2_SECRET_PTR_MISMATCH;
+		}
+	}
+	else {
+		if (ARGON2_MIN_SECRET > context->secretlen) {
+			return ARGON2_SECRET_TOO_SHORT;
+		}
+		if (ARGON2_MAX_SECRET < context->secretlen) {
+			return ARGON2_SECRET_TOO_LONG;
+		}
+	}
+
+	/* Validate associated data (optional param) */
+	if (NULL == context->ad) {
+		if (0 != context->adlen) {
+			return ARGON2_AD_PTR_MISMATCH;
+		}
+	}
+	else {
+		if (ARGON2_MIN_AD_LENGTH > context->adlen) {
+			return ARGON2_AD_TOO_SHORT;
+		}
+		if (ARGON2_MAX_AD_LENGTH < context->adlen) {
+			return ARGON2_AD_TOO_LONG;
+		}
+	}
+
+	/* Validate memory cost */
+	if (ARGON2_MIN_MEMORY > context->m_cost) {
+		return ARGON2_MEMORY_TOO_LITTLE;
+	}
+
+	if (ARGON2_MAX_MEMORY < context->m_cost) {
+		return ARGON2_MEMORY_TOO_MUCH;
+	}
+
+	if (context->m_cost < 8 * context->lanes) {
+		return ARGON2_MEMORY_TOO_LITTLE;
+	}
+
+	/* Validate time cost */
+	if (ARGON2_MIN_TIME > context->t_cost) {
+		return ARGON2_TIME_TOO_SMALL;
+	}
+
+	if (ARGON2_MAX_TIME < context->t_cost) {
+		return ARGON2_TIME_TOO_LARGE;
+	}
+
+	/* Validate lanes */
+	if (ARGON2_MIN_LANES > context->lanes) {
+		return ARGON2_LANES_TOO_FEW;
+	}
+
+	if (ARGON2_MAX_LANES < context->lanes) {
+		return ARGON2_LANES_TOO_MANY;
+	}
+
+	/* Validate threads */
+	if (ARGON2_MIN_THREADS > context->threads) {
+		return ARGON2_THREADS_TOO_FEW;
+	}
+
+	if (ARGON2_MAX_THREADS < context->threads) {
+		return ARGON2_THREADS_TOO_MANY;
+	}
+
+	if (NULL != context->allocate_cbk && NULL == context->free_cbk) {
+		return ARGON2_FREE_MEMORY_CBK_NULL;
+	}
+
+	if (NULL == context->allocate_cbk && NULL != context->free_cbk) {
+		return ARGON2_ALLOCATE_MEMORY_CBK_NULL;
+	}
+
+	return ARGON2_OK;
+}
+
+void rxa2_fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) {
+	uint32_t l;
+	/* Make the first and second block in each lane as G(H0||0||i) or
+	   G(H0||1||i) */
+	uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
+	for (l = 0; l < instance->lanes; ++l) {
+
+		store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
+		store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l);
+		blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
+			ARGON2_PREHASH_SEED_LENGTH);
+		load_block(&instance->memory[l * instance->lane_length + 0],
+			blockhash_bytes);
+
+		store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
+		blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
+			ARGON2_PREHASH_SEED_LENGTH);
+		load_block(&instance->memory[l * instance->lane_length + 1],
+			blockhash_bytes);
+	}
+}
+
+void rxa2_initial_hash(uint8_t *blockhash, argon2_context *context, argon2_type type) {
+	blake2b_state BlakeHash;
+	uint8_t value[sizeof(uint32_t)];
+
+	if (NULL == context || NULL == blockhash) {
+		return;
+	}
+
+	blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH);
+
+	store32(&value, context->lanes);
+	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+	store32(&value, context->outlen);
+	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+	store32(&value, context->m_cost);
+	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+	store32(&value, context->t_cost);
+	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+	store32(&value, context->version);
+	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+	store32(&value, (uint32_t)type);
+	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+	store32(&value, context->pwdlen);
+	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+	if (context->pwd != NULL) {
+		blake2b_update(&BlakeHash, (const uint8_t *)context->pwd,
+			context->pwdlen);
+	}
+
+	store32(&value, context->saltlen);
+	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+	if (context->salt != NULL) {
+		blake2b_update(&BlakeHash, (const uint8_t *)context->salt, context->saltlen);
+	}
+
+	store32(&value, context->secretlen);
+	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+	if (context->secret != NULL) {
+		blake2b_update(&BlakeHash, (const uint8_t *)context->secret,
+			context->secretlen);
+	}
+
+	store32(&value, context->adlen);
+	blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
+
+	if (context->ad != NULL) {
+		blake2b_update(&BlakeHash, (const uint8_t *)context->ad,
+			context->adlen);
+	}
+
+	blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
+}
+
+int randomx_argon2_initialize(argon2_instance_t *instance, argon2_context *context) {
+	uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
+	int result = ARGON2_OK;
+
+	if (instance == NULL || context == NULL)
+		return ARGON2_INCORRECT_PARAMETER;
+	instance->context_ptr = context;
+
+	/* 1. Memory allocation */
+	//RandomX takes care of memory allocation
+
+	/* 2. Initial hashing */
+	/* H_0 + 8 extra bytes to produce the first blocks */
+	/* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */
+	/* Hashing all inputs */
+	rxa2_initial_hash(blockhash, context, instance->type);
+	/* Zeroing 8 extra bytes */
+	/*rxa2_clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
+		ARGON2_PREHASH_SEED_LENGTH -
+		ARGON2_PREHASH_DIGEST_LENGTH);*/
+
+	/* 3. Creating first blocks, we always have at least two blocks in a slice
+	 */
+	rxa2_fill_first_blocks(blockhash, instance);
+
+	return ARGON2_OK;
+}
--- a/crypto/randomx/argon2_core.h
+++ b/crypto/randomx/argon2_core.h
@ -0,0 +1,163 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#ifndef ARGON2_CORE_H
+#define ARGON2_CORE_H
+
+#include <stdint.h>
+#include "argon2.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define CONST_CAST(x) (x)(uintptr_t)
+
+ /**********************Argon2 internal constants*******************************/
+
+enum argon2_core_constants {
+	/* Memory block size in bytes */
+	ARGON2_BLOCK_SIZE = 1024,
+	ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8,
+	ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16,
+	ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32,
+	ARGON2_512BIT_WORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 64,
+
+	/* Number of pseudo-random values generated by one call to Blake in Argon2i
+	   to
+	   generate reference block positions */
+	ARGON2_ADDRESSES_IN_BLOCK = 128,
+
+	/* Pre-hashing digest length and its extension*/
+	ARGON2_PREHASH_DIGEST_LENGTH = 64,
+	ARGON2_PREHASH_SEED_LENGTH = 72
+};
+
+/*************************Argon2 internal data types***********************/
+
+/*
+ * Structure for the (1KB) memory block implemented as 128 64-bit words.
+ * Memory blocks can be copied, XORed. Internal words can be accessed by [] (no
+ * bounds checking).
+ */
+typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block;
+
+/*
+ * Argon2 instance: memory pointer, number of passes, amount of memory, type,
+ * and derived values.
+ * Used to evaluate the number and location of blocks to construct in each
+ * thread
+ */
+typedef struct Argon2_instance_t {
+	block *memory;          /* Memory pointer */
+	uint32_t version;
+	uint32_t passes;        /* Number of passes */
+	uint32_t memory_blocks; /* Number of blocks in memory */
+	uint32_t segment_length;
+	uint32_t lane_length;
+	uint32_t lanes;
+	uint32_t threads;
+	argon2_type type;
+	int print_internals; /* whether to print the memory blocks */
+	argon2_context *context_ptr; /* points back to original context */
+	randomx_argon2_impl *impl;
+} argon2_instance_t;
+
+/*
+ * Argon2 position: where we construct the block right now. Used to distribute
+ * work between threads.
+ */
+typedef struct Argon2_position_t {
+	uint32_t pass;
+	uint32_t lane;
+	uint8_t slice;
+	uint32_t index;
+} argon2_position_t;
+
+/*Struct that holds the inputs for thread handling FillSegment*/
+typedef struct Argon2_thread_data {
+	argon2_instance_t *instance_ptr;
+	argon2_position_t pos;
+} argon2_thread_data;
+
+/*************************Argon2 core functions********************************/
+
+/*
+ * Computes absolute position of reference block in the lane following a skewed
+ * distribution and using a pseudo-random value as input
+ * @param instance Pointer to the current instance
+ * @param position Pointer to the current position
+ * @param pseudo_rand 32-bit pseudo-random value used to determine the position
+ * @param same_lane Indicates if the block will be taken from the current lane.
+ * If so we can reference the current segment
+ * @pre All pointers must be valid
+ */
+uint32_t randomx_argon2_index_alpha(const argon2_instance_t *instance,
+	const argon2_position_t *position, uint32_t pseudo_rand,
+	int same_lane);
+
+/*
+ * Function that validates all inputs against predefined restrictions and return
+ * an error code
+ * @param context Pointer to current Argon2 context
+ * @return ARGON2_OK if everything is all right, otherwise one of error codes
+ * (all defined in <argon2.h>
+ */
+int randomx_argon2_validate_inputs(const argon2_context *context);
+
+/*
+ * Function allocates memory, hashes the inputs with Blake,  and creates first
+ * two blocks. Returns the pointer to the main memory with 2 blocks per lane
+ * initialized
+ * @param  context  Pointer to the Argon2 internal structure containing memory
+ * pointer, and parameters for time and space requirements.
+ * @param  instance Current Argon2 instance
+ * @return Zero if successful, -1 if memory failed to allocate. @context->state
+ * will be modified if successful.
+ */
+int randomx_argon2_initialize(argon2_instance_t *instance, argon2_context *context);
+
+/*
+ * Function that fills the entire memory t_cost times based on the first two
+ * blocks in each lane
+ * @param instance Pointer to the current instance
+ * @return ARGON2_OK if successful, @context->state
+ */
+int randomx_argon2_fill_memory_blocks(argon2_instance_t* instance);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
--- a/crypto/randomx/argon2_ref.c
+++ b/crypto/randomx/argon2_ref.c
@ -0,0 +1,187 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "argon2.h"
+#include "argon2_core.h"
+
+#include "blake2/blamka-round-ref.h"
+#include "blake2/blake2-impl.h"
+#include "blake2/blake2.h"
+
+static void copy_block(block* dst, const block* src) {
+	memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
+}
+
+static void xor_block(block* dst, const block* src) {
+	int i;
+	for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
+		dst->v[i] ^= src->v[i];
+	}
+}
+
+ /*
+  * Function fills a new memory block and optionally XORs the old block over the new one.
+  * @next_block must be initialized.
+  * @param prev_block Pointer to the previous block
+  * @param ref_block Pointer to the reference block
+  * @param next_block Pointer to the block to be constructed
+  * @param with_xor Whether to XOR into the new block (1) or just overwrite (0)
+  * @pre all block pointers must be valid
+  */
+static void fill_block(const block *prev_block, const block *ref_block,
+	block *next_block, int with_xor) {
+	block blockR, block_tmp;
+	unsigned i;
+
+	copy_block(&blockR, ref_block);
+	xor_block(&blockR, prev_block);
+	copy_block(&block_tmp, &blockR);
+	/* Now blockR = ref_block + prev_block and block_tmp = ref_block + prev_block */
+	if (with_xor) {
+		/* Saving the next block contents for XOR over: */
+		xor_block(&block_tmp, next_block);
+		/* Now blockR = ref_block + prev_block and
+		   block_tmp = ref_block + prev_block + next_block */
+	}
+
+	/* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then
+	   (16,17,..31)... finally (112,113,...127) */
+	for (i = 0; i < 8; ++i) {
+		BLAKE2_ROUND_NOMSG(
+			blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2],
+			blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5],
+			blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8],
+			blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11],
+			blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14],
+			blockR.v[16 * i + 15]);
+	}
+
+	/* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then
+	   (2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */
+	for (i = 0; i < 8; i++) {
+		BLAKE2_ROUND_NOMSG(
+			blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16],
+			blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33],
+			blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64],
+			blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81],
+			blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112],
+			blockR.v[2 * i + 113]);
+	}
+
+	copy_block(next_block, &block_tmp);
+	xor_block(next_block, &blockR);
+}
+
+void randomx_argon2_fill_segment_ref(const argon2_instance_t *instance,
+	argon2_position_t position) {
+	block *ref_block = NULL, *curr_block = NULL;
+	block address_block, input_block, zero_block;
+	uint64_t pseudo_rand, ref_index, ref_lane;
+	uint32_t prev_offset, curr_offset;
+	uint32_t starting_index;
+	uint32_t i;
+
+	if (instance == NULL) {
+		return;
+	}
+
+	starting_index = 0;
+
+	if ((0 == position.pass) && (0 == position.slice)) {
+		starting_index = 2; /* we have already generated the first two blocks */
+	}
+
+	/* Offset of the current block */
+	curr_offset = position.lane * instance->lane_length +
+		position.slice * instance->segment_length + starting_index;
+
+	if (0 == curr_offset % instance->lane_length) {
+		/* Last block in this lane */
+		prev_offset = curr_offset + instance->lane_length - 1;
+	}
+	else {
+		/* Previous block */
+		prev_offset = curr_offset - 1;
+	}
+
+	for (i = starting_index; i < instance->segment_length;
+		++i, ++curr_offset, ++prev_offset) {
+		/*1.1 Rotating prev_offset if needed */
+		if (curr_offset % instance->lane_length == 1) {
+			prev_offset = curr_offset - 1;
+		}
+
+		/* 1.2 Computing the index of the reference block */
+		/* 1.2.1 Taking pseudo-random value from the previous block */
+		pseudo_rand = instance->memory[prev_offset].v[0];
+
+		/* 1.2.2 Computing the lane of the reference block */
+		ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
+
+		if ((position.pass == 0) && (position.slice == 0)) {
+			/* Can not reference other lanes yet */
+			ref_lane = position.lane;
+		}
+
+		/* 1.2.3 Computing the number of possible reference block within the
+		 * lane.
+		 */
+		position.index = i;
+		ref_index = randomx_argon2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
+			ref_lane == position.lane);
+
+		/* 2 Creating a new block */
+		ref_block =
+			instance->memory + instance->lane_length * ref_lane + ref_index;
+		curr_block = instance->memory + curr_offset;
+		if (ARGON2_VERSION_10 == instance->version) {
+			/* version 1.2.1 and earlier: overwrite, not XOR */
+			fill_block(instance->memory + prev_offset, ref_block, curr_block, 0);
+		}
+		else {
+			if (0 == position.pass) {
+				fill_block(instance->memory + prev_offset, ref_block,
+					curr_block, 0);
+			}
+			else {
+				fill_block(instance->memory + prev_offset, ref_block,
+					curr_block, 1);
+			}
+		}
+	}
+}
--- a/crypto/randomx/argon2_ssse3.c
+++ b/crypto/randomx/argon2_ssse3.c
@ -0,0 +1,182 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "argon2.h"
+
+#if defined(_MSC_VER) //MSVC doesn't define SSSE3
+#define __SSSE3__
+#endif
+
+void randomx_argon2_fill_segment_ssse3(const argon2_instance_t* instance,
+	argon2_position_t position);
+
+randomx_argon2_impl* randomx_argon2_impl_ssse3() {
+#if defined(__SSSE3__)
+	return &randomx_argon2_fill_segment_ssse3;
+#endif
+	return NULL;
+}
+
+#if defined(__SSSE3__)
+
+#include <tmmintrin.h> /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */
+
+#include "argon2_core.h"
+
+#include "blake2/blamka-round-ssse3.h"
+#include "blake2/blake2-impl.h"
+#include "blake2/blake2.h"
+
+static void fill_block(__m128i* state, const block* ref_block,
+	block* next_block, int with_xor) {
+	__m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
+	unsigned int i;
+
+	if (with_xor) {
+		for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
+			state[i] = _mm_xor_si128(
+				state[i], _mm_loadu_si128((const __m128i*)ref_block->v + i));
+			block_XY[i] = _mm_xor_si128(
+				state[i], _mm_loadu_si128((const __m128i*)next_block->v + i));
+		}
+	}
+	else {
+		for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
+			block_XY[i] = state[i] = _mm_xor_si128(
+				state[i], _mm_loadu_si128((const __m128i*)ref_block->v + i));
+		}
+	}
+
+	for (i = 0; i < 8; ++i) {
+		BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
+			state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
+			state[8 * i + 6], state[8 * i + 7]);
+	}
+
+	for (i = 0; i < 8; ++i) {
+		BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
+			state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
+			state[8 * 6 + i], state[8 * 7 + i]);
+	}
+
+	for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
+		state[i] = _mm_xor_si128(state[i], block_XY[i]);
+		_mm_storeu_si128((__m128i*)next_block->v + i, state[i]);
+	}
+}
+
+void randomx_argon2_fill_segment_ssse3(const argon2_instance_t* instance,
+	argon2_position_t position) {
+	block* ref_block = NULL, * curr_block = NULL;
+	block address_block, input_block;
+	uint64_t pseudo_rand, ref_index, ref_lane;
+	uint32_t prev_offset, curr_offset;
+	uint32_t starting_index, i;
+	__m128i state[ARGON2_OWORDS_IN_BLOCK];
+
+	if (instance == NULL) {
+		return;
+	}
+
+	starting_index = 0;
+
+	if ((0 == position.pass) && (0 == position.slice)) {
+		starting_index = 2; /* we have already generated the first two blocks */
+	}
+
+	/* Offset of the current block */
+	curr_offset = position.lane * instance->lane_length +
+		position.slice * instance->segment_length + starting_index;
+
+	if (0 == curr_offset % instance->lane_length) {
+		/* Last block in this lane */
+		prev_offset = curr_offset + instance->lane_length - 1;
+	}
+	else {
+		/* Previous block */
+		prev_offset = curr_offset - 1;
+	}
+
+	memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
+
+	for (i = starting_index; i < instance->segment_length;
+		++i, ++curr_offset, ++prev_offset) {
+		/*1.1 Rotating prev_offset if needed */
+		if (curr_offset % instance->lane_length == 1) {
+			prev_offset = curr_offset - 1;
+		}
+
+		/* 1.2 Computing the index of the reference block */
+		/* 1.2.1 Taking pseudo-random value from the previous block */
+		pseudo_rand = instance->memory[prev_offset].v[0];
+
+		/* 1.2.2 Computing the lane of the reference block */
+		ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
+
+		if ((position.pass == 0) && (position.slice == 0)) {
+			/* Can not reference other lanes yet */
+			ref_lane = position.lane;
+		}
+
+		/* 1.2.3 Computing the number of possible reference block within the
+		 * lane.
+		 */
+		position.index = i;
+		ref_index = randomx_argon2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
+			ref_lane == position.lane);
+
+		/* 2 Creating a new block */
+		ref_block =
+			instance->memory + instance->lane_length * ref_lane + ref_index;
+		curr_block = instance->memory + curr_offset;
+		if (ARGON2_VERSION_10 == instance->version) {
+			/* version 1.2.1 and earlier: overwrite, not XOR */
+			fill_block(state, ref_block, curr_block, 0);
+		}
+		else {
+			if (0 == position.pass) {
+				fill_block(state, ref_block, curr_block, 0);
+			}
+			else {
+				fill_block(state, ref_block, curr_block, 1);
+			}
+		}
+	}
+}
+
+#endif
--- a/crypto/randomx/asm/configuration.asm
+++ b/crypto/randomx/asm/configuration.asm
@ -0,0 +1,48 @@
+; File start: ..\src\configuration.h
+RANDOMX_ARGON_MEMORY EQU 262144t
+RANDOMX_ARGON_ITERATIONS EQU 3t
+RANDOMX_ARGON_LANES EQU 1t
+RANDOMX_ARGON_SALT TEXTEQU <"RandomX\x03">
+RANDOMX_CACHE_ACCESSES EQU 8t
+RANDOMX_SUPERSCALAR_LATENCY EQU 170t
+RANDOMX_DATASET_BASE_SIZE EQU 2147483648t
+RANDOMX_DATASET_EXTRA_SIZE EQU 33554368t
+RANDOMX_PROGRAM_SIZE EQU 256t
+RANDOMX_PROGRAM_ITERATIONS EQU 2048t
+RANDOMX_PROGRAM_COUNT EQU 8t
+RANDOMX_SCRATCHPAD_L3 EQU 2097152t
+RANDOMX_SCRATCHPAD_L2 EQU 262144t
+RANDOMX_SCRATCHPAD_L1 EQU 16384t
+RANDOMX_JUMP_BITS EQU 8t
+RANDOMX_JUMP_OFFSET EQU 8t
+RANDOMX_FREQ_IADD_RS EQU 16t
+RANDOMX_FREQ_IADD_M EQU 7t
+RANDOMX_FREQ_ISUB_R EQU 16t
+RANDOMX_FREQ_ISUB_M EQU 7t
+RANDOMX_FREQ_IMUL_R EQU 16t
+RANDOMX_FREQ_IMUL_M EQU 4t
+RANDOMX_FREQ_IMULH_R EQU 4t
+RANDOMX_FREQ_IMULH_M EQU 1t
+RANDOMX_FREQ_ISMULH_R EQU 4t
+RANDOMX_FREQ_ISMULH_M EQU 1t
+RANDOMX_FREQ_IMUL_RCP EQU 8t
+RANDOMX_FREQ_INEG_R EQU 2t
+RANDOMX_FREQ_IXOR_R EQU 15t
+RANDOMX_FREQ_IXOR_M EQU 5t
+RANDOMX_FREQ_IROR_R EQU 8t
+RANDOMX_FREQ_IROL_R EQU 2t
+RANDOMX_FREQ_ISWAP_R EQU 4t
+RANDOMX_FREQ_FSWAP_R EQU 4t
+RANDOMX_FREQ_FADD_R EQU 16t
+RANDOMX_FREQ_FADD_M EQU 5t
+RANDOMX_FREQ_FSUB_R EQU 16t
+RANDOMX_FREQ_FSUB_M EQU 5t
+RANDOMX_FREQ_FSCAL_R EQU 6t
+RANDOMX_FREQ_FMUL_R EQU 32t
+RANDOMX_FREQ_FDIV_M EQU 4t
+RANDOMX_FREQ_FSQRT_R EQU 6t
+RANDOMX_FREQ_CBRANCH EQU 25t
+RANDOMX_FREQ_CFROUND EQU 1t
+RANDOMX_FREQ_ISTORE EQU 16t
+RANDOMX_FREQ_NOP EQU 0t
+; File end: ..\src\configuration.h
--- a/crypto/randomx/asm/program_epilogue_linux.inc
+++ b/crypto/randomx/asm/program_epilogue_linux.inc
@ -0,0 +1,10 @@
+	;# restore callee-saved registers - System V AMD64 ABI
+	pop r15
+	pop r14
+	pop r13
+	pop r12
+	pop rbp
+	pop rbx
+
+	;# program finished
+	ret 0
--- a/crypto/randomx/asm/program_epilogue_store.inc
+++ b/crypto/randomx/asm/program_epilogue_store.inc
@ -0,0 +1,19 @@
+	;# save VM register values
+	pop rcx
+	mov qword ptr [rcx+0], r8
+	mov qword ptr [rcx+8], r9
+	mov qword ptr [rcx+16], r10
+	mov qword ptr [rcx+24], r11
+	mov qword ptr [rcx+32], r12
+	mov qword ptr [rcx+40], r13
+	mov qword ptr [rcx+48], r14
+	mov qword ptr [rcx+56], r15
+	movdqa xmmword ptr [rcx+64], xmm0
+	movdqa xmmword ptr [rcx+80], xmm1
+	movdqa xmmword ptr [rcx+96], xmm2
+	movdqa xmmword ptr [rcx+112], xmm3
+	lea rcx, [rcx+64]
+	movdqa xmmword ptr [rcx+64], xmm4
+	movdqa xmmword ptr [rcx+80], xmm5
+	movdqa xmmword ptr [rcx+96], xmm6
+	movdqa xmmword ptr [rcx+112], xmm7
--- a/crypto/randomx/asm/program_epilogue_win64.inc
+++ b/crypto/randomx/asm/program_epilogue_win64.inc
@ -0,0 +1,24 @@
+	;# restore callee-saved registers - Microsoft x64 calling convention
+	movdqu xmm15, xmmword ptr [rsp]
+	movdqu xmm14, xmmword ptr [rsp+16]
+	movdqu xmm13, xmmword ptr [rsp+32]
+	movdqu xmm12, xmmword ptr [rsp+48]
+	movdqu xmm11, xmmword ptr [rsp+64]
+	add rsp, 80
+	movdqu xmm10, xmmword ptr [rsp]
+	movdqu xmm9, xmmword ptr [rsp+16]
+	movdqu xmm8, xmmword ptr [rsp+32]
+	movdqu xmm7, xmmword ptr [rsp+48]
+	movdqu xmm6, xmmword ptr [rsp+64]
+	add rsp, 80
+	pop r15
+	pop r14
+	pop r13
+	pop r12
+	pop rsi
+	pop rdi
+	pop rbp
+	pop rbx
+
+	;# program finished
+	ret
--- a/crypto/randomx/asm/program_loop_load.inc
+++ b/crypto/randomx/asm/program_loop_load.inc
@ -0,0 +1,28 @@
+	lea rcx, [rsi+rax]
+	push rcx
+	xor r8,  qword ptr [rcx+0]
+	xor r9,  qword ptr [rcx+8]
+	xor r10, qword ptr [rcx+16]
+	xor r11, qword ptr [rcx+24]
+	xor r12, qword ptr [rcx+32]
+	xor r13, qword ptr [rcx+40]
+	xor r14, qword ptr [rcx+48]
+	xor r15, qword ptr [rcx+56]
+	lea rcx, [rsi+rdx]
+	push rcx
+	cvtdq2pd xmm0, qword ptr [rcx+0]
+	cvtdq2pd xmm1, qword ptr [rcx+8]
+	cvtdq2pd xmm2, qword ptr [rcx+16]
+	cvtdq2pd xmm3, qword ptr [rcx+24]
+	cvtdq2pd xmm4, qword ptr [rcx+32]
+	cvtdq2pd xmm5, qword ptr [rcx+40]
+	cvtdq2pd xmm6, qword ptr [rcx+48]
+	cvtdq2pd xmm7, qword ptr [rcx+56]
+	andps xmm4, xmm13
+	andps xmm5, xmm13
+	andps xmm6, xmm13
+	andps xmm7, xmm13
+	orps xmm4, xmm14
+	orps xmm5, xmm14
+	orps xmm6, xmm14
+	orps xmm7, xmm14
--- a/crypto/randomx/asm/program_loop_store.inc
+++ b/crypto/randomx/asm/program_loop_store.inc
@ -0,0 +1,18 @@
+	pop rcx
+	mov qword ptr [rcx+0], r8
+	mov qword ptr [rcx+8], r9
+	mov qword ptr [rcx+16], r10
+	mov qword ptr [rcx+24], r11
+	mov qword ptr [rcx+32], r12
+	mov qword ptr [rcx+40], r13
+	mov qword ptr [rcx+48], r14
+	mov qword ptr [rcx+56], r15
+	pop rcx
+	xorpd xmm0, xmm4
+	xorpd xmm1, xmm5
+	xorpd xmm2, xmm6
+	xorpd xmm3, xmm7
+	movapd xmmword ptr [rcx+0], xmm0
+	movapd xmmword ptr [rcx+16], xmm1
+	movapd xmmword ptr [rcx+32], xmm2
+	movapd xmmword ptr [rcx+48], xmm3
--- a/crypto/randomx/asm/program_prologue_linux.inc
+++ b/crypto/randomx/asm/program_prologue_linux.inc
@ -0,0 +1,35 @@
+	;# callee-saved registers - System V AMD64 ABI
+	push rbx
+	push rbp
+	push r12
+	push r13
+	push r14
+	push r15
+
+	;# function arguments
+	mov rbx, rcx                ;# loop counter
+	push rdi                    ;# RegisterFile& registerFile
+	mov rcx, rdi
+	mov rbp, qword ptr [rsi]    ;# "mx", "ma"
+	mov rdi, qword ptr [rsi+8]  ;# uint8_t* dataset
+	mov rsi, rdx                ;# uint8_t* scratchpad
+
+	mov rax, rbp
+	ror rbp, 32
+
+	;# zero integer registers
+	xor r8, r8
+	xor r9, r9
+	xor r10, r10
+	xor r11, r11
+	xor r12, r12
+	xor r13, r13
+	xor r14, r14
+	xor r15, r15
+
+	;# load constant registers
+	lea rcx, [rcx+120]
+	movapd xmm8, xmmword ptr [rcx+72]
+	movapd xmm9, xmmword ptr [rcx+88]
+	movapd xmm10, xmmword ptr [rcx+104]
+	movapd xmm11, xmmword ptr [rcx+120]
--- a/crypto/randomx/asm/program_prologue_win64.inc
+++ b/crypto/randomx/asm/program_prologue_win64.inc
@ -0,0 +1,48 @@
+	;# callee-saved registers - Microsoft x64 calling convention
+	push rbx
+	push rbp
+	push rdi
+	push rsi
+	push r12
+	push r13
+	push r14
+	push r15
+	sub rsp, 80
+	movdqu xmmword ptr [rsp+64], xmm6
+	movdqu xmmword ptr [rsp+48], xmm7
+	movdqu xmmword ptr [rsp+32], xmm8
+	movdqu xmmword ptr [rsp+16], xmm9
+	movdqu xmmword ptr [rsp+0], xmm10
+	sub rsp, 80
+	movdqu xmmword ptr [rsp+64], xmm11
+	movdqu xmmword ptr [rsp+48], xmm12
+	movdqu xmmword ptr [rsp+32], xmm13
+	movdqu xmmword ptr [rsp+16], xmm14
+	movdqu xmmword ptr [rsp+0], xmm15
+
+	;# function arguments
+	push rcx                    ;# RegisterFile& registerFile
+	mov rbp, qword ptr [rdx]    ;# "mx", "ma"
+	mov rdi, qword ptr [rdx+8]  ;# uint8_t* dataset
+	mov rsi, r8                 ;# uint8_t* scratchpad
+	mov rbx, r9                 ;# loop counter
+
+	mov rax, rbp
+	ror rbp, 32
+
+	;# zero integer registers
+	xor r8, r8
+	xor r9, r9
+	xor r10, r10
+	xor r11, r11
+	xor r12, r12
+	xor r13, r13
+	xor r14, r14
+	xor r15, r15
+
+	;# load constant registers
+	lea rcx, [rcx+120]
+	movapd xmm8, xmmword ptr [rcx+72]
+	movapd xmm9, xmmword ptr [rcx+88]
+	movapd xmm10, xmmword ptr [rcx+104]
+	movapd xmm11, xmmword ptr [rcx+120]
--- a/crypto/randomx/asm/program_read_dataset.inc
+++ b/crypto/randomx/asm/program_read_dataset.inc
@ -0,0 +1,16 @@
+	mov ecx, ebp                       ;# ecx = ma
+	and ecx, RANDOMX_DATASET_BASE_MASK
+	xor r8, qword ptr [rdi+rcx]
+	ror rbp, 32                        ;# swap "ma" and "mx"
+	xor rbp, rax                       ;# modify "mx"
+	mov edx, ebp                       ;# edx = mx
+	and edx, RANDOMX_DATASET_BASE_MASK
+	prefetchnta byte ptr [rdi+rdx]
+	xor r9,  qword ptr [rdi+rcx+8]
+	xor r10, qword ptr [rdi+rcx+16]
+	xor r11, qword ptr [rdi+rcx+24]
+	xor r12, qword ptr [rdi+rcx+32]
+	xor r13, qword ptr [rdi+rcx+40]
+	xor r14, qword ptr [rdi+rcx+48]
+	xor r15, qword ptr [rdi+rcx+56]
+	
--- a/crypto/randomx/asm/program_read_dataset_sshash_fin.inc
+++ b/crypto/randomx/asm/program_read_dataset_sshash_fin.inc
@ -0,0 +1,10 @@
+	mov rbx, qword ptr [rsp+64]
+	xor r8, qword ptr [rsp+56]
+	xor r9, qword ptr [rsp+48]
+	xor r10, qword ptr [rsp+40]
+	xor r11, qword ptr [rsp+32]
+	xor r12, qword ptr [rsp+24]
+	xor r13, qword ptr [rsp+16]
+	xor r14, qword ptr [rsp+8]
+	xor r15, qword ptr [rsp+0]
+	add rsp, 72
--- a/crypto/randomx/asm/program_read_dataset_sshash_init.inc
+++ b/crypto/randomx/asm/program_read_dataset_sshash_init.inc
@ -0,0 +1,17 @@
+	sub rsp, 72
+	mov qword ptr [rsp+64], rbx
+	mov qword ptr [rsp+56], r8
+	mov qword ptr [rsp+48], r9
+	mov qword ptr [rsp+40], r10
+	mov qword ptr [rsp+32], r11
+	mov qword ptr [rsp+24], r12
+	mov qword ptr [rsp+16], r13
+	mov qword ptr [rsp+8], r14
+	mov qword ptr [rsp+0], r15
+	ror rbp, 32                        ;# swap "ma" and "mx"
+	xor rbp, rax                       ;# modify "mx"
+	mov rbx, rbp                       ;# ebx = ma
+	shr rbx, 38
+	and ebx, RANDOMX_DATASET_BASE_MASK / 64 ;# ebx = Dataset block number
+	;# add ebx, datasetOffset / 64
+	;# call 32768
--- a/crypto/randomx/asm/program_sshash_constants.inc
+++ b/crypto/randomx/asm/program_sshash_constants.inc
@ -0,0 +1,24 @@
+r0_mul:
+	;#/ 6364136223846793005
+	db 45, 127, 149, 76, 45, 244, 81, 88
+r1_add:
+	;#/ 9298411001130361340
+	db 252, 161, 245, 89, 138, 151, 10, 129
+r2_add:
+	;#/ 12065312585734608966
+	db 70, 216, 194, 56, 223, 153, 112, 167
+r3_add:
+	;#/ 9306329213124626780
+	db 92, 73, 34, 191, 28, 185, 38, 129
+r4_add:
+	;#/ 5281919268842080866
+	db 98, 138, 159, 23, 151, 37, 77, 73
+r5_add:
+	;#/ 10536153434571861004
+	db 12, 236, 170, 206, 185, 239, 55, 146
+r6_add:
+	;#/ 3398623926847679864
+	db 120, 45, 230, 108, 116, 86, 42, 47
+r7_add:
+	;#/ 9549104520008361294
+	db 78, 229, 44, 182, 247, 59, 133, 132
--- a/crypto/randomx/asm/program_sshash_load.inc
+++ b/crypto/randomx/asm/program_sshash_load.inc
@ -0,0 +1,8 @@
+	xor r8, qword ptr [rbx+0]
+	xor r9, qword ptr [rbx+8]
+	xor r10, qword ptr [rbx+16]
+	xor r11, qword ptr [rbx+24]
+	xor r12, qword ptr [rbx+32]
+	xor r13, qword ptr [rbx+40]
+	xor r14, qword ptr [rbx+48]
+	xor r15, qword ptr [rbx+56]
--- a/crypto/randomx/asm/program_sshash_prefetch.inc
+++ b/crypto/randomx/asm/program_sshash_prefetch.inc
@ -0,0 +1,4 @@
+	and rbx, RANDOMX_CACHE_MASK
+	shl rbx, 6
+	add rbx, rdi
+	prefetchnta byte ptr [rbx]
--- a/crypto/randomx/asm/program_xmm_constants.inc
+++ b/crypto/randomx/asm/program_xmm_constants.inc
@ -0,0 +1,6 @@
+mantissaMask:
+	db 255, 255, 255, 255, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 0
+exp240:
+	db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+scaleMask:
+	db 0, 0, 0, 0, 0, 0, 240, 128, 0, 0, 0, 0, 0, 0, 240, 128
--- a/crypto/randomx/asm/randomx_reciprocal.inc
+++ b/crypto/randomx/asm/randomx_reciprocal.inc
@ -0,0 +1,7 @@
+	mov edx, 1
+	mov r8, rcx
+	xor eax, eax
+	bsr rcx, rcx
+	shl rdx, cl
+	div r8
+	ret
--- a/crypto/randomx/assembly_generator_x86.cpp
+++ b/crypto/randomx/assembly_generator_x86.cpp
@ -0,0 +1,611 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <climits>
+#include "assembly_generator_x86.hpp"
+#include "common.hpp"
+#include "reciprocal.h"
+#include "program.hpp"
+#include "superscalar.hpp"
+
+namespace randomx {
+
+	static const char* regR[] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" };
+	static const char* regR32[] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
+	static const char* regFE[] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
+	static const char* regF[] = { "xmm0", "xmm1", "xmm2", "xmm3" };
+	static const char* regE[] = { "xmm4", "xmm5", "xmm6", "xmm7" };
+	static const char* regA[] = { "xmm8", "xmm9", "xmm10", "xmm11" };
+
+	static const char* tempRegx = "xmm12";
+	static const char* mantissaMaskReg = "xmm13";
+	static const char* exponentMaskReg = "xmm14";
+	static const char* scaleMaskReg = "xmm15";
+	static const char* regIc = "rbx";
+	static const char* regIc32 = "ebx";
+	static const char* regIc8 = "bl";
+	static const char* regScratchpadAddr = "rsi";
+
+	void AssemblyGeneratorX86::generateProgram(Program& prog) {
+		for (unsigned i = 0; i < RegistersCount; ++i) {
+			registerUsage[i] = -1;
+		}
+		asmCode.str(std::string()); //clear
+		for (unsigned i = 0; i < prog.getSize(); ++i) {
+			asmCode << "randomx_isn_" << i << ":" << std::endl;
+			Instruction& instr = prog(i);
+			instr.src %= RegistersCount;
+			instr.dst %= RegistersCount;
+			generateCode(instr, i);
+		}
+	}
+
+	void AssemblyGeneratorX86::generateAsm(SuperscalarProgram& prog) {
+		asmCode.str(std::string()); //clear
+#ifdef RANDOMX_ALIGN
+		asmCode << "ALIGN 16" << std::endl;
+#endif
+		for (unsigned i = 0; i < prog.getSize(); ++i) {
+			Instruction& instr = prog(i);
+			switch ((SuperscalarInstructionType)instr.opcode)
+			{
+			case SuperscalarInstructionType::ISUB_R:
+				asmCode << "sub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
+				break;
+			case SuperscalarInstructionType::IXOR_R:
+				asmCode << "xor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
+				break;
+			case SuperscalarInstructionType::IADD_RS:
+				asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << "]" << std::endl;
+				break;
+			case SuperscalarInstructionType::IMUL_R:
+				asmCode << "imul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
+				break;
+			case SuperscalarInstructionType::IROR_C:
+				asmCode << "ror " << regR[instr.dst] << ", " << instr.getImm32() << std::endl;
+				break;
+			case SuperscalarInstructionType::IADD_C7:
+				asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
+				break;
+			case SuperscalarInstructionType::IXOR_C7:
+				asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
+				break;
+			case SuperscalarInstructionType::IADD_C8:
+				asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
+#ifdef RANDOMX_ALIGN
+				asmCode << "nop" << std::endl;
+#endif
+				break;
+			case SuperscalarInstructionType::IXOR_C8:
+				asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
+#ifdef RANDOMX_ALIGN
+				asmCode << "nop" << std::endl;
+#endif
+				break;
+			case SuperscalarInstructionType::IADD_C9:
+				asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
+#ifdef RANDOMX_ALIGN
+				asmCode << "xchg ax, ax ;nop" << std::endl;
+#endif
+				break;
+			case SuperscalarInstructionType::IXOR_C9:
+				asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
+#ifdef RANDOMX_ALIGN
+				asmCode << "xchg ax, ax ;nop" << std::endl;
+#endif
+				break;
+			case SuperscalarInstructionType::IMULH_R:
+				asmCode << "mov rax, " << regR[instr.dst] << std::endl;
+				asmCode << "mul " << regR[instr.src] << std::endl;
+				asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl;
+				break;
+			case SuperscalarInstructionType::ISMULH_R:
+				asmCode << "mov rax, " << regR[instr.dst] << std::endl;
+				asmCode << "imul " << regR[instr.src] << std::endl;
+				asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl;
+				break;
+			case SuperscalarInstructionType::IMUL_RCP:
+				asmCode << "mov rax, " << (int64_t)randomx_reciprocal(instr.getImm32()) << std::endl;
+				asmCode << "imul " << regR[instr.dst] << ", rax" << std::endl;
+				break;
+			default:
+				UNREACHABLE;
+			}
+		}
+	}
+
+	void AssemblyGeneratorX86::generateC(SuperscalarProgram& prog) {
+		asmCode.str(std::string()); //clear
+		asmCode << "#include <stdint.h>" << std::endl;
+		asmCode << "#if defined(__SIZEOF_INT128__)" << std::endl;
+		asmCode << "	static inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl;
+		asmCode << "		return ((unsigned __int128)a * b) >> 64;" << std::endl;
+		asmCode << "	}" << std::endl;
+		asmCode << "	static inline int64_t smulh(int64_t a, int64_t b) {" << std::endl;
+		asmCode << "		return ((__int128)a * b) >> 64;" << std::endl;
+		asmCode << "	}" << std::endl;
+		asmCode << "	#define HAVE_MULH" << std::endl;
+		asmCode << "	#define HAVE_SMULH" << std::endl;
+		asmCode << "#endif" << std::endl;
+		asmCode << "#if defined(_MSC_VER)" << std::endl;
+		asmCode << "	#define HAS_VALUE(X) X ## 0" << std::endl;
+		asmCode << "	#define EVAL_DEFINE(X) HAS_VALUE(X)" << std::endl;
+		asmCode << "	#include <intrin.h>" << std::endl;
+		asmCode << "	#include <stdlib.h>" << std::endl;
+		asmCode << "	static __inline uint64_t rotr(uint64_t x , int c) {" << std::endl;
+		asmCode << "		return _rotr64(x, c);" << std::endl;
+		asmCode << "	}" << std::endl;
+		asmCode << "	#define HAVE_ROTR" << std::endl;
+		asmCode << "	#if EVAL_DEFINE(__MACHINEARM64_X64(1))" << std::endl;
+		asmCode << "		static __inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl;
+		asmCode << "			return __umulh(a, b);" << std::endl;
+		asmCode << "		}" << std::endl;
+		asmCode << "		#define HAVE_MULH" << std::endl;
+		asmCode << "	#endif" << std::endl;
+		asmCode << "	#if EVAL_DEFINE(__MACHINEX64(1))" << std::endl;
+		asmCode << "		static __inline int64_t smulh(int64_t a, int64_t b) {" << std::endl;
+		asmCode << "			int64_t hi;" << std::endl;
+		asmCode << "			_mul128(a, b, &hi);" << std::endl;
+		asmCode << "			return hi;" << std::endl;
+		asmCode << "		}" << std::endl;
+		asmCode << "		#define HAVE_SMULH" << std::endl;
+		asmCode << "	#endif" << std::endl;
+		asmCode << "#endif" << std::endl;
+		asmCode << "#ifndef HAVE_ROTR" << std::endl;
+		asmCode << "	static inline uint64_t rotr(uint64_t a, int b) {" << std::endl;
+		asmCode << "		return (a >> b) | (a << (64 - b));" << std::endl;
+		asmCode << "	}" << std::endl;
+		asmCode << "	#define HAVE_ROTR" << std::endl;
+		asmCode << "#endif" << std::endl;
+		asmCode << "#if !defined(HAVE_MULH) || !defined(HAVE_SMULH) || !defined(HAVE_ROTR)" << std::endl;
+		asmCode << "	#error \"Required functions are not defined\"" << std::endl;
+		asmCode << "#endif" << std::endl;
+		asmCode << "void superScalar(uint64_t r[8]) {" << std::endl;
+		asmCode << "uint64_t r8 = r[0], r9 = r[1], r10 = r[2], r11 = r[3], r12 = r[4], r13 = r[5], r14 = r[6], r15 = r[7];" << std::endl;
+		for (unsigned i = 0; i < prog.getSize(); ++i) {
+			Instruction& instr = prog(i);
+			switch ((SuperscalarInstructionType)instr.opcode)
+			{
+			case SuperscalarInstructionType::ISUB_R:
+				asmCode << regR[instr.dst] << " -= " << regR[instr.src] << ";" << std::endl;
+				break;
+			case SuperscalarInstructionType::IXOR_R:
+				asmCode << regR[instr.dst] << " ^= " << regR[instr.src] << ";" << std::endl;
+				break;
+			case SuperscalarInstructionType::IADD_RS:
+				asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.getModShift())) << ";" << std::endl;
+				break;
+			case SuperscalarInstructionType::IMUL_R:
+				asmCode << regR[instr.dst] << " *= " << regR[instr.src] << ";" << std::endl;
+				break;
+			case SuperscalarInstructionType::IROR_C:
+				asmCode << regR[instr.dst] << " = rotr(" << regR[instr.dst] << ", " << instr.getImm32() << ");" << std::endl;
+				break;
+			case SuperscalarInstructionType::IADD_C7:
+			case SuperscalarInstructionType::IADD_C8:
+			case SuperscalarInstructionType::IADD_C9:
+				asmCode << regR[instr.dst] << " += " << (int32_t)instr.getImm32() << ";" << std::endl;
+				break;
+			case SuperscalarInstructionType::IXOR_C7:
+			case SuperscalarInstructionType::IXOR_C8:
+			case SuperscalarInstructionType::IXOR_C9:
+				asmCode << regR[instr.dst] << " ^= " << (int32_t)instr.getImm32() << ";" << std::endl;
+				break;
+			case SuperscalarInstructionType::IMULH_R:
+				asmCode << regR[instr.dst] << " = mulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl;
+				break;
+			case SuperscalarInstructionType::ISMULH_R:
+				asmCode << regR[instr.dst] << " = smulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl;
+				break;
+			case SuperscalarInstructionType::IMUL_RCP:
+				asmCode << regR[instr.dst] << " *= " << (int64_t)randomx_reciprocal(instr.getImm32()) << ";" << std::endl;
+				break;
+			default:
+				UNREACHABLE;
+			}
+		}
+		asmCode << "r[0] = r8; r[1] = r9; r[2] = r10; r[3] = r11; r[4] = r12; r[5] = r13; r[6] = r14; r[7] = r15;" << std::endl;
+		asmCode << "}" << std::endl;
+	}
+
+	void AssemblyGeneratorX86::traceint(Instruction& instr) {
+		if (trace) {
+			asmCode << "\tpush " << regR[instr.dst] << std::endl;
+		}
+	}
+
+	void AssemblyGeneratorX86::traceflt(Instruction& instr) {
+		if (trace) {
+			asmCode << "\tpush 0" << std::endl;
+		}
+	}
+
+	void AssemblyGeneratorX86::tracenop(Instruction& instr) {
+		if (trace) {
+			asmCode << "\tpush 0" << std::endl;
+		}
+	}
+
+	void AssemblyGeneratorX86::generateCode(Instruction& instr, int i) {
+		asmCode << "\t; " << instr;
+		auto generator = engine[instr.opcode];
+		(this->*generator)(instr, i);
+	}
+
+	void AssemblyGeneratorX86::genAddressReg(Instruction& instr, const char* reg = "eax") {
+		asmCode << "\tlea " << reg << ", [" << regR32[instr.src] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
+		asmCode << "\tand " << reg << ", " << ((instr.getModMem()) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl;
+	}
+
+	void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) {
+		asmCode << "\tlea eax, [" << regR32[instr.dst] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
+		int mask;
+		if (instr.getModCond() < StoreL3Condition) {
+			mask = instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask;
+		}
+		else {
+			mask = ScratchpadL3Mask;
+		}
+		asmCode << "\tand eax" << ", " << (mask & (-maskAlign)) << std::endl;
+	}
+
+	int32_t AssemblyGeneratorX86::genAddressImm(Instruction& instr) {
+		return (int32_t)instr.getImm32() & ScratchpadL3Mask;
+	}
+
+	void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if(instr.dst == RegisterNeedsDisplacement)
+			asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
+		else
+			asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << "]" << std::endl;
+		traceint(instr);
+	}
+
+	void AssemblyGeneratorX86::h_IADD_M(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			genAddressReg(instr);
+			asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
+		}
+		else {
+			asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
+		}
+		traceint(instr);
+	}
+
+	void AssemblyGeneratorX86::h_ISUB_R(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			asmCode << "\tsub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
+		}
+		else {
+			asmCode << "\tsub " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
+		}
+		traceint(instr);
+	}
+
+	void AssemblyGeneratorX86::h_ISUB_M(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			genAddressReg(instr);
+			asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
+		}
+		else {
+			asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
+		}
+		traceint(instr);
+	}
+
+	void AssemblyGeneratorX86::h_IMUL_R(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			asmCode << "\timul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
+		}
+		else {
+			asmCode << "\timul " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
+		}
+		traceint(instr);
+	}
+
+	void AssemblyGeneratorX86::h_IMUL_M(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			genAddressReg(instr);
+			asmCode << "\timul " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
+		}
+		else {
+			asmCode << "\timul " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
+		}
+		traceint(instr);
+	}
+
+	void AssemblyGeneratorX86::h_IMULH_R(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
+		asmCode << "\tmul " << regR[instr.src] << std::endl;
+		asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
+		traceint(instr);
+	}
+
+	void AssemblyGeneratorX86::h_IMULH_M(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			genAddressReg(instr, "ecx");
+			asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
+			asmCode << "\tmul qword ptr [" << regScratchpadAddr << "+rcx]" << std::endl;
+		}
+		else {
+			asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
+			asmCode << "\tmul qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
+		}
+		asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
+		traceint(instr);
+	}
+
+	void AssemblyGeneratorX86::h_ISMULH_R(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
+		asmCode << "\timul " << regR[instr.src] << std::endl;
+		asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
+		traceint(instr);
+	}
+
+	void AssemblyGeneratorX86::h_ISMULH_M(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			genAddressReg(instr, "ecx");
+			asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
+			asmCode << "\timul qword ptr [" << regScratchpadAddr << "+rcx]" << std::endl;
+		}
+		else {
+			asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
+			asmCode << "\timul qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
+		}
+		asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
+		traceint(instr);
+	}
+
+	void AssemblyGeneratorX86::h_INEG_R(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		asmCode << "\tneg " << regR[instr.dst] << std::endl;
+		traceint(instr);
+	}
+
+	void AssemblyGeneratorX86::h_IXOR_R(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			asmCode << "\txor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
+		}
+		else {
+			asmCode << "\txor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
+		}
+		traceint(instr);
+	}
+
+	void AssemblyGeneratorX86::h_IXOR_M(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			genAddressReg(instr);
+			asmCode << "\txor " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
+		}
+		else {
+			asmCode << "\txor " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
+		}
+		traceint(instr);
+	}
+
+	void AssemblyGeneratorX86::h_IROR_R(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl;
+			asmCode << "\tror " << regR[instr.dst] << ", cl" << std::endl;
+		}
+		else {
+			asmCode << "\tror " << regR[instr.dst] << ", " << (instr.getImm32() & 63) << std::endl;
+		}
+		traceint(instr);
+	}
+
+	void AssemblyGeneratorX86::h_IROL_R(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl;
+			asmCode << "\trol " << regR[instr.dst] << ", cl" << std::endl;
+		}
+		else {
+			asmCode << "\trol " << regR[instr.dst] << ", " << (instr.getImm32() & 63) << std::endl;
+		}
+		traceint(instr);
+	}
+
+	void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) {
+		const uint32_t divisor = instr.getImm32();
+		if (!isZeroOrPowerOf2(divisor)) {
+			registerUsage[instr.dst] = i;
+			asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl;
+			asmCode << "\timul " << regR[instr.dst] << ", rax" << std::endl;
+			traceint(instr);
+		}
+		else {
+			tracenop(instr);
+		}
+	}
+
+	void AssemblyGeneratorX86::h_ISWAP_R(Instruction& instr, int i) {
+		if (instr.src != instr.dst) {
+			registerUsage[instr.dst] = i;
+			registerUsage[instr.src] = i;
+			asmCode << "\txchg " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
+			traceint(instr);
+		}
+		else {
+			tracenop(instr);
+		}
+	}
+
+	void AssemblyGeneratorX86::h_FSWAP_R(Instruction& instr, int i) {
+		asmCode << "\tshufpd " << regFE[instr.dst] << ", " << regFE[instr.dst] << ", 1" << std::endl;
+		traceflt(instr);
+	}
+
+	void AssemblyGeneratorX86::h_FADD_R(Instruction& instr, int i) {
+		instr.dst %= RegisterCountFlt;
+		instr.src %= RegisterCountFlt;
+		asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
+		traceflt(instr);
+	}
+
+	void AssemblyGeneratorX86::h_FADD_M(Instruction& instr, int i) {
+		instr.dst %= RegisterCountFlt;
+		genAddressReg(instr);
+		asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
+		asmCode << "\taddpd " << regF[instr.dst] << ", " << tempRegx << std::endl;
+		traceflt(instr);
+	}
+
+	void AssemblyGeneratorX86::h_FSUB_R(Instruction& instr, int i) {
+		instr.dst %= RegisterCountFlt;
+		instr.src %= RegisterCountFlt;
+		asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
+		traceflt(instr);
+	}
+
+	void AssemblyGeneratorX86::h_FSUB_M(Instruction& instr, int i) {
+		instr.dst %= RegisterCountFlt;
+		genAddressReg(instr);
+		asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
+		asmCode << "\tsubpd " << regF[instr.dst] << ", " << tempRegx << std::endl;
+		traceflt(instr);
+	}
+
+	void AssemblyGeneratorX86::h_FSCAL_R(Instruction& instr, int i) {
+		instr.dst %= RegisterCountFlt;
+		asmCode << "\txorps " << regF[instr.dst] << ", " << scaleMaskReg << std::endl;
+		traceflt(instr);
+	}
+
+	void AssemblyGeneratorX86::h_FMUL_R(Instruction& instr, int i) {
+		instr.dst %= RegisterCountFlt;
+		instr.src %= RegisterCountFlt;
+		asmCode << "\tmulpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl;
+		traceflt(instr);
+	}
+
+	void AssemblyGeneratorX86::h_FDIV_M(Instruction& instr, int i) {
+		instr.dst %= RegisterCountFlt;
+		genAddressReg(instr);
+		asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
+		asmCode << "\tandps " << tempRegx << ", " << mantissaMaskReg << std::endl;
+		asmCode << "\torps " << tempRegx << ", " << exponentMaskReg << std::endl;
+		asmCode << "\tdivpd " << regE[instr.dst] << ", " << tempRegx << std::endl;
+		traceflt(instr);
+	}
+
+	void AssemblyGeneratorX86::h_FSQRT_R(Instruction& instr, int i) {
+		instr.dst %= RegisterCountFlt;
+		asmCode << "\tsqrtpd " << regE[instr.dst] << ", " << regE[instr.dst] << std::endl;
+		traceflt(instr);
+	}	
+
+	void AssemblyGeneratorX86::h_CFROUND(Instruction& instr, int i) {
+		asmCode << "\tmov rax, " << regR[instr.src] << std::endl;
+		int rotate = (13 - (instr.getImm32() & 63)) & 63;
+		if (rotate != 0)
+			asmCode << "\trol rax, " << rotate << std::endl;
+		asmCode << "\tand eax, 24576" << std::endl;
+		asmCode << "\tor eax, 40896" << std::endl;
+		asmCode << "\tpush rax" << std::endl;
+		asmCode << "\tldmxcsr dword ptr [rsp]" << std::endl;
+		asmCode << "\tpop rax" << std::endl;
+		tracenop(instr);
+	}
+
+	void AssemblyGeneratorX86::h_CBRANCH(Instruction& instr, int i) {
+		int reg = instr.dst;
+		int target = registerUsage[reg] + 1;
+		int shift = instr.getModCond() + ConditionOffset;
+		int32_t imm = instr.getImm32() | (1L << shift);
+		if (ConditionOffset > 0 || shift > 0)
+			imm &= ~(1L << (shift - 1));
+		asmCode << "\tadd " << regR[reg] << ", " << imm << std::endl;
+		asmCode << "\ttest " << regR[reg] << ", " << (ConditionMask << shift) << std::endl;
+		asmCode << "\tjz randomx_isn_" << target << std::endl;
+		//mark all registers as used
+		for (unsigned j = 0; j < RegistersCount; ++j) {
+			registerUsage[j] = i;
+		}
+	}
+
+	void AssemblyGeneratorX86::h_ISTORE(Instruction& instr, int i) {
+		genAddressRegDst(instr);
+		asmCode << "\tmov qword ptr [" << regScratchpadAddr << "+rax], " << regR[instr.src] << std::endl;
+		tracenop(instr);
+	}
+
+	void AssemblyGeneratorX86::h_NOP(Instruction& instr, int i) {
+		asmCode << "\tnop" << std::endl;
+		tracenop(instr);
+	}
+
+#include "instruction_weights.hpp"
+#define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x))
+
+	InstructionGenerator AssemblyGeneratorX86::engine[256] = {
+		INST_HANDLE(IADD_RS)
+		INST_HANDLE(IADD_M)
+		INST_HANDLE(ISUB_R)
+		INST_HANDLE(ISUB_M)
+		INST_HANDLE(IMUL_R)
+		INST_HANDLE(IMUL_M)
+		INST_HANDLE(IMULH_R)
+		INST_HANDLE(IMULH_M)
+		INST_HANDLE(ISMULH_R)
+		INST_HANDLE(ISMULH_M)
+		INST_HANDLE(IMUL_RCP)
+		INST_HANDLE(INEG_R)
+		INST_HANDLE(IXOR_R)
+		INST_HANDLE(IXOR_M)
+		INST_HANDLE(IROR_R)
+		INST_HANDLE(IROL_R)
+		INST_HANDLE(ISWAP_R)
+		INST_HANDLE(FSWAP_R)
+		INST_HANDLE(FADD_R)
+		INST_HANDLE(FADD_M)
+		INST_HANDLE(FSUB_R)
+		INST_HANDLE(FSUB_M)
+		INST_HANDLE(FSCAL_R)
+		INST_HANDLE(FMUL_R)
+		INST_HANDLE(FDIV_M)
+		INST_HANDLE(FSQRT_R)
+		INST_HANDLE(CBRANCH)
+		INST_HANDLE(CFROUND)
+		INST_HANDLE(ISTORE)
+		INST_HANDLE(NOP)
+	};
+}
--- a/crypto/randomx/assembly_generator_x86.hpp
+++ b/crypto/randomx/assembly_generator_x86.hpp
@ -0,0 +1,94 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include "common.hpp"
+#include <sstream>
+
+namespace randomx {
+
+	class Program;
+	class SuperscalarProgram;
+	class AssemblyGeneratorX86;
+	class Instruction;
+
+	typedef void(AssemblyGeneratorX86::*InstructionGenerator)(Instruction&, int);
+
+	class AssemblyGeneratorX86 {
+	public:
+		void generateProgram(Program& prog);
+		void generateAsm(SuperscalarProgram& prog);
+		void generateC(SuperscalarProgram& prog);
+		void printCode(std::ostream& os) {
+			os << asmCode.rdbuf();
+		}
+	private:
+		void genAddressReg(Instruction&, const char*);
+		void genAddressRegDst(Instruction&, int);
+		int32_t genAddressImm(Instruction&);
+		void generateCode(Instruction&, int);
+		void traceint(Instruction&);
+		void traceflt(Instruction&);
+		void tracenop(Instruction&);
+		void h_IADD_RS(Instruction&, int);
+		void h_IADD_M(Instruction&, int);
+		void h_ISUB_R(Instruction&, int);
+		void h_ISUB_M(Instruction&, int);
+		void h_IMUL_R(Instruction&, int);
+		void h_IMUL_M(Instruction&, int);
+		void h_IMULH_R(Instruction&, int);
+		void h_IMULH_M(Instruction&, int);
+		void h_ISMULH_R(Instruction&, int);
+		void h_ISMULH_M(Instruction&, int);
+		void h_IMUL_RCP(Instruction&, int);
+		void h_INEG_R(Instruction&, int);
+		void h_IXOR_R(Instruction&, int);
+		void h_IXOR_M(Instruction&, int);
+		void h_IROR_R(Instruction&, int);
+		void h_IROL_R(Instruction&, int);
+		void h_ISWAP_R(Instruction&, int);
+		void h_FSWAP_R(Instruction&, int);
+		void h_FADD_R(Instruction&, int);
+		void h_FADD_M(Instruction&, int);
+		void h_FSUB_R(Instruction&, int);
+		void h_FSUB_M(Instruction&, int);
+		void h_FSCAL_R(Instruction&, int);
+		void h_FMUL_R(Instruction&, int);
+		void h_FDIV_M(Instruction&, int);
+		void h_FSQRT_R(Instruction&, int);
+		void h_CBRANCH(Instruction&, int);
+		void h_CFROUND(Instruction&, int);
+		void h_ISTORE(Instruction&, int);
+		void h_NOP(Instruction&, int);
+
+		static InstructionGenerator engine[256];
+		std::stringstream asmCode;
+		int registerUsage[RegistersCount];
+	};
+}
--- a/crypto/randomx/blake2/blake2-impl.h
+++ b/crypto/randomx/blake2/blake2-impl.h
@ -0,0 +1,76 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#ifndef PORTABLE_BLAKE2_IMPL_H
+#define PORTABLE_BLAKE2_IMPL_H
+
+#include <stdint.h>
+
+#include "endian.h"
+
+static FORCE_INLINE uint64_t load48(const void *src) {
+	const uint8_t *p = (const uint8_t *)src;
+	uint64_t w = *p++;
+	w |= (uint64_t)(*p++) << 8;
+	w |= (uint64_t)(*p++) << 16;
+	w |= (uint64_t)(*p++) << 24;
+	w |= (uint64_t)(*p++) << 32;
+	w |= (uint64_t)(*p++) << 40;
+	return w;
+}
+
+static FORCE_INLINE void store48(void *dst, uint64_t w) {
+	uint8_t *p = (uint8_t *)dst;
+	*p++ = (uint8_t)w;
+	w >>= 8;
+	*p++ = (uint8_t)w;
+	w >>= 8;
+	*p++ = (uint8_t)w;
+	w >>= 8;
+	*p++ = (uint8_t)w;
+	w >>= 8;
+	*p++ = (uint8_t)w;
+	w >>= 8;
+	*p++ = (uint8_t)w;
+}
+
+static FORCE_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) {
+	return (w >> c) | (w << (32 - c));
+}
+
+static FORCE_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) {
+	return (w >> c) | (w << (64 - c));
+}
+
+#endif
--- a/crypto/randomx/blake2/blake2.h
+++ b/crypto/randomx/blake2/blake2.h
@ -0,0 +1,116 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#ifndef PORTABLE_BLAKE2_H
+#define PORTABLE_BLAKE2_H
+
+#include <stdint.h>
+#include <limits.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+	enum blake2b_constant {
+		BLAKE2B_BLOCKBYTES = 128,
+		BLAKE2B_OUTBYTES = 64,
+		BLAKE2B_KEYBYTES = 64,
+		BLAKE2B_SALTBYTES = 16,
+		BLAKE2B_PERSONALBYTES = 16
+	};
+
+#pragma pack(push, 1)
+	typedef struct __blake2b_param {
+		uint8_t digest_length;                   /* 1 */
+		uint8_t key_length;                      /* 2 */
+		uint8_t fanout;                          /* 3 */
+		uint8_t depth;                           /* 4 */
+		uint32_t leaf_length;                    /* 8 */
+		uint64_t node_offset;                    /* 16 */
+		uint8_t node_depth;                      /* 17 */
+		uint8_t inner_length;                    /* 18 */
+		uint8_t reserved[14];                    /* 32 */
+		uint8_t salt[BLAKE2B_SALTBYTES];         /* 48 */
+		uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
+	} blake2b_param;
+#pragma pack(pop)
+
+	typedef struct __blake2b_state {
+		uint64_t h[8];
+		uint64_t t[2];
+		uint64_t f[2];
+		uint8_t buf[BLAKE2B_BLOCKBYTES];
+		unsigned buflen;
+		unsigned outlen;
+		uint8_t last_node;
+	} blake2b_state;
+
+	/* Ensure param structs have not been wrongly padded */
+	/* Poor man's static_assert */
+	enum {
+		blake2_size_check_0 = 1 / !!(CHAR_BIT == 8),
+		blake2_size_check_2 =
+		1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT)
+	};
+
+	//randomx namespace
+#define blake2b_init        randomx_blake2b_init
+#define blake2b_init_key    randomx_blake2b_init_key
+#define blake2b_init_param  randomx_blake2b_init_param
+#define blake2b_update      randomx_blake2b_update
+#define blake2b_final       randomx_blake2b_final
+#define blake2b             randomx_blake2b
+#define blake2b_long        randomx_blake2b_long
+
+	/* Streaming API */
+	int blake2b_init(blake2b_state *S, size_t outlen);
+	int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
+		size_t keylen);
+	int blake2b_init_param(blake2b_state *S, const blake2b_param *P);
+	int blake2b_update(blake2b_state *S, const void *in, size_t inlen);
+	int blake2b_final(blake2b_state *S, void *out, size_t outlen);
+
+	/* Simple API */
+	int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
+		const void *key, size_t keylen);
+
+	/* Argon2 Team - Begin Code */
+	int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
+	/* Argon2 Team - End Code */
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
--- a/crypto/randomx/blake2/blake2b.c
+++ b/crypto/randomx/blake2/blake2b.c
@ -0,0 +1,409 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "blake2.h"
+#include "blake2-impl.h"
+
+static const uint64_t blake2b_IV[8] = {
+	UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
+	UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
+	UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
+	UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) };
+
+static const unsigned int blake2b_sigma[12][16] = {
+	{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+	{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
+	{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
+	{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
+	{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
+	{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
+	{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
+	{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
+	{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
+	{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
+	{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+	{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
+};
+
+static FORCE_INLINE void blake2b_set_lastnode(blake2b_state *S) {
+	S->f[1] = (uint64_t)-1;
+}
+
+static FORCE_INLINE void blake2b_set_lastblock(blake2b_state *S) {
+	if (S->last_node) {
+		blake2b_set_lastnode(S);
+	}
+	S->f[0] = (uint64_t)-1;
+}
+
+static FORCE_INLINE void blake2b_increment_counter(blake2b_state *S,
+	uint64_t inc) {
+	S->t[0] += inc;
+	S->t[1] += (S->t[0] < inc);
+}
+
+static FORCE_INLINE void blake2b_invalidate_state(blake2b_state *S) {
+	//clear_internal_memory(S, sizeof(*S));      /* wipe */
+	blake2b_set_lastblock(S); /* invalidate for further use */
+}
+
+static FORCE_INLINE void blake2b_init0(blake2b_state *S) {
+	memset(S, 0, sizeof(*S));
+	memcpy(S->h, blake2b_IV, sizeof(S->h));
+}
+
+int blake2b_init_param(blake2b_state *S, const blake2b_param *P) {
+	const unsigned char *p = (const unsigned char *)P;
+	unsigned int i;
+
+	if (NULL == P || NULL == S) {
+		return -1;
+	}
+
+	blake2b_init0(S);
+	/* IV XOR Parameter Block */
+	for (i = 0; i < 8; ++i) {
+		S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
+	}
+	S->outlen = P->digest_length;
+	return 0;
+}
+
+/* Sequential blake2b initialization */
+int blake2b_init(blake2b_state *S, size_t outlen) {
+	blake2b_param P;
+
+	if (S == NULL) {
+		return -1;
+	}
+
+	if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
+		blake2b_invalidate_state(S);
+		return -1;
+	}
+
+	/* Setup Parameter Block for unkeyed BLAKE2 */
+	P.digest_length = (uint8_t)outlen;
+	P.key_length = 0;
+	P.fanout = 1;
+	P.depth = 1;
+	P.leaf_length = 0;
+	P.node_offset = 0;
+	P.node_depth = 0;
+	P.inner_length = 0;
+	memset(P.reserved, 0, sizeof(P.reserved));
+	memset(P.salt, 0, sizeof(P.salt));
+	memset(P.personal, 0, sizeof(P.personal));
+
+	return blake2b_init_param(S, &P);
+}
+
+int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t keylen) {
+	blake2b_param P;
+
+	if (S == NULL) {
+		return -1;
+	}
+
+	if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
+		blake2b_invalidate_state(S);
+		return -1;
+	}
+
+	if ((key == 0) || (keylen == 0) || (keylen > BLAKE2B_KEYBYTES)) {
+		blake2b_invalidate_state(S);
+		return -1;
+	}
+
+	/* Setup Parameter Block for keyed BLAKE2 */
+	P.digest_length = (uint8_t)outlen;
+	P.key_length = (uint8_t)keylen;
+	P.fanout = 1;
+	P.depth = 1;
+	P.leaf_length = 0;
+	P.node_offset = 0;
+	P.node_depth = 0;
+	P.inner_length = 0;
+	memset(P.reserved, 0, sizeof(P.reserved));
+	memset(P.salt, 0, sizeof(P.salt));
+	memset(P.personal, 0, sizeof(P.personal));
+
+	if (blake2b_init_param(S, &P) < 0) {
+		blake2b_invalidate_state(S);
+		return -1;
+	}
+
+	{
+		uint8_t block[BLAKE2B_BLOCKBYTES];
+		memset(block, 0, BLAKE2B_BLOCKBYTES);
+		memcpy(block, key, keylen);
+		blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
+		/* Burn the key from stack */
+		//clear_internal_memory(block, BLAKE2B_BLOCKBYTES);
+	}
+	return 0;
+}
+
+static void blake2b_compress(blake2b_state *S, const uint8_t *block) {
+	uint64_t m[16];
+	uint64_t v[16];
+	unsigned int i, r;
+
+	for (i = 0; i < 16; ++i) {
+		m[i] = load64(block + i * sizeof(m[i]));
+	}
+
+	for (i = 0; i < 8; ++i) {
+		v[i] = S->h[i];
+	}
+
+	v[8] = blake2b_IV[0];
+	v[9] = blake2b_IV[1];
+	v[10] = blake2b_IV[2];
+	v[11] = blake2b_IV[3];
+	v[12] = blake2b_IV[4] ^ S->t[0];
+	v[13] = blake2b_IV[5] ^ S->t[1];
+	v[14] = blake2b_IV[6] ^ S->f[0];
+	v[15] = blake2b_IV[7] ^ S->f[1];
+
+#define G(r, i, a, b, c, d)                                                    \
+    do {                                                                       \
+        a = a + b + m[blake2b_sigma[r][2 * i + 0]];                            \
+        d = rotr64(d ^ a, 32);                                                 \
+        c = c + d;                                                             \
+        b = rotr64(b ^ c, 24);                                                 \
+        a = a + b + m[blake2b_sigma[r][2 * i + 1]];                            \
+        d = rotr64(d ^ a, 16);                                                 \
+        c = c + d;                                                             \
+        b = rotr64(b ^ c, 63);                                                 \
+    } while ((void)0, 0)
+
+#define ROUND(r)                                                               \
+    do {                                                                       \
+        G(r, 0, v[0], v[4], v[8], v[12]);                                      \
+        G(r, 1, v[1], v[5], v[9], v[13]);                                      \
+        G(r, 2, v[2], v[6], v[10], v[14]);                                     \
+        G(r, 3, v[3], v[7], v[11], v[15]);                                     \
+        G(r, 4, v[0], v[5], v[10], v[15]);                                     \
+        G(r, 5, v[1], v[6], v[11], v[12]);                                     \
+        G(r, 6, v[2], v[7], v[8], v[13]);                                      \
+        G(r, 7, v[3], v[4], v[9], v[14]);                                      \
+    } while ((void)0, 0)
+
+	for (r = 0; r < 12; ++r) {
+		ROUND(r);
+	}
+
+	for (i = 0; i < 8; ++i) {
+		S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
+	}
+
+#undef G
+#undef ROUND
+}
+
+int blake2b_update(blake2b_state *S, const void *in, size_t inlen) {
+	const uint8_t *pin = (const uint8_t *)in;
+
+	if (inlen == 0) {
+		return 0;
+	}
+
+	/* Sanity check */
+	if (S == NULL || in == NULL) {
+		return -1;
+	}
+
+	/* Is this a reused state? */
+	if (S->f[0] != 0) {
+		return -1;
+	}
+
+	if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) {
+		/* Complete current block */
+		size_t left = S->buflen;
+		size_t fill = BLAKE2B_BLOCKBYTES - left;
+		memcpy(&S->buf[left], pin, fill);
+		blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
+		blake2b_compress(S, S->buf);
+		S->buflen = 0;
+		inlen -= fill;
+		pin += fill;
+		/* Avoid buffer copies when possible */
+		while (inlen > BLAKE2B_BLOCKBYTES) {
+			blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
+			blake2b_compress(S, pin);
+			inlen -= BLAKE2B_BLOCKBYTES;
+			pin += BLAKE2B_BLOCKBYTES;
+		}
+	}
+	memcpy(&S->buf[S->buflen], pin, inlen);
+	S->buflen += (unsigned int)inlen;
+	return 0;
+}
+
+int blake2b_final(blake2b_state *S, void *out, size_t outlen) {
+	uint8_t buffer[BLAKE2B_OUTBYTES] = { 0 };
+	unsigned int i;
+
+	/* Sanity checks */
+	if (S == NULL || out == NULL || outlen < S->outlen) {
+		return -1;
+	}
+
+	/* Is this a reused state? */
+	if (S->f[0] != 0) {
+		return -1;
+	}
+
+	blake2b_increment_counter(S, S->buflen);
+	blake2b_set_lastblock(S);
+	memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
+	blake2b_compress(S, S->buf);
+
+	for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
+		store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
+	}
+
+	memcpy(out, buffer, S->outlen);
+	//clear_internal_memory(buffer, sizeof(buffer));
+	//clear_internal_memory(S->buf, sizeof(S->buf));
+	//clear_internal_memory(S->h, sizeof(S->h));
+	return 0;
+}
+
+int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
+	const void *key, size_t keylen) {
+	blake2b_state S;
+	int ret = -1;
+
+	/* Verify parameters */
+	if (NULL == in && inlen > 0) {
+		goto fail;
+	}
+
+	if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) {
+		goto fail;
+	}
+
+	if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
+		goto fail;
+	}
+
+	if (keylen > 0) {
+		if (blake2b_init_key(&S, outlen, key, keylen) < 0) {
+			goto fail;
+		}
+	}
+	else {
+		if (blake2b_init(&S, outlen) < 0) {
+			goto fail;
+		}
+	}
+
+	if (blake2b_update(&S, in, inlen) < 0) {
+		goto fail;
+	}
+	ret = blake2b_final(&S, out, outlen);
+
+fail:
+	//clear_internal_memory(&S, sizeof(S));
+	return ret;
+}
+
+/* Argon2 Team - Begin Code */
+int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
+	uint8_t *out = (uint8_t *)pout;
+	blake2b_state blake_state;
+	uint8_t outlen_bytes[sizeof(uint32_t)] = { 0 };
+	int ret = -1;
+
+	if (outlen > UINT32_MAX) {
+		goto fail;
+	}
+
+	/* Ensure little-endian byte order! */
+	store32(outlen_bytes, (uint32_t)outlen);
+
+#define TRY(statement)                                                         \
+    do {                                                                       \
+        ret = statement;                                                       \
+        if (ret < 0) {                                                         \
+            goto fail;                                                         \
+        }                                                                      \
+    } while ((void)0, 0)
+
+	if (outlen <= BLAKE2B_OUTBYTES) {
+		TRY(blake2b_init(&blake_state, outlen));
+		TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
+		TRY(blake2b_update(&blake_state, in, inlen));
+		TRY(blake2b_final(&blake_state, out, outlen));
+	}
+	else {
+		uint32_t toproduce;
+		uint8_t out_buffer[BLAKE2B_OUTBYTES];
+		uint8_t in_buffer[BLAKE2B_OUTBYTES];
+		TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
+		TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
+		TRY(blake2b_update(&blake_state, in, inlen));
+		TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
+		memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
+		out += BLAKE2B_OUTBYTES / 2;
+		toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
+
+		while (toproduce > BLAKE2B_OUTBYTES) {
+			memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
+			TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
+				BLAKE2B_OUTBYTES, NULL, 0));
+			memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
+			out += BLAKE2B_OUTBYTES / 2;
+			toproduce -= BLAKE2B_OUTBYTES / 2;
+		}
+
+		memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
+		TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
+			0));
+		memcpy(out, out_buffer, toproduce);
+	}
+fail:
+	//clear_internal_memory(&blake_state, sizeof(blake_state));
+	return ret;
+#undef TRY
+}
+/* Argon2 Team - End Code */
+
--- a/crypto/randomx/blake2/blamka-round-avx2.h
+++ b/crypto/randomx/blake2/blamka-round-avx2.h
@ -0,0 +1,189 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#ifndef BLAKE_ROUND_MKA_OPT_H
+#define BLAKE_ROUND_MKA_OPT_H
+
+#include "blake2-impl.h"
+
+#ifdef __GNUC__
+#include <x86intrin.h>
+#else
+#include <intrin.h>
+#endif
+
+#define rotr32(x)   _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
+#define rotr24(x)   _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
+#define rotr16(x)   _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
+#define rotr63(x)   _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
+
+#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+    do { \
+        __m256i ml = _mm256_mul_epu32(A0, B0); \
+        ml = _mm256_add_epi64(ml, ml); \
+        A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
+        D0 = _mm256_xor_si256(D0, A0); \
+        D0 = rotr32(D0); \
+        \
+        ml = _mm256_mul_epu32(C0, D0); \
+        ml = _mm256_add_epi64(ml, ml); \
+        C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
+        \
+        B0 = _mm256_xor_si256(B0, C0); \
+        B0 = rotr24(B0); \
+        \
+        ml = _mm256_mul_epu32(A1, B1); \
+        ml = _mm256_add_epi64(ml, ml); \
+        A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
+        D1 = _mm256_xor_si256(D1, A1); \
+        D1 = rotr32(D1); \
+        \
+        ml = _mm256_mul_epu32(C1, D1); \
+        ml = _mm256_add_epi64(ml, ml); \
+        C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
+        \
+        B1 = _mm256_xor_si256(B1, C1); \
+        B1 = rotr24(B1); \
+    } while((void)0, 0);
+
+#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+    do { \
+        __m256i ml = _mm256_mul_epu32(A0, B0); \
+        ml = _mm256_add_epi64(ml, ml); \
+        A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
+        D0 = _mm256_xor_si256(D0, A0); \
+        D0 = rotr16(D0); \
+        \
+        ml = _mm256_mul_epu32(C0, D0); \
+        ml = _mm256_add_epi64(ml, ml); \
+        C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
+        B0 = _mm256_xor_si256(B0, C0); \
+        B0 = rotr63(B0); \
+        \
+        ml = _mm256_mul_epu32(A1, B1); \
+        ml = _mm256_add_epi64(ml, ml); \
+        A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
+        D1 = _mm256_xor_si256(D1, A1); \
+        D1 = rotr16(D1); \
+        \
+        ml = _mm256_mul_epu32(C1, D1); \
+        ml = _mm256_add_epi64(ml, ml); \
+        C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
+        B1 = _mm256_xor_si256(B1, C1); \
+        B1 = rotr63(B1); \
+    } while((void)0, 0);
+
+#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
+    do { \
+        B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
+        C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
+        D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
+        \
+        B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
+        C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
+        D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
+    } while((void)0, 0);
+
+#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+    do { \
+        __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
+        __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
+        B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
+        B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
+        \
+        tmp1 = C0; \
+        C0 = C1; \
+        C1 = tmp1; \
+        \
+        tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
+        tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
+        D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
+        D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
+    } while(0);
+
+#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
+    do { \
+        B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
+        C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
+        D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
+        \
+        B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
+        C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
+        D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
+    } while((void)0, 0);
+
+#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+    do { \
+        __m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
+        __m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
+        B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
+        B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
+        \
+        tmp1 = C0; \
+        C0 = C1; \
+        C1 = tmp1; \
+        \
+        tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
+        tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
+        D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
+        D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
+    } while((void)0, 0);
+
+#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
+    do{ \
+        G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+        G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+        \
+        DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
+        \
+        G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+        G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+        \
+        UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
+    } while((void)0, 0);
+
+#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+    do{ \
+        G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+        G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+        \
+        DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+        \
+        G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+        G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
+        \
+        UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
+    } while((void)0, 0);
+
+#endif /* BLAKE_ROUND_MKA_OPT_H */
--- a/crypto/randomx/blake2/blamka-round-ref.h
+++ b/crypto/randomx/blake2/blamka-round-ref.h
@ -0,0 +1,73 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#ifndef BLAKE_ROUND_MKA_H
+#define BLAKE_ROUND_MKA_H
+
+#include "blake2.h"
+#include "blake2-impl.h"
+
+ /* designed by the Lyra PHC team */
+static FORCE_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
+	const uint64_t m = UINT64_C(0xFFFFFFFF);
+	const uint64_t xy = (x & m) * (y & m);
+	return x + y + 2 * xy;
+}
+
+#define G(a, b, c, d)                                                          \
+    do {                                                                       \
+        a = fBlaMka(a, b);                                                     \
+        d = rotr64(d ^ a, 32);                                                 \
+        c = fBlaMka(c, d);                                                     \
+        b = rotr64(b ^ c, 24);                                                 \
+        a = fBlaMka(a, b);                                                     \
+        d = rotr64(d ^ a, 16);                                                 \
+        c = fBlaMka(c, d);                                                     \
+        b = rotr64(b ^ c, 63);                                                 \
+    } while ((void)0, 0)
+
+#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,   \
+                           v12, v13, v14, v15)                                 \
+    do {                                                                       \
+        G(v0, v4, v8, v12);                                                    \
+        G(v1, v5, v9, v13);                                                    \
+        G(v2, v6, v10, v14);                                                   \
+        G(v3, v7, v11, v15);                                                   \
+        G(v0, v5, v10, v15);                                                   \
+        G(v1, v6, v11, v12);                                                   \
+        G(v2, v7, v8, v13);                                                    \
+        G(v3, v4, v9, v14);                                                    \
+    } while ((void)0, 0)
+
+#endif
--- a/crypto/randomx/blake2/blamka-round-ssse3.h
+++ b/crypto/randomx/blake2/blamka-round-ssse3.h
@ -0,0 +1,162 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#ifndef BLAKE_ROUND_MKA_OPT_H
+#define BLAKE_ROUND_MKA_OPT_H
+
+#include "blake2-impl.h"
+
+#ifdef __GNUC__
+#include <x86intrin.h>
+#else
+#include <intrin.h>
+#endif
+
+#ifdef _mm_roti_epi64 //clang defines it using the XOP instruction set
+#undef _mm_roti_epi64
+#endif
+
+#define r16                                                                    \
+    (_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
+#define r24                                                                    \
+    (_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
+#define _mm_roti_epi64(x, c)                                                   \
+    (-(c) == 32)                                                               \
+        ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1))                      \
+        : (-(c) == 24)                                                         \
+              ? _mm_shuffle_epi8((x), r24)                                     \
+              : (-(c) == 16)                                                   \
+                    ? _mm_shuffle_epi8((x), r16)                               \
+                    : (-(c) == 63)                                             \
+                          ? _mm_xor_si128(_mm_srli_epi64((x), -(c)),           \
+                                          _mm_add_epi64((x), (x)))             \
+                          : _mm_xor_si128(_mm_srli_epi64((x), -(c)),           \
+                                          _mm_slli_epi64((x), 64 - (-(c))))
+
+static FORCE_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
+    const __m128i z = _mm_mul_epu32(x, y);
+    return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
+}
+
+#define G1(A0, B0, C0, D0, A1, B1, C1, D1)                                     \
+    do {                                                                       \
+        A0 = fBlaMka(A0, B0);                                                  \
+        A1 = fBlaMka(A1, B1);                                                  \
+                                                                               \
+        D0 = _mm_xor_si128(D0, A0);                                            \
+        D1 = _mm_xor_si128(D1, A1);                                            \
+                                                                               \
+        D0 = _mm_roti_epi64(D0, -32);                                          \
+        D1 = _mm_roti_epi64(D1, -32);                                          \
+                                                                               \
+        C0 = fBlaMka(C0, D0);                                                  \
+        C1 = fBlaMka(C1, D1);                                                  \
+                                                                               \
+        B0 = _mm_xor_si128(B0, C0);                                            \
+        B1 = _mm_xor_si128(B1, C1);                                            \
+                                                                               \
+        B0 = _mm_roti_epi64(B0, -24);                                          \
+        B1 = _mm_roti_epi64(B1, -24);                                          \
+    } while ((void)0, 0)
+
+#define G2(A0, B0, C0, D0, A1, B1, C1, D1)                                     \
+    do {                                                                       \
+        A0 = fBlaMka(A0, B0);                                                  \
+        A1 = fBlaMka(A1, B1);                                                  \
+                                                                               \
+        D0 = _mm_xor_si128(D0, A0);                                            \
+        D1 = _mm_xor_si128(D1, A1);                                            \
+                                                                               \
+        D0 = _mm_roti_epi64(D0, -16);                                          \
+        D1 = _mm_roti_epi64(D1, -16);                                          \
+                                                                               \
+        C0 = fBlaMka(C0, D0);                                                  \
+        C1 = fBlaMka(C1, D1);                                                  \
+                                                                               \
+        B0 = _mm_xor_si128(B0, C0);                                            \
+        B1 = _mm_xor_si128(B1, C1);                                            \
+                                                                               \
+        B0 = _mm_roti_epi64(B0, -63);                                          \
+        B1 = _mm_roti_epi64(B1, -63);                                          \
+    } while ((void)0, 0)
+
+#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1)                            \
+    do {                                                                       \
+        __m128i t0 = _mm_alignr_epi8(B1, B0, 8);                               \
+        __m128i t1 = _mm_alignr_epi8(B0, B1, 8);                               \
+        B0 = t0;                                                               \
+        B1 = t1;                                                               \
+                                                                               \
+        t0 = C0;                                                               \
+        C0 = C1;                                                               \
+        C1 = t0;                                                               \
+                                                                               \
+        t0 = _mm_alignr_epi8(D1, D0, 8);                                       \
+        t1 = _mm_alignr_epi8(D0, D1, 8);                                       \
+        D0 = t1;                                                               \
+        D1 = t0;                                                               \
+    } while ((void)0, 0)
+
+#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1)                          \
+    do {                                                                       \
+        __m128i t0 = _mm_alignr_epi8(B0, B1, 8);                               \
+        __m128i t1 = _mm_alignr_epi8(B1, B0, 8);                               \
+        B0 = t0;                                                               \
+        B1 = t1;                                                               \
+                                                                               \
+        t0 = C0;                                                               \
+        C0 = C1;                                                               \
+        C1 = t0;                                                               \
+                                                                               \
+        t0 = _mm_alignr_epi8(D0, D1, 8);                                       \
+        t1 = _mm_alignr_epi8(D1, D0, 8);                                       \
+        D0 = t1;                                                               \
+        D1 = t0;                                                               \
+    } while ((void)0, 0)
+
+#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1)                           \
+    do {                                                                       \
+        G1(A0, B0, C0, D0, A1, B1, C1, D1);                                    \
+        G2(A0, B0, C0, D0, A1, B1, C1, D1);                                    \
+                                                                               \
+        DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1);                           \
+                                                                               \
+        G1(A0, B0, C0, D0, A1, B1, C1, D1);                                    \
+        G2(A0, B0, C0, D0, A1, B1, C1, D1);                                    \
+                                                                               \
+        UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1);                         \
+    } while ((void)0, 0)
+
+
+#endif /* BLAKE_ROUND_MKA_OPT_H */
--- a/crypto/randomx/blake2/endian.h
+++ b/crypto/randomx/blake2/endian.h
@ -0,0 +1,107 @@
+#pragma once
+#include <stdint.h>
+#include <string.h>
+
+#if defined(_MSC_VER)
+#define FORCE_INLINE __inline
+#elif defined(__GNUC__) || defined(__clang__)
+#define FORCE_INLINE __inline__
+#else
+#define FORCE_INLINE
+#endif
+
+ /* Argon2 Team - Begin Code */
+ /*
+	Not an exhaustive list, but should cover the majority of modern platforms
+	Additionally, the code will always be correct---this is only a performance
+	tweak.
+ */
+#if (defined(__BYTE_ORDER__) &&                                                \
+     (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) ||                           \
+    defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \
+    defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) ||       \
+    defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) ||                \
+    defined(_M_ARM)
+#define NATIVE_LITTLE_ENDIAN
+#endif
+ /* Argon2 Team - End Code */
+
+static FORCE_INLINE uint32_t load32(const void *src) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+	uint32_t w;
+	memcpy(&w, src, sizeof w);
+	return w;
+#else
+	const uint8_t *p = (const uint8_t *)src;
+	uint32_t w = *p++;
+	w |= (uint32_t)(*p++) << 8;
+	w |= (uint32_t)(*p++) << 16;
+	w |= (uint32_t)(*p++) << 24;
+	return w;
+#endif
+}
+
+static FORCE_INLINE uint64_t load64_native(const void *src) {
+	uint64_t w;
+	memcpy(&w, src, sizeof w);
+	return w;
+}
+
+static FORCE_INLINE uint64_t load64(const void *src) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+	return load64_native(src);
+#else
+	const uint8_t *p = (const uint8_t *)src;
+	uint64_t w = *p++;
+	w |= (uint64_t)(*p++) << 8;
+	w |= (uint64_t)(*p++) << 16;
+	w |= (uint64_t)(*p++) << 24;
+	w |= (uint64_t)(*p++) << 32;
+	w |= (uint64_t)(*p++) << 40;
+	w |= (uint64_t)(*p++) << 48;
+	w |= (uint64_t)(*p++) << 56;
+	return w;
+#endif
+}
+
+static FORCE_INLINE void store32(void *dst, uint32_t w) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+	memcpy(dst, &w, sizeof w);
+#else
+	uint8_t *p = (uint8_t *)dst;
+	*p++ = (uint8_t)w;
+	w >>= 8;
+	*p++ = (uint8_t)w;
+	w >>= 8;
+	*p++ = (uint8_t)w;
+	w >>= 8;
+	*p++ = (uint8_t)w;
+#endif
+}
+
+static FORCE_INLINE void store64_native(void *dst, uint64_t w) {
+	memcpy(dst, &w, sizeof w);
+}
+
+static FORCE_INLINE void store64(void *dst, uint64_t w) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+	store64_native(dst, w);
+#else
+	uint8_t *p = (uint8_t *)dst;
+	*p++ = (uint8_t)w;
+	w >>= 8;
+	*p++ = (uint8_t)w;
+	w >>= 8;
+	*p++ = (uint8_t)w;
+	w >>= 8;
+	*p++ = (uint8_t)w;
+	w >>= 8;
+	*p++ = (uint8_t)w;
+	w >>= 8;
+	*p++ = (uint8_t)w;
+	w >>= 8;
+	*p++ = (uint8_t)w;
+	w >>= 8;
+	*p++ = (uint8_t)w;
+#endif
+}
--- a/crypto/randomx/blake2_generator.cpp
+++ b/crypto/randomx/blake2_generator.cpp
@ -0,0 +1,62 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stddef.h>
+#include "blake2/blake2.h"
+#include "blake2/endian.h"
+#include "blake2_generator.hpp"
+
+namespace randomx {
+
+	constexpr int maxSeedSize = 60;
+
+	Blake2Generator::Blake2Generator(const void* seed, size_t seedSize, int nonce) : dataIndex(sizeof(data)) {
+		memset(data, 0, sizeof(data));
+		memcpy(data, seed, seedSize > maxSeedSize ? maxSeedSize : seedSize);
+		store32(&data[maxSeedSize], nonce);
+	}
+
+	uint8_t Blake2Generator::getByte() {
+		checkData(1);
+		return data[dataIndex++];
+	}
+
+	uint32_t Blake2Generator::getUInt32() {
+		checkData(4);
+		auto ret = load32(&data[dataIndex]);
+		dataIndex += 4;
+		return ret;
+	}
+
+	void Blake2Generator::checkData(const size_t bytesNeeded) {
+		if (dataIndex + bytesNeeded > sizeof(data)) {
+			blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
+			dataIndex = 0;
+		}
+	}
+}
--- a/crypto/randomx/blake2_generator.hpp
+++ b/crypto/randomx/blake2_generator.hpp
@ -0,0 +1,46 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cstdint>
+
+namespace randomx {
+
+	class Blake2Generator {
+	public:
+		Blake2Generator(const void* seed, size_t seedSize, int nonce = 0);
+		uint8_t getByte();
+		uint32_t getUInt32();
+	private:
+		void checkData(const size_t);
+
+		uint8_t data[64];
+		size_t dataIndex;
+	};
+}
--- a/crypto/randomx/bytecode_machine.cpp
+++ b/crypto/randomx/bytecode_machine.cpp
@ -0,0 +1,482 @@
+/*
+Copyright (c) 2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "bytecode_machine.hpp"
+#include "reciprocal.h"
+
+namespace randomx {
+
+	const int_reg_t BytecodeMachine::zero = 0;
+
+#define INSTR_CASE(x) case InstructionType::x: \
+	exe_ ## x(ibc, pc, scratchpad, config); \
+	break;
+
+	void BytecodeMachine::executeInstruction(RANDOMX_EXE_ARGS) {
+		switch (ibc.type)
+		{
+			INSTR_CASE(IADD_RS)
+			INSTR_CASE(IADD_M)
+			INSTR_CASE(ISUB_R)
+			INSTR_CASE(ISUB_M)
+			INSTR_CASE(IMUL_R)
+			INSTR_CASE(IMUL_M)
+			INSTR_CASE(IMULH_R)
+			INSTR_CASE(IMULH_M)
+			INSTR_CASE(ISMULH_R)
+			INSTR_CASE(ISMULH_M)
+			INSTR_CASE(INEG_R)
+			INSTR_CASE(IXOR_R)
+			INSTR_CASE(IXOR_M)
+			INSTR_CASE(IROR_R)
+			INSTR_CASE(IROL_R)
+			INSTR_CASE(ISWAP_R)
+			INSTR_CASE(FSWAP_R)
+			INSTR_CASE(FADD_R)
+			INSTR_CASE(FADD_M)
+			INSTR_CASE(FSUB_R)
+			INSTR_CASE(FSUB_M)
+			INSTR_CASE(FSCAL_R)
+			INSTR_CASE(FMUL_R)
+			INSTR_CASE(FDIV_M)
+			INSTR_CASE(FSQRT_R)
+			INSTR_CASE(CBRANCH)
+			INSTR_CASE(CFROUND)
+			INSTR_CASE(ISTORE)
+
+		case InstructionType::NOP:
+			break;
+
+		case InstructionType::IMUL_RCP: //executed as IMUL_R
+		default:
+			UNREACHABLE;
+		}
+	}
+
+	void BytecodeMachine::compileInstruction(RANDOMX_GEN_ARGS) {
+		int opcode = instr.opcode;
+
+		if (opcode < ceil_IADD_RS) {
+			auto dst = instr.dst % RegistersCount;
+			auto src = instr.src % RegistersCount;
+			ibc.type = InstructionType::IADD_RS;
+			ibc.idst = &nreg->r[dst];
+			if (dst != RegisterNeedsDisplacement) {
+				ibc.isrc = &nreg->r[src];
+				ibc.shift = instr.getModShift();
+				ibc.imm = 0;
+			}
+			else {
+				ibc.isrc = &nreg->r[src];
+				ibc.shift = instr.getModShift();
+				ibc.imm = signExtend2sCompl(instr.getImm32());
+			}
+			registerUsage[dst] = i;
+			return;
+		}
+
+		if (opcode < ceil_IADD_M) {
+			auto dst = instr.dst % RegistersCount;
+			auto src = instr.src % RegistersCount;
+			ibc.type = InstructionType::IADD_M;
+			ibc.idst = &nreg->r[dst];
+			ibc.imm = signExtend2sCompl(instr.getImm32());
+			if (src != dst) {
+				ibc.isrc = &nreg->r[src];
+				ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+			}
+			else {
+				ibc.isrc = &zero;
+				ibc.memMask = ScratchpadL3Mask;
+			}
+			registerUsage[dst] = i;
+			return;
+		}
+
+		if (opcode < ceil_ISUB_R) {
+			auto dst = instr.dst % RegistersCount;
+			auto src = instr.src % RegistersCount;
+			ibc.type = InstructionType::ISUB_R;
+			ibc.idst = &nreg->r[dst];
+			if (src != dst) {
+				ibc.isrc = &nreg->r[src];
+			}
+			else {
+				ibc.imm = signExtend2sCompl(instr.getImm32());
+				ibc.isrc = &ibc.imm;
+			}
+			registerUsage[dst] = i;
+			return;
+		}
+
+		if (opcode < ceil_ISUB_M) {
+			auto dst = instr.dst % RegistersCount;
+			auto src = instr.src % RegistersCount;
+			ibc.type = InstructionType::ISUB_M;
+			ibc.idst = &nreg->r[dst];
+			ibc.imm = signExtend2sCompl(instr.getImm32());
+			if (src != dst) {
+				ibc.isrc = &nreg->r[src];
+				ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+			}
+			else {
+				ibc.isrc = &zero;
+				ibc.memMask = ScratchpadL3Mask;
+			}
+			registerUsage[dst] = i;
+			return;
+		}
+
+		if (opcode < ceil_IMUL_R) {
+			auto dst = instr.dst % RegistersCount;
+			auto src = instr.src % RegistersCount;
+			ibc.type = InstructionType::IMUL_R;
+			ibc.idst = &nreg->r[dst];
+			if (src != dst) {
+				ibc.isrc = &nreg->r[src];
+			}
+			else {
+				ibc.imm = signExtend2sCompl(instr.getImm32());
+				ibc.isrc = &ibc.imm;
+			}
+			registerUsage[dst] = i;
+			return;
+		}
+
+		if (opcode < ceil_IMUL_M) {
+			auto dst = instr.dst % RegistersCount;
+			auto src = instr.src % RegistersCount;
+			ibc.type = InstructionType::IMUL_M;
+			ibc.idst = &nreg->r[dst];
+			ibc.imm = signExtend2sCompl(instr.getImm32());
+			if (src != dst) {
+				ibc.isrc = &nreg->r[src];
+				ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+			}
+			else {
+				ibc.isrc = &zero;
+				ibc.memMask = ScratchpadL3Mask;
+			}
+			registerUsage[dst] = i;
+			return;
+		}
+
+		if (opcode < ceil_IMULH_R) {
+			auto dst = instr.dst % RegistersCount;
+			auto src = instr.src % RegistersCount;
+			ibc.type = InstructionType::IMULH_R;
+			ibc.idst = &nreg->r[dst];
+			ibc.isrc = &nreg->r[src];
+			registerUsage[dst] = i;
+			return;
+		}
+
+		if (opcode < ceil_IMULH_M) {
+			auto dst = instr.dst % RegistersCount;
+			auto src = instr.src % RegistersCount;
+			ibc.type = InstructionType::IMULH_M;
+			ibc.idst = &nreg->r[dst];
+			ibc.imm = signExtend2sCompl(instr.getImm32());
+			if (src != dst) {
+				ibc.isrc = &nreg->r[src];
+				ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+			}
+			else {
+				ibc.isrc = &zero;
+				ibc.memMask = ScratchpadL3Mask;
+			}
+			registerUsage[dst] = i;
+			return;
+		}
+
+		if (opcode < ceil_ISMULH_R) {
+			auto dst = instr.dst % RegistersCount;
+			auto src = instr.src % RegistersCount;
+			ibc.type = InstructionType::ISMULH_R;
+			ibc.idst = &nreg->r[dst];
+			ibc.isrc = &nreg->r[src];
+			registerUsage[dst] = i;
+			return;
+		}
+
+		if (opcode < ceil_ISMULH_M) {
+			auto dst = instr.dst % RegistersCount;
+			auto src = instr.src % RegistersCount;
+			ibc.type = InstructionType::ISMULH_M;
+			ibc.idst = &nreg->r[dst];
+			ibc.imm = signExtend2sCompl(instr.getImm32());
+			if (src != dst) {
+				ibc.isrc = &nreg->r[src];
+				ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+			}
+			else {
+				ibc.isrc = &zero;
+				ibc.memMask = ScratchpadL3Mask;
+			}
+			registerUsage[dst] = i;
+			return;
+		}
+
+		if (opcode < ceil_IMUL_RCP) {
+			const uint32_t divisor = instr.getImm32();
+			if (!isZeroOrPowerOf2(divisor)) {
+				auto dst = instr.dst % RegistersCount;
+				ibc.type = InstructionType::IMUL_R;
+				ibc.idst = &nreg->r[dst];
+				ibc.imm = randomx_reciprocal(divisor);
+				ibc.isrc = &ibc.imm;
+				registerUsage[dst] = i;
+			}
+			else {
+				ibc.type = InstructionType::NOP;
+			}
+			return;
+		}
+
+		if (opcode < ceil_INEG_R) {
+			auto dst = instr.dst % RegistersCount;
+			ibc.type = InstructionType::INEG_R;
+			ibc.idst = &nreg->r[dst];
+			registerUsage[dst] = i;
+			return;
+		}
+
+		if (opcode < ceil_IXOR_R) {
+			auto dst = instr.dst % RegistersCount;
+			auto src = instr.src % RegistersCount;
+			ibc.type = InstructionType::IXOR_R;
+			ibc.idst = &nreg->r[dst];
+			if (src != dst) {
+				ibc.isrc = &nreg->r[src];
+			}
+			else {
+				ibc.imm = signExtend2sCompl(instr.getImm32());
+				ibc.isrc = &ibc.imm;
+			}
+			registerUsage[dst] = i;
+			return;
+		}
+
+		if (opcode < ceil_IXOR_M) {
+			auto dst = instr.dst % RegistersCount;
+			auto src = instr.src % RegistersCount;
+			ibc.type = InstructionType::IXOR_M;
+			ibc.idst = &nreg->r[dst];
+			ibc.imm = signExtend2sCompl(instr.getImm32());
+			if (src != dst) {
+				ibc.isrc = &nreg->r[src];
+				ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+			}
+			else {
+				ibc.isrc = &zero;
+				ibc.memMask = ScratchpadL3Mask;
+			}
+			registerUsage[dst] = i;
+			return;
+		}
+
+		if (opcode < ceil_IROR_R) {
+			auto dst = instr.dst % RegistersCount;
+			auto src = instr.src % RegistersCount;
+			ibc.type = InstructionType::IROR_R;
+			ibc.idst = &nreg->r[dst];
+			if (src != dst) {
+				ibc.isrc = &nreg->r[src];
+			}
+			else {
+				ibc.imm = instr.getImm32();
+				ibc.isrc = &ibc.imm;
+			}
+			registerUsage[dst] = i;
+			return;
+		}
+
+		if (opcode < ceil_IROL_R) {
+			auto dst = instr.dst % RegistersCount;
+			auto src = instr.src % RegistersCount;
+			ibc.type = InstructionType::IROL_R;
+			ibc.idst = &nreg->r[dst];
+			if (src != dst) {
+				ibc.isrc = &nreg->r[src];
+			}
+			else {
+				ibc.imm = instr.getImm32();
+				ibc.isrc = &ibc.imm;
+			}
+			registerUsage[dst] = i;
+			return;
+		}
+
+		if (opcode < ceil_ISWAP_R) {
+			auto dst = instr.dst % RegistersCount;
+			auto src = instr.src % RegistersCount;
+			if (src != dst) {
+				ibc.idst = &nreg->r[dst];
+				ibc.isrc = &nreg->r[src];
+				ibc.type = InstructionType::ISWAP_R;
+				registerUsage[dst] = i;
+				registerUsage[src] = i;
+			}
+			else {
+				ibc.type = InstructionType::NOP;
+			}
+			return;
+		}
+
+		if (opcode < ceil_FSWAP_R) {
+			auto dst = instr.dst % RegistersCount;
+			ibc.type = InstructionType::FSWAP_R;
+			if (dst < RegisterCountFlt)
+				ibc.fdst = &nreg->f[dst];
+			else
+				ibc.fdst = &nreg->e[dst - RegisterCountFlt];
+			return;
+		}
+
+		if (opcode < ceil_FADD_R) {
+			auto dst = instr.dst % RegisterCountFlt;
+			auto src = instr.src % RegisterCountFlt;
+			ibc.type = InstructionType::FADD_R;
+			ibc.fdst = &nreg->f[dst];
+			ibc.fsrc = &nreg->a[src];
+			return;
+		}
+
+		if (opcode < ceil_FADD_M) {
+			auto dst = instr.dst % RegisterCountFlt;
+			auto src = instr.src % RegistersCount;
+			ibc.type = InstructionType::FADD_M;
+			ibc.fdst = &nreg->f[dst];
+			ibc.isrc = &nreg->r[src];
+			ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+			ibc.imm = signExtend2sCompl(instr.getImm32());
+			return;
+		}
+
+		if (opcode < ceil_FSUB_R) {
+			auto dst = instr.dst % RegisterCountFlt;
+			auto src = instr.src % RegisterCountFlt;
+			ibc.type = InstructionType::FSUB_R;
+			ibc.fdst = &nreg->f[dst];
+			ibc.fsrc = &nreg->a[src];
+			return;
+		}
+
+		if (opcode < ceil_FSUB_M) {
+			auto dst = instr.dst % RegisterCountFlt;
+			auto src = instr.src % RegistersCount;
+			ibc.type = InstructionType::FSUB_M;
+			ibc.fdst = &nreg->f[dst];
+			ibc.isrc = &nreg->r[src];
+			ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+			ibc.imm = signExtend2sCompl(instr.getImm32());
+			return;
+		}
+
+		if (opcode < ceil_FSCAL_R) {
+			auto dst = instr.dst % RegisterCountFlt;
+			ibc.fdst = &nreg->f[dst];
+			ibc.type = InstructionType::FSCAL_R;
+			return;
+		}
+
+		if (opcode < ceil_FMUL_R) {
+			auto dst = instr.dst % RegisterCountFlt;
+			auto src = instr.src % RegisterCountFlt;
+			ibc.type = InstructionType::FMUL_R;
+			ibc.fdst = &nreg->e[dst];
+			ibc.fsrc = &nreg->a[src];
+			return;
+		}
+
+		if (opcode < ceil_FDIV_M) {
+			auto dst = instr.dst % RegisterCountFlt;
+			auto src = instr.src % RegistersCount;
+			ibc.type = InstructionType::FDIV_M;
+			ibc.fdst = &nreg->e[dst];
+			ibc.isrc = &nreg->r[src];
+			ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+			ibc.imm = signExtend2sCompl(instr.getImm32());
+			return;
+		}
+
+		if (opcode < ceil_FSQRT_R) {
+			auto dst = instr.dst % RegisterCountFlt;
+			ibc.type = InstructionType::FSQRT_R;
+			ibc.fdst = &nreg->e[dst];
+			return;
+		}
+
+		if (opcode < ceil_CBRANCH) {
+			ibc.type = InstructionType::CBRANCH;
+			//jump condition
+			int creg = instr.dst % RegistersCount;
+			ibc.idst = &nreg->r[creg];
+			ibc.target = registerUsage[creg];
+			int shift = instr.getModCond() + ConditionOffset;
+			ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift);
+			if (ConditionOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2
+				ibc.imm &= ~(1ULL << (shift - 1));
+			ibc.memMask = ConditionMask << shift;
+			//mark all registers as used
+			for (unsigned j = 0; j < RegistersCount; ++j) {
+				registerUsage[j] = i;
+			}
+			return;
+		}
+
+		if (opcode < ceil_CFROUND) {
+			auto src = instr.src % RegistersCount;
+			ibc.isrc = &nreg->r[src];
+			ibc.type = InstructionType::CFROUND;
+			ibc.imm = instr.getImm32() & 63;
+			return;
+		}
+
+		if (opcode < ceil_ISTORE) {
+			auto dst = instr.dst % RegistersCount;
+			auto src = instr.src % RegistersCount;
+			ibc.type = InstructionType::ISTORE;
+			ibc.idst = &nreg->r[dst];
+			ibc.isrc = &nreg->r[src];
+			ibc.imm = signExtend2sCompl(instr.getImm32());
+			if (instr.getModCond() < StoreL3Condition)
+				ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+			else
+				ibc.memMask = ScratchpadL3Mask;
+			return;
+		}
+
+		if (opcode < ceil_NOP) {
+			ibc.type = InstructionType::NOP;
+			return;
+		}
+
+		UNREACHABLE;
+	}
+}
--- a/crypto/randomx/bytecode_machine.hpp
+++ b/crypto/randomx/bytecode_machine.hpp
@ -0,0 +1,322 @@
+/*
+Copyright (c) 2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include "common.hpp"
+#include "intrin_portable.h"
+#include "instruction.hpp"
+#include "program.hpp"
+
+namespace randomx {
+
+	//register file in machine byte order
+	struct NativeRegisterFile {
+		int_reg_t r[RegistersCount] = { 0 };
+		rx_vec_f128 f[RegisterCountFlt];
+		rx_vec_f128 e[RegisterCountFlt];
+		rx_vec_f128 a[RegisterCountFlt];
+	};
+
+	struct InstructionByteCode {
+		union {
+			int_reg_t* idst;
+			rx_vec_f128* fdst;
+		};
+		union {
+			const int_reg_t* isrc;
+			const rx_vec_f128* fsrc;
+		};
+		union {
+			uint64_t imm;
+			int64_t simm;
+		};
+		InstructionType type;
+		union {
+			int16_t target;
+			uint16_t shift;
+		};
+		uint32_t memMask;
+	};
+
+#define OPCODE_CEIL_DECLARE(curr, prev) constexpr int ceil_ ## curr = ceil_ ## prev + RANDOMX_FREQ_ ## curr;
+	constexpr int ceil_NULL = 0;
+	OPCODE_CEIL_DECLARE(IADD_RS, NULL);
+	OPCODE_CEIL_DECLARE(IADD_M, IADD_RS);
+	OPCODE_CEIL_DECLARE(ISUB_R, IADD_M);
+	OPCODE_CEIL_DECLARE(ISUB_M, ISUB_R);
+	OPCODE_CEIL_DECLARE(IMUL_R, ISUB_M);
+	OPCODE_CEIL_DECLARE(IMUL_M, IMUL_R);
+	OPCODE_CEIL_DECLARE(IMULH_R, IMUL_M);
+	OPCODE_CEIL_DECLARE(IMULH_M, IMULH_R);
+	OPCODE_CEIL_DECLARE(ISMULH_R, IMULH_M);
+	OPCODE_CEIL_DECLARE(ISMULH_M, ISMULH_R);
+	OPCODE_CEIL_DECLARE(IMUL_RCP, ISMULH_M);
+	OPCODE_CEIL_DECLARE(INEG_R, IMUL_RCP);
+	OPCODE_CEIL_DECLARE(IXOR_R, INEG_R);
+	OPCODE_CEIL_DECLARE(IXOR_M, IXOR_R);
+	OPCODE_CEIL_DECLARE(IROR_R, IXOR_M);
+	OPCODE_CEIL_DECLARE(IROL_R, IROR_R);
+	OPCODE_CEIL_DECLARE(ISWAP_R, IROL_R);
+	OPCODE_CEIL_DECLARE(FSWAP_R, ISWAP_R);
+	OPCODE_CEIL_DECLARE(FADD_R, FSWAP_R);
+	OPCODE_CEIL_DECLARE(FADD_M, FADD_R);
+	OPCODE_CEIL_DECLARE(FSUB_R, FADD_M);
+	OPCODE_CEIL_DECLARE(FSUB_M, FSUB_R);
+	OPCODE_CEIL_DECLARE(FSCAL_R, FSUB_M);
+	OPCODE_CEIL_DECLARE(FMUL_R, FSCAL_R);
+	OPCODE_CEIL_DECLARE(FDIV_M, FMUL_R);
+	OPCODE_CEIL_DECLARE(FSQRT_R, FDIV_M);
+	OPCODE_CEIL_DECLARE(CBRANCH, FSQRT_R);
+	OPCODE_CEIL_DECLARE(CFROUND, CBRANCH);
+	OPCODE_CEIL_DECLARE(ISTORE, CFROUND);
+	OPCODE_CEIL_DECLARE(NOP, ISTORE);
+#undef OPCODE_CEIL_DECLARE
+
+#define RANDOMX_EXE_ARGS InstructionByteCode& ibc, int& pc, uint8_t* scratchpad, ProgramConfiguration& config
+#define RANDOMX_GEN_ARGS Instruction& instr, int i, InstructionByteCode& ibc
+
+	class BytecodeMachine;
+
+	typedef void(BytecodeMachine::*InstructionGenBytecode)(RANDOMX_GEN_ARGS);
+
+	class BytecodeMachine {
+	public:
+		void beginCompilation(NativeRegisterFile& regFile) {
+			for (unsigned i = 0; i < RegistersCount; ++i) {
+				registerUsage[i] = -1;
+			}
+			nreg = &regFile;
+		}
+
+		void compileProgram(Program& program, InstructionByteCode bytecode[RANDOMX_PROGRAM_SIZE], NativeRegisterFile& regFile) {
+			beginCompilation(regFile);
+			for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
+				auto& instr = program(i);
+				auto& ibc = bytecode[i];
+				compileInstruction(instr, i, ibc);
+			}
+		}
+
+		static void executeBytecode(InstructionByteCode bytecode[RANDOMX_PROGRAM_SIZE], uint8_t* scratchpad, ProgramConfiguration& config) {
+			for (int pc = 0; pc < RANDOMX_PROGRAM_SIZE; ++pc) {
+				auto& ibc = bytecode[pc];
+				executeInstruction(ibc, pc, scratchpad, config);
+			}
+		}
+
+		void compileInstruction(RANDOMX_GEN_ARGS)
+#ifdef RANDOMX_GEN_TABLE
+		{
+			auto generator = genTable[instr.opcode];
+			(this->*generator)(instr, i, ibc);
+		}
+#else
+		;
+#endif
+
+		static void executeInstruction(RANDOMX_EXE_ARGS);
+
+		static void exe_IADD_RS(RANDOMX_EXE_ARGS) {
+			*ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm;
+		}
+
+		static void exe_IADD_M(RANDOMX_EXE_ARGS) {
+			*ibc.idst += load64(getScratchpadAddress(ibc, scratchpad));
+		}
+
+		static void exe_ISUB_R(RANDOMX_EXE_ARGS) {
+			*ibc.idst -= *ibc.isrc;
+		}
+
+		static void exe_ISUB_M(RANDOMX_EXE_ARGS) {
+			*ibc.idst -= load64(getScratchpadAddress(ibc, scratchpad));
+		}
+
+		static void exe_IMUL_R(RANDOMX_EXE_ARGS) {
+			*ibc.idst *= *ibc.isrc;
+		}
+
+		static void exe_IMUL_M(RANDOMX_EXE_ARGS) {
+			*ibc.idst *= load64(getScratchpadAddress(ibc, scratchpad));
+		}
+
+		static void exe_IMULH_R(RANDOMX_EXE_ARGS) {
+			*ibc.idst = mulh(*ibc.idst, *ibc.isrc);
+		}
+
+		static void exe_IMULH_M(RANDOMX_EXE_ARGS) {
+			*ibc.idst = mulh(*ibc.idst, load64(getScratchpadAddress(ibc, scratchpad)));
+		}
+
+		static void exe_ISMULH_R(RANDOMX_EXE_ARGS) {
+			*ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(*ibc.isrc));
+		}
+
+		static void exe_ISMULH_M(RANDOMX_EXE_ARGS) {
+			*ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(load64(getScratchpadAddress(ibc, scratchpad))));
+		}
+
+		static void exe_INEG_R(RANDOMX_EXE_ARGS) {
+			*ibc.idst = ~(*ibc.idst) + 1; //two's complement negative
+		}
+
+		static void exe_IXOR_R(RANDOMX_EXE_ARGS) {
+			*ibc.idst ^= *ibc.isrc;
+		}
+
+		static void exe_IXOR_M(RANDOMX_EXE_ARGS) {
+			*ibc.idst ^= load64(getScratchpadAddress(ibc, scratchpad));
+		}
+
+		static void exe_IROR_R(RANDOMX_EXE_ARGS) {
+			*ibc.idst = rotr(*ibc.idst, *ibc.isrc & 63);
+		}
+
+		static void exe_IROL_R(RANDOMX_EXE_ARGS) {
+			*ibc.idst = rotl(*ibc.idst, *ibc.isrc & 63);
+		}
+
+		static void exe_ISWAP_R(RANDOMX_EXE_ARGS) {
+			int_reg_t temp = *ibc.isrc;
+			*(int_reg_t*)ibc.isrc = *ibc.idst;
+			*ibc.idst = temp;
+		}
+
+		static void exe_FSWAP_R(RANDOMX_EXE_ARGS) {
+			*ibc.fdst = rx_swap_vec_f128(*ibc.fdst);
+		}
+
+		static void exe_FADD_R(RANDOMX_EXE_ARGS) {
+			*ibc.fdst = rx_add_vec_f128(*ibc.fdst, *ibc.fsrc);
+		}
+
+		static void exe_FADD_M(RANDOMX_EXE_ARGS) {
+			rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad));
+			*ibc.fdst = rx_add_vec_f128(*ibc.fdst, fsrc);
+		}
+
+		static void exe_FSUB_R(RANDOMX_EXE_ARGS) {
+			*ibc.fdst = rx_sub_vec_f128(*ibc.fdst, *ibc.fsrc);
+		}
+
+		static void exe_FSUB_M(RANDOMX_EXE_ARGS) {
+			rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad));
+			*ibc.fdst = rx_sub_vec_f128(*ibc.fdst, fsrc);
+		}
+
+		static void exe_FSCAL_R(RANDOMX_EXE_ARGS) {
+			const rx_vec_f128 mask = rx_set1_vec_f128(0x80F0000000000000);
+			*ibc.fdst = rx_xor_vec_f128(*ibc.fdst, mask);
+		}
+
+		static void exe_FMUL_R(RANDOMX_EXE_ARGS) {
+			*ibc.fdst = rx_mul_vec_f128(*ibc.fdst, *ibc.fsrc);
+		}
+
+		static void exe_FDIV_M(RANDOMX_EXE_ARGS) {
+			rx_vec_f128 fsrc = maskRegisterExponentMantissa(
+				config,
+				rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad))
+			);
+			*ibc.fdst = rx_div_vec_f128(*ibc.fdst, fsrc);
+		}
+
+		static void exe_FSQRT_R(RANDOMX_EXE_ARGS) {
+			*ibc.fdst = rx_sqrt_vec_f128(*ibc.fdst);
+		}
+
+		static void exe_CBRANCH(RANDOMX_EXE_ARGS) {
+			*ibc.idst += ibc.imm;
+			if ((*ibc.idst & ibc.memMask) == 0) {
+				pc = ibc.target;
+			}
+		}
+
+		static void exe_CFROUND(RANDOMX_EXE_ARGS) {
+			rx_set_rounding_mode(rotr(*ibc.isrc, ibc.imm) % 4);
+		}
+
+		static void exe_ISTORE(RANDOMX_EXE_ARGS) {
+			store64(scratchpad + ((*ibc.idst + ibc.imm) & ibc.memMask), *ibc.isrc);
+		}
+	protected:
+		static rx_vec_f128 maskRegisterExponentMantissa(ProgramConfiguration& config, rx_vec_f128 x) {
+			const rx_vec_f128 xmantissaMask = rx_set_vec_f128(dynamicMantissaMask, dynamicMantissaMask);
+			const rx_vec_f128 xexponentMask = rx_load_vec_f128((const double*)&config.eMask);
+			x = rx_and_vec_f128(x, xmantissaMask);
+			x = rx_or_vec_f128(x, xexponentMask);
+			return x;
+		}
+
+	private:
+		static const int_reg_t zero;
+		int registerUsage[RegistersCount];
+		NativeRegisterFile* nreg;
+
+		static void* getScratchpadAddress(InstructionByteCode& ibc, uint8_t* scratchpad) {
+			uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask;
+			return scratchpad + addr;
+		}
+
+#ifdef RANDOMX_GEN_TABLE
+		static InstructionGenBytecode genTable[256];
+
+		void gen_IADD_RS(RANDOMX_GEN_ARGS);
+		void gen_IADD_M(RANDOMX_GEN_ARGS);
+		void gen_ISUB_R(RANDOMX_GEN_ARGS);
+		void gen_ISUB_M(RANDOMX_GEN_ARGS);
+		void gen_IMUL_R(RANDOMX_GEN_ARGS);
+		void gen_IMUL_M(RANDOMX_GEN_ARGS);
+		void gen_IMULH_R(RANDOMX_GEN_ARGS);
+		void gen_IMULH_M(RANDOMX_GEN_ARGS);
+		void gen_ISMULH_R(RANDOMX_GEN_ARGS);
+		void gen_ISMULH_M(RANDOMX_GEN_ARGS);
+		void gen_IMUL_RCP(RANDOMX_GEN_ARGS);
+		void gen_INEG_R(RANDOMX_GEN_ARGS);
+		void gen_IXOR_R(RANDOMX_GEN_ARGS);
+		void gen_IXOR_M(RANDOMX_GEN_ARGS);
+		void gen_IROR_R(RANDOMX_GEN_ARGS);
+		void gen_IROL_R(RANDOMX_GEN_ARGS);
+		void gen_ISWAP_R(RANDOMX_GEN_ARGS);
+		void gen_FSWAP_R(RANDOMX_GEN_ARGS);
+		void gen_FADD_R(RANDOMX_GEN_ARGS);
+		void gen_FADD_M(RANDOMX_GEN_ARGS);
+		void gen_FSUB_R(RANDOMX_GEN_ARGS);
+		void gen_FSUB_M(RANDOMX_GEN_ARGS);
+		void gen_FSCAL_R(RANDOMX_GEN_ARGS);
+		void gen_FMUL_R(RANDOMX_GEN_ARGS);
+		void gen_FDIV_M(RANDOMX_GEN_ARGS);
+		void gen_FSQRT_R(RANDOMX_GEN_ARGS);
+		void gen_CBRANCH(RANDOMX_GEN_ARGS);
+		void gen_CFROUND(RANDOMX_GEN_ARGS);
+		void gen_ISTORE(RANDOMX_GEN_ARGS);
+		void gen_NOP(RANDOMX_GEN_ARGS);
+#endif
+	};
+}
--- a/crypto/randomx/common.hpp
+++ b/crypto/randomx/common.hpp
@ -0,0 +1,194 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cstdint>
+#include <iostream>
+#include <climits>
+#include "blake2/endian.h"
+#include "configuration.h"
+#include "randomx.h"
+
+namespace randomx {
+
+	static_assert(RANDOMX_ARGON_MEMORY >= 8, "RANDOMX_ARGON_MEMORY must be at least 8.");
+	static_assert(RANDOMX_ARGON_MEMORY <= 2097152, "RANDOMX_ARGON_MEMORY must not exceed 2097152.");
+	static_assert((RANDOMX_ARGON_MEMORY & (RANDOMX_ARGON_MEMORY - 1)) == 0, "RANDOMX_ARGON_MEMORY must be a power of 2.");
+	static_assert(RANDOMX_ARGON_ITERATIONS > 0 && RANDOMX_ARGON_ITERATIONS < UINT32_MAX, "RANDOMX_ARGON_ITERATIONS must be a positive 32-bit integer.");
+	static_assert(RANDOMX_ARGON_LANES > 0 && RANDOMX_ARGON_LANES <= 16777215, "RANDOMX_ARGON_LANES out of range");
+	static_assert(RANDOMX_DATASET_BASE_SIZE >= 64, "RANDOMX_DATASET_BASE_SIZE must be at least 64.");
+	static_assert((RANDOMX_DATASET_BASE_SIZE & (RANDOMX_DATASET_BASE_SIZE - 1)) == 0, "RANDOMX_DATASET_BASE_SIZE must be a power of 2.");
+	static_assert(RANDOMX_DATASET_BASE_SIZE <= 4294967296ULL, "RANDOMX_DATASET_BASE_SIZE must not exceed 4294967296.");
+	static_assert(RANDOMX_DATASET_EXTRA_SIZE % 64 == 0, "RANDOMX_DATASET_EXTRA_SIZE must be divisible by 64.");
+	static_assert((uint64_t)RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE <= 17179869184, "Dataset size must not exceed 16 GiB.");
+	static_assert(RANDOMX_PROGRAM_SIZE > 0, "RANDOMX_PROGRAM_SIZE must be greater than 0");
+	static_assert(RANDOMX_PROGRAM_SIZE <= 32768, "RANDOMX_PROGRAM_SIZE must not exceed 32768");
+	static_assert(RANDOMX_PROGRAM_ITERATIONS > 0, "RANDOMX_PROGRAM_ITERATIONS must be greater than 0");
+	static_assert(RANDOMX_PROGRAM_COUNT > 0, "RANDOMX_PROGRAM_COUNT must be greater than 0");
+	static_assert((RANDOMX_SCRATCHPAD_L3 & (RANDOMX_SCRATCHPAD_L3 - 1)) == 0, "RANDOMX_SCRATCHPAD_L3 must be a power of 2.");
+	static_assert(RANDOMX_SCRATCHPAD_L3 >= RANDOMX_SCRATCHPAD_L2, "RANDOMX_SCRATCHPAD_L3 must be greater than or equal to RANDOMX_SCRATCHPAD_L2.");
+	static_assert((RANDOMX_SCRATCHPAD_L2 & (RANDOMX_SCRATCHPAD_L2 - 1)) == 0, "RANDOMX_SCRATCHPAD_L2 must be a power of 2.");
+	static_assert(RANDOMX_SCRATCHPAD_L2 >= RANDOMX_SCRATCHPAD_L1, "RANDOMX_SCRATCHPAD_L2 must be greater than or equal to RANDOMX_SCRATCHPAD_L1.");
+	static_assert(RANDOMX_SCRATCHPAD_L1 >= 64, "RANDOMX_SCRATCHPAD_L1 must be at least 64.");
+	static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2.");
+	static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1");
+	static_assert(RANDOMX_SUPERSCALAR_LATENCY > 0, "RANDOMX_SUPERSCALAR_LATENCY must be greater than 0");
+	static_assert(RANDOMX_SUPERSCALAR_LATENCY <= 10000, "RANDOMX_SUPERSCALAR_LATENCY must not exceed 10000");
+	static_assert(RANDOMX_JUMP_BITS > 0, "RANDOMX_JUMP_BITS must be greater than 0.");
+	static_assert(RANDOMX_JUMP_OFFSET >= 0, "RANDOMX_JUMP_OFFSET must be greater than or equal to 0.");
+	static_assert(RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET <= 16, "RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET must not exceed 16.");
+
+	constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \
+		RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \
+		RANDOMX_FREQ_IMULH_M + RANDOMX_FREQ_ISMULH_R + RANDOMX_FREQ_ISMULH_M + RANDOMX_FREQ_IMUL_RCP + \
+		RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_IROL_R + RANDOMX_FREQ_ISWAP_R + \
+		RANDOMX_FREQ_FSWAP_R + RANDOMX_FREQ_FADD_R + RANDOMX_FREQ_FADD_M + RANDOMX_FREQ_FSUB_R + RANDOMX_FREQ_FSUB_M + \
+		RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R + RANDOMX_FREQ_FDIV_M + RANDOMX_FREQ_FSQRT_R + RANDOMX_FREQ_CBRANCH + \
+		RANDOMX_FREQ_CFROUND + RANDOMX_FREQ_ISTORE + RANDOMX_FREQ_NOP;
+
+	static_assert(wtSum == 256,	"Sum of instruction frequencies must be 256.");
+
+
+	constexpr uint32_t ArgonBlockSize = 1024;
+	constexpr int ArgonSaltSize = sizeof("" RANDOMX_ARGON_SALT) - 1;
+	static_assert(ArgonSaltSize >= 8, "RANDOMX_ARGON_SALT must be at least 8 characters long");
+	constexpr int SuperscalarMaxSize = 3 * RANDOMX_SUPERSCALAR_LATENCY + 2;
+	constexpr size_t CacheLineSize = RANDOMX_DATASET_ITEM_SIZE;
+	constexpr int ScratchpadSize = RANDOMX_SCRATCHPAD_L3;
+	constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_BASE_SIZE - 1) & ~(CacheLineSize - 1);
+	constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * ArgonBlockSize;
+	constexpr uint64_t DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE;
+	constexpr uint32_t DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE;
+	constexpr uint32_t ConditionMask = ((1 << RANDOMX_JUMP_BITS) - 1);
+	constexpr int ConditionOffset = RANDOMX_JUMP_OFFSET;
+	constexpr int StoreL3Condition = 14;
+
+	//Prevent some unsafe configurations.
+#ifndef RANDOMX_UNSAFE
+	static_assert((uint64_t)ArgonBlockSize * RANDOMX_CACHE_ACCESSES * RANDOMX_ARGON_MEMORY + 33554432 >= (uint64_t)RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE, "Unsafe configuration: Memory-time tradeoffs");
+	static_assert((128 + RANDOMX_PROGRAM_SIZE * RANDOMX_FREQ_ISTORE / 256) * (RANDOMX_PROGRAM_COUNT * RANDOMX_PROGRAM_ITERATIONS) >= RANDOMX_SCRATCHPAD_L3, "Unsafe configuration: Insufficient Scratchpad writes");
+	static_assert(RANDOMX_PROGRAM_COUNT > 1, "Unsafe configuration: Program filtering strategies");
+	static_assert(RANDOMX_PROGRAM_SIZE >= 64, "Unsafe configuration: Low program entropy");
+	static_assert(RANDOMX_PROGRAM_ITERATIONS >= 400, "Unsafe configuration: High compilation overhead");
+#endif
+
+#ifdef TRACE
+	constexpr bool trace = true;
+#else
+	constexpr bool trace = false;
+#endif
+
+#ifndef UNREACHABLE
+#ifdef __GNUC__
+#define UNREACHABLE __builtin_unreachable()
+#elif _MSC_VER
+#define UNREACHABLE __assume(false)
+#else
+#define UNREACHABLE
+#endif
+#endif
+
+#if defined(_M_X64) || defined(__x86_64__)
+	#define RANDOMX_HAVE_COMPILER 1
+	#define RANDOMX_COMPILER_X86
+	class JitCompilerX86;
+	using JitCompiler = JitCompilerX86;
+#elif defined(__aarch64__)
+	#define RANDOMX_HAVE_COMPILER 1
+	#define RANDOMX_COMPILER_A64
+	class JitCompilerA64;
+	using JitCompiler = JitCompilerA64;
+#elif defined(__riscv) && __riscv_xlen == 64
+	#define RANDOMX_HAVE_COMPILER 1
+	#define RANDOMX_COMPILER_RV64
+	class JitCompilerRV64;
+	using JitCompiler = JitCompilerRV64;
+#else
+	#define RANDOMX_HAVE_COMPILER 0
+	class JitCompilerFallback;
+	using JitCompiler = JitCompilerFallback;
+#endif
+
+	using addr_t = uint32_t;
+
+	using int_reg_t = uint64_t;
+
+	struct fpu_reg_t {
+		double lo;
+		double hi;
+	};
+
+	constexpr uint32_t ScratchpadL1 = RANDOMX_SCRATCHPAD_L1 / sizeof(int_reg_t);
+	constexpr uint32_t ScratchpadL2 = RANDOMX_SCRATCHPAD_L2 / sizeof(int_reg_t);
+	constexpr uint32_t ScratchpadL3 = RANDOMX_SCRATCHPAD_L3 / sizeof(int_reg_t);
+	constexpr int ScratchpadL1Mask = (ScratchpadL1 - 1) * 8;
+	constexpr int ScratchpadL2Mask = (ScratchpadL2 - 1) * 8;
+	constexpr int ScratchpadL1Mask16 = (ScratchpadL1 / 2 - 1) * 16;
+	constexpr int ScratchpadL2Mask16 = (ScratchpadL2 / 2 - 1) * 16;
+	constexpr int ScratchpadL3Mask = (ScratchpadL3 - 1) * 8;
+	constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64;
+	constexpr int RegistersCount = 8;
+	constexpr int RegisterCountFlt = RegistersCount / 2;
+	constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register
+	constexpr int RegisterNeedsSib = 4; //x86 r12 register
+
+	inline bool isZeroOrPowerOf2(uint64_t x) {
+		return (x & (x - 1)) == 0;
+	}
+
+	constexpr int mantissaSize = 52;
+	constexpr int exponentSize = 11;
+	constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1;
+	constexpr uint64_t exponentMask = (1ULL << exponentSize) - 1;
+	constexpr int exponentBias = 1023;
+	constexpr int dynamicExponentBits = 4;
+	constexpr int staticExponentBits = 4;
+	constexpr uint64_t constExponentBits = 0x300;
+	constexpr uint64_t dynamicMantissaMask = (1ULL << (mantissaSize + dynamicExponentBits)) - 1;
+
+	struct MemoryRegisters {
+		addr_t mx, ma;
+		uint8_t* memory = nullptr;
+	};
+
+	//register file in little-endian byte order
+	struct RegisterFile {
+		int_reg_t r[RegistersCount];
+		fpu_reg_t f[RegisterCountFlt];
+		fpu_reg_t e[RegisterCountFlt];
+		fpu_reg_t a[RegisterCountFlt];
+	};
+
+	typedef void(ProgramFunc)(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t);
+	typedef void(DatasetInitFunc)(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);
+
+	typedef void(DatasetDeallocFunc)(randomx_dataset*);
+	typedef void(CacheDeallocFunc)(randomx_cache*);
+	typedef void(CacheInitializeFunc)(randomx_cache*, const void*, size_t);
+}
--- a/crypto/randomx/configuration.h
+++ b/crypto/randomx/configuration.h
@ -0,0 +1,125 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+//Cache size in KiB. Must be a power of 2.
+#define RANDOMX_ARGON_MEMORY       262144
+
+//Number of Argon2d iterations for Cache initialization.
+#define RANDOMX_ARGON_ITERATIONS   3
+
+//Number of parallel lanes for Cache initialization.
+#define RANDOMX_ARGON_LANES        1
+
+//Argon2d salt
+#define RANDOMX_ARGON_SALT         "RandomX\x03"
+
+//Number of random Cache accesses per Dataset item. Minimum is 2.
+#define RANDOMX_CACHE_ACCESSES     8
+
+//Target latency for SuperscalarHash (in cycles of the reference CPU).
+#define RANDOMX_SUPERSCALAR_LATENCY   170
+
+//Dataset base size in bytes. Must be a power of 2.
+#define RANDOMX_DATASET_BASE_SIZE  2147483648
+
+//Dataset extra size. Must be divisible by 64.
+#define RANDOMX_DATASET_EXTRA_SIZE 33554368
+
+//Number of instructions in a RandomX program. Must be divisible by 8.
+#define RANDOMX_PROGRAM_SIZE       256
+
+//Number of iterations during VM execution.
+#define RANDOMX_PROGRAM_ITERATIONS 2048
+
+//Number of chained VM executions per hash.
+#define RANDOMX_PROGRAM_COUNT      8
+
+//Scratchpad L3 size in bytes. Must be a power of 2.
+#define RANDOMX_SCRATCHPAD_L3      2097152
+
+//Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3.
+#define RANDOMX_SCRATCHPAD_L2      262144
+
+//Scratchpad L1 size in bytes. Must be a power of two (minimum 64) and less than or equal to RANDOMX_SCRATCHPAD_L2.
+#define RANDOMX_SCRATCHPAD_L1      16384
+
+//Jump condition mask size in bits.
+#define RANDOMX_JUMP_BITS          8
+
+//Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16.
+#define RANDOMX_JUMP_OFFSET        8
+
+/*
+Instruction frequencies (per 256 opcodes)
+Total sum of frequencies must be 256
+*/
+
+//Integer instructions
+#define RANDOMX_FREQ_IADD_RS       16
+#define RANDOMX_FREQ_IADD_M         7
+#define RANDOMX_FREQ_ISUB_R        16
+#define RANDOMX_FREQ_ISUB_M         7
+#define RANDOMX_FREQ_IMUL_R        16
+#define RANDOMX_FREQ_IMUL_M         4
+#define RANDOMX_FREQ_IMULH_R        4
+#define RANDOMX_FREQ_IMULH_M        1
+#define RANDOMX_FREQ_ISMULH_R       4
+#define RANDOMX_FREQ_ISMULH_M       1
+#define RANDOMX_FREQ_IMUL_RCP       8
+#define RANDOMX_FREQ_INEG_R         2
+#define RANDOMX_FREQ_IXOR_R        15
+#define RANDOMX_FREQ_IXOR_M         5
+#define RANDOMX_FREQ_IROR_R         8
+#define RANDOMX_FREQ_IROL_R         2
+#define RANDOMX_FREQ_ISWAP_R        4
+
+//Floating point instructions
+#define RANDOMX_FREQ_FSWAP_R        4
+#define RANDOMX_FREQ_FADD_R        16
+#define RANDOMX_FREQ_FADD_M         5
+#define RANDOMX_FREQ_FSUB_R        16
+#define RANDOMX_FREQ_FSUB_M         5
+#define RANDOMX_FREQ_FSCAL_R        6
+#define RANDOMX_FREQ_FMUL_R        32
+#define RANDOMX_FREQ_FDIV_M         4
+#define RANDOMX_FREQ_FSQRT_R        6
+
+//Control instructions
+#define RANDOMX_FREQ_CBRANCH       25
+#define RANDOMX_FREQ_CFROUND        1
+
+//Store instruction
+#define RANDOMX_FREQ_ISTORE        16
+
+//No-op instruction
+#define RANDOMX_FREQ_NOP            0
+/*                               ------
+                                  256
+*/
--- a/crypto/randomx/cpu.cpp
+++ b/crypto/randomx/cpu.cpp
@ -0,0 +1,128 @@
+/*
+Copyright (c) 2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "cpu.hpp"
+#include <cstring>
+
+#if defined(_M_X64) || defined(__x86_64__)
+	#define HAVE_CPUID
+	#if defined(_MSC_VER)
+		#include <intrin.h>
+		#define cpuid(info, x) __cpuidex(info, x, 0)
+	#else //GCC
+		#include <cpuid.h>
+		void cpuid(int info[4], int InfoType) {
+			__cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
+		}
+	#endif
+#endif
+
+#if defined(HAVE_HWCAP)
+	#include <sys/auxv.h>
+	#include <asm/hwcap.h>
+#endif
+
+#ifdef __riscv
+#include <signal.h>
+#include <setjmp.h>
+#include <cstdint>
+
+extern "C" uint64_t rv64_test_vector();
+extern "C" uint64_t rv64_test_vector_aes();
+
+static sigjmp_buf jump_buffer;
+static void sigill_handler(int) { siglongjmp(jump_buffer, 1); }
+
+void hashAes1Rx4_zvkned(const void *input, size_t inputSize, void *hash);
+#endif
+
+namespace randomx {
+
+	Cpu::Cpu()
+	{
+#ifdef HAVE_CPUID
+		int info[4];
+		cpuid(info, 0);
+		int nIds = info[0];
+		if (nIds >= 0x00000001) {
+			cpuid(info, 0x00000001);
+			ssse3_ = (info[2] & (1 << 9)) != 0;
+			aes_ = (info[2] & (1 << 25)) != 0;
+		}
+		if (nIds >= 0x00000007) {
+			cpuid(info, 0x00000007);
+			avx2_ = (info[1] & (1 << 5)) != 0;
+		}
+#elif defined(__aarch64__)
+	#if defined(HWCAP_AES)
+		long hwcaps = getauxval(AT_HWCAP);
+		aes_ = (hwcaps & HWCAP_AES) != 0;
+	#elif defined(__APPLE__)
+		aes_ = true;
+	#endif
+#elif defined(__riscv)
+		struct sigaction new_action, old_action;
+
+		new_action.sa_handler = sigill_handler;
+		sigemptyset(&new_action.sa_mask);
+		new_action.sa_flags = 0;
+
+		if (sigaction(SIGILL, &new_action, &old_action) == 0) {
+			if (sigsetjmp(jump_buffer, 1) == 0) {
+				rvv_length = static_cast<int>(rv64_test_vector());
+				// If execution gets here, vector instructions executed successfully
+				rvv_ = true;
+			}
+
+			if (sigsetjmp(jump_buffer, 1) == 0) {
+				if (rv64_test_vector_aes() == 0) {
+					// If execution gets here, vector AES instructions executed successfully
+					// Now need to check that they actually do what they're supposed to do
+
+					uint64_t input[8] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+					uint64_t hash[8] = {};
+
+					static constexpr uint64_t ref_hash[8] = {
+						0x195268637f56cab0ull, 0xdf7d7d3553e9e1d1ull, 0x3067fb6e5efcbee2ull, 0xfab778b414feaf77ull,
+						0x99905a5000820817ull, 0xae359bff2379ff97ull, 0x0d87373e6505c4c3ull, 0xf3f5cffd57f2dd62ull
+					};
+
+					hashAes1Rx4_zvkned(input, sizeof(input), hash);
+
+					aes_ = (memcmp(hash, ref_hash, sizeof(hash)) == 0);
+				}
+			}
+
+			sigaction(SIGILL, &old_action, nullptr);
+		}
+#endif
+		//TODO POWER8 AES
+	}
+
+	const Cpu cpu;
+}
--- a/crypto/randomx/cpu.hpp
+++ b/crypto/randomx/cpu.hpp
@ -0,0 +1,56 @@
+/*
+Copyright (c) 2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+namespace randomx {
+
+	class Cpu {
+	public:
+		Cpu();
+
+		inline bool hasAes() const { return aes_; }
+		inline bool hasSsse3() const { return ssse3_; }
+		inline bool hasAvx2() const { return avx2_; }
+#ifdef __riscv
+		inline bool hasRVV() const { return rvv_; }
+		inline int getRVV_Length() const { return rvv_length; }
+#endif
+
+	private:
+		bool aes_ = false;
+		bool ssse3_ = false;
+		bool avx2_ = false;
+#ifdef __riscv
+		bool rvv_ = false;
+		int rvv_length = 0;
+#endif
+	};
+
+	extern const Cpu cpu;
+}
--- a/crypto/randomx/cpu_rv64.S
+++ b/crypto/randomx/cpu_rv64.S
@ -0,0 +1,47 @@
+/*
+Copyright (c) 2018-2020, tevador    <tevador@gmail.com>
+Copyright (c) 2025, SChernykh       <https://github.com/SChernykh>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+.text
+.option arch, rv64gcv_zvkned
+.global rv64_test_vector
+.global rv64_test_vector_aes
+
+// Returns vector register length (in bits)
+rv64_test_vector:
+	li x10, 1024
+	vsetvli x10, x10, e64, m1, ta, ma
+	slli x10, x10, 6
+	ret
+
+rv64_test_vector_aes:
+	vsetivli zero, 8, e32, m1, ta, ma
+	vaesem.vv v0, v0
+	vaesdm.vv v0, v0
+	li x10, 0
+	ret
--- a/crypto/randomx/dataset.cpp
+++ b/crypto/randomx/dataset.cpp
@ -0,0 +1,196 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from Argon2 reference source code package used under CC0 Licence
+ * https://github.com/P-H-C/phc-winner-argon2
+ * Copyright 2015
+ * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
+*/
+
+#include <new>
+#include <algorithm>
+#include <stdexcept>
+#include <cstring>
+#include <limits>
+#include <cstring>
+#include <cassert>
+
+#include "common.hpp"
+#include "dataset.hpp"
+#include "virtual_memory.h"
+#include "superscalar.hpp"
+#include "blake2_generator.hpp"
+#include "reciprocal.h"
+#include "blake2/endian.h"
+#include "argon2.h"
+#include "argon2_core.h"
+#include "jit_compiler.hpp"
+#include "intrin_portable.h"
+
+static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value");
+static_assert(ARGON2_BLOCK_SIZE == randomx::ArgonBlockSize, "Unpexpected value of ARGON2_BLOCK_SIZE");
+
+namespace randomx {
+
+	template<class Allocator>
+	void deallocCache(randomx_cache* cache) {
+		if (cache->memory != nullptr)
+			Allocator::freeMemory(cache->memory, CacheSize);
+		if (cache->jit != nullptr)
+			delete cache->jit;
+	}
+
+	template void deallocCache<DefaultAllocator>(randomx_cache* cache);
+	template void deallocCache<LargePageAllocator>(randomx_cache* cache);
+
+	void initCache(randomx_cache* cache, const void* key, size_t keySize) {
+		uint32_t memory_blocks, segment_length;
+		argon2_instance_t instance;
+		argon2_context context;
+
+		context.out = nullptr;
+		context.outlen = 0;
+		context.pwd = CONST_CAST(uint8_t *)key;
+		context.pwdlen = (uint32_t)keySize;
+		context.salt = CONST_CAST(uint8_t *)RANDOMX_ARGON_SALT;
+		context.saltlen = (uint32_t)randomx::ArgonSaltSize;
+		context.secret = NULL;
+		context.secretlen = 0;
+		context.ad = NULL;
+		context.adlen = 0;
+		context.t_cost = RANDOMX_ARGON_ITERATIONS;
+		context.m_cost = RANDOMX_ARGON_MEMORY;
+		context.lanes = RANDOMX_ARGON_LANES;
+		context.threads = 1;
+		context.allocate_cbk = NULL;
+		context.free_cbk = NULL;
+		context.flags = ARGON2_DEFAULT_FLAGS;
+		context.version = ARGON2_VERSION_NUMBER;
+
+		int inputsValid = randomx_argon2_validate_inputs(&context);
+		assert(inputsValid == ARGON2_OK);
+
+		/* 2. Align memory size */
+		/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
+		memory_blocks = context.m_cost;
+
+		segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS);
+
+		instance.version = context.version;
+		instance.memory = NULL;
+		instance.passes = context.t_cost;
+		instance.memory_blocks = memory_blocks;
+		instance.segment_length = segment_length;
+		instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
+		instance.lanes = context.lanes;
+		instance.threads = context.threads;
+		instance.type = Argon2_d;
+		instance.memory = (block*)cache->memory;
+		instance.impl = cache->argonImpl;
+
+		if (instance.threads > instance.lanes) {
+			instance.threads = instance.lanes;
+		}
+
+		/* 3. Initialization: Hashing inputs, allocating memory, filling first
+		 * blocks
+		 */
+		randomx_argon2_initialize(&instance, &context);
+
+		randomx_argon2_fill_memory_blocks(&instance);
+
+		cache->reciprocalCache.clear();
+		randomx::Blake2Generator gen(key, keySize);
+		for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
+			randomx::generateSuperscalar(cache->programs[i], gen);
+			for (unsigned j = 0; j < cache->programs[i].getSize(); ++j) {
+				auto& instr = cache->programs[i](j);
+				if ((SuperscalarInstructionType)instr.opcode == SuperscalarInstructionType::IMUL_RCP) {
+					auto rcp = randomx_reciprocal(instr.getImm32());
+					instr.setImm32(cache->reciprocalCache.size());
+					cache->reciprocalCache.push_back(rcp);
+				}
+			}
+		}
+	}
+
+	void initCacheCompile(randomx_cache* cache, const void* key, size_t keySize) {
+		initCache(cache, key, keySize);
+		cache->jit->enableWriting();
+		cache->jit->generateSuperscalarHash(cache->programs, cache->reciprocalCache);
+		cache->jit->generateDatasetInitCode();
+		cache->jit->enableExecution();
+	}
+
+	constexpr uint64_t superscalarMul0 = 6364136223846793005ULL;
+	constexpr uint64_t superscalarAdd1 = 9298411001130361340ULL;
+	constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL;
+	constexpr uint64_t superscalarAdd3 = 9306329213124626780ULL;
+	constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL;
+	constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL;
+	constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL;
+	constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL;
+
+	static inline uint8_t* getMixBlock(uint64_t registerValue, uint8_t *memory) {
+		constexpr uint32_t mask = CacheSize / CacheLineSize - 1;
+		return memory + (registerValue & mask) * CacheLineSize;
+	}
+
+	void initDatasetItem(randomx_cache* cache, uint8_t* out, uint64_t itemNumber) {
+		int_reg_t rl[8];
+		uint8_t* mixBlock;
+		uint64_t registerValue = itemNumber;
+		rl[0] = (itemNumber + 1) * superscalarMul0;
+		rl[1] = rl[0] ^ superscalarAdd1;
+		rl[2] = rl[0] ^ superscalarAdd2;
+		rl[3] = rl[0] ^ superscalarAdd3;
+		rl[4] = rl[0] ^ superscalarAdd4;
+		rl[5] = rl[0] ^ superscalarAdd5;
+		rl[6] = rl[0] ^ superscalarAdd6;
+		rl[7] = rl[0] ^ superscalarAdd7;
+		for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) {
+			mixBlock = getMixBlock(registerValue, cache->memory);
+			rx_prefetch_nta(mixBlock);
+			SuperscalarProgram& prog = cache->programs[i];
+
+			executeSuperscalar(rl, prog, &cache->reciprocalCache);
+
+			for (unsigned q = 0; q < 8; ++q)
+				rl[q] ^= load64_native(mixBlock + 8 * q);
+
+			registerValue = rl[prog.getAddressRegister()];
+		}
+
+		memcpy(out, &rl, CacheLineSize);
+	}
+
+	void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startItem, uint32_t endItem) {
+		for (uint32_t itemNumber = startItem; itemNumber < endItem; ++itemNumber, dataset += CacheLineSize)
+			initDatasetItem(cache, dataset, itemNumber);
+	}
+}
--- a/crypto/randomx/dataset.hpp
+++ b/crypto/randomx/dataset.hpp
@ -0,0 +1,103 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+#include <type_traits>
+#include "common.hpp"
+#include "superscalar_program.hpp"
+#include "allocator.hpp"
+#include "argon2.h"
+
+/* Global scope for C binding */
+struct randomx_dataset {
+	uint8_t* memory = nullptr;
+	randomx::DatasetDeallocFunc* dealloc;
+};
+
+/* Global scope for C binding */
+struct randomx_cache {
+	uint8_t* memory = nullptr;
+	randomx::CacheDeallocFunc* dealloc;
+	randomx::JitCompiler* jit;
+	randomx::CacheInitializeFunc* initialize;
+	randomx::DatasetInitFunc* datasetInit;
+	randomx::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES];
+	std::vector<uint64_t> reciprocalCache;
+	std::string cacheKey;
+	randomx_argon2_impl* argonImpl;
+
+	bool isInitialized() {
+		return programs[0].getSize() != 0;
+	}
+};
+
+//A pointer to a standard-layout struct object points to its initial member
+static_assert(std::is_standard_layout<randomx_dataset>(), "randomx_dataset must be a standard-layout struct");
+
+//the following assert fails when compiling Debug in Visual Studio (JIT mode will crash in Debug)
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && defined(_DEBUG)
+#define TO_STR(x) #x
+#define STR(x) TO_STR(x)
+#pragma message ( __FILE__ "(" STR(__LINE__) ") warning: check std::is_standard_layout<randomx_cache>() is disabled for Debug configuration. JIT mode will crash." )
+#undef STR
+#undef TO_STR
+#else
+static_assert(std::is_standard_layout<randomx_cache>(), "randomx_cache must be a standard-layout struct");
+#endif
+
+namespace randomx {
+
+	using DefaultAllocator = AlignedAllocator<CacheLineSize>;
+
+	template<class Allocator>
+	void deallocDataset(randomx_dataset* dataset) {
+		if (dataset->memory != nullptr)
+			Allocator::freeMemory(dataset->memory, DatasetSize);
+	}
+
+	template<class Allocator>
+	void deallocCache(randomx_cache* cache);
+
+	void initCache(randomx_cache*, const void*, size_t);
+	void initCacheCompile(randomx_cache*, const void*, size_t);
+	void initDatasetItem(randomx_cache* cache, uint8_t* out, uint64_t blockNumber);
+	void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);
+
+	inline randomx_argon2_impl* selectArgonImpl(randomx_flags flags) {
+		if (flags & RANDOMX_FLAG_ARGON2_AVX2) {
+			return randomx_argon2_impl_avx2();
+		}
+		if (flags & RANDOMX_FLAG_ARGON2_SSSE3) {
+			return randomx_argon2_impl_ssse3();
+		}
+		return &randomx_argon2_fill_segment_ref;
+	}
+}
--- a/crypto/randomx/instruction.cpp
+++ b/crypto/randomx/instruction.cpp
@ -0,0 +1,390 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "instruction.hpp"
+#include "common.hpp"
+
+namespace randomx {
+
+	void Instruction::print(std::ostream& os) const {
+		os << names[opcode] << " ";
+		auto handler = engine[opcode];
+		(this->*handler)(os);
+	}
+
+	void Instruction::genAddressReg(std::ostream& os, int srcIndex) const {
+		os << (getModMem() ? "L1" : "L2") << "[r" << srcIndex << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
+	}
+
+	void Instruction::genAddressRegDst(std::ostream& os, int dstIndex) const {
+		if (getModCond() < StoreL3Condition)
+			os << (getModMem() ? "L1" : "L2");
+		else
+			os << "L3";
+		os << "[r" << dstIndex << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
+	}
+
+	void Instruction::genAddressImm(std::ostream& os) const {
+		os << "L3" << "[" << (getImm32() & ScratchpadL3Mask) << "]";
+	}
+
+	void Instruction::h_IADD_RS(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		auto srcIndex = src % RegistersCount;
+		os << "r" << dstIndex << ", r" << srcIndex;
+		if(dstIndex == RegisterNeedsDisplacement) {
+			os << ", " << (int32_t)getImm32();
+		}
+		os << ", SHFT " << getModShift() << std::endl;
+	}
+
+	void Instruction::h_IADD_M(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		auto srcIndex = src % RegistersCount;
+		if (dstIndex != srcIndex) {
+			os << "r" << dstIndex << ", ";
+			genAddressReg(os, srcIndex);
+			os << std::endl;
+		}
+		else {
+			os << "r" << dstIndex << ", ";
+			genAddressImm(os);
+			os << std::endl;
+		}
+	}
+
+	void Instruction::h_ISUB_R(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		auto srcIndex = src % RegistersCount;
+		if (dstIndex != srcIndex) {
+			os << "r" << dstIndex << ", r" << srcIndex << std::endl;
+		}
+		else {
+			os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl;
+		}
+	}
+
+	void Instruction::h_ISUB_M(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		auto srcIndex = src % RegistersCount;
+		if (dstIndex != srcIndex) {
+			os << "r" << dstIndex << ", ";
+			genAddressReg(os, srcIndex);
+			os << std::endl;
+		}
+		else {
+			os << "r" << dstIndex << ", ";
+			genAddressImm(os);
+			os << std::endl;
+		}
+	}
+
+	void Instruction::h_IMUL_R(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		auto srcIndex = src % RegistersCount;
+		if (dstIndex != srcIndex) {
+			os << "r" << dstIndex << ", r" << srcIndex << std::endl;
+		}
+		else {
+			os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl;
+		}
+	}
+
+	void Instruction::h_IMUL_M(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		auto srcIndex = src % RegistersCount;
+		if (dstIndex != srcIndex) {
+			os << "r" << dstIndex << ", ";
+			genAddressReg(os, srcIndex);
+			os << std::endl;
+		}
+		else {
+			os << "r" << dstIndex << ", ";
+			genAddressImm(os);
+			os << std::endl;
+		}
+	}
+
+	void Instruction::h_IMULH_R(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		auto srcIndex = src % RegistersCount;
+		os << "r" << dstIndex << ", r" << srcIndex << std::endl;
+	}
+
+	void Instruction::h_IMULH_M(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		auto srcIndex = src % RegistersCount;
+		if (dstIndex != srcIndex) {
+			os << "r" << dstIndex << ", ";
+			genAddressReg(os, srcIndex);
+			os << std::endl;
+		}
+		else {
+			os << "r" << dstIndex << ", ";
+			genAddressImm(os);
+			os << std::endl;
+		}
+	}
+
+	void Instruction::h_ISMULH_R(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		auto srcIndex = src % RegistersCount;
+		os << "r" << dstIndex << ", r" << srcIndex << std::endl;
+	}
+
+	void Instruction::h_ISMULH_M(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		auto srcIndex = src % RegistersCount;
+		if (dstIndex != srcIndex) {
+			os << "r" << dstIndex << ", ";
+			genAddressReg(os, srcIndex);
+			os << std::endl;
+		}
+		else {
+			os << "r" << dstIndex << ", ";
+			genAddressImm(os);
+			os << std::endl;
+		}
+	}
+
+	void Instruction::h_INEG_R(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		os << "r" << dstIndex << std::endl;
+	}
+
+	void Instruction::h_IXOR_R(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		auto srcIndex = src % RegistersCount;
+		if (dstIndex != srcIndex) {
+			os << "r" << dstIndex << ", r" << srcIndex << std::endl;
+		}
+		else {
+			os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl;
+		}
+	}
+
+	void Instruction::h_IXOR_M(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		auto srcIndex = src % RegistersCount;
+		if (dstIndex != srcIndex) {
+			os << "r" << dstIndex << ", ";
+			genAddressReg(os, srcIndex);
+			os << std::endl;
+		}
+		else {
+			os << "r" << dstIndex << ", ";
+			genAddressImm(os);
+			os << std::endl;
+		}
+	}
+
+	void Instruction::h_IROR_R(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		auto srcIndex = src % RegistersCount;
+		if (dstIndex != srcIndex) {
+			os << "r" << dstIndex << ", r" << srcIndex << std::endl;
+		}
+		else {
+			os << "r" << dstIndex << ", " << (getImm32() & 63) << std::endl;
+		}
+	}
+
+	void Instruction::h_IROL_R(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		auto srcIndex = src % RegistersCount;
+		if (dstIndex != srcIndex) {
+			os << "r" << dstIndex << ", r" << srcIndex << std::endl;
+		}
+		else {
+			os << "r" << dstIndex << ", " << (getImm32() & 63) << std::endl;
+		}
+	}
+
+	void Instruction::h_IMUL_RCP(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		os << "r" << dstIndex << ", " << getImm32() << std::endl;
+	}
+
+	void Instruction::h_ISWAP_R(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		auto srcIndex = src % RegistersCount;
+		os << "r" << dstIndex << ", r" << srcIndex << std::endl;
+	}
+
+	void Instruction::h_FSWAP_R(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		const char reg = (dstIndex >= RegisterCountFlt) ? 'e' : 'f';
+		dstIndex %= RegisterCountFlt;
+		os << reg << dstIndex << std::endl;
+	}
+
+	void Instruction::h_FADD_R(std::ostream& os) const {
+		auto dstIndex = dst % RegisterCountFlt;
+		auto srcIndex = src % RegisterCountFlt;
+		os << "f" << dstIndex << ", a" << srcIndex << std::endl;
+	}
+
+	void Instruction::h_FADD_M(std::ostream& os) const {
+		auto dstIndex = dst % RegisterCountFlt;
+		auto srcIndex = src % RegistersCount;
+		os << "f" << dstIndex << ", ";
+		genAddressReg(os, srcIndex);
+		os << std::endl;
+	}
+
+	void Instruction::h_FSUB_R(std::ostream& os) const {
+		auto dstIndex = dst % RegisterCountFlt;
+		auto srcIndex = src % RegisterCountFlt;
+		os << "f" << dstIndex << ", a" << srcIndex << std::endl;
+	}
+
+	void Instruction::h_FSUB_M(std::ostream& os) const {
+		auto dstIndex = dst % RegisterCountFlt;
+		auto srcIndex = src % RegistersCount;
+		os << "f" << dstIndex << ", ";
+		genAddressReg(os, srcIndex);
+		os << std::endl;
+	}
+
+	void Instruction::h_FSCAL_R(std::ostream& os) const {
+		auto dstIndex = dst % RegisterCountFlt;
+		os << "f" << dstIndex << std::endl;
+	}
+
+	void Instruction::h_FMUL_R(std::ostream& os) const {
+		auto dstIndex = dst % RegisterCountFlt;
+		auto srcIndex = src % RegisterCountFlt;
+		os << "e" << dstIndex << ", a" << srcIndex << std::endl;
+	}
+
+	void Instruction::h_FDIV_M(std::ostream& os) const {
+		auto dstIndex = dst % RegisterCountFlt;
+		auto srcIndex = src % RegistersCount;
+		os << "e" << dstIndex << ", ";
+		genAddressReg(os, srcIndex);
+		os << std::endl;
+	}
+
+	void Instruction::h_FSQRT_R(std::ostream& os) const {
+		auto dstIndex = dst % RegisterCountFlt;
+		os << "e" << dstIndex << std::endl;
+	}
+
+	void Instruction::h_CFROUND(std::ostream& os) const {
+		auto srcIndex = src % RegistersCount;
+		os << "r" << srcIndex << ", " << (getImm32() & 63) << std::endl;
+	}
+
+	void Instruction::h_CBRANCH(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		auto srcIndex = src % RegistersCount;
+		os << "r" << dstIndex << ", " << (int32_t)getImm32() << ", COND " << (int)(getModCond()) << std::endl;
+	}
+
+	void  Instruction::h_ISTORE(std::ostream& os) const {
+		auto dstIndex = dst % RegistersCount;
+		auto srcIndex = src % RegistersCount;
+		genAddressRegDst(os, dstIndex);
+		os << ", r" << srcIndex << std::endl;
+	}
+
+	void  Instruction::h_NOP(std::ostream& os) const {
+		os << std::endl;
+	}
+
+#include "instruction_weights.hpp"
+#define INST_NAME(x) REPN(#x, WT(x))
+#define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x))
+
+	const char* Instruction::names[256] = {
+		INST_NAME(IADD_RS)
+		INST_NAME(IADD_M)
+		INST_NAME(ISUB_R)
+		INST_NAME(ISUB_M)
+		INST_NAME(IMUL_R)
+		INST_NAME(IMUL_M)
+		INST_NAME(IMULH_R)
+		INST_NAME(IMULH_M)
+		INST_NAME(ISMULH_R)
+		INST_NAME(ISMULH_M)
+		INST_NAME(IMUL_RCP)
+		INST_NAME(INEG_R)
+		INST_NAME(IXOR_R)
+		INST_NAME(IXOR_M)
+		INST_NAME(IROR_R)
+		INST_NAME(IROL_R)
+		INST_NAME(ISWAP_R)
+		INST_NAME(FSWAP_R)
+		INST_NAME(FADD_R)
+		INST_NAME(FADD_M)
+		INST_NAME(FSUB_R)
+		INST_NAME(FSUB_M)
+		INST_NAME(FSCAL_R)
+		INST_NAME(FMUL_R)
+		INST_NAME(FDIV_M)
+		INST_NAME(FSQRT_R)
+		INST_NAME(CBRANCH)
+		INST_NAME(CFROUND)
+		INST_NAME(ISTORE)
+		INST_NAME(NOP)
+	};
+
+	InstructionFormatter Instruction::engine[256] = {
+		INST_HANDLE(IADD_RS)
+		INST_HANDLE(IADD_M)
+		INST_HANDLE(ISUB_R)
+		INST_HANDLE(ISUB_M)
+		INST_HANDLE(IMUL_R)
+		INST_HANDLE(IMUL_M)
+		INST_HANDLE(IMULH_R)
+		INST_HANDLE(IMULH_M)
+		INST_HANDLE(ISMULH_R)
+		INST_HANDLE(ISMULH_M)
+		INST_HANDLE(IMUL_RCP)
+		INST_HANDLE(INEG_R)
+		INST_HANDLE(IXOR_R)
+		INST_HANDLE(IXOR_M)
+		INST_HANDLE(IROR_R)
+		INST_HANDLE(IROL_R)
+		INST_HANDLE(ISWAP_R)
+		INST_HANDLE(FSWAP_R)
+		INST_HANDLE(FADD_R)
+		INST_HANDLE(FADD_M)
+		INST_HANDLE(FSUB_R)
+		INST_HANDLE(FSUB_M)
+		INST_HANDLE(FSCAL_R)
+		INST_HANDLE(FMUL_R)
+		INST_HANDLE(FDIV_M)
+		INST_HANDLE(FSQRT_R)
+		INST_HANDLE(CBRANCH)
+		INST_HANDLE(CFROUND)
+		INST_HANDLE(ISTORE)
+		INST_HANDLE(NOP)
+	};
+
+}
--- a/crypto/randomx/instruction.hpp
+++ b/crypto/randomx/instruction.hpp
@ -0,0 +1,149 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cstdint>
+#include <iostream>
+#include <type_traits>
+#include "blake2/endian.h"
+
+namespace randomx {
+
+	class Instruction;
+
+	typedef void(Instruction::*InstructionFormatter)(std::ostream&) const;
+
+	enum class InstructionType : uint16_t {
+		IADD_RS = 0,
+		IADD_M = 1,
+		ISUB_R = 2,
+		ISUB_M = 3,
+		IMUL_R = 4,
+		IMUL_M = 5,
+		IMULH_R = 6,
+		IMULH_M = 7,
+		ISMULH_R = 8,
+		ISMULH_M = 9,
+		IMUL_RCP = 10,
+		INEG_R = 11,
+		IXOR_R = 12,
+		IXOR_M = 13,
+		IROR_R = 14,
+		IROL_R = 15,
+		ISWAP_R = 16,
+		FSWAP_R = 17,
+		FADD_R = 18,
+		FADD_M = 19,
+		FSUB_R = 20,
+		FSUB_M = 21,
+		FSCAL_R = 22,
+		FMUL_R = 23,
+		FDIV_M = 24,
+		FSQRT_R = 25,
+		CBRANCH = 26,
+		CFROUND = 27,
+		ISTORE = 28,
+		NOP = 29,
+	};
+
+	class Instruction {
+	public:
+		uint32_t getImm32() const {
+			return load32(&imm32);
+		}
+		void setImm32(uint32_t val) {
+			return store32(&imm32, val);
+		}
+		const char* getName() const {
+			return names[opcode];
+		}
+		friend std::ostream& operator<<(std::ostream& os, const Instruction& i) {
+			i.print(os);
+			return os;
+		}
+		int getModMem() const {
+			return mod % 4; //bits 0-1
+		}
+		int getModShift() const {
+			return (mod >> 2) % 4; //bits 2-3
+		}
+		int getModCond() const {
+			return mod >> 4; //bits 4-7
+		}
+		void setMod(uint8_t val) {
+			mod = val;
+		}
+
+		uint8_t opcode;
+		uint8_t dst;
+		uint8_t src;
+		uint8_t mod;
+		uint32_t imm32;
+	private:
+		void print(std::ostream&) const;
+		static const char* names[256];
+		static InstructionFormatter engine[256];
+		void genAddressReg(std::ostream& os, int) const;
+		void genAddressImm(std::ostream& os) const;
+		void genAddressRegDst(std::ostream&, int) const;
+		void h_IADD_RS(std::ostream&) const;
+		void h_IADD_M(std::ostream&) const;
+		void h_ISUB_R(std::ostream&) const;
+		void h_ISUB_M(std::ostream&) const;
+		void h_IMUL_R(std::ostream&) const;
+		void h_IMUL_M(std::ostream&) const;
+		void h_IMULH_R(std::ostream&) const;
+		void h_IMULH_M(std::ostream&) const;
+		void h_ISMULH_R(std::ostream&) const;
+		void h_ISMULH_M(std::ostream&) const;
+		void h_IMUL_RCP(std::ostream&) const;
+		void h_INEG_R(std::ostream&) const;
+		void h_IXOR_R(std::ostream&) const;
+		void h_IXOR_M(std::ostream&) const;
+		void h_IROR_R(std::ostream&) const;
+		void h_IROL_R(std::ostream&) const;
+		void h_ISWAP_R(std::ostream&) const;
+		void h_FSWAP_R(std::ostream&) const;
+		void h_FADD_R(std::ostream&) const;
+		void h_FADD_M(std::ostream&) const;
+		void h_FSUB_R(std::ostream&) const;
+		void h_FSUB_M(std::ostream&) const;
+		void h_FSCAL_R(std::ostream&) const;
+		void h_FMUL_R(std::ostream&) const;
+		void h_FDIV_M(std::ostream&) const;
+		void h_FSQRT_R(std::ostream&) const;
+		void h_CBRANCH(std::ostream&) const;
+		void h_CFROUND(std::ostream&) const;
+		void h_ISTORE(std::ostream&) const;
+		void h_NOP(std::ostream&) const;
+	};
+
+	static_assert(sizeof(Instruction) == 8, "Invalid size of struct randomx::Instruction");
+	static_assert(std::is_standard_layout<Instruction>(), "randomx::Instruction must be a standard-layout struct");
+}
--- a/crypto/randomx/instruction_weights.hpp
+++ b/crypto/randomx/instruction_weights.hpp
@ -0,0 +1,73 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#define REP0(x)
+#define REP1(x) x,
+#define REP2(x) REP1(x) x,
+#define REP3(x) REP2(x) x,
+#define REP4(x) REP3(x) x,
+#define REP5(x) REP4(x) x,
+#define REP6(x) REP5(x) x,
+#define REP7(x) REP6(x) x,
+#define REP8(x) REP7(x) x,
+#define REP9(x) REP8(x) x,
+#define REP10(x) REP9(x) x,
+#define REP11(x) REP10(x) x,
+#define REP12(x) REP11(x) x,
+#define REP13(x) REP12(x) x,
+#define REP14(x) REP13(x) x,
+#define REP15(x) REP14(x) x,
+#define REP16(x) REP15(x) x,
+#define REP17(x) REP16(x) x,
+#define REP18(x) REP17(x) x,
+#define REP19(x) REP18(x) x,
+#define REP20(x) REP19(x) x,
+#define REP21(x) REP20(x) x,
+#define REP22(x) REP21(x) x,
+#define REP23(x) REP22(x) x,
+#define REP24(x) REP23(x) x,
+#define REP25(x) REP24(x) x,
+#define REP26(x) REP25(x) x,
+#define REP27(x) REP26(x) x,
+#define REP28(x) REP27(x) x,
+#define REP29(x) REP28(x) x,
+#define REP30(x) REP29(x) x,
+#define REP31(x) REP30(x) x,
+#define REP32(x) REP31(x) x,
+#define REP33(x) REP32(x) x,
+#define REP40(x) REP32(x) REP8(x)
+#define REP64(x) REP32(x) REP32(x)
+#define REP128(x) REP32(x) REP32(x) REP32(x) REP32(x)
+#define REP232(x) REP128(x) REP40(x) REP40(x) REP24(x)
+#define REP256(x) REP128(x) REP128(x)
+#define REPNX(x,N) REP##N(x)
+#define REPN(x,N) REPNX(x,N)
+#define NUM(x) x
+#define WT(x) NUM(RANDOMX_FREQ_##x)
--- a/crypto/randomx/instructions_portable.cpp
+++ b/crypto/randomx/instructions_portable.cpp
@ -0,0 +1,208 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <cfenv>
+#include <cmath>
+#include "common.hpp"
+#include "intrin_portable.h"
+#include "blake2/endian.h"
+
+#if defined(__SIZEOF_INT128__)
+	typedef unsigned __int128 uint128_t;
+	typedef __int128 int128_t;
+	uint64_t mulh(uint64_t a, uint64_t b) {
+		return ((uint128_t)a * b) >> 64;
+	}
+	int64_t smulh(int64_t a, int64_t b) {
+		return ((int128_t)a * b) >> 64;
+	}
+	#define HAVE_MULH
+	#define HAVE_SMULH
+#endif
+
+#if defined(_MSC_VER)
+	#define HAS_VALUE(X) X ## 0
+	#define EVAL_DEFINE(X) HAS_VALUE(X)
+	#include <intrin.h>
+	#include <stdlib.h>
+
+	uint64_t rotl(uint64_t x, unsigned int c) {
+		return _rotl64(x, c);
+	}
+	uint64_t rotr(uint64_t x, unsigned int c) {
+		return _rotr64(x, c);
+	}
+	#define HAVE_ROTL
+	#define HAVE_ROTR
+
+	#if EVAL_DEFINE(__MACHINEARM64_X64(1))
+		uint64_t mulh(uint64_t a, uint64_t b) {
+			return __umulh(a, b);
+		}
+		#define HAVE_MULH
+	#endif
+
+	#if EVAL_DEFINE(__MACHINEX64(1))
+		int64_t smulh(int64_t a, int64_t b) {
+			int64_t hi;
+			_mul128(a, b, &hi);
+			return hi;
+		}
+		#define HAVE_SMULH
+	#endif
+
+	static void setRoundMode_(uint32_t mode) {
+		_controlfp(mode, _MCW_RC);
+	}
+	#define HAVE_SETROUNDMODE_IMPL
+#endif
+
+#ifndef HAVE_SETROUNDMODE_IMPL
+	static void setRoundMode_(uint32_t mode) {
+		fesetround(mode);
+	}
+#endif
+
+#ifndef HAVE_ROTR
+	uint64_t rotr(uint64_t a, unsigned int b) {
+		return (a >> b) | (a << (-b & 63));
+	}
+	#define HAVE_ROTR
+#endif
+
+#ifndef HAVE_ROTL
+	uint64_t rotl(uint64_t a, unsigned int b) {
+		return (a << b) | (a >> (-b & 63));
+	}
+	#define HAVE_ROTL
+#endif
+
+#ifndef HAVE_MULH
+	#define LO(x) ((x)&0xffffffff)
+	#define HI(x) ((x)>>32)
+	uint64_t mulh(uint64_t a, uint64_t b) {
+		uint64_t ah = HI(a), al = LO(a);
+		uint64_t bh = HI(b), bl = LO(b);
+		uint64_t x00 = al * bl;
+		uint64_t x01 = al * bh;
+		uint64_t x10 = ah * bl;
+		uint64_t x11 = ah * bh;
+		uint64_t m1 = LO(x10) + LO(x01) + HI(x00);
+		uint64_t m2 = HI(x10) + HI(x01) + LO(x11) + HI(m1);
+		uint64_t m3 = HI(x11) + HI(m2);
+
+		return (m3 << 32) + LO(m2);
+	}
+	#define HAVE_MULH
+#endif
+
+#ifndef HAVE_SMULH
+	int64_t smulh(int64_t a, int64_t b) {
+		int64_t hi = mulh(a, b);
+		if (a < 0LL) hi -= b;
+		if (b < 0LL) hi -= a;
+		return hi;
+	}
+	#define HAVE_SMULH
+#endif
+
+#ifdef RANDOMX_DEFAULT_FENV
+
+void rx_reset_float_state() {
+	setRoundMode_(FE_TONEAREST);
+	rx_set_double_precision(); //set precision to 53 bits if needed by the platform
+}
+
+void rx_set_rounding_mode(uint32_t mode) {
+	switch (mode & 3) {
+	case RoundDown:
+		setRoundMode_(FE_DOWNWARD);
+		break;
+	case RoundUp:
+		setRoundMode_(FE_UPWARD);
+		break;
+	case RoundToZero:
+		setRoundMode_(FE_TOWARDZERO);
+		break;
+	case RoundToNearest:
+		setRoundMode_(FE_TONEAREST);
+		break;
+	default:
+		UNREACHABLE;
+	}
+}
+
+uint32_t rx_get_rounding_mode() {
+	switch (fegetround()) {
+	case FE_DOWNWARD:
+		return RoundDown;
+	case FE_UPWARD:
+		return RoundUp;
+	case FE_TOWARDZERO:
+		return RoundToZero;
+	case FE_TONEAREST:
+		return RoundToNearest;
+	default:
+		UNREACHABLE;
+	}
+}
+
+#endif
+
+#ifdef RANDOMX_USE_X87
+
+#if defined(_MSC_VER) && defined(_M_IX86)
+
+void rx_set_double_precision() {
+	_control87(_PC_53, _MCW_PC);
+}
+
+#elif defined(__i386)
+
+void rx_set_double_precision() {
+	uint16_t volatile x87cw;
+	asm volatile("fstcw %0" : "=m" (x87cw));
+	x87cw &= ~0x300;
+	x87cw |= 0x200;
+	asm volatile("fldcw %0" : : "m" (x87cw));
+}
+
+#endif
+
+#endif //RANDOMX_USE_X87
+
+union double_ser_t {
+	double f;
+	uint64_t i;
+};
+
+double loadDoublePortable(const void* addr) {
+	double_ser_t ds;
+	ds.i = load64(addr);
+	return ds.f;
+}
--- a/crypto/randomx/intrin_portable.h
+++ b/crypto/randomx/intrin_portable.h
@ -0,0 +1,751 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cstdint>
+#include "blake2/endian.h"
+
+constexpr int32_t unsigned32ToSigned2sCompl(uint32_t x) {
+	return (-1 == ~0) ? (int32_t)x : (x > INT32_MAX ? (-(int32_t)(UINT32_MAX - x) - 1) : (int32_t)x);
+}
+
+constexpr int64_t unsigned64ToSigned2sCompl(uint64_t x) {
+	return (-1 == ~0) ? (int64_t)x : (x > INT64_MAX ? (-(int64_t)(UINT64_MAX - x) - 1) : (int64_t)x);
+}
+
+constexpr uint64_t signExtend2sCompl(uint32_t x) {
+	return (-1 == ~0) ? (int64_t)(int32_t)(x) : (x > INT32_MAX ? (x | 0xffffffff00000000ULL) : (uint64_t)x);
+}
+
+constexpr int RoundToNearest = 0;
+constexpr int RoundDown = 1;
+constexpr int RoundUp = 2;
+constexpr int RoundToZero = 3;
+
+//MSVC doesn't define __SSE2__, so we have to define it manually if SSE2 is available
+#if !defined(__SSE2__) && (defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2))
+#define __SSE2__ 1
+#endif
+
+//MSVC doesn't define __AES__
+#if defined(_MSC_VER) && defined(__SSE2__)
+#define __AES__
+#endif
+
+//the library "sqrt" function provided by MSVC for x86 targets doesn't give
+//the correct results, so we have to use inline assembly to call x87 fsqrt directly
+#if !defined(__SSE2__)
+#if defined(_MSC_VER) && defined(_M_IX86)
+inline double __cdecl rx_sqrt(double x) {
+	__asm {
+		fld x
+		fsqrt
+	}
+}
+#define rx_sqrt rx_sqrt
+
+void rx_set_double_precision();
+#define RANDOMX_USE_X87
+
+#elif defined(__i386)
+
+void rx_set_double_precision();
+#define RANDOMX_USE_X87
+
+#endif
+#endif //__SSE2__
+
+#if !defined(rx_sqrt)
+#define rx_sqrt sqrt
+#endif
+
+#if !defined(RANDOMX_USE_X87)
+#define rx_set_double_precision(x)
+#endif
+
+#ifdef __SSE2__
+#ifdef __GNUC__
+#include <x86intrin.h>
+#else
+#include <intrin.h>
+#endif
+
+typedef __m128i rx_vec_i128;
+typedef __m128d rx_vec_f128;
+
+#define rx_aligned_alloc(a, b) _mm_malloc(a,b)
+#define rx_aligned_free(a) _mm_free(a)
+#define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA)
+#define rx_prefetch_t0(x) _mm_prefetch((const char *)(x), _MM_HINT_T0)
+
+#define rx_load_vec_f128 _mm_load_pd
+#define rx_store_vec_f128 _mm_store_pd
+#define rx_add_vec_f128 _mm_add_pd
+#define rx_sub_vec_f128 _mm_sub_pd
+#define rx_mul_vec_f128 _mm_mul_pd
+#define rx_div_vec_f128 _mm_div_pd
+#define rx_sqrt_vec_f128 _mm_sqrt_pd
+
+FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
+	return _mm_shuffle_pd(a, a, 1);
+}
+
+FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
+	return _mm_castsi128_pd(_mm_set_epi64x(x1, x0));
+}
+
+FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
+	return _mm_castsi128_pd(_mm_set1_epi64x(x));
+}
+
+#define rx_xor_vec_f128 _mm_xor_pd
+#define rx_and_vec_f128 _mm_and_pd
+#define rx_or_vec_f128 _mm_or_pd
+
+#ifdef __AES__
+
+#define rx_aesenc_vec_i128 _mm_aesenc_si128
+#define rx_aesdec_vec_i128 _mm_aesdec_si128
+
+#define HAVE_AES 1
+
+#endif //__AES__
+
+FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
+	return _mm_cvtsi128_si32(a);
+}
+
+FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
+	return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0x55));
+}
+
+FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
+	return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xaa));
+}
+
+FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
+	return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xff));
+}
+
+#define rx_set_int_vec_i128 _mm_set_epi32
+#define rx_xor_vec_i128 _mm_xor_si128
+#define rx_load_vec_i128 _mm_load_si128
+#define rx_store_vec_i128 _mm_store_si128
+
+FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
+	__m128i ix = _mm_loadl_epi64((const __m128i*)addr);
+	return _mm_cvtepi32_pd(ix);
+}
+
+constexpr uint32_t rx_mxcsr_default = 0x9FC0; //Flush to zero, denormals are zero, default rounding mode, all exceptions disabled
+
+FORCE_INLINE void rx_reset_float_state() {
+	_mm_setcsr(rx_mxcsr_default);
+}
+
+FORCE_INLINE void rx_set_rounding_mode(uint32_t mode) {
+	_mm_setcsr(rx_mxcsr_default | (mode << 13));
+}
+
+FORCE_INLINE uint32_t rx_get_rounding_mode() {
+	return (_mm_getcsr() >> 13) & 3;
+}
+
+#elif defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__) //sadly only POWER7 and newer will be able to use SIMD acceleration. Earlier processors cant use doubles or 64 bit integers with SIMD
+#include <cstdint>
+#include <stdexcept>
+#include <cstdlib>
+#include <altivec.h>
+#undef vector
+#undef pixel
+#undef bool
+
+typedef __vector uint8_t __m128i;
+typedef __vector uint32_t __m128l;
+typedef __vector int      __m128li;
+typedef __vector uint64_t __m128ll;
+typedef __vector double __m128d;
+
+typedef __m128i rx_vec_i128;
+typedef __m128d rx_vec_f128;
+typedef union{
+	rx_vec_i128 i;
+  rx_vec_f128 d;
+  uint64_t u64[2];
+  double   d64[2];
+  uint32_t u32[4];
+	int i32[4];
+} vec_u;
+
+#define rx_aligned_alloc(a, b) malloc(a)
+#define rx_aligned_free(a) free(a)
+#define rx_prefetch_nta(x)
+#define rx_prefetch_t0(x)
+
+/* Splat 64-bit long long to 2 64-bit long longs */
+FORCE_INLINE __m128i vec_splat2sd (int64_t scalar)
+{ return (__m128i) vec_splats (scalar); }
+
+FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+	return (rx_vec_f128)vec_vsx_ld(0,pd);
+#else
+	vec_u t;
+	t.u64[0] = load64(pd + 0);
+	t.u64[1] = load64(pd + 1);
+	return (rx_vec_f128)t.d;
+#endif
+}
+
+FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 a) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+	vec_vsx_st(a,0,(rx_vec_f128*)mem_addr);
+#else
+	vec_u _a;
+	_a.d = a;
+	store64(mem_addr + 0, _a.u64[0]);
+	store64(mem_addr + 1, _a.u64[1]);
+#endif
+}
+
+FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
+	return (rx_vec_f128)vec_perm((__m128i)a,(__m128i)a,(__m128i){8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7});
+}
+
+FORCE_INLINE rx_vec_f128 rx_add_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	return (rx_vec_f128)vec_add(a,b);
+}
+
+FORCE_INLINE rx_vec_f128 rx_sub_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	return (rx_vec_f128)vec_sub(a,b);
+}
+
+FORCE_INLINE rx_vec_f128 rx_mul_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	return (rx_vec_f128)vec_mul(a,b);
+}
+
+FORCE_INLINE rx_vec_f128 rx_div_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	return (rx_vec_f128)vec_div(a,b);
+}
+
+FORCE_INLINE rx_vec_f128 rx_sqrt_vec_f128(rx_vec_f128 a) {
+	return (rx_vec_f128)vec_sqrt(a);
+}
+
+FORCE_INLINE rx_vec_i128 rx_set1_long_vec_i128(uint64_t a) {
+	return (rx_vec_i128)vec_splat2sd(a);
+}
+
+FORCE_INLINE rx_vec_f128 rx_vec_i128_vec_f128(rx_vec_i128 a) {
+	return (rx_vec_f128)a;
+}
+
+FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
+	return (rx_vec_f128)(__m128ll){x0,x1};
+}
+
+FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
+	return (rx_vec_f128)vec_splat2sd(x);
+}
+
+FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	return (rx_vec_f128)vec_xor(a,b);
+}
+
+FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	return (rx_vec_f128)vec_and(a,b);
+}
+
+FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	return (rx_vec_f128)vec_or(a,b);
+}
+
+#if defined(__CRYPTO__)
+
+FORCE_INLINE __m128ll vrev(__m128i v){
+#if defined(NATIVE_LITTLE_ENDIAN)
+	return (__m128ll)vec_perm((__m128i)v,(__m128i){0},(__m128i){15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0});
+#else
+	return (__m128ll)vec_perm((__m128i)v,(__m128i){0},(__m128i){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12});
+#endif
+}
+
+FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
+	__m128ll _v = vrev(v);
+	__m128ll _rkey = vrev(rkey);
+	__m128ll result = vrev((__m128i)__builtin_crypto_vcipher(_v,_rkey));
+	return (rx_vec_i128)result;
+}
+
+FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
+	__m128ll _v = vrev(v);
+	__m128ll zero = (__m128ll){0};
+	__m128ll out = vrev((__m128i)__builtin_crypto_vncipher(_v,zero));
+	return (rx_vec_i128)vec_xor((__m128i)out,rkey);
+}
+#define HAVE_AES 1
+
+#endif //__CRYPTO__
+
+FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
+	vec_u _a;
+	_a.i = a;
+  return _a.i32[0];
+}
+
+FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
+	vec_u _a;
+	_a.i = a;
+	return _a.i32[1];
+}
+
+FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
+	vec_u _a;
+	_a.i = a;
+	return _a.i32[2];
+}
+
+FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
+	vec_u _a;
+	_a.i = a;
+	return _a.i32[3];
+}
+
+FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int i3, int i2, int i1, int i0) {
+	return (rx_vec_i128)((__m128li){i0,i1,i2,i3});
+};
+
+FORCE_INLINE rx_vec_i128 rx_xor_vec_i128(rx_vec_i128 a, rx_vec_i128 b) {
+	return (rx_vec_i128)vec_xor(a,b);
+}
+
+FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const *p) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+	return *p;
+#else
+	const uint32_t* ptr = (const uint32_t*)p;
+	vec_u c;
+	c.u32[0] = load32(ptr + 0);
+	c.u32[1] = load32(ptr + 1);
+	c.u32[2] = load32(ptr + 2);
+	c.u32[3] = load32(ptr + 3);
+	return (rx_vec_i128)c.i;
+#endif
+}
+
+FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *p, rx_vec_i128 b) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+	*p = b;
+#else
+	uint32_t* ptr = (uint32_t*)p;
+	vec_u B;
+	B.i = b;
+	store32(ptr + 0, B.u32[0]);
+	store32(ptr + 1, B.u32[1]);
+	store32(ptr + 2, B.u32[2]);
+	store32(ptr + 3, B.u32[3]);
+#endif
+}
+
+FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
+	vec_u x;
+	x.d64[0] = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 0));
+	x.d64[1] = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 4));
+	return (rx_vec_f128)x.d;
+}
+
+#define RANDOMX_DEFAULT_FENV
+
+#elif defined(__aarch64__)
+
+#include <stdlib.h>
+#include <arm_neon.h>
+#include <arm_acle.h>
+
+typedef uint8x16_t rx_vec_i128;
+typedef float64x2_t rx_vec_f128;
+
+inline void* rx_aligned_alloc(size_t size, size_t align) {
+	void* p;
+	if (posix_memalign(&p, align, size) == 0)
+		return p;
+
+	return 0;
+};
+
+#define rx_aligned_free(a) free(a)
+
+inline void rx_prefetch_nta(void* ptr) {
+	asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
+}
+
+inline void rx_prefetch_t0(const void* ptr) {
+	asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
+}
+
+FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
+	return vld1q_f64((const float64_t*)pd);
+}
+
+FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 val) {
+	vst1q_f64((float64_t*)mem_addr, val);
+}
+
+FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
+	float64x2_t temp;
+	temp = vcopyq_laneq_f64(temp, 1, a, 1);
+	a = vcopyq_laneq_f64(a, 1, a, 0);
+	return vcopyq_laneq_f64(a, 0, temp, 1);
+}
+
+FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
+	uint64x2_t temp0 = vdupq_n_u64(x0);
+	uint64x2_t temp1 = vdupq_n_u64(x1);
+	return vreinterpretq_f64_u64(vcopyq_laneq_u64(temp0, 1, temp1, 0));
+}
+
+FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
+	return vreinterpretq_f64_u64(vdupq_n_u64(x));
+}
+
+#define rx_add_vec_f128 vaddq_f64
+#define rx_sub_vec_f128 vsubq_f64
+#define rx_mul_vec_f128 vmulq_f64
+#define rx_div_vec_f128 vdivq_f64
+#define rx_sqrt_vec_f128 vsqrtq_f64
+
+FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	return vreinterpretq_f64_u8(veorq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
+}
+
+FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	return vreinterpretq_f64_u8(vandq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
+}
+
+FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	return vreinterpretq_f64_u8(vorrq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
+}
+
+#ifdef __ARM_FEATURE_CRYPTO
+
+
+FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 a, rx_vec_i128 key) {
+	const uint8x16_t zero = { 0 };
+	return vaesmcq_u8(vaeseq_u8(a, zero)) ^ key;
+}
+
+FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 a, rx_vec_i128 key) {
+	const uint8x16_t zero = { 0 };
+	return vaesimcq_u8(vaesdq_u8(a, zero)) ^ key;
+}
+
+#define HAVE_AES 1
+
+#endif
+
+#define rx_xor_vec_i128 veorq_u8
+
+FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
+	return vgetq_lane_s32(vreinterpretq_s32_u8(a), 0);
+}
+
+FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
+	return vgetq_lane_s32(vreinterpretq_s32_u8(a), 1);
+}
+
+FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
+	return vgetq_lane_s32(vreinterpretq_s32_u8(a), 2);
+}
+
+FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
+	return vgetq_lane_s32(vreinterpretq_s32_u8(a), 3);
+}
+
+FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int i3, int i2, int i1, int i0) {
+	int32_t data[4];
+	data[0] = i0;
+	data[1] = i1;
+	data[2] = i2;
+	data[3] = i3;
+	return vreinterpretq_u8_s32(vld1q_s32(data));
+};
+
+#define rx_xor_vec_i128 veorq_u8
+
+FORCE_INLINE rx_vec_i128 rx_load_vec_i128(const rx_vec_i128* mem_addr) {
+	return vld1q_u8((const uint8_t*)mem_addr);
+}
+
+FORCE_INLINE void rx_store_vec_i128(rx_vec_i128* mem_addr, rx_vec_i128 val) {
+	vst1q_u8((uint8_t*)mem_addr, val);
+}
+
+FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
+	double lo = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
+	double hi = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
+	rx_vec_f128 x;
+	x = vsetq_lane_f64(lo, x, 0);
+	x = vsetq_lane_f64(hi, x, 1);
+	return x;
+}
+
+#define RANDOMX_DEFAULT_FENV
+
+#else //portable fallback
+
+#include <cstdint>
+#include <stdexcept>
+#include <cstdlib>
+#include <cmath>
+
+typedef union {
+	uint64_t u64[2];
+	uint32_t u32[4];
+	uint16_t u16[8];
+	uint8_t u8[16];
+} rx_vec_i128;
+
+typedef union {
+	struct {
+		double lo;
+		double hi;
+	};
+	rx_vec_i128 i;
+} rx_vec_f128;
+
+#define rx_aligned_alloc(a, b) malloc(a)
+#define rx_aligned_free(a) free(a)
+#define rx_prefetch_nta(x)
+#define rx_prefetch_t0(x)
+
+FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
+	rx_vec_f128 x;
+	x.i.u64[0] = load64(pd + 0);
+	x.i.u64[1] = load64(pd + 1);
+	return x;
+}
+
+FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 a) {
+	store64(mem_addr + 0, a.i.u64[0]);
+	store64(mem_addr + 1, a.i.u64[1]);
+}
+
+FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
+	double temp = a.hi;
+	a.hi = a.lo;
+	a.lo = temp;
+	return a;
+}
+
+FORCE_INLINE rx_vec_f128 rx_add_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	rx_vec_f128 x;
+	x.lo = a.lo + b.lo;
+	x.hi = a.hi + b.hi;
+	return x;
+}
+
+FORCE_INLINE rx_vec_f128 rx_sub_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	rx_vec_f128 x;
+	x.lo = a.lo - b.lo;
+	x.hi = a.hi - b.hi;
+	return x;
+}
+
+FORCE_INLINE rx_vec_f128 rx_mul_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	rx_vec_f128 x;
+	x.lo = a.lo * b.lo;
+	x.hi = a.hi * b.hi;
+	return x;
+}
+
+FORCE_INLINE rx_vec_f128 rx_div_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	rx_vec_f128 x;
+	x.lo = a.lo / b.lo;
+	x.hi = a.hi / b.hi;
+	return x;
+}
+
+FORCE_INLINE rx_vec_f128 rx_sqrt_vec_f128(rx_vec_f128 a) {
+	rx_vec_f128 x;
+	x.lo = rx_sqrt(a.lo);
+	x.hi = rx_sqrt(a.hi);
+	return x;
+}
+
+FORCE_INLINE rx_vec_i128 rx_set1_long_vec_i128(uint64_t a) {
+	rx_vec_i128 x;
+	x.u64[0] = a;
+	x.u64[1] = a;
+	return x;
+}
+
+FORCE_INLINE rx_vec_f128 rx_vec_i128_vec_f128(rx_vec_i128 a) {
+	rx_vec_f128 x;
+	x.i = a;
+	return x;
+}
+
+FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
+	rx_vec_f128 v;
+	v.i.u64[0] = x0;
+	v.i.u64[1] = x1;
+	return v;
+}
+
+FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
+	rx_vec_f128 v;
+	v.i.u64[0] = x;
+	v.i.u64[1] = x;
+	return v;
+}
+
+FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	rx_vec_f128 x;
+	x.i.u64[0] = a.i.u64[0] ^ b.i.u64[0];
+	x.i.u64[1] = a.i.u64[1] ^ b.i.u64[1];
+	return x;
+}
+
+FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	rx_vec_f128 x;
+	x.i.u64[0] = a.i.u64[0] & b.i.u64[0];
+	x.i.u64[1] = a.i.u64[1] & b.i.u64[1];
+	return x;
+}
+
+FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
+	rx_vec_f128 x;
+	x.i.u64[0] = a.i.u64[0] | b.i.u64[0];
+	x.i.u64[1] = a.i.u64[1] | b.i.u64[1];
+	return x;
+}
+
+FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
+	return a.u32[0];
+}
+
+FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
+	return a.u32[1];
+}
+
+FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
+	return a.u32[2];
+}
+
+FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
+	return a.u32[3];
+}
+
+FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int i3, int i2, int i1, int i0) {
+	rx_vec_i128 v;
+	v.u32[0] = i0;
+	v.u32[1] = i1;
+	v.u32[2] = i2;
+	v.u32[3] = i3;
+	return v;
+};
+
+FORCE_INLINE rx_vec_i128 rx_xor_vec_i128(rx_vec_i128 a, rx_vec_i128 b) {
+	rx_vec_i128 c;
+	c.u32[0] = a.u32[0] ^ b.u32[0];
+	c.u32[1] = a.u32[1] ^ b.u32[1];
+	c.u32[2] = a.u32[2] ^ b.u32[2];
+	c.u32[3] = a.u32[3] ^ b.u32[3];
+	return c;
+}
+
+FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const* p) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+	return *p;
+#else
+	const uint32_t* ptr = (const uint32_t*)p;
+	rx_vec_i128 c;
+	c.u32[0] = load32(ptr + 0);
+	c.u32[1] = load32(ptr + 1);
+	c.u32[2] = load32(ptr + 2);
+	c.u32[3] = load32(ptr + 3);
+	return c;
+#endif
+}
+
+FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *p, rx_vec_i128 b) {
+#if defined(NATIVE_LITTLE_ENDIAN)
+	*p = b;
+#else
+	uint32_t* ptr = (uint32_t*)p;
+	store32(ptr + 0, b.u32[0]);
+	store32(ptr + 1, b.u32[1]);
+	store32(ptr + 2, b.u32[2]);
+	store32(ptr + 3, b.u32[3]);
+#endif
+}
+
+FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
+	rx_vec_f128 x;
+	x.lo = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 0));
+	x.hi = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 4));
+	return x;
+}
+
+#define RANDOMX_DEFAULT_FENV
+
+#endif
+
+#ifndef HAVE_AES
+static const char* platformError = "Platform doesn't support hardware AES";
+
+#include <stdexcept>
+
+FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
+	throw std::runtime_error(platformError);
+}
+
+FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
+	throw std::runtime_error(platformError);
+}
+
+#define HAVE_AES 0
+
+#endif
+
+#ifdef RANDOMX_DEFAULT_FENV
+
+void rx_reset_float_state();
+
+void rx_set_rounding_mode(uint32_t mode);
+
+uint32_t rx_get_rounding_mode();
+
+#endif
+
+double loadDoublePortable(const void* addr);
+uint64_t mulh(uint64_t, uint64_t);
+int64_t smulh(int64_t, int64_t);
+uint64_t rotl(uint64_t, unsigned int);
+uint64_t rotr(uint64_t, unsigned int);
--- a/crypto/randomx/jit_compiler.hpp
+++ b/crypto/randomx/jit_compiler.hpp
@ -0,0 +1,79 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include "common.hpp"
+
+namespace randomx {
+
+	struct CodeBuffer {
+		uint8_t* code;
+		int32_t codePos;
+		int32_t rcpCount;
+
+		void emit(const uint8_t* src, int32_t len) {
+			memcpy(&code[codePos], src, len);
+			codePos += len;
+		}
+
+		template<typename T>
+		void emit(T src) {
+			memcpy(&code[codePos], &src, sizeof(src));
+			codePos += sizeof(src);
+		}
+
+		void emitAt(int32_t codePos, const uint8_t* src, int32_t len) {
+			memcpy(&code[codePos], src, len);
+		}
+
+		template<typename T>
+		void emitAt(int32_t codePos, T src) {
+			memcpy(&code[codePos], &src, sizeof(src));
+		}
+	};
+
+	struct CompilerState : public CodeBuffer {
+		int32_t instructionOffsets[RANDOMX_PROGRAM_SIZE];
+		int registerUsage[RegistersCount];
+	};
+}
+
+#if defined(RANDOMX_COMPILER_X86)
+#include "jit_compiler_x86.hpp"
+#elif defined(RANDOMX_COMPILER_A64)
+#include "jit_compiler_a64.hpp"
+#elif defined(RANDOMX_COMPILER_RV64)
+#include "jit_compiler_rv64.hpp"
+#else
+#include "jit_compiler_fallback.hpp"
+#endif
+
+#if defined(__OpenBSD__) || defined(__NetBSD__) || (defined(__APPLE__) && defined(__aarch64__))
+#define RANDOMX_FORCE_SECURE
+#endif
--- a/crypto/randomx/jit_compiler_a64.cpp
+++ b/crypto/randomx/jit_compiler_a64.cpp
--- a/crypto/randomx/jit_compiler_a64.hpp
+++ b/crypto/randomx/jit_compiler_a64.hpp
@ -0,0 +1,128 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+Copyright (c) 2019, SChernykh    <https://github.com/SChernykh>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+#include <stdexcept>
+#include "common.hpp"
+#include "jit_compiler_a64_static.hpp"
+
+namespace randomx {
+
+	class Program;
+	struct ProgramConfiguration;
+	class SuperscalarProgram;
+	class Instruction;
+
+	typedef void(JitCompilerA64::*InstructionGeneratorA64)(Instruction&, uint32_t&);
+
+	class JitCompilerA64 {
+	public:
+		JitCompilerA64();
+		~JitCompilerA64();
+
+		void generateProgram(Program&, ProgramConfiguration&);
+		void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
+
+		template<size_t N>
+		void generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector<uint64_t> &);
+
+		void generateDatasetInitCode() {}
+
+		ProgramFunc* getProgramFunc() { return reinterpret_cast<ProgramFunc*>(code); }
+		DatasetInitFunc* getDatasetInitFunc();
+		uint8_t* getCode() { return code; }
+		size_t getCodeSize();
+
+		void enableWriting();
+		void enableExecution();
+		void enableAll();
+
+	private:
+		static InstructionGeneratorA64 engine[256];
+		uint32_t reg_changed_offset[8];
+		uint8_t* code;
+		uint32_t literalPos;
+		uint32_t num32bitLiterals;
+
+		static void emit32(uint32_t val, uint8_t* code, uint32_t& codePos)
+		{
+			*(uint32_t*)(code + codePos) = val;
+			codePos += sizeof(val);
+		}
+
+		static void emit64(uint64_t val, uint8_t* code, uint32_t& codePos)
+		{
+			memcpy(code + codePos, &val, sizeof(val));
+			codePos += sizeof(val);
+		}
+
+		void emitMovImmediate(uint32_t dst, uint32_t imm, uint8_t* code, uint32_t& codePos);
+		void emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm, uint8_t* code, uint32_t& codePos);
+
+		template<uint32_t tmp_reg>
+		void emitMemLoad(uint32_t dst, uint32_t src, Instruction& instr, uint8_t* code, uint32_t& codePos);
+
+		template<uint32_t tmp_reg_fp>
+		void emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* code, uint32_t& codePos);
+
+		void h_IADD_RS(Instruction&, uint32_t&);
+		void h_IADD_M(Instruction&, uint32_t&);
+		void h_ISUB_R(Instruction&, uint32_t&);
+		void h_ISUB_M(Instruction&, uint32_t&);
+		void h_IMUL_R(Instruction&, uint32_t&);
+		void h_IMUL_M(Instruction&, uint32_t&);
+		void h_IMULH_R(Instruction&, uint32_t&);
+		void h_IMULH_M(Instruction&, uint32_t&);
+		void h_ISMULH_R(Instruction&, uint32_t&);
+		void h_ISMULH_M(Instruction&, uint32_t&);
+		void h_IMUL_RCP(Instruction&, uint32_t&);
+		void h_INEG_R(Instruction&, uint32_t&);
+		void h_IXOR_R(Instruction&, uint32_t&);
+		void h_IXOR_M(Instruction&, uint32_t&);
+		void h_IROR_R(Instruction&, uint32_t&);
+		void h_IROL_R(Instruction&, uint32_t&);
+		void h_ISWAP_R(Instruction&, uint32_t&);
+		void h_FSWAP_R(Instruction&, uint32_t&);
+		void h_FADD_R(Instruction&, uint32_t&);
+		void h_FADD_M(Instruction&, uint32_t&);
+		void h_FSUB_R(Instruction&, uint32_t&);
+		void h_FSUB_M(Instruction&, uint32_t&);
+		void h_FSCAL_R(Instruction&, uint32_t&);
+		void h_FMUL_R(Instruction&, uint32_t&);
+		void h_FDIV_M(Instruction&, uint32_t&);
+		void h_FSQRT_R(Instruction&, uint32_t&);
+		void h_CBRANCH(Instruction&, uint32_t&);
+		void h_CFROUND(Instruction&, uint32_t&);
+		void h_ISTORE(Instruction&, uint32_t&);
+		void h_NOP(Instruction&, uint32_t&);
+	};
+}
--- a/crypto/randomx/jit_compiler_a64_static.S
+++ b/crypto/randomx/jit_compiler_a64_static.S
@ -0,0 +1,589 @@
+# Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+# Copyright (c) 2019, SChernykh    <https://github.com/SChernykh>
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# 	* Redistributions of source code must retain the above copyright
+# 	  notice, this list of conditions and the following disclaimer.
+# 	* Redistributions in binary form must reproduce the above copyright
+# 	  notice, this list of conditions and the following disclaimer in the
+# 	  documentation and/or other materials provided with the distribution.
+# 	* Neither the name of the copyright holder nor the
+# 	  names of its contributors may be used to endorse or promote products
+# 	  derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#if defined(__APPLE__)
+#define DECL(x) _##x
+#else
+#define DECL(x) x
+#endif
+
+	.arch armv8-a
+	.text
+	.global DECL(randomx_program_aarch64)
+	.global DECL(randomx_program_aarch64_main_loop)
+	.global DECL(randomx_program_aarch64_vm_instructions)
+	.global DECL(randomx_program_aarch64_imul_rcp_literals_end)
+	.global DECL(randomx_program_aarch64_vm_instructions_end)
+	.global DECL(randomx_program_aarch64_cacheline_align_mask1)
+	.global DECL(randomx_program_aarch64_cacheline_align_mask2)
+	.global DECL(randomx_program_aarch64_update_spMix1)
+	.global DECL(randomx_program_aarch64_vm_instructions_end_light)
+	.global DECL(randomx_program_aarch64_light_cacheline_align_mask)
+	.global DECL(randomx_program_aarch64_light_dataset_offset)
+	.global DECL(randomx_init_dataset_aarch64)
+	.global DECL(randomx_init_dataset_aarch64_end)
+	.global DECL(randomx_calc_dataset_item_aarch64)
+	.global DECL(randomx_calc_dataset_item_aarch64_prefetch)
+	.global DECL(randomx_calc_dataset_item_aarch64_mix)
+	.global DECL(randomx_calc_dataset_item_aarch64_store_result)
+	.global DECL(randomx_calc_dataset_item_aarch64_end)
+
+#include "configuration.h"
+
+# Register allocation
+
+# x0  -> pointer to reg buffer and then literal for IMUL_RCP
+# x1  -> pointer to mem buffer and then to dataset
+# x2  -> pointer to scratchpad
+# x3  -> loop counter
+# x4  -> "r0"
+# x5  -> "r1"
+# x6  -> "r2"
+# x7  -> "r3"
+# x8  -> fpcr (reversed bits)
+# x9  -> mx, ma
+# x10 -> spMix1
+# x11 -> literal for IMUL_RCP
+# x12 -> "r4"
+# x13 -> "r5"
+# x14 -> "r6"
+# x15 -> "r7"
+# x16 -> spAddr0
+# x17 -> spAddr1
+# x18 -> unused (platform register, don't touch it)
+# x19 -> temporary
+# x20 -> temporary
+# x21 -> literal for IMUL_RCP
+# x22 -> literal for IMUL_RCP
+# x23 -> literal for IMUL_RCP
+# x24 -> literal for IMUL_RCP
+# x25 -> literal for IMUL_RCP
+# x26 -> literal for IMUL_RCP
+# x27 -> literal for IMUL_RCP
+# x28 -> literal for IMUL_RCP
+# x29 -> literal for IMUL_RCP
+# x30 -> literal for IMUL_RCP
+
+# v0-v15 -> store 32-bit literals
+# v16 -> "f0"
+# v17 -> "f1"
+# v18 -> "f2"
+# v19 -> "f3"
+# v20 -> "e0"
+# v21 -> "e1"
+# v22 -> "e2"
+# v23 -> "e3"
+# v24 -> "a0"
+# v25 -> "a1"
+# v26 -> "a2"
+# v27 -> "a3"
+# v28 -> temporary
+# v29 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff
+# v30 -> E 'or' mask  = 0x3*00000000******3*00000000******
+# v31 -> scale mask   = 0x81f000000000000081f0000000000000
+
+	.balign 4
+DECL(randomx_program_aarch64):
+	# Save callee-saved registers
+	sub	sp, sp, 192
+	stp	x16, x17, [sp]
+	str	x19, [sp, 16]
+	stp	x20, x21, [sp, 32]
+	stp	x22, x23, [sp, 48]
+	stp	x24, x25, [sp, 64]
+	stp	x26, x27, [sp, 80]
+	stp	x28, x29, [sp, 96]
+	stp	x8, x30, [sp, 112]
+	stp	d8, d9, [sp, 128]
+	stp	d10, d11, [sp, 144]
+	stp	d12, d13, [sp, 160]
+	stp	d14, d15, [sp, 176]
+
+	# Zero integer registers
+	mov	x4, xzr
+	mov	x5, xzr
+	mov	x6, xzr
+	mov	x7, xzr
+	mov	x12, xzr
+	mov	x13, xzr
+	mov	x14, xzr
+	mov	x15, xzr
+
+	# Load ma, mx and dataset pointer
+	ldp	x9, x1, [x1]
+
+	# Load initial spMix value
+	mov	x10, x9
+
+	# Load group A registers
+	ldp	q24, q25, [x0, 192]
+	ldp	q26, q27, [x0, 224]
+
+	# Load E 'and' mask
+	mov	x16, 0x00FFFFFFFFFFFFFF
+	ins	v29.d[0], x16
+	ins	v29.d[1], x16
+
+	# Load E 'or' mask (stored in reg.f[0])
+	ldr	q30, [x0, 64]
+
+	# Load scale mask
+	mov	x16, 0x80f0000000000000
+	ins	v31.d[0], x16
+	ins	v31.d[1], x16
+
+	# Read fpcr
+	mrs	x8, fpcr
+	rbit	x8, x8
+
+	# Save x0
+	str	x0, [sp, -16]!
+
+	# Read literals
+	ldr	x0, literal_x0
+	ldr	x11, literal_x11
+	ldr	x21, literal_x21
+	ldr	x22, literal_x22
+	ldr	x23, literal_x23
+	ldr	x24, literal_x24
+	ldr	x25, literal_x25
+	ldr	x26, literal_x26
+	ldr	x27, literal_x27
+	ldr	x28, literal_x28
+	ldr	x29, literal_x29
+	ldr	x30, literal_x30
+
+	ldr	q0, literal_v0
+	ldr	q1, literal_v1
+	ldr	q2, literal_v2
+	ldr	q3, literal_v3
+	ldr	q4, literal_v4
+	ldr	q5, literal_v5
+	ldr	q6, literal_v6
+	ldr	q7, literal_v7
+	ldr	q8, literal_v8
+	ldr	q9, literal_v9
+	ldr	q10, literal_v10
+	ldr	q11, literal_v11
+	ldr	q12, literal_v12
+	ldr	q13, literal_v13
+	ldr	q14, literal_v14
+	ldr	q15, literal_v15
+
+DECL(randomx_program_aarch64_main_loop):
+	# spAddr0 = spMix1 & ScratchpadL3Mask64;
+	# spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64;
+	lsr	x20, x10, 32
+
+	# Actual mask will be inserted by JIT compiler
+	and	w16, w10, 1
+	and	w17, w20, 1
+
+	# x16 = scratchpad + spAddr0
+	# x17 = scratchpad + spAddr1
+	add	x16, x16, x2
+	add	x17, x17, x2
+
+	# xor integer registers with scratchpad data (spAddr0)
+	ldp	x20, x19, [x16]
+	eor	x4, x4, x20
+	eor	x5, x5, x19
+	ldp	x20, x19, [x16, 16]
+	eor	x6, x6, x20
+	eor	x7, x7, x19
+	ldp	x20, x19, [x16, 32]
+	eor	x12, x12, x20
+	eor	x13, x13, x19
+	ldp	x20, x19, [x16, 48]
+	eor	x14, x14, x20
+	eor	x15, x15, x19
+
+	# Load group F registers (spAddr1)
+	ldpsw	x20, x19, [x17]
+	ins	v16.d[0], x20
+	ins	v16.d[1], x19
+	ldpsw	x20, x19, [x17, 8]
+	ins	v17.d[0], x20
+	ins	v17.d[1], x19
+	ldpsw	x20, x19, [x17, 16]
+	ins	v18.d[0], x20
+	ins	v18.d[1], x19
+	ldpsw	x20, x19, [x17, 24]
+	ins	v19.d[0], x20
+	ins	v19.d[1], x19
+	scvtf	v16.2d, v16.2d
+	scvtf	v17.2d, v17.2d
+	scvtf	v18.2d, v18.2d
+	scvtf	v19.2d, v19.2d
+
+	# Load group E registers (spAddr1)
+	ldpsw	x20, x19, [x17, 32]
+	ins	v20.d[0], x20
+	ins	v20.d[1], x19
+	ldpsw	x20, x19, [x17, 40]
+	ins	v21.d[0], x20
+	ins	v21.d[1], x19
+	ldpsw	x20, x19, [x17, 48]
+	ins	v22.d[0], x20
+	ins	v22.d[1], x19
+	ldpsw	x20, x19, [x17, 56]
+	ins	v23.d[0], x20
+	ins	v23.d[1], x19
+	scvtf	v20.2d, v20.2d
+	scvtf	v21.2d, v21.2d
+	scvtf	v22.2d, v22.2d
+	scvtf	v23.2d, v23.2d
+	and	v20.16b, v20.16b, v29.16b
+	and	v21.16b, v21.16b, v29.16b
+	and	v22.16b, v22.16b, v29.16b
+	and	v23.16b, v23.16b, v29.16b
+	orr	v20.16b, v20.16b, v30.16b
+	orr	v21.16b, v21.16b, v30.16b
+	orr	v22.16b, v22.16b, v30.16b
+	orr	v23.16b, v23.16b, v30.16b
+
+	# Execute VM instructions
+DECL(randomx_program_aarch64_vm_instructions):
+
+	# buffer for generated instructions
+	# FDIV_M is the largest instruction taking up to 12 ARMv8 instructions
+	.fill RANDOMX_PROGRAM_SIZE*12,4,0
+
+literal_x0:  .fill 1,8,0
+literal_x11: .fill 1,8,0
+literal_x21: .fill 1,8,0
+literal_x22: .fill 1,8,0
+literal_x23: .fill 1,8,0
+literal_x24: .fill 1,8,0
+literal_x25: .fill 1,8,0
+literal_x26: .fill 1,8,0
+literal_x27: .fill 1,8,0
+literal_x28: .fill 1,8,0
+literal_x29: .fill 1,8,0
+literal_x30: .fill 1,8,0
+DECL(randomx_program_aarch64_imul_rcp_literals_end):
+
+literal_v0:  .fill 2,8,0
+literal_v1:  .fill 2,8,0
+literal_v2:  .fill 2,8,0
+literal_v3:  .fill 2,8,0
+literal_v4:  .fill 2,8,0
+literal_v5:  .fill 2,8,0
+literal_v6:  .fill 2,8,0
+literal_v7:  .fill 2,8,0
+literal_v8:  .fill 2,8,0
+literal_v9:  .fill 2,8,0
+literal_v10: .fill 2,8,0
+literal_v11: .fill 2,8,0
+literal_v12: .fill 2,8,0
+literal_v13: .fill 2,8,0
+literal_v14: .fill 2,8,0
+literal_v15: .fill 2,8,0
+
+DECL(randomx_program_aarch64_vm_instructions_end):
+	# Calculate dataset pointer for dataset read
+	# Do it here to break false dependency from readReg2 and readReg3 (see next line)
+	lsr	x10, x9, 32
+
+	# mx ^= r[readReg2] ^ r[readReg3];
+	eor	x9, x9, x20
+
+	# Calculate dataset pointer for dataset prefetch
+	mov	w20, w9
+DECL(randomx_program_aarch64_cacheline_align_mask1):
+	# Actual mask will be inserted by JIT compiler
+	and	x20, x20, 1
+	add	x20, x20, x1
+
+	# Prefetch dataset data
+	prfm	pldl2strm, [x20]
+
+	# mx <-> ma
+	ror	x9, x9, 32
+
+DECL(randomx_program_aarch64_cacheline_align_mask2):
+	# Actual mask will be inserted by JIT compiler
+	and	x10, x10, 1
+	add	x10, x10, x1
+
+DECL(randomx_program_aarch64_xor_with_dataset_line):
+rx_program_xor_with_dataset_line:
+	# xor integer registers with dataset data
+	ldp	x20, x19, [x10]
+	eor	x4, x4, x20
+	eor	x5, x5, x19
+	ldp	x20, x19, [x10, 16]
+	eor	x6, x6, x20
+	eor	x7, x7, x19
+	ldp	x20, x19, [x10, 32]
+	eor	x12, x12, x20
+	eor	x13, x13, x19
+	ldp	x20, x19, [x10, 48]
+	eor	x14, x14, x20
+	eor	x15, x15, x19
+
+DECL(randomx_program_aarch64_update_spMix1):
+	# JIT compiler will replace it with "eor x10, config.readReg0, config.readReg1"
+	eor	x10, x0, x0
+
+	# Store integer registers to scratchpad (spAddr1)
+	stp	x4, x5, [x17, 0]
+	stp	x6, x7, [x17, 16]
+	stp	x12, x13, [x17, 32]
+	stp	x14, x15, [x17, 48]
+
+	# xor group F and group E registers
+	eor	v16.16b, v16.16b, v20.16b
+	eor	v17.16b, v17.16b, v21.16b
+	eor	v18.16b, v18.16b, v22.16b
+	eor	v19.16b, v19.16b, v23.16b
+
+	# Store FP registers to scratchpad (spAddr0)
+	stp	q16, q17, [x16, 0]
+	stp	q18, q19, [x16, 32]
+
+	subs	x3, x3, 1
+	bne	DECL(randomx_program_aarch64_main_loop)
+	
+	# Restore x0
+	ldr	x0, [sp], 16
+
+	# Store integer registers
+	stp	x4, x5, [x0, 0]
+	stp	x6, x7, [x0, 16]
+	stp	x12, x13, [x0, 32]
+	stp	x14, x15, [x0, 48]
+
+	# Store FP registers
+	stp	q16, q17, [x0, 64]
+	stp	q18, q19, [x0, 96]
+	stp	q20, q21, [x0, 128]
+	stp	q22, q23, [x0, 160]
+
+	# Restore callee-saved registers
+	ldp	x16, x17, [sp]
+	ldr	x19, [sp, 16]
+	ldp	x20, x21, [sp, 32]
+	ldp	x22, x23, [sp, 48]
+	ldp	x24, x25, [sp, 64]
+	ldp	x26, x27, [sp, 80]
+	ldp	x28, x29, [sp, 96]
+	ldp	x8, x30, [sp, 112]
+	ldp	d8, d9, [sp, 128]
+	ldp	d10, d11, [sp, 144]
+	ldp	d12, d13, [sp, 160]
+	ldp	d14, d15, [sp, 176]
+	add	sp, sp, 192
+
+	ret
+
+DECL(randomx_program_aarch64_vm_instructions_end_light):
+	sub	sp, sp, 96
+	stp	x0, x1, [sp, 64]
+	stp	x2, x30, [sp, 80]
+
+	# mx ^= r[readReg2] ^ r[readReg3];
+	eor	x9, x9, x20
+
+	# mx <-> ma
+	ror	x9, x9, 32
+
+	# x0 -> pointer to cache memory
+	mov	x0, x1
+
+	# x1 -> pointer to output
+	mov	x1, sp
+
+DECL(randomx_program_aarch64_light_cacheline_align_mask):
+	# Actual mask will be inserted by JIT compiler
+	and	w2, w9, 1
+
+	# x2 -> item number
+	lsr	x2, x2, 6
+
+DECL(randomx_program_aarch64_light_dataset_offset):
+	# Apply dataset offset (filled in by JIT compiler)
+	add	x2, x2, 0
+	add	x2, x2, 0
+
+	bl	rx_calc_dataset_item
+
+	mov	x10, sp
+	ldp	x0, x1, [sp, 64]
+	ldp	x2, x30, [sp, 80]
+	add	sp, sp, 96
+
+	b	rx_program_xor_with_dataset_line
+
+
+
+# Input parameters
+#
+# x0 -> pointer to cache
+# x1 -> pointer to dataset memory at startItem
+# x2 -> start item
+# x3 -> end item
+
+DECL(randomx_init_dataset_aarch64):
+	# Save x20 (used as temporary, but must be saved to not break ABI) and x30 (return address)
+	stp	x20, x30, [sp, -16]!
+
+	# Load pointer to cache memory
+	ldr	x0, [x0]
+
+DECL(randomx_init_dataset_aarch64_main_loop):
+	bl	rx_calc_dataset_item
+	add	x1, x1, 64
+	add	x2, x2, 1
+	cmp	x2, x3
+	bne	DECL(randomx_init_dataset_aarch64_main_loop)
+
+	# Restore x20 and x30
+	ldp	x20, x30, [sp], 16
+
+	ret
+
+DECL(randomx_init_dataset_aarch64_end):
+
+# Input parameters
+#
+# x0 -> pointer to cache memory
+# x1 -> pointer to output
+# x2 -> item number
+#
+# Register allocation
+#
+# x0-x7 -> output value (calculated dataset item)
+# x8 -> pointer to cache memory
+# x9 -> pointer to output
+# x10 -> registerValue
+# x11 -> mixBlock
+# x12 -> temporary
+# x13 -> temporary
+
+DECL(randomx_calc_dataset_item_aarch64):
+rx_calc_dataset_item:
+	sub	sp, sp, 112
+	stp	x0, x1, [sp]
+	stp	x2, x3, [sp, 16]
+	stp	x4, x5, [sp, 32]
+	stp	x6, x7, [sp, 48]
+	stp	x8, x9, [sp, 64]
+	stp	x10, x11, [sp, 80]
+	stp	x12, x13, [sp, 96]
+
+	ldr	x12, superscalarMul0
+
+	mov	x8, x0
+	mov	x9, x1
+	mov	x10, x2
+
+	# rl[0] = (itemNumber + 1) * superscalarMul0;
+	madd	x0, x2, x12, x12
+
+	# rl[1] = rl[0] ^ superscalarAdd1;
+	ldr	x12, superscalarAdd1
+	eor	x1, x0, x12
+
+	# rl[2] = rl[0] ^ superscalarAdd2;
+	ldr	x12, superscalarAdd2
+	eor	x2, x0, x12
+
+	# rl[3] = rl[0] ^ superscalarAdd3;
+	ldr	x12, superscalarAdd3
+	eor	x3, x0, x12
+
+	# rl[4] = rl[0] ^ superscalarAdd4;
+	ldr	x12, superscalarAdd4
+	eor	x4, x0, x12
+
+	# rl[5] = rl[0] ^ superscalarAdd5;
+	ldr	x12, superscalarAdd5
+	eor	x5, x0, x12
+
+	# rl[6] = rl[0] ^ superscalarAdd6;
+	ldr	x12, superscalarAdd6
+	eor	x6, x0, x12
+
+	# rl[7] = rl[0] ^ superscalarAdd7;
+	ldr	x12, superscalarAdd7
+	eor	x7, x0, x12
+
+	b	rx_calc_dataset_item_prefetch
+
+superscalarMul0: .quad 6364136223846793005
+superscalarAdd1: .quad 9298411001130361340
+superscalarAdd2: .quad 12065312585734608966
+superscalarAdd3: .quad 9306329213124626780
+superscalarAdd4: .quad 5281919268842080866
+superscalarAdd5: .quad 10536153434571861004
+superscalarAdd6: .quad 3398623926847679864
+superscalarAdd7: .quad 9549104520008361294
+
+# Prefetch -> SuperScalar hash -> Mix will be repeated N times
+
+DECL(randomx_calc_dataset_item_aarch64_prefetch):
+rx_calc_dataset_item_prefetch:
+	# Actual mask will be inserted by JIT compiler
+	and	x11, x10, 1
+	add	x11, x8, x11, lsl 6
+	prfm	pldl2strm, [x11]
+
+	# Generated SuperScalar hash program goes here
+
+DECL(randomx_calc_dataset_item_aarch64_mix):
+	ldp	x12, x13, [x11]
+	eor	x0, x0, x12
+	eor	x1, x1, x13
+	ldp	x12, x13, [x11, 16]
+	eor	x2, x2, x12
+	eor	x3, x3, x13
+	ldp	x12, x13, [x11, 32]
+	eor	x4, x4, x12
+	eor	x5, x5, x13
+	ldp	x12, x13, [x11, 48]
+	eor	x6, x6, x12
+	eor	x7, x7, x13
+
+DECL(randomx_calc_dataset_item_aarch64_store_result):
+	stp	x0, x1, [x9]
+	stp	x2, x3, [x9, 16]
+	stp	x4, x5, [x9, 32]
+	stp	x6, x7, [x9, 48]
+
+	ldp	x0, x1, [sp]
+	ldp	x2, x3, [sp, 16]
+	ldp	x4, x5, [sp, 32]
+	ldp	x6, x7, [sp, 48]
+	ldp	x8, x9, [sp, 64]
+	ldp	x10, x11, [sp, 80]
+	ldp	x12, x13, [sp, 96]
+	add	sp, sp, 112
+
+	ret
+
+DECL(randomx_calc_dataset_item_aarch64_end):
--- a/crypto/randomx/jit_compiler_a64_static.hpp
+++ b/crypto/randomx/jit_compiler_a64_static.hpp
@ -0,0 +1,51 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+Copyright (c) 2019, SChernykh    <https://github.com/SChernykh>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+extern "C" {
+	void randomx_program_aarch64(void* reg, void* mem, void* scratchpad, uint64_t iterations);
+	void randomx_program_aarch64_main_loop();
+	void randomx_program_aarch64_vm_instructions();
+	void randomx_program_aarch64_imul_rcp_literals_end();
+	void randomx_program_aarch64_vm_instructions_end();
+	void randomx_program_aarch64_cacheline_align_mask1();
+	void randomx_program_aarch64_cacheline_align_mask2();
+	void randomx_program_aarch64_update_spMix1();
+	void randomx_program_aarch64_vm_instructions_end_light();
+	void randomx_program_aarch64_light_cacheline_align_mask();
+	void randomx_program_aarch64_light_dataset_offset();
+	void randomx_init_dataset_aarch64();
+	void randomx_init_dataset_aarch64_end();
+	void randomx_calc_dataset_item_aarch64();
+	void randomx_calc_dataset_item_aarch64_prefetch();
+	void randomx_calc_dataset_item_aarch64_mix();
+	void randomx_calc_dataset_item_aarch64_store_result();
+	void randomx_calc_dataset_item_aarch64_end();
+}
--- a/crypto/randomx/jit_compiler_fallback.hpp
+++ b/crypto/randomx/jit_compiler_fallback.hpp
@ -0,0 +1,76 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+#include <stdexcept>
+#include "common.hpp"
+
+namespace randomx {
+
+	class Program;
+	struct ProgramConfiguration;
+	class SuperscalarProgram;
+
+	class JitCompilerFallback {
+	public:
+		JitCompilerFallback() {
+			throw std::runtime_error("JIT compilation is not supported on this platform");
+		}
+		void generateProgram(Program&, ProgramConfiguration&) {
+
+		}
+		void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) {
+
+		}
+		template<size_t N>
+		void generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector<uint64_t> &) {
+
+		}
+		void generateDatasetInitCode() {
+
+		}
+		ProgramFunc* getProgramFunc() {
+			return nullptr;
+		}
+		DatasetInitFunc* getDatasetInitFunc() {
+			return nullptr;
+		}
+		uint8_t* getCode() {
+			return nullptr;
+		}
+		size_t getCodeSize() {
+			return 0;
+		}
+		void enableWriting() {}
+		void enableExecution() {}
+		void enableAll() {}
+	};
+}
--- a/crypto/randomx/jit_compiler_rv64.cpp
+++ b/crypto/randomx/jit_compiler_rv64.cpp
--- a/crypto/randomx/jit_compiler_rv64.hpp
+++ b/crypto/randomx/jit_compiler_rv64.hpp
@ -0,0 +1,78 @@
+/*
+Copyright (c) 2023 tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <vector>
+#include "jit_compiler.hpp"
+
+namespace randomx {
+
+	class Program;
+	struct ProgramConfiguration;
+	class SuperscalarProgram;
+	class Instruction;
+
+	class JitCompilerRV64 {
+	public:
+		JitCompilerRV64();
+		~JitCompilerRV64();
+		void generateProgram(Program&, ProgramConfiguration&);
+		void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
+		void generateSuperscalarHash(SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES], std::vector<uint64_t>&);
+		void generateDatasetInitCode() {}
+		ProgramFunc* getProgramFunc() {
+			return (ProgramFunc*)(vectorCode ? entryProgramVector : entryProgram);
+		}
+		DatasetInitFunc* getDatasetInitFunc() {
+			return (DatasetInitFunc*)((vectorCode && (vectorRegisterLength >= 256)) ? entryDataInitVector : entryDataInit);
+		}
+		uint8_t* getCode() {
+			return vectorCode ? vectorCode : state.code;
+		}
+		size_t getCodeSize();
+		void enableWriting();
+		void enableExecution();
+		void enableAll();
+
+		static uint8_t instMap[256];
+	private:
+		CompilerState state;
+
+		uint8_t* vectorCode = nullptr;
+		size_t vectorCodeSize = 0;
+		int vectorRegisterLength = 0;
+
+		void* entryDataInit = nullptr;
+		void* entryDataInitVector = nullptr;
+		void* entryProgram = nullptr;
+		void* entryProgramVector = nullptr;
+	};
+}
--- a/crypto/randomx/jit_compiler_rv64_static.S
+++ b/crypto/randomx/jit_compiler_rv64_static.S
--- a/crypto/randomx/jit_compiler_rv64_static.hpp
+++ b/crypto/randomx/jit_compiler_rv64_static.hpp
@ -0,0 +1,53 @@
+/*
+Copyright (c) 2023 tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+extern "C" {
+	void randomx_riscv64_literals();
+	void randomx_riscv64_literals_end();
+	void randomx_riscv64_data_init();
+	void randomx_riscv64_fix_data_call();
+	void randomx_riscv64_prologue();
+	void randomx_riscv64_loop_begin();
+	void randomx_riscv64_data_read();
+	void randomx_riscv64_data_read_light();
+	void randomx_riscv64_fix_loop_call();
+	void randomx_riscv64_spad_store();
+	void randomx_riscv64_spad_store_hardaes();
+	void randomx_riscv64_spad_store_softaes();
+	void randomx_riscv64_loop_end();
+	void randomx_riscv64_fix_continue_loop();
+	void randomx_riscv64_epilogue();
+	void randomx_riscv64_softaes();
+	void randomx_riscv64_program_end();
+	void randomx_riscv64_ssh_init();
+	void randomx_riscv64_ssh_load();
+	void randomx_riscv64_ssh_prefetch();
+	void randomx_riscv64_ssh_end();
+}
--- a/crypto/randomx/jit_compiler_rv64_vector.cpp
+++ b/crypto/randomx/jit_compiler_rv64_vector.cpp
@ -0,0 +1,903 @@
+/*
+Copyright (c) 2023, tevador    <tevador@gmail.com>
+Copyright (c) 2025, SChernykh       <https://github.com/SChernykh>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "configuration.h"
+#include "jit_compiler_rv64_vector.h"
+#include "jit_compiler_rv64_vector_static.h"
+#include "reciprocal.h"
+#include "superscalar.hpp"
+#include "program.hpp"
+
+namespace randomx {
+
+constexpr int maskLog2(uint32_t x, int prev) {
+	return x == 1 ? prev : maskLog2(x >> 1, prev + 1);
+}
+
+constexpr int MaskL1Shift = 32 - maskLog2(RANDOMX_SCRATCHPAD_L1, 0);
+constexpr int MaskL2Shift = 32 - maskLog2(RANDOMX_SCRATCHPAD_L2, 0);
+constexpr int MaskL3Shift = 32 - maskLog2(RANDOMX_SCRATCHPAD_L3, 0);
+
+#define ADDR(x) ((uint8_t*) &(x))
+#define DIST(x, y) (ADDR(y) - ADDR(x))
+
+void* generateDatasetInitVectorRV64(uint8_t* buf, SuperscalarProgram* programs, size_t num_programs, std::vector<uint64_t>& reciprocalCache)
+{
+	uint8_t* p = buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_sshash_generated_instructions);
+
+	uint8_t* literals = buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_sshash_imul_rcp_literals);
+	uint8_t* cur_literal = literals;
+
+	for (size_t i = 0; i < num_programs; ++i) {
+		// Step 4
+		size_t k = DIST(randomx_riscv64_vector_sshash_cache_prefetch, randomx_riscv64_vector_sshash_xor);
+		memcpy(p, reinterpret_cast<void*>(randomx_riscv64_vector_sshash_cache_prefetch), k);
+		p += k;
+
+		// Step 5
+		for (uint32_t j = 0; j < programs[i].size; ++j) {
+			const uint32_t dst = programs[i].programBuffer[j].dst & 7;
+			const uint32_t src = programs[i].programBuffer[j].src & 7;
+			const uint32_t modShift = (programs[i].programBuffer[j].mod >> 2) & 3;
+			const uint32_t imm32 = programs[i].programBuffer[j].imm32;
+
+			uint32_t inst;
+			#define EMIT(data) inst = (data); memcpy(p, &inst, 4); p += 4
+
+			switch (static_cast<SuperscalarInstructionType>(programs[i].programBuffer[j].opcode)) {
+			case SuperscalarInstructionType::ISUB_R: 
+				// 57 00 00 0A	vsub.vv v0, v0, v0
+				EMIT(0x0A000057 | (dst << 7) | (src << 15) | (dst << 20));
+				break;
+
+			case SuperscalarInstructionType::IXOR_R:
+				// 57 00 00 2E	vxor.vv v0, v0, v0
+				EMIT(0x2E000057 | (dst << 7) | (src << 15) | (dst << 20));
+				break;
+
+			case SuperscalarInstructionType::IADD_RS:
+				if (modShift == 0) {
+					// 57 00 00 02	vadd.vv v0, v0, v0
+					EMIT(0x02000057 | (dst << 7) | (src << 15) | (dst << 20));
+				}
+				else {
+					// 57 39 00 96	vsll.vi v18, v0, 0
+					// 57 00 09 02	vadd.vv v0, v0, v18
+					EMIT(0x96003957 | (modShift << 15) | (src << 20));
+					EMIT(0x02090057 | (dst << 7) | (dst << 20));
+				}
+				break;
+
+			case SuperscalarInstructionType::IMUL_R:
+				// 57 20 00 96	vmul.vv v0, v0, v0
+				EMIT(0x96002057 | (dst << 7) | (src << 15) | (dst << 20));
+				break;
+
+			case SuperscalarInstructionType::IROR_C:
+				{
+#ifdef __riscv_zvkb
+					// 57 30 00 52 		vror.vi v0, v0, 0
+					EMIT(0x52003057 | (dst << 7) | (dst << 20) | ((imm32 & 31) << 15) | ((imm32 & 32) << 21));
+#else // __riscv_zvkb
+					const uint32_t shift_right = imm32 & 63;
+					const uint32_t shift_left = 64 - shift_right;
+
+					if (shift_right < 32) {
+						// 57 39 00 A2	vsrl.vi v18, v0, 0
+						EMIT(0xA2003957 | (shift_right << 15) | (dst << 20));
+					}
+					else {
+						// 93 02 00 00	li x5, 0
+						// 57 C9 02 A2	vsrl.vx v18, v0, x5
+						EMIT(0x00000293 | (shift_right << 20));
+						EMIT(0xA202C957 | (dst << 20));
+					}
+
+					if (shift_left < 32) {
+						// 57 30 00 96	vsll.vi v0, v0, 0
+						EMIT(0x96003057 | (dst << 7) | (shift_left << 15) | (dst << 20));
+					}
+					else {
+						// 93 02 00 00	li x5, 0
+						// 57 C0 02 96	vsll.vx v0, v0, x5
+						EMIT(0x00000293 | (shift_left << 20));
+						EMIT(0x9602C057 | (dst << 7) | (dst << 20));
+					}
+
+					// 57 00 20 2B vor.vv v0, v18, v0
+					EMIT(0x2B200057 | (dst << 7) | (dst << 15));
+#endif // __riscv_zvkb
+				}
+				break;
+
+			case SuperscalarInstructionType::IADD_C7:
+			case SuperscalarInstructionType::IADD_C8:
+			case SuperscalarInstructionType::IADD_C9:
+				// B7 02 00 00	lui x5, 0
+				// 9B 82 02 00	addiw x5, x5, 0
+				// 57 C0 02 02	vadd.vx v0, v0, x5
+				EMIT(0x000002B7 | ((imm32 + ((imm32 & 0x800) << 1)) & 0xFFFFF000));
+				EMIT(0x0002829B | ((imm32 & 0x00000FFF) << 20));
+				EMIT(0x0202C057 | (dst << 7) | (dst << 20));
+				break;
+
+			case SuperscalarInstructionType::IXOR_C7:
+			case SuperscalarInstructionType::IXOR_C8:
+			case SuperscalarInstructionType::IXOR_C9:
+				// B7 02 00 00	lui x5, 0
+				// 9B 82 02 00	addiw x5, x5, 0
+				// 57 C0 02 2E	vxor.vx v0, v0, x5
+				EMIT(0x000002B7 | ((imm32 + ((imm32 & 0x800) << 1)) & 0xFFFFF000));
+				EMIT(0x0002829B | ((imm32 & 0x00000FFF) << 20));
+				EMIT(0x2E02C057 | (dst << 7) | (dst << 20));
+				break;
+
+			case SuperscalarInstructionType::IMULH_R:
+				// 57 20 00 92	vmulhu.vv v0, v0, v0
+				EMIT(0x92002057 | (dst << 7) | (src << 15) | (dst << 20));
+				break;
+
+			case SuperscalarInstructionType::ISMULH_R:
+				// 57 20 00 9E	vmulh.vv v0, v0, v0
+				EMIT(0x9E002057 | (dst << 7) | (src << 15) | (dst << 20));
+				break;
+
+			case SuperscalarInstructionType::IMUL_RCP:
+				{
+					uint32_t offset = cur_literal - literals;
+
+					if (offset == 2040) {
+						literals += 2040;
+						offset = 0;
+
+						// 93 87 87 7F	add x15, x15, 2040
+						EMIT(0x7F878793);
+					}
+
+					const uint64_t r = reciprocalCache[imm32];
+					memcpy(cur_literal, &r, 8);
+					cur_literal += 8;
+
+					// 83 B2 07 00	ld x5, (x15)
+					// 57 E0 02 96	vmul.vx v0, v0, x5
+					EMIT(0x0007B283 | (offset << 20));
+					EMIT(0x9602E057 | (dst << 7) | (dst << 20));
+				}
+				break;
+
+			default:
+				UNREACHABLE;
+			}
+		}
+
+		// Step 6
+		k = DIST(randomx_riscv64_vector_sshash_xor, randomx_riscv64_vector_sshash_end);
+		memcpy(p, reinterpret_cast<void*>(randomx_riscv64_vector_sshash_xor), k);
+		p += k;
+
+		// Step 7. Set cacheIndex to the value of the register that has the longest dependency chain in the SuperscalarHash function executed in step 5.
+		if (i + 1 < num_programs) {
+			// vmv.v.v v9, v0 + programs[i].getAddressRegister()
+			const uint32_t t = 0x5E0004D7 + (static_cast<uint32_t>(programs[i].getAddressRegister()) << 15);
+			memcpy(p, &t, 4);
+			p += 4;
+		}
+	}
+
+	// Emit "J randomx_riscv64_vector_sshash_generated_instructions_end" instruction
+	const uint8_t* e = buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_sshash_generated_instructions_end);
+	const uint32_t k = e - p;
+	const uint32_t j = 0x6F | ((k & 0x7FE) << 20) | ((k & 0x800) << 9) | (k & 0xFF000);
+	memcpy(p, &j, 4);
+
+	char* result = (char*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_sshash_dataset_init));
+
+#ifdef __GNUC__
+	__builtin___clear_cache(result, (char*)(buf + DIST(randomx_riscv64_vector_sshash_begin, randomx_riscv64_vector_sshash_end)));
+#endif
+
+	return result;
+}
+
+#define emit16(value) { const uint16_t t = value; memcpy(p, &t, 2); p += 2; }
+#define emit32(value) { const uint32_t t = value; memcpy(p, &t, 4); p += 4; }
+#define emit64(value) { const uint64_t t = value; memcpy(p, &t, 8); p += 8; }
+#define emit_data(arr) { memcpy(p, arr, sizeof(arr)); p += sizeof(arr); }
+
+static void imm_to_x5(uint32_t imm, uint8_t*& p)
+{
+	const uint32_t imm_hi = (imm + ((imm & 0x800) << 1)) & 0xFFFFF000U;
+	const uint32_t imm_lo = imm & 0x00000FFFU;
+
+	if (imm_hi == 0) {
+		// li x5, imm_lo
+		emit32(0x00000293 + (imm_lo << 20));
+		return;
+	}
+
+	if (imm_lo == 0) {
+		// lui x5, imm_hi
+		emit32(0x000002B7 + imm_hi);
+		return;
+	}
+
+	if (imm_hi < (32 << 12)) {
+		//c.lui x5, imm_hi
+		emit16(0x6281 + (imm_hi >> 10));
+	}
+	else {
+		// lui x5, imm_hi
+		emit32(0x000002B7 + imm_hi);
+	}
+
+	// addiw x5, x5, imm_lo
+	emit32(0x0002829B | (imm_lo << 20));
+}
+
+static void loadFromScratchpad(uint32_t src, uint32_t dst, uint32_t mod, uint32_t imm, uint8_t*& p)
+{
+	if (src == dst) {
+		imm &= RANDOMX_SCRATCHPAD_L3 - 8;
+
+		if (imm <= 2047) {
+			// ld x5, imm(x12)
+			emit32(0x00063283 | (imm << 20));
+		}
+		else if (imm <= 2047 * 2) {
+			// addi x5, x12, 2047
+			emit32(0x7FF60293);
+			// ld x5, (imm - 2047)(x5)
+			emit32(0x0002B283 | ((imm - 2047) << 20));
+		}
+		else {
+			// lui x5, imm & 0xFFFFF000U
+			emit32(0x000002B7 | ((imm + ((imm & 0x800) << 1)) & 0xFFFFF000U));
+			// c.add x5, x12
+			emit16(0x92B2);
+			// ld x5, (imm & 0xFFF)(x5)
+			emit32(0x0002B283 | ((imm & 0xFFF) << 20));
+		}
+
+		return;
+	}
+
+	uint32_t shift;
+	uint32_t mask_reg;
+
+	if ((mod & 3) == 0) {
+		shift = MaskL2Shift;
+		mask_reg = 17;
+	}
+	else {
+		shift = MaskL1Shift;
+		mask_reg = 16;
+	}
+
+	imm = static_cast<uint32_t>(static_cast<int32_t>(imm << shift) >> shift);
+
+	// 0-0x7FF, 0xFFFFF800-0xFFFFFFFF fit into 12 bit (a single addi instruction)
+	if (imm - 0xFFFFF800U < 0x1000U) {
+		// addi x5, x20 + src, imm
+		emit32(0x000A0293 + (src << 15) + (imm << 20));
+	}
+	else {
+		imm_to_x5(imm, p);
+		// c.add x5, x20 + src
+		emit16(0x92D2 + (src << 2));
+	}
+
+	// and x5, x5, mask_reg
+	emit32(0x0002F2B3 + (mask_reg << 20));
+	// c.add x5, x12
+	emit16(0x92B2);
+	// ld x5, 0(x5)
+	emit32(0x0002B283);
+}
+
+void* generateProgramVectorRV64(uint8_t* buf, Program& prog, ProgramConfiguration& pcfg, const uint8_t (&inst_map)[256], void* entryDataInitScalar, uint32_t datasetOffset)
+{
+	uint64_t* params = (uint64_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_params));
+
+	params[0] = RANDOMX_SCRATCHPAD_L1 - 8;
+	params[1] = RANDOMX_SCRATCHPAD_L2 - 8;
+	params[2] = RANDOMX_SCRATCHPAD_L3 - 8;
+	params[3] = RANDOMX_DATASET_BASE_SIZE - 64;
+	params[4] = (1 << RANDOMX_JUMP_BITS) - 1;
+
+	uint64_t* imul_rcp_literals = (uint64_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_imul_rcp_literals));
+	uint64_t* cur_literal = imul_rcp_literals;
+
+	uint32_t* spaddr_xor	= (uint32_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_main_loop_spaddr_xor));
+	uint32_t* spaddr_xor2	= (uint32_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_scratchpad_prefetch));
+	uint32_t* mx_xor	= (uint32_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_main_loop_mx_xor));
+	uint32_t* mx_xor_light	= (uint32_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_main_loop_mx_xor_light_mode));
+
+	*spaddr_xor			= 0x014A47B3 + (pcfg.readReg0 << 15) + (pcfg.readReg1 << 20);	// xor x15, readReg0, readReg1
+	*spaddr_xor2			= 0x014A42B3 + (pcfg.readReg0 << 15) + (pcfg.readReg1 << 20);	// xor x5,  readReg0, readReg1
+	const uint32_t mx_xor_value	= 0x014A42B3 + (pcfg.readReg2 << 15) + (pcfg.readReg3 << 20);	// xor x5,  readReg2, readReg3
+
+	*mx_xor = mx_xor_value;
+	*mx_xor_light = mx_xor_value;
+
+	if (entryDataInitScalar) {
+		void* light_mode_data = buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_main_loop_light_mode_data);
+
+		const uint64_t data[2] = { reinterpret_cast<uint64_t>(entryDataInitScalar), datasetOffset };
+		memcpy(light_mode_data, &data, sizeof(data));
+	}
+
+	uint8_t* p = (uint8_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_main_loop_instructions));
+
+	// 57C8025E 		vmv.v.x v16, x5
+	// 57A9034B 		vsext.vf2 v18, v16
+	// 5798214B 		vfcvt.f.x.v v16, v18
+	static constexpr uint8_t group_f_convert[] = {
+		0x57, 0xC8, 0x02, 0x5E, 0x57, 0xA9, 0x03, 0x4B, 0x57, 0x98, 0x21, 0x4B
+	};
+
+	// 57080627 		vand.vv v16, v16, v12
+	// 5788062B 		vor.vv v16, v16, v13
+	static constexpr uint8_t group_e_post_process[] = { 0x57, 0x08, 0x06, 0x27, 0x57, 0x88, 0x06, 0x2B };
+
+	uint8_t* last_modified[RegistersCount] = { p, p, p, p, p, p, p, p };
+
+	uint8_t readReg01[RegistersCount] = {};
+
+	readReg01[pcfg.readReg0] = 1;
+	readReg01[pcfg.readReg1] = 1;
+
+	uint32_t scratchpad_prefetch_pos = 0;
+
+	for (int32_t i = static_cast<int32_t>(prog.getSize()) - 1; i >= 0; --i) {
+		Instruction instr = prog(i);
+
+		const InstructionType inst_type = static_cast<InstructionType>(inst_map[instr.opcode]);
+
+		if (inst_type == InstructionType::CBRANCH) {
+			scratchpad_prefetch_pos = i;
+			break;
+		}
+
+		if (inst_type < InstructionType::FSWAP_R) {
+			const uint32_t src = instr.src % RegistersCount;
+			const uint32_t dst = instr.dst % RegistersCount;
+
+			if ((inst_type == InstructionType::ISWAP_R) && (src != dst) && (readReg01[src] || readReg01[dst])) {
+				scratchpad_prefetch_pos = i;
+				break;
+			}
+
+			if ((inst_type == InstructionType::IMUL_RCP) && readReg01[dst] && !isZeroOrPowerOf2(instr.getImm32())) {
+				scratchpad_prefetch_pos = i;
+				break;
+			}
+
+			if (readReg01[dst]) {
+				scratchpad_prefetch_pos = i;
+				break;
+			}
+		}
+	}
+
+	for (uint32_t i = 0, n = prog.getSize(); i < n; ++i) {
+		Instruction instr = prog(i);
+
+		uint32_t src = instr.src % RegistersCount;
+		uint32_t dst = instr.dst % RegistersCount;
+		const uint32_t shift = instr.getModShift();
+		uint32_t imm = instr.getImm32();
+		const uint32_t mod = instr.mod;
+
+		switch (static_cast<InstructionType>(inst_map[instr.opcode])) {
+		case InstructionType::IADD_RS:
+			if (shift == 0) {
+				// c.add x20 + dst, x20 + src
+				emit16(0x9A52 + (src << 2) + (dst << 7));
+			}
+			else {
+#ifdef __riscv_zba
+				// sh{shift}add x20 + dst, x20 + src, x20 + dst
+				emit32(0x214A0A33 + (shift << 13) + (dst << 7) + (src << 15) + (dst << 20));
+#else // __riscv_zba
+				// slli x5, x20 + src, shift
+				emit32(0x000A1293 + (src << 15) + (shift << 20));
+				// c.add x20 + dst, x5
+				emit16(0x9A16 + (dst << 7));
+#endif // __riscv_zba
+			}
+			if (dst == RegisterNeedsDisplacement) {
+				imm_to_x5(imm, p);
+
+				// c.add x20 + dst, x5
+				emit16(0x9A16 + (dst << 7));
+			}
+
+			last_modified[dst] = p;
+			break;
+
+		case InstructionType::IADD_M:
+			loadFromScratchpad(src, dst, mod, imm, p);
+			// c.add x20 + dst, x5
+			emit16(0x9A16 + (dst << 7));
+
+			last_modified[dst] = p;
+			break;
+
+		case InstructionType::ISUB_R:
+			if (src != dst) {
+				// sub x20 + dst, x20 + dst, x20 + src
+				emit32(0x414A0A33 + (dst << 7) + (dst << 15) + (src << 20));
+			}
+			else {
+				imm_to_x5(-imm, p);
+				// c.add x20 + dst, x5
+				emit16(0x9A16 + (dst << 7));
+			}
+
+			last_modified[dst] = p;
+			break;
+
+		case InstructionType::ISUB_M:
+			loadFromScratchpad(src, dst, mod, imm, p);
+			// sub x20 + dst, x20 + dst, x5
+			emit32(0x405A0A33 + (dst << 7) + (dst << 15));
+
+			last_modified[dst] = p;
+			break;
+
+		case InstructionType::IMUL_R:
+			if (src != dst) {
+				// mul x20 + dst, x20 + dst, x20 + src
+				emit32(0x034A0A33 + (dst << 7) + (dst << 15) + (src << 20));
+			}
+			else {
+				imm_to_x5(imm, p);
+				// mul x20 + dst, x20 + dst, x5
+				emit32(0x025A0A33 + (dst << 7) + (dst << 15));
+			}
+
+			last_modified[dst] = p;
+			break;
+
+		case InstructionType::IMUL_M:
+			loadFromScratchpad(src, dst, mod, imm, p);
+			// mul x20 + dst, x20 + dst, x5
+			emit32(0x025A0A33 + (dst << 7) + (dst << 15));
+
+			last_modified[dst] = p;
+			break;
+
+		case InstructionType::IMULH_R:
+			// mulhu x20 + dst, x20 + dst, x20 + src
+			emit32(0x034A3A33 + (dst << 7) + (dst << 15) + (src << 20));
+
+			last_modified[dst] = p;
+			break;
+
+		case InstructionType::IMULH_M:
+			loadFromScratchpad(src, dst, mod, imm, p);
+			// mulhu x20 + dst, x20 + dst, x5
+			emit32(0x025A3A33 + (dst << 7) + (dst << 15));
+
+			last_modified[dst] = p;
+			break;
+
+		case InstructionType::ISMULH_R:
+			// mulh x20 + dst, x20 + dst, x20 + src
+			emit32(0x034A1A33 + (dst << 7) + (dst << 15) + (src << 20));
+
+			last_modified[dst] = p;
+			break;
+
+		case InstructionType::ISMULH_M:
+			loadFromScratchpad(src, dst, mod, imm, p);
+			// mulh x20 + dst, x20 + dst, x5
+			emit32(0x025A1A33 + (dst << 7) + (dst << 15));
+
+			last_modified[dst] = p;
+			break;
+
+		case InstructionType::IMUL_RCP:
+			if (!isZeroOrPowerOf2(imm)) {
+				const uint64_t offset = (cur_literal - imul_rcp_literals) * 8;
+				*(cur_literal++) = randomx_reciprocal_fast(imm);
+
+				static constexpr uint32_t rcp_regs[26] = {
+					/* Integer */ 8, 10, 28, 29, 30, 31,
+					/* Float   */ 0,  1,  2,  3,  4,  5,  6,  7, 10, 11, 12, 13, 14, 15, 16, 17, 28, 29, 30, 31
+				};
+
+				if (offset < 6 * 8) {
+					// mul x20 + dst, x20 + dst, rcp_reg
+					emit32(0x020A0A33 + (dst << 7) + (dst << 15) + (rcp_regs[offset / 8] << 20));
+				}
+				else if (offset < 26 * 8) {
+					// fmv.x.d x5, rcp_reg
+					emit32(0xE20002D3 + (rcp_regs[offset / 8] << 15));
+					// mul x20 + dst, x20 + dst, x5
+					emit32(0x025A0A33 + (dst << 7) + (dst << 15));
+				}
+				else {
+					// ld x5, offset(x18)
+					emit32(0x00093283 + (offset << 20));
+					// mul x20 + dst, x20 + dst, x5
+					emit32(0x025A0A33 + (dst << 7) + (dst << 15));
+				}
+
+				last_modified[dst] = p;
+			}
+			break;
+
+		case InstructionType::INEG_R:
+			// sub x20 + dst, x0, x20 + dst
+			emit32(0x41400A33 + (dst << 7) + (dst << 20));
+
+			last_modified[dst] = p;
+			break;
+
+		case InstructionType::IXOR_R:
+			if (src != dst) {
+				// xor x20 + dst, x20 + dst, x20 + src
+				emit32(0x014A4A33 + (dst << 7) + (dst << 15) + (src << 20));
+			}
+			else {
+				imm_to_x5(imm, p);
+				// xor x20, x20, x5
+				emit32(0x005A4A33 + (dst << 7) + (dst << 15));
+			}
+
+			last_modified[dst] = p;
+			break;
+
+		case InstructionType::IXOR_M:
+			loadFromScratchpad(src, dst, mod, imm, p);
+			// xor x20, x20, x5
+			emit32(0x005A4A33 + (dst << 7) + (dst << 15));
+
+			last_modified[dst] = p;
+			break;
+
+#ifdef __riscv_zbb
+		case InstructionType::IROR_R:
+			if (src != dst) {
+				// ror x20 + dst, x20 + dst, x20 + src
+				emit32(0x614A5A33 + (dst << 7) + (dst << 15) + (src << 20));
+			}
+			else {
+				// rori x20 + dst, x20 + dst, imm
+				emit32(0x600A5A13 + (dst << 7) + (dst << 15) + ((imm & 63) << 20));
+			}
+
+			last_modified[dst] = p;
+			break;
+
+		case InstructionType::IROL_R:
+			if (src != dst) {
+				// rol x20 + dst, x20 + dst, x20 + src
+				emit32(0x614A1A33 + (dst << 7) + (dst << 15) + (src << 20));
+			}
+			else {
+				// rori x20 + dst, x20 + dst, -imm
+				emit32(0x600A5A13 + (dst << 7) + (dst << 15) + ((-imm & 63) << 20));
+			}
+
+			last_modified[dst] = p;
+			break;
+#else // __riscv_zbb
+		case InstructionType::IROR_R:
+			if (src != dst) {
+				// sub x5, x0, x20 + src
+				emit32(0x414002B3 + (src << 20));
+				// srl x6, x20 + dst, x20 + src
+				emit32(0x014A5333 + (dst << 15) + (src << 20));
+				// sll x20 + dst, x20 + dst, x5
+				emit32(0x005A1A33 + (dst << 7) + (dst << 15));
+				// or x20 + dst, x20 + dst, x6
+				emit32(0x006A6A33 + (dst << 7) + (dst << 15));
+			}
+			else {
+				// srli x5, x20 + dst, imm
+				emit32(0x000A5293 + (dst << 15) + ((imm & 63) << 20));
+				// slli x6, x20 + dst, -imm
+				emit32(0x000A1313 + (dst << 15) + ((-imm & 63) << 20));
+				// or x20 + dst, x5, x6
+				emit32(0x0062EA33 + (dst << 7));
+			}
+
+			last_modified[dst] = p;
+			break;
+
+		case InstructionType::IROL_R:
+			if (src != dst) {
+				// sub x5, x0, x20 + src
+				emit32(0x414002B3 + (src << 20));
+				// sll x6, x20 + dst, x20 + src
+				emit32(0x014A1333 + (dst << 15) + (src << 20));
+				// srl x20 + dst, x20 + dst, x5
+				emit32(0x005A5A33 + (dst << 7) + (dst << 15));
+				// or x20 + dst, x20 + dst, x6
+				emit32(0x006A6A33 + (dst << 7) + (dst << 15));
+			}
+			else {
+				// srli x5, x20 + dst, -imm
+				emit32(0x000A5293 + (dst << 15) + ((-imm & 63) << 20));
+				// slli x6, x20 + dst, imm
+				emit32(0x000A1313 + (dst << 15) + ((imm & 63) << 20));
+				// or x20 + dst, x5, x6
+				emit32(0x0062EA33 + (dst << 7));
+			}
+
+			last_modified[dst] = p;
+			break;
+#endif // __riscv_zbb
+
+		case InstructionType::ISWAP_R:
+			if (src != dst) {
+				// c.mv x5, x20 + dst
+				emit16(0x82D2 + (dst << 2));
+				// c.mv x20 + dst, x20 + src
+				emit16(0x8A52 + (src << 2) + (dst << 7));
+				// c.mv x20 + src, x5
+				emit16(0x8A16 + (src << 7));
+
+				last_modified[src] = p;
+				last_modified[dst] = p;
+			}
+			break;
+
+		case InstructionType::FSWAP_R:
+			// vmv.x.s x5, v0 + dst
+			emit32(0x420022D7 + (dst << 20));
+			// vslide1down.vx v0 + dst, v0 + dst, x5
+			emit32(0x3E02E057 + (dst << 7) + (dst << 20));
+			break;
+
+		case InstructionType::FADD_R:
+			src %= RegisterCountFlt;
+			dst %= RegisterCountFlt;
+
+			// vfadd.vv v0 + dst, v0 + dst, v8 + src
+			emit32(0x02041057 + (dst << 7) + (src << 15) + (dst << 20));
+			break;
+
+		case InstructionType::FADD_M:
+			dst %= RegisterCountFlt;
+
+			loadFromScratchpad(src, RegistersCount, mod, imm, p);
+			emit_data(group_f_convert);
+
+			// vfadd.vv v0 + dst, v0 + dst, v16
+			emit32(0x02081057 + (dst << 7) + (dst << 20));
+			break;
+
+		case InstructionType::FSUB_R:
+			src %= RegisterCountFlt;
+			dst %= RegisterCountFlt;
+
+			// vfsub.vv v0 + dst, v0 + dst, v8 + src
+			emit32(0x0A041057 + (dst << 7) + (src << 15) + (dst << 20));
+			break;
+
+		case InstructionType::FSUB_M:
+			dst %= RegisterCountFlt;
+
+			loadFromScratchpad(src, RegistersCount, mod, imm, p);
+			emit_data(group_f_convert);
+
+			// vfsub.vv v0 + dst, v0 + dst, v16
+			emit32(0x0A081057 + (dst << 7) + (dst << 20));
+			break;
+
+		case InstructionType::FSCAL_R:
+			dst %= RegisterCountFlt;
+
+			// vxor.vv v0, v0, v14
+			emit32(0x2E070057 + (dst << 7) + (dst << 20));
+			break;
+
+		case InstructionType::FMUL_R:
+			src %= RegisterCountFlt;
+			dst %= RegisterCountFlt;
+
+			// vfmul.vv v4 + dst, v4 + dst, v8 + src
+			emit32(0x92441257 + (dst << 7) + (src << 15) + (dst << 20));
+			break;
+
+		case InstructionType::FDIV_M:
+			dst %= RegisterCountFlt;
+
+			loadFromScratchpad(src, RegistersCount, mod, imm, p);
+			emit_data(group_f_convert);
+			emit_data(group_e_post_process);
+
+			// vfdiv.vv v0 + dst, v0 + dst, v16
+			emit32(0x82481257 + (dst << 7) + (dst << 20));
+			break;
+
+		case InstructionType::FSQRT_R:
+			dst %= RegisterCountFlt;
+
+			// vfsqrt.v v4 + dst, v4 + dst
+			emit32(0x4E401257 + (dst << 7) + (dst << 20));
+			break;
+
+		case InstructionType::CBRANCH:
+			{
+				const uint32_t shift = (mod >> 4) + RANDOMX_JUMP_OFFSET;
+
+				imm |= (1UL << shift);
+
+				if (RANDOMX_JUMP_OFFSET > 0 || shift > 0) {
+					imm &= ~(1UL << (shift - 1));
+				}
+
+				// slli x6, x7, shift
+				// x6 = branchMask
+				emit32(0x00039313 + (shift << 20));
+
+				// x5 = imm
+				imm_to_x5(imm, p);
+
+				// c.add x20 + dst, x5
+				emit16(0x9A16 + (dst << 7));
+
+				// and x5, x20 + dst, x6
+				emit32(0x006A72B3 + (dst << 15));
+
+				const int offset = static_cast<int>(last_modified[dst] - p);
+
+				if (offset >= -4096) {
+					// beqz x5, offset
+					const uint32_t k = static_cast<uint32_t>(offset);
+					emit32(0x80028063 | ((k & 0x1E) << 7) | ((k & 0x7E0) << 20) | ((k & 0x800) >> 4));
+				}
+				else {
+					// bnez x5, 8
+					emit32(0x00029463);
+					// j offset
+					const uint32_t k = static_cast<uint32_t>(offset - 4);
+					emit32(0x8000006F | ((k & 0x7FE) << 20) | ((k & 0x800) << 9) | (k & 0xFF000));
+				}
+
+				for (uint32_t j = 0; j < RegistersCount; ++j) {
+					last_modified[j] = p;
+				}
+			}
+			break;
+
+		case InstructionType::CFROUND:
+			if ((imm - 1) & 63) {
+#ifdef __riscv_zbb
+				// rori x5, x20 + src, imm - 1
+				emit32(0x600A5293 + (src << 15) + (((imm - 1) & 63) << 20));
+#else // __riscv_zbb
+				// srli x5, x20 + src, imm - 1
+				emit32(0x000A5293 + (src << 15) + (((imm - 1) & 63) << 20));
+				// slli x6, x20 + src, 1 - imm
+				emit32(0x000A1313 + (src << 15) + (((1 - imm) & 63) << 20));
+				// or x5, x5, x6
+				emit32(0x0062E2B3);
+#endif // __riscv_zbb
+
+				// andi x5, x5, 6
+				emit32(0x0062F293);
+			}
+			else {
+				// andi x5, x20 + src, 6
+				emit32(0x006A7293 + (src << 15));
+			}
+
+			// li x6, 01111000b
+			// x6 = CFROUND lookup table
+			emit32(0x07800313);
+			// srl x5, x6, x5
+			emit32(0x005352B3);
+			// andi x5, x5, 3
+			emit32(0x0032F293);
+			// csrw frm, x5
+			emit32(0x00229073);
+			break;
+
+		case InstructionType::ISTORE:
+			{
+				uint32_t mask_reg;
+				uint32_t shift;
+
+				if ((mod >> 4) >= 14) {
+					shift = MaskL3Shift;
+					mask_reg = 1; // x1 = L3 mask
+				}
+				else {
+					if ((mod & 3) == 0) {
+						shift = MaskL2Shift;
+						mask_reg = 17; // x17 = L2 mask
+					}
+					else {
+						shift = MaskL1Shift;
+						mask_reg = 16; // x16 = L1 mask
+					}
+				}
+
+				imm = static_cast<uint32_t>(static_cast<int32_t>(imm << shift) >> shift);
+				imm_to_x5(imm, p);
+
+				// c.add x5, x20 + dst
+				emit16(0x92D2 + (dst << 2));
+				// and x5, x5, x0 + mask_reg
+				emit32(0x0002F2B3 + (mask_reg << 20));
+				// c.add x5, x12
+				emit16(0x92B2);
+				// sd x20 + src, 0(x5)
+				emit32(0x0142B023 + (src << 20));
+			}
+			break;
+
+		case InstructionType::NOP:
+			break;
+
+		default:
+			UNREACHABLE;
+		}
+
+		// Prefetch scratchpad lines for the next main loop iteration
+		// scratchpad_prefetch_pos is a conservative estimate of the earliest place in the code where we can do it
+		if (i == scratchpad_prefetch_pos) {
+			uint8_t* e = (uint8_t*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_scratchpad_prefetch_end));
+			const size_t n = e - ((uint8_t*)spaddr_xor2);
+
+			memcpy(p, spaddr_xor2, n);
+			p += n;
+		}
+	}
+
+	const uint8_t* e;
+
+	if (entryDataInitScalar) {
+		// Emit "J randomx_riscv64_vector_program_main_loop_instructions_end_light_mode" instruction
+		e = buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_main_loop_instructions_end_light_mode);
+	}
+	else {
+		// Emit "J randomx_riscv64_vector_program_main_loop_instructions_end" instruction
+		e = buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_main_loop_instructions_end);
+	}
+
+	const uint32_t k = e - p;
+	emit32(0x6F | ((k & 0x7FE) << 20) | ((k & 0x800) << 9) | (k & 0xFF000));
+
+#ifdef __GNUC__
+	char* p1 = (char*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_params));
+	char* p2 = (char*)(buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_end));
+
+	__builtin___clear_cache(p1, p2);
+#endif
+
+	return buf + DIST(randomx_riscv64_vector_code_begin, randomx_riscv64_vector_program_begin);
+}
+
+} // namespace randomx
--- a/crypto/randomx/jit_compiler_rv64_vector.h
+++ b/crypto/randomx/jit_compiler_rv64_vector.h
@ -0,0 +1,45 @@
+/*
+Copyright (c) 2018-2020, tevador    <tevador@gmail.com>
+Copyright (c) 2025, SChernykh       <https://github.com/SChernykh>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cstdint>
+#include <cstdlib>
+#include <vector>
+
+namespace randomx {
+
+class SuperscalarProgram;
+struct ProgramConfiguration;
+class Program;
+
+void* generateDatasetInitVectorRV64(uint8_t* buf, SuperscalarProgram* programs, size_t num_programs, std::vector<uint64_t>& reciprocalCache);
+void* generateProgramVectorRV64(uint8_t* buf, Program& prog, ProgramConfiguration& pcfg, const uint8_t (&inst_map)[256], void* entryDataInitScalar, uint32_t datasetOffset);
+
+} // namespace randomx
--- a/crypto/randomx/jit_compiler_rv64_vector_static.S
+++ b/crypto/randomx/jit_compiler_rv64_vector_static.S
@ -0,0 +1,873 @@
+/*
+Copyright (c) 2018-2020, tevador    <tevador@gmail.com>
+Copyright (c) 2025, SChernykh       <https://github.com/SChernykh>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "configuration.h"
+
+// Compatibility macros
+
+#if !defined(RANDOMX_CACHE_ACCESSES) && defined(RANDOMX_CACHE_MAX_ACCESSES)
+#define RANDOMX_CACHE_ACCESSES RANDOMX_CACHE_MAX_ACCESSES
+#endif
+
+#if defined(RANDOMX_ARGON_MEMORY)
+#define RANDOMX_CACHE_MASK RANDOMX_ARGON_MEMORY * 1024 / 64 - 1
+#elif defined(RANDOMX_CACHE_MAX_SIZE)
+#define RANDOMX_CACHE_MASK RANDOMX_CACHE_MAX_SIZE / 64 - 1
+#endif
+
+#define DECL(x) x
+
+.text
+
+#ifndef __riscv_v
+#error This file requires rv64gcv
+#endif
+
+.option pic
+
+.global DECL(randomx_riscv64_vector_code_begin)
+
+.global DECL(randomx_riscv64_vector_sshash_begin)
+.global DECL(randomx_riscv64_vector_sshash_imul_rcp_literals)
+.global DECL(randomx_riscv64_vector_sshash_dataset_init)
+.global DECL(randomx_riscv64_vector_sshash_generated_instructions)
+.global DECL(randomx_riscv64_vector_sshash_generated_instructions_end)
+.global DECL(randomx_riscv64_vector_sshash_cache_prefetch)
+.global DECL(randomx_riscv64_vector_sshash_xor)
+.global DECL(randomx_riscv64_vector_sshash_end)
+
+.global DECL(randomx_riscv64_vector_program_params)
+.global DECL(randomx_riscv64_vector_program_imul_rcp_literals)
+.global DECL(randomx_riscv64_vector_program_begin)
+.global DECL(randomx_riscv64_vector_program_main_loop_instructions)
+.global DECL(randomx_riscv64_vector_program_main_loop_instructions_end)
+.global DECL(randomx_riscv64_vector_program_main_loop_mx_xor)
+.global DECL(randomx_riscv64_vector_program_main_loop_spaddr_xor)
+
+.global DECL(randomx_riscv64_vector_program_main_loop_light_mode_data)
+.global DECL(randomx_riscv64_vector_program_main_loop_instructions_end_light_mode)
+.global DECL(randomx_riscv64_vector_program_main_loop_mx_xor_light_mode)
+.global DECL(randomx_riscv64_vector_program_scratchpad_prefetch)
+.global DECL(randomx_riscv64_vector_program_scratchpad_prefetch_end)
+
+.global DECL(randomx_riscv64_vector_program_end)
+
+.global DECL(randomx_riscv64_vector_code_end)
+
+.balign 8
+
+DECL(randomx_riscv64_vector_code_begin):
+
+DECL(randomx_riscv64_vector_sshash_begin):
+
+sshash_constant_0: .dword 6364136223846793005
+sshash_constant_1: .dword 9298411001130361340
+sshash_constant_2: .dword 12065312585734608966
+sshash_constant_3: .dword 9306329213124626780
+sshash_constant_4: .dword 5281919268842080866
+sshash_constant_5: .dword 10536153434571861004
+sshash_constant_6: .dword 3398623926847679864
+sshash_constant_7: .dword 9549104520008361294
+sshash_offsets:    .dword 0,1,2,3
+store_offsets:     .dword 0,64,128,192
+
+DECL(randomx_riscv64_vector_sshash_imul_rcp_literals): .fill 512,8,0
+
+/*
+Reference: https://github.com/tevador/RandomX/blob/master/doc/specs.md#73-dataset-block-generation
+
+Register layout
+---------------
+x5	= temporary
+
+x10	= randomx cache
+x11	= output buffer
+x12	= startBlock
+x13	= endBlock
+
+x14	= cache mask
+x15	= imul_rcp literal pointer
+
+v0-v7	= r0-r7
+v8	= itemNumber
+v9	= cacheIndex, then a pointer into cache->memory (for prefetch), then a byte offset into cache->memory
+
+v10-v17	= sshash constants
+
+v18	= temporary
+
+v19	= dataset item store offsets
+*/
+
+DECL(randomx_riscv64_vector_sshash_dataset_init):
+	// Process 4 64-bit values at a time
+	vsetivli zero, 4, e64, m1, ta, ma
+
+	// Load cache->memory pointer
+	ld x10, (x10)
+
+	// Init cache mask
+	li x14, RANDOMX_CACHE_MASK
+
+	// Init dataset item store offsets
+	lla x5, store_offsets
+	vle64.v v19, (x5)
+
+	// Init itemNumber vector to (startBlock, startBlock + 1, startBlock + 2, startBlock + 3)
+	lla x5, sshash_offsets
+	vle64.v v8, (x5)
+	vadd.vx v8, v8, x12
+
+	// Load constants (stride = x0 = 0, so a 64-bit value will be broadcast into each element of a vector)
+	lla x5, sshash_constant_0
+	vlse64.v v10, (x5), x0
+
+	lla x5, sshash_constant_1
+	vlse64.v v11, (x5), x0
+
+	lla x5, sshash_constant_2
+	vlse64.v v12, (x5), x0
+
+	lla x5, sshash_constant_3
+	vlse64.v v13, (x5), x0
+
+	lla x5, sshash_constant_4
+	vlse64.v v14, (x5), x0
+
+	lla x5, sshash_constant_5
+	vlse64.v v15, (x5), x0
+
+	lla x5, sshash_constant_6
+	vlse64.v v16, (x5), x0
+
+	lla x5, sshash_constant_7
+	vlse64.v v17, (x5), x0
+
+	// Calculate the end pointer for dataset init
+	sub x13, x13, x12
+	slli x13, x13, 6
+	add x13, x13, x11
+
+init_item:
+	// Step 1. Init r0-r7
+
+	// r0 = (itemNumber + 1) * 6364136223846793005
+	vmv.v.v v0, v8
+	vmadd.vv v0, v10, v10
+
+	// r_i = r0 ^ c_i for i = 1..7
+	vxor.vv v1, v0, v11
+	vxor.vv v2, v0, v12
+	vxor.vv v3, v0, v13
+	vxor.vv v4, v0, v14
+	vxor.vv v5, v0, v15
+	vxor.vv v6, v0, v16
+	vxor.vv v7, v0, v17
+
+	// Step 2. Let cacheIndex = itemNumber
+	vmv.v.v v9, v8
+
+	// Step 3 is implicit (all iterations are inlined, there is no "i")
+
+	// Init imul_rcp literal pointer
+	lla x15, randomx_riscv64_vector_sshash_imul_rcp_literals
+
+DECL(randomx_riscv64_vector_sshash_generated_instructions):
+	// Generated by JIT compiler
+	//
+	// Step 4. randomx_riscv64_vector_sshash_cache_prefetch
+	// Step 5. SuperscalarHash[i]
+	// Step 6. randomx_riscv64_vector_sshash_xor
+	//
+	// Above steps will be repeated RANDOMX_CACHE_ACCESSES times
+	.fill RANDOMX_CACHE_ACCESSES * 2048, 4, 0
+
+DECL(randomx_riscv64_vector_sshash_generated_instructions_end):
+	// Step 9. Concatenate registers r0-r7 in little endian format to get the final Dataset item data.
+	vsuxei64.v v0, (x11), v19
+
+	add x5, x11, 8
+	vsuxei64.v v1, (x5), v19
+
+	add x5, x11, 16
+	vsuxei64.v v2, (x5), v19
+
+	add x5, x11, 24
+	vsuxei64.v v3, (x5), v19
+
+	add x5, x11, 32
+	vsuxei64.v v4, (x5), v19
+
+	add x5, x11, 40
+	vsuxei64.v v5, (x5), v19
+
+	add x5, x11, 48
+	vsuxei64.v v6, (x5), v19
+
+	add x5, x11, 56
+	vsuxei64.v v7, (x5), v19
+
+	// Iterate to the next 4 items
+	vadd.vi v8, v8, 4
+	add x11, x11, 256
+	bltu x11, x13, init_item
+
+	ret
+
+// Step 4. Load a 64-byte item from the Cache. The item index is given by cacheIndex modulo the total number of 64-byte items in Cache.
+DECL(randomx_riscv64_vector_sshash_cache_prefetch):
+	// v9 = convert from cacheIndex to a direct pointer into cache->memory
+	vand.vx v9, v9, x14
+	vsll.vi v9, v9, 6
+	vadd.vx v9, v9, x10
+
+	// Prefetch element 0
+	vmv.x.s x5, v9
+#ifdef __riscv_zicbop
+	prefetch.r (x5)
+#else
+	ld x5, (x5)
+#endif
+
+	// Prefetch element 1
+	vslidedown.vi v18, v9, 1
+	vmv.x.s x5, v18
+#ifdef __riscv_zicbop
+	prefetch.r (x5)
+#else
+	ld x5, (x5)
+#endif
+
+	// Prefetch element 2
+	vslidedown.vi v18, v9, 2
+	vmv.x.s x5, v18
+#ifdef __riscv_zicbop
+	prefetch.r (x5)
+#else
+	ld x5, (x5)
+#endif
+
+	// Prefetch element 3
+	vslidedown.vi v18, v9, 3
+	vmv.x.s x5, v18
+#ifdef __riscv_zicbop
+	prefetch.r (x5)
+#else
+	ld x5, (x5)
+#endif
+
+	// v9 = byte offset into cache->memory
+	vsub.vx v9, v9, x10
+
+// Step 6. XOR all registers with data loaded from randomx cache
+DECL(randomx_riscv64_vector_sshash_xor):
+	vluxei64.v v18, (x10), v9
+	vxor.vv v0, v0, v18
+
+	add x5, x10, 8
+	vluxei64.v v18, (x5), v9
+	vxor.vv v1, v1, v18
+
+	add x5, x10, 16
+	vluxei64.v v18, (x5), v9
+	vxor.vv v2, v2, v18
+
+	add x5, x10, 24
+	vluxei64.v v18, (x5), v9
+	vxor.vv v3, v3, v18
+
+	add x5, x10, 32
+	vluxei64.v v18, (x5), v9
+	vxor.vv v4, v4, v18
+
+	add x5, x10, 40
+	vluxei64.v v18, (x5), v9
+	vxor.vv v5, v5, v18
+
+	add x5, x10, 48
+	vluxei64.v v18, (x5), v9
+	vxor.vv v6, v6, v18
+
+	add x5, x10, 56
+	vluxei64.v v18, (x5), v9
+	vxor.vv v7, v7, v18
+
+DECL(randomx_riscv64_vector_sshash_end):
+
+/*
+Reference: https://github.com/tevador/RandomX/blob/master/doc/specs.md#46-vm-execution
+
+C declarations:
+
+struct RegisterFile {
+	uint64_t r[8];
+	double f[4][2];
+	double e[4][2];
+	double a[4][2];
+};
+
+struct MemoryRegisters {
+	uint32_t mx, ma;
+	uint8_t* memory; // dataset (fast mode) or cache (light mode)
+};
+
+void ProgramFunc(RegisterFile* reg, MemoryRegisters* mem, uint8_t* scratchpad, uint64_t iterations);
+
+Register layout
+---------------
+x0	= zero
+x1	= scratchpad L3 mask
+x2	= stack pointer
+x3	= global pointer (unused)
+x4	= thread pointer (unused)
+x5	= temporary
+x6	= temporary
+x7	= branch mask (unshifted)
+x8	= frame pointer, also 64-bit literal inside the loop
+x9	= scratchpad L3 mask (64-byte aligned)
+x10	= RegisterFile* reg, also 64-bit literal inside the loop
+x11	= MemoryRegisters* mem, then dataset/cache pointer
+x12	= scratchpad
+x13	= iterations
+x14	= mx, ma (always stored with dataset mask applied)
+x15	= spAddr0, spAddr1
+x16	= scratchpad L1 mask
+x17	= scratchpad L2 mask
+x18	= IMUL_RCP literals pointer
+x19	= dataset mask
+x20-x27	= r0-r7
+x28-x31 = 64-bit literals
+
+f0-f7   = 64-bit literals
+f10-f17 = 64-bit literals
+f28-f31 = 64-bit literals
+
+v0-v3	= f0-f3
+v4-v7	= e0-e3
+v8-v11	= a0-a3
+v12	= E 'and' mask = 0x00ffffffffffffff'00ffffffffffffff
+v13	= E 'or' mask  = 0x3*00000000******'3*00000000******
+v14	= scale mask   = 0x80f0000000000000'80f0000000000000
+
+v15	= unused
+v16	= temporary
+v17	= unused
+v18	= temporary
+
+v19-v31	= unused
+*/
+
+.balign 8
+
+DECL(randomx_riscv64_vector_program_params):
+
+// JIT compiler will adjust these values for different RandomX variants
+randomx_masks:	.dword 16376, 262136, 2097144, 2147483584, 255
+
+DECL(randomx_riscv64_vector_program_imul_rcp_literals):
+
+imul_rcp_literals:	.fill RANDOMX_PROGRAM_SIZE, 8, 0
+
+DECL(randomx_riscv64_vector_program_begin):
+	addi sp, sp, -112
+	sd x8, 96(sp)		// save old frame pointer
+	addi x8, sp, 112	// setup new frame pointer
+	sd x1, 104(sp)		// save return address
+
+	// Save callee-saved registers
+	sd x9, 0(sp)
+	sd x18, 8(sp)
+	sd x19, 16(sp)
+	sd x20, 24(sp)
+	sd x21, 32(sp)
+	sd x22, 40(sp)
+	sd x23, 48(sp)
+	sd x24, 56(sp)
+	sd x25, 64(sp)
+	sd x26, 72(sp)
+	sd x27, 80(sp)
+
+	// Save x10 as it will be used as an IMUL_RCP literal
+	sd x10, 88(sp)
+
+	// Load mx, ma and dataset pointer
+	ld x14, (x11)
+	ld x11, 8(x11)
+
+	// Initialize spAddr0-spAddr1
+	mv x15, x14
+
+	// Set registers r0-r7 to zero
+	li x20, 0
+	li x21, 0
+	li x22, 0
+	li x23, 0
+	li x24, 0
+	li x25, 0
+	li x26, 0
+	li x27, 0
+
+	// Load masks
+	lla x5, randomx_masks
+	ld x16, 0(x5)
+	ld x17, 8(x5)
+	ld x1, 16(x5)
+	ld x19, 24(x5)
+	ld x7, 32(x5)
+	addi x9, x1, -56
+
+	// Set vector registers to 2x64 bit
+	vsetivli zero, 2, e64, m1, ta, ma
+
+	// Apply dataset mask to mx, ma
+	slli x5, x19, 32
+	or x5, x5, x19
+	and x14, x14, x5
+
+	// Load group A registers
+	addi x5, x10, 192
+	vle64.v v8, (x5)
+
+	addi x5, x10, 208
+	vle64.v v9, (x5)
+
+	addi x5, x10, 224
+	vle64.v v10, (x5)
+
+	addi x5, x10, 240
+	vle64.v v11, (x5)
+
+	// Load E 'and' mask
+	vmv.v.i v12, -1
+	vsrl.vi v12, v12, 8
+
+	// Load E 'or' mask (stored in reg.f[0])
+	addi x5, x10, 64
+	vle64.v v13, (x5)
+
+	// Load scale mask
+	lui x5, 0x80f00
+	slli x5, x5, 32
+	vmv.v.x v14, x5
+
+	// IMUL_RCP literals pointer
+	lla x18, imul_rcp_literals
+
+	// Load IMUL_RCP literals
+	ld   x8,   0(x18)
+	ld  x10,   8(x18)
+	ld  x28,  16(x18)
+	ld  x29,  24(x18)
+	ld  x30,  32(x18)
+	ld  x31,  40(x18)
+	fld  f0,  48(x18)
+	fld  f1,  56(x18)
+	fld  f2,  64(x18)
+	fld  f3,  72(x18)
+	fld  f4,  80(x18)
+	fld  f5,  88(x18)
+	fld  f6,  96(x18)
+	fld  f7, 104(x18)
+	fld f10, 112(x18)
+	fld f11, 120(x18)
+	fld f12, 128(x18)
+	fld f13, 136(x18)
+	fld f14, 144(x18)
+	fld f15, 152(x18)
+	fld f16, 160(x18)
+	fld f17, 168(x18)
+	fld f28, 176(x18)
+	fld f29, 184(x18)
+	fld f30, 192(x18)
+	fld f31, 200(x18)
+
+randomx_riscv64_vector_program_main_loop:
+	and x5, x15, x9		// x5 = spAddr0 & 64-byte aligned L3 mask
+	add x5, x5, x12		// x5 = &scratchpad[spAddr0 & 64-byte aligned L3 mask]
+
+	// read a 64-byte line from scratchpad (indexed by spAddr0) and XOR it with r0-r7
+	ld x6, 0(x5)
+	xor x20, x20, x6
+	ld x6, 8(x5)
+	xor x21, x21, x6
+	ld x6, 16(x5)
+	xor x22, x22, x6
+	ld x6, 24(x5)
+	xor x23, x23, x6
+	ld x6, 32(x5)
+	xor x24, x24, x6
+	ld x6, 40(x5)
+	xor x25, x25, x6
+	ld x6, 48(x5)
+	xor x26, x26, x6
+	ld x6, 56(x5)
+	xor x27, x27, x6
+
+	srli x5, x15, 32	// x5 = spAddr1
+	and x5, x5, x9		// x5 = spAddr1 & 64-byte aligned L3 mask
+	add x5, x5, x12		// x5 = &scratchpad[spAddr1 & 64-byte aligned L3 mask]
+
+	// read a 64-byte line from scratchpad (indexed by spAddr1) and initialize f0-f3, e0-e3 registers
+
+	// Set vector registers to 2x32 bit
+	vsetivli zero, 2, e32, m1, ta, ma
+
+	// load f0
+	vle32.v v16, (x5)
+	vfwcvt.f.x.v v0, v16
+
+	// load f1
+	addi x6, x5, 8
+	vle32.v v1, (x6)
+	// Use v16 as an intermediary register because vfwcvt accepts only registers with even numbers here
+	vfwcvt.f.x.v v16, v1
+	vmv1r.v v1, v16
+
+	// load f2
+	addi x6, x5, 16
+	vle32.v v16, (x6)
+	vfwcvt.f.x.v v2, v16
+
+	// load f3
+	addi x6, x5, 24
+	vle32.v v3, (x6)
+	vfwcvt.f.x.v v16, v3
+	vmv1r.v v3, v16
+
+	// load e0
+	addi x6, x5, 32
+	vle32.v v16, (x6)
+	vfwcvt.f.x.v v4, v16
+
+	// load e1
+	addi x6, x5, 40
+	vle32.v v5, (x6)
+	vfwcvt.f.x.v v16, v5
+	vmv1r.v v5, v16
+
+	// load e2
+	addi x6, x5, 48
+	vle32.v v16, (x6)
+	vfwcvt.f.x.v v6, v16
+
+	// load e3
+	addi x6, x5, 56
+	vle32.v v7, (x6)
+	vfwcvt.f.x.v v16, v7
+	vmv1r.v v7, v16
+
+	// Set vector registers back to 2x64 bit
+	vsetivli zero, 2, e64, m1, ta, ma
+
+	// post-process e0-e3
+	vand.vv v4, v4, v12
+	vand.vv v5, v5, v12
+	vand.vv v6, v6, v12
+	vand.vv v7, v7, v12
+
+	vor.vv v4, v4, v13
+	vor.vv v5, v5, v13
+	vor.vv v6, v6, v13
+	vor.vv v7, v7, v13
+
+DECL(randomx_riscv64_vector_program_main_loop_instructions):
+	// Generated by JIT compiler
+	// FDIV_M can generate up to 50 bytes of code (round it up to 52 - a multiple of 4)
+	// +32 bytes for the scratchpad prefetch and the final jump instruction
+	.fill RANDOMX_PROGRAM_SIZE * 52 + 32, 1, 0
+
+DECL(randomx_riscv64_vector_program_main_loop_instructions_end):
+	// Calculate dataset pointer for dataset read
+	// Do it here to break false dependency from readReg2 and readReg3 (see below)
+	srli x6, x14, 32	// x6 = ma & dataset mask
+
+DECL(randomx_riscv64_vector_program_main_loop_mx_xor):
+	xor x5, x24, x26	// x5 = readReg2 ^ readReg3 (JIT compiler will substitute the actual registers)
+
+	and x5, x5, x19		// x5 = (readReg2 ^ readReg3) & dataset mask
+	xor x14, x14, x5	// mx ^= (readReg2 ^ readReg3) & dataset mask
+
+	and x5, x14, x19	// x5 = mx & dataset mask
+	add x5, x5, x11		// x5 = &dataset[mx & dataset mask]
+
+#ifdef __riscv_zicbop
+	prefetch.r (x5)
+#else
+	ld x5, (x5)
+#endif
+
+	add x5, x6, x11		// x5 = &dataset[ma & dataset mask]
+
+	// read a 64-byte line from dataset and XOR it with r0-r7
+	ld x6, 0(x5)
+	xor x20, x20, x6
+	ld x6, 8(x5)
+	xor x21, x21, x6
+	ld x6, 16(x5)
+	xor x22, x22, x6
+	ld x6, 24(x5)
+	xor x23, x23, x6
+	ld x6, 32(x5)
+	xor x24, x24, x6
+	ld x6, 40(x5)
+	xor x25, x25, x6
+	ld x6, 48(x5)
+	xor x26, x26, x6
+	ld x6, 56(x5)
+	xor x27, x27, x6
+
+randomx_riscv64_vector_program_main_loop_swap_mx_ma:
+	// swap mx <-> ma
+#ifdef __riscv_zbb
+	rori x14, x14, 32
+#else
+	srli x5, x14, 32
+	slli x14, x14, 32
+	or x14, x14, x5
+#endif
+
+	srli x5, x15, 32	// x5 = spAddr1
+	and x5, x5, x9		// x5 = spAddr1 & 64-byte aligned L3 mask
+	add x5, x5, x12		// x5 = &scratchpad[spAddr1 & 64-byte aligned L3 mask]
+
+	// store registers r0-r7 to the scratchpad
+	sd x20, 0(x5)
+	sd x21, 8(x5)
+	sd x22, 16(x5)
+	sd x23, 24(x5)
+	sd x24, 32(x5)
+	sd x25, 40(x5)
+	sd x26, 48(x5)
+	sd x27, 56(x5)
+
+	and x5, x15, x9		// x5 = spAddr0 & 64-byte aligned L3 mask
+	add x5, x5, x12		// x5 = &scratchpad[spAddr0 & 64-byte aligned L3 mask]
+
+DECL(randomx_riscv64_vector_program_main_loop_spaddr_xor):
+	xor x15, x20, x22	// spAddr0-spAddr1 = readReg0 ^ readReg1 (JIT compiler will substitute the actual registers)
+
+	// store registers f0-f3 to the scratchpad (f0-f3 are first combined with e0-e3)
+	vxor.vv v0, v0, v4
+	vxor.vv v1, v1, v5
+	vxor.vv v2, v2, v6
+	vxor.vv v3, v3, v7
+
+	vse64.v v0, (x5)
+
+	addi x6, x5, 16
+	vse64.v v1, (x6)
+
+	addi x6, x5, 32
+	vse64.v v2, (x6)
+
+	addi x6, x5, 48
+	vse64.v v3, (x6)
+
+	addi x13, x13, -1
+	beqz x13, randomx_riscv64_vector_program_main_loop_end
+	j randomx_riscv64_vector_program_main_loop
+
+randomx_riscv64_vector_program_main_loop_end:
+	// Restore x8 and x10
+	addi x8, sp, 112
+	ld x10, 88(sp)
+
+	// Store integer registers
+	sd x20, 0(x10)
+	sd x21, 8(x10)
+	sd x22, 16(x10)
+	sd x23, 24(x10)
+	sd x24, 32(x10)
+	sd x25, 40(x10)
+	sd x26, 48(x10)
+	sd x27, 56(x10)
+
+	// Store FP registers
+	addi x5, x10, 64
+	vse64.v v0, (x5)
+
+	addi x5, x10, 80
+	vse64.v v1, (x5)
+
+	addi x5, x10, 96
+	vse64.v v2, (x5)
+
+	addi x5, x10, 112
+	vse64.v v3, (x5)
+
+	addi x5, x10, 128
+	vse64.v v4, (x5)
+
+	addi x5, x10, 144
+	vse64.v v5, (x5)
+
+	addi x5, x10, 160
+	vse64.v v6, (x5)
+
+	addi x5, x10, 176
+	vse64.v v7, (x5)
+
+	// Restore callee-saved registers
+	ld x9, 0(sp)
+	ld x18, 8(sp)
+	ld x19, 16(sp)
+	ld x20, 24(sp)
+	ld x21, 32(sp)
+	ld x22, 40(sp)
+	ld x23, 48(sp)
+	ld x24, 56(sp)
+	ld x25, 64(sp)
+	ld x26, 72(sp)
+	ld x27, 80(sp)
+
+	ld x8, 96(sp)	// old frame pointer
+	ld x1, 104(sp)	// return address
+
+	addi sp, sp, 112
+
+	ret
+
+DECL(randomx_riscv64_vector_program_main_loop_light_mode_data):
+	// 1) Pointer to the scalar dataset init function
+	// 2) Dataset offset
+	.dword 0, 0
+
+DECL(randomx_riscv64_vector_program_main_loop_instructions_end_light_mode):
+	// Calculate dataset pointer for dataset read
+	// Do it here to break false dependency from readReg2 and readReg3 (see below)
+	srli x6, x14, 32	// x6 = ma & dataset mask
+
+DECL(randomx_riscv64_vector_program_main_loop_mx_xor_light_mode):
+	xor x5, x24, x26	// x5 = readReg2 ^ readReg3 (JIT compiler will substitute the actual registers)
+	and x5, x5, x19		// x5 = (readReg2 ^ readReg3) & dataset mask
+	xor x14, x14, x5	// mx ^= (readReg2 ^ readReg3) & dataset mask
+
+	// Save all registers modified when calling dataset_init_scalar_func_ptr
+	addi sp, sp, -192
+
+	// bytes [0, 127] - saved registers
+	// bytes [128, 191] - output buffer
+
+	sd  x1,   0(sp)
+	sd  x7,  16(sp)
+	sd x10,  24(sp)
+	sd x11,  32(sp)
+	sd x12,  40(sp)
+	sd x13,  48(sp)
+	sd x14,  56(sp)
+	sd x15,  64(sp)
+	sd x16,  72(sp)
+	sd x17,  80(sp)
+	sd x28,  88(sp)
+	sd x29,  96(sp)
+	sd x30, 104(sp)
+	sd x31, 112(sp)
+
+	// setup randomx_riscv64_vector_sshash_dataset_init's parameters
+
+	// x10 = pointer to pointer to cache memory
+	// pointer to cache memory was saved in "sd x11, 32(sp)", so x10 = sp + 32
+	addi x10, sp, 32
+
+	// x11 = output buffer (64 bytes)
+	addi x11, sp, 128
+
+	// x12 = start block
+	lla x5, randomx_riscv64_vector_program_main_loop_light_mode_data
+	ld x12, 8(x5)
+	add x12, x12, x6
+	srli x12, x12, 6
+
+	// x13 = end block
+	addi x13, x12, 1
+
+	ld x5, 0(x5)
+	jalr x1, 0(x5)
+
+	// restore registers
+	ld  x1,   0(sp)
+	ld  x7,  16(sp)
+	ld x10,  24(sp)
+	ld x11,  32(sp)
+	ld x12,  40(sp)
+	ld x13,  48(sp)
+	ld x14,  56(sp)
+	ld x15,  64(sp)
+	ld x16,  72(sp)
+	ld x17,  80(sp)
+	ld x28,  88(sp)
+	ld x29,  96(sp)
+	ld x30, 104(sp)
+	ld x31, 112(sp)
+
+	// read a 64-byte line from dataset and XOR it with r0-r7
+	ld x5, 128(sp)
+	xor x20, x20, x5
+	ld x5, 136(sp)
+	xor x21, x21, x5
+	ld x5, 144(sp)
+	xor x22, x22, x5
+	ld x5, 152(sp)
+	xor x23, x23, x5
+	ld x5, 160(sp)
+	xor x24, x24, x5
+	ld x5, 168(sp)
+	xor x25, x25, x5
+	ld x5, 176(sp)
+	xor x26, x26, x5
+	ld x5, 184(sp)
+	xor x27, x27, x5
+
+	addi sp, sp, 192
+
+	j randomx_riscv64_vector_program_main_loop_swap_mx_ma
+
+DECL(randomx_riscv64_vector_program_scratchpad_prefetch):
+	xor x5, x20, x22	// spAddr0-spAddr1 = readReg0 ^ readReg1 (JIT compiler will substitute the actual registers)
+	srli x6, x5, 32		// x6 = spAddr1
+
+	and x5, x5, x9		// x5 = spAddr0 & 64-byte aligned L3 mask
+	and x6, x6, x9		// x6 = spAddr1 & 64-byte aligned L3 mask
+
+	c.add x5, x12		// x5 = &scratchpad[spAddr0 & 64-byte aligned L3 mask]
+	c.add x6, x12		// x6 = &scratchpad[spAddr1 & 64-byte aligned L3 mask]
+
+#ifdef __riscv_zicbop
+	prefetch.r (x5)
+	prefetch.r (x6)
+#else
+	ld x5, (x5)
+	ld x6, (x6)
+#endif
+
+DECL(randomx_riscv64_vector_program_scratchpad_prefetch_end):
+
+DECL(randomx_riscv64_vector_program_end):
+
+DECL(randomx_riscv64_vector_code_end):
--- a/crypto/randomx/jit_compiler_rv64_vector_static.h
+++ b/crypto/randomx/jit_compiler_rv64_vector_static.h
@ -0,0 +1,74 @@
+/*
+Copyright (c) 2018-2020, tevador    <tevador@gmail.com>
+Copyright (c) 2025, SChernykh       <https://github.com/SChernykh>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#if defined(__cplusplus)
+#include <cstdint>
+#else
+#include <stdint.h>
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+struct randomx_cache;
+
+void randomx_riscv64_vector_code_begin();
+
+void randomx_riscv64_vector_sshash_begin();
+void randomx_riscv64_vector_sshash_imul_rcp_literals();
+void randomx_riscv64_vector_sshash_dataset_init(struct randomx_cache* cache, uint8_t* output_buf, uint32_t startBlock, uint32_t endBlock);
+void randomx_riscv64_vector_sshash_cache_prefetch();
+void randomx_riscv64_vector_sshash_generated_instructions();
+void randomx_riscv64_vector_sshash_generated_instructions_end();
+void randomx_riscv64_vector_sshash_cache_prefetch();
+void randomx_riscv64_vector_sshash_xor();
+void randomx_riscv64_vector_sshash_end();
+
+void randomx_riscv64_vector_program_params();
+void randomx_riscv64_vector_program_imul_rcp_literals();
+void randomx_riscv64_vector_program_begin();
+void randomx_riscv64_vector_program_main_loop_instructions();
+void randomx_riscv64_vector_program_main_loop_instructions_end();
+void randomx_riscv64_vector_program_main_loop_mx_xor();
+void randomx_riscv64_vector_program_main_loop_spaddr_xor();
+void randomx_riscv64_vector_program_main_loop_light_mode_data();
+void randomx_riscv64_vector_program_main_loop_instructions_end_light_mode();
+void randomx_riscv64_vector_program_main_loop_mx_xor_light_mode();
+void randomx_riscv64_vector_program_end();
+void randomx_riscv64_vector_program_scratchpad_prefetch();
+void randomx_riscv64_vector_program_scratchpad_prefetch_end();
+
+void randomx_riscv64_vector_code_end();
+
+#if defined(__cplusplus)
+}
+#endif
--- a/crypto/randomx/jit_compiler_x86.cpp
+++ b/crypto/randomx/jit_compiler_x86.cpp
@ -0,0 +1,847 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdexcept>
+#include <cstring>
+#include <climits>
+#include "jit_compiler_x86.hpp"
+#include "jit_compiler_x86_static.hpp"
+#include "superscalar.hpp"
+#include "program.hpp"
+#include "reciprocal.h"
+#include "virtual_memory.h"
+
+namespace randomx {
+	/*
+
+	REGISTER ALLOCATION:
+
+	; rax -> temporary
+	; rbx -> iteration counter "ic"
+	; rcx -> temporary
+	; rdx -> temporary
+	; rsi -> scratchpad pointer
+	; rdi -> dataset pointer
+	; rbp -> memory registers "ma" (high 32 bits), "mx" (low 32 bits)
+	; rsp -> stack pointer
+	; r8  -> "r0"
+	; r9  -> "r1"
+	; r10 -> "r2"
+	; r11 -> "r3"
+	; r12 -> "r4"
+	; r13 -> "r5"
+	; r14 -> "r6"
+	; r15 -> "r7"
+	; xmm0 -> "f0"
+	; xmm1 -> "f1"
+	; xmm2 -> "f2"
+	; xmm3 -> "f3"
+	; xmm4 -> "e0"
+	; xmm5 -> "e1"
+	; xmm6 -> "e2"
+	; xmm7 -> "e3"
+	; xmm8 -> "a0"
+	; xmm9 -> "a1"
+	; xmm10 -> "a2"
+	; xmm11 -> "a3"
+	; xmm12 -> temporary
+	; xmm13 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff
+	; xmm14 -> E 'or' mask  = 0x3*00000000******3*00000000******
+	; xmm15 -> scale mask   = 0x81f000000000000081f0000000000000
+
+	*/
+
+	//Calculate the required code buffer size that is sufficient for the largest possible program:
+
+	constexpr size_t MaxRandomXInstrCodeSize = 32;   //FDIV_M requires up to 32 bytes of x86 code
+	constexpr size_t MaxSuperscalarInstrSize = 14;   //IMUL_RCP requires 14 bytes of x86 code
+	constexpr size_t SuperscalarProgramHeader = 128; //overhead per superscalar program
+	constexpr size_t CodeAlign = 4096;               //align code size to a multiple of 4 KiB
+	constexpr size_t ReserveCodeSize = CodeAlign;    //function prologue/epilogue + reserve
+
+	constexpr size_t RandomXCodeSize = alignSize(ReserveCodeSize + MaxRandomXInstrCodeSize * RANDOMX_PROGRAM_SIZE, CodeAlign);
+	constexpr size_t SuperscalarSize = alignSize(ReserveCodeSize + (SuperscalarProgramHeader + MaxSuperscalarInstrSize * SuperscalarMaxSize) * RANDOMX_CACHE_ACCESSES, CodeAlign);
+
+	static_assert(RandomXCodeSize < INT32_MAX / 2, "RandomXCodeSize is too large");
+	static_assert(SuperscalarSize < INT32_MAX / 2, "SuperscalarSize is too large");
+
+	constexpr uint32_t CodeSize = RandomXCodeSize + SuperscalarSize;
+
+	constexpr int32_t superScalarHashOffset = RandomXCodeSize;
+
+#if defined(_MSC_VER) && (defined(_DEBUG) || defined (RELWITHDEBINFO))
+#define ADDR(x) ((((uint8_t*)&x)[0] == 0xE9) ? (((uint8_t*)&x) + *(const int32_t*)(((uint8_t*)&x) + 1) + 5) : ((uint8_t*)&x))
+#else
+#define ADDR(x) ((uint8_t*)&x)
+#endif
+
+	const uint8_t* codePrologue = ADDR(randomx_program_prologue);
+	const uint8_t* codeLoopBegin = ADDR(randomx_program_loop_begin);
+	const uint8_t* codeLoopLoad = ADDR(randomx_program_loop_load);
+	const uint8_t* codeProgamStart = ADDR(randomx_program_start);
+	const uint8_t* codeReadDataset = ADDR(randomx_program_read_dataset);
+	const uint8_t* codeReadDatasetLightSshInit = ADDR(randomx_program_read_dataset_sshash_init);
+	const uint8_t* codeReadDatasetLightSshFin = ADDR(randomx_program_read_dataset_sshash_fin);
+	const uint8_t* codeDatasetInit = ADDR(randomx_dataset_init);
+	const uint8_t* codeLoopStore = ADDR(randomx_program_loop_store);
+	const uint8_t* codeLoopEnd = ADDR(randomx_program_loop_end);
+	const uint8_t* codeEpilogue = ADDR(randomx_program_epilogue);
+	const uint8_t* codeProgramEnd = ADDR(randomx_program_end);
+	const uint8_t* codeShhLoad = ADDR(randomx_sshash_load);
+	const uint8_t* codeShhPrefetch = ADDR(randomx_sshash_prefetch);
+	const uint8_t* codeShhEnd = ADDR(randomx_sshash_end);
+	const uint8_t* codeShhInit = ADDR(randomx_sshash_init);
+
+	const int32_t prologueSize = codeLoopBegin - codePrologue;
+	const int32_t loopLoadSize = codeProgamStart - codeLoopLoad;
+	const int32_t readDatasetSize = codeReadDatasetLightSshInit - codeReadDataset;
+	const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit;
+	const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin;
+	const int32_t loopStoreSize = codeLoopEnd - codeLoopStore;
+	const int32_t datasetInitSize = codeEpilogue - codeDatasetInit;
+	const int32_t epilogueSize = codeShhLoad - codeEpilogue;
+	const int32_t codeSshLoadSize = codeShhPrefetch - codeShhLoad;
+	const int32_t codeSshPrefetchSize = codeShhEnd - codeShhPrefetch;
+	const int32_t codeSshInitSize = codeProgramEnd - codeShhInit;
+
+	const int32_t epilogueOffset = CodeSize - epilogueSize;
+
+	static const uint8_t REX_ADD_RR[] = { 0x4d, 0x03 };
+	static const uint8_t REX_ADD_RM[] = { 0x4c, 0x03 };
+	static const uint8_t REX_SUB_RR[] = { 0x4d, 0x2b };
+	static const uint8_t REX_SUB_RM[] = { 0x4c, 0x2b };
+	static const uint8_t REX_MOV_RR[] = { 0x41, 0x8b };
+	static const uint8_t REX_MOV_RR64[] = { 0x49, 0x8b };
+	static const uint8_t REX_MOV_R64R[] = { 0x4c, 0x8b };
+	static const uint8_t REX_IMUL_RR[] = { 0x4d, 0x0f, 0xaf };
+	static const uint8_t REX_IMUL_RRI[] = { 0x4d, 0x69 };
+	static const uint8_t REX_IMUL_RM[] = { 0x4c, 0x0f, 0xaf };
+	static const uint8_t REX_MUL_R[] = { 0x49, 0xf7 };
+	static const uint8_t REX_MUL_M[] = { 0x48, 0xf7 };
+	static const uint8_t REX_81[] = { 0x49, 0x81 };
+	static const uint8_t AND_EAX_I = 0x25;
+	static const uint8_t MOV_EAX_I = 0xb8;
+	static const uint8_t MOV_RAX_I[] = { 0x48, 0xb8 };
+	static const uint8_t MOV_RCX_I[] = { 0x48, 0xb9 };
+	static const uint8_t REX_LEA[] = { 0x4f, 0x8d };
+	static const uint8_t REX_MUL_MEM[] = { 0x48, 0xf7, 0x24, 0x0e };
+	static const uint8_t REX_IMUL_MEM[] = { 0x48, 0xf7, 0x2c, 0x0e };
+	static const uint8_t REX_SHR_RAX[] = { 0x48, 0xc1, 0xe8 };
+	static const uint8_t RAX_ADD_SBB_1[] = { 0x48, 0x83, 0xC0, 0x01, 0x48, 0x83, 0xD8, 0x00 };
+	static const uint8_t MUL_RCX[] = { 0x48, 0xf7, 0xe1 };
+	static const uint8_t REX_SHR_RDX[] = { 0x48, 0xc1, 0xea };
+	static const uint8_t REX_SH[] = { 0x49, 0xc1 };
+	static const uint8_t MOV_RCX_RAX_SAR_RCX_63[] = { 0x48, 0x89, 0xc1, 0x48, 0xc1, 0xf9, 0x3f };
+	static const uint8_t AND_ECX_I[] = { 0x81, 0xe1 };
+	static const uint8_t ADD_RAX_RCX[] = { 0x48, 0x01, 0xC8 };
+	static const uint8_t SAR_RAX_I8[] = { 0x48, 0xC1, 0xF8 };
+	static const uint8_t NEG_RAX[] = { 0x48, 0xF7, 0xD8 };
+	static const uint8_t ADD_R_RAX[] = { 0x4C, 0x03 };
+	static const uint8_t XOR_EAX_EAX[] = { 0x33, 0xC0 };
+	static const uint8_t ADD_RDX_R[] = { 0x4c, 0x01 };
+	static const uint8_t SUB_RDX_R[] = { 0x4c, 0x29 };
+	static const uint8_t SAR_RDX_I8[] = { 0x48, 0xC1, 0xFA };
+	static const uint8_t TEST_RDX_RDX[] = { 0x48, 0x85, 0xD2 };
+	static const uint8_t SETS_AL_ADD_RDX_RAX[] = { 0x0F, 0x98, 0xC0, 0x48, 0x03, 0xD0 };
+	static const uint8_t REX_NEG[] = { 0x49, 0xF7 };
+	static const uint8_t REX_XOR_RR[] = { 0x4D, 0x33 };
+	static const uint8_t REX_XOR_RI[] = { 0x49, 0x81 };
+	static const uint8_t REX_XOR_RM[] = { 0x4c, 0x33 };
+	static const uint8_t REX_ROT_CL[] = { 0x49, 0xd3 };
+	static const uint8_t REX_ROT_I8[] = { 0x49, 0xc1 };
+	static const uint8_t SHUFPD[] = { 0x66, 0x0f, 0xc6 };
+	static const uint8_t REX_ADDPD[] = { 0x66, 0x41, 0x0f, 0x58 };
+	static const uint8_t REX_CVTDQ2PD_XMM12[] = { 0xf3, 0x44, 0x0f, 0xe6, 0x24, 0x06 };
+	static const uint8_t REX_SUBPD[] = { 0x66, 0x41, 0x0f, 0x5c };
+	static const uint8_t REX_XORPS[] = { 0x41, 0x0f, 0x57 };
+	static const uint8_t REX_MULPD[] = { 0x66, 0x41, 0x0f, 0x59 };
+	static const uint8_t REX_MAXPD[] = { 0x66, 0x41, 0x0f, 0x5f };
+	static const uint8_t REX_DIVPD[] = { 0x66, 0x41, 0x0f, 0x5e };
+	static const uint8_t SQRTPD[] = { 0x66, 0x0f, 0x51 };
+	static const uint8_t AND_OR_MOV_LDMXCSR[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x50, 0x0F, 0xAE, 0x14, 0x24, 0x58 };
+	static const uint8_t ROL_RAX[] = { 0x48, 0xc1, 0xc0 };
+	static const uint8_t XOR_ECX_ECX[] = { 0x33, 0xC9 };
+	static const uint8_t REX_CMP_R32I[] = { 0x41, 0x81 };
+	static const uint8_t REX_CMP_M32I[] = { 0x81, 0x3c, 0x06 };
+	static const uint8_t MOVAPD[] = { 0x66, 0x0f, 0x29 };
+	static const uint8_t REX_MOV_MR[] = { 0x4c, 0x89 };
+	static const uint8_t REX_XOR_EAX[] = { 0x41, 0x33 };
+	static const uint8_t SUB_EBX[] = { 0x83, 0xEB, 0x01 };
+	static const uint8_t JNZ[] = { 0x0f, 0x85 };
+	static const uint8_t JMP = 0xe9;
+	static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 };
+	static const uint8_t REX_XCHG[] = { 0x4d, 0x87 };
+	static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6 };
+	static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f };
+	static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 };
+	static const uint8_t CALL = 0xe8;
+	static const uint8_t REX_ADD_I[] = { 0x49, 0x81 };
+	static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
+	static const uint8_t JZ[] = { 0x0f, 0x84 };
+	static const uint8_t RET = 0xc3;
+	static const uint8_t LEA_32[] = { 0x41, 0x8d };
+	static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 };
+	static const uint8_t ADD_EBX_I[] = { 0x81, 0xc3 };
+
+	static const uint8_t NOP1[] = { 0x90 };
+	static const uint8_t NOP2[] = { 0x66, 0x90 };
+	static const uint8_t NOP3[] = { 0x66, 0x66, 0x90 };
+	static const uint8_t NOP4[] = { 0x0F, 0x1F, 0x40, 0x00 };
+	static const uint8_t NOP5[] = { 0x0F, 0x1F, 0x44, 0x00, 0x00 };
+	static const uint8_t NOP6[] = { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 };
+	static const uint8_t NOP7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 };
+	static const uint8_t NOP8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
+
+	static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 };
+
+	size_t JitCompilerX86::getCodeSize() {
+		return CodeSize;
+	}
+
+	JitCompilerX86::JitCompilerX86() {
+		code = (uint8_t*)allocMemoryPages(CodeSize);
+		if (code == nullptr)
+			throw std::runtime_error("allocMemoryPages");
+		memcpy(code, codePrologue, prologueSize);
+		memcpy(code + epilogueOffset, codeEpilogue, epilogueSize);
+	}
+
+	JitCompilerX86::~JitCompilerX86() {
+		freePagedMemory(code, CodeSize);
+	}
+
+	void JitCompilerX86::enableAll() {
+		setPagesRWX(code, CodeSize);
+	}
+
+	void JitCompilerX86::enableWriting() {
+		setPagesRW(code, CodeSize);
+	}
+
+	void JitCompilerX86::enableExecution() {
+		setPagesRX(code, CodeSize);
+	}
+
+	void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) {
+		generateProgramPrologue(prog, pcfg);
+		memcpy(code + codePos, codeReadDataset, readDatasetSize);
+		codePos += readDatasetSize;
+		generateProgramEpilogue(prog, pcfg);
+	}
+
+	void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) {
+		generateProgramPrologue(prog, pcfg);
+		emit(codeReadDatasetLightSshInit, readDatasetLightInitSize);
+		emit(ADD_EBX_I);
+		emit32(datasetOffset / CacheLineSize);
+		emitByte(CALL);
+		emit32(superScalarHashOffset - (codePos + 4));
+		emit(codeReadDatasetLightSshFin, readDatasetLightFinSize);
+		generateProgramEpilogue(prog, pcfg);
+	}
+
+	template<size_t N>
+	void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector<uint64_t> &reciprocalCache) {
+		memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize);
+		codePos = superScalarHashOffset + codeSshInitSize;
+		for (unsigned j = 0; j < N; ++j) {
+			SuperscalarProgram& prog = programs[j];
+			for (unsigned i = 0; i < prog.getSize(); ++i) {
+				Instruction& instr = prog(i);
+				generateSuperscalarCode(instr, reciprocalCache);
+			}
+			emit(codeShhLoad, codeSshLoadSize);
+			if (j < N - 1) {
+				emit(REX_MOV_RR64);
+				emitByte(0xd8 + prog.getAddressRegister());
+				emit(codeShhPrefetch, codeSshPrefetchSize);
+#ifdef RANDOMX_ALIGN
+				int align = (codePos % 16);
+				while (align != 0) {
+					int nopSize = 16 - align;
+					if (nopSize > 8) nopSize = 8;
+					emit(NOPX[nopSize - 1], nopSize);
+					align = (codePos % 16);
+				}
+#endif
+			}
+		}
+		emitByte(RET);
+	}
+
+	template
+		void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES], std::vector<uint64_t> &reciprocalCache);
+
+	void JitCompilerX86::generateDatasetInitCode() {
+		memcpy(code, codeDatasetInit, datasetInitSize);
+	}
+
+	void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
+		instructionOffsets.clear();
+		for (unsigned i = 0; i < RegistersCount; ++i) {
+			registerUsage[i] = -1;
+		}
+
+		codePos = prologueSize;
+		memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
+		memcpy(code + codePos, codeLoopLoad, loopLoadSize);
+		codePos += loopLoadSize;
+		for (unsigned i = 0; i < prog.getSize(); ++i) {
+			Instruction& instr = prog(i);
+			instr.src %= RegistersCount;
+			instr.dst %= RegistersCount;
+			generateCode(instr, i);
+		}
+		emit(REX_MOV_RR);
+		emitByte(0xc0 + pcfg.readReg2);
+		emit(REX_XOR_EAX);
+		emitByte(0xc0 + pcfg.readReg3);
+	}
+
+	void JitCompilerX86::generateProgramEpilogue(Program& prog, ProgramConfiguration& pcfg) {
+		emit(REX_MOV_RR64);
+		emitByte(0xc0 + pcfg.readReg0);
+		emit(REX_XOR_RAX_R64);
+		emitByte(0xc0 + pcfg.readReg1);
+		emit(ADDR(randomx_prefetch_scratchpad), ADDR(randomx_prefetch_scratchpad_end) - ADDR(randomx_prefetch_scratchpad));
+		memcpy(code + codePos, codeLoopStore, loopStoreSize);
+		codePos += loopStoreSize;
+		emit(SUB_EBX);
+		emit(JNZ);
+		emit32(prologueSize - codePos - 4);
+		emitByte(JMP);
+		emit32(epilogueOffset - codePos - 4);
+	}
+
+	void JitCompilerX86::generateCode(Instruction& instr, int i) {
+		instructionOffsets.push_back(codePos);
+		auto generator = engine[instr.opcode];
+		(this->*generator)(instr, i);
+	}
+
+	void JitCompilerX86::generateSuperscalarCode(Instruction& instr, std::vector<uint64_t> &reciprocalCache) {
+		switch ((SuperscalarInstructionType)instr.opcode)
+		{
+		case randomx::SuperscalarInstructionType::ISUB_R:
+			emit(REX_SUB_RR);
+			emitByte(0xc0 + 8 * instr.dst + instr.src);
+			break;
+		case randomx::SuperscalarInstructionType::IXOR_R:
+			emit(REX_XOR_RR);
+			emitByte(0xc0 + 8 * instr.dst + instr.src);
+			break;
+		case randomx::SuperscalarInstructionType::IADD_RS:
+			emit(REX_LEA);
+			emitByte(0x04 + 8 * instr.dst);
+			genSIB(instr.getModShift(), instr.src, instr.dst);
+			break;
+		case randomx::SuperscalarInstructionType::IMUL_R:
+			emit(REX_IMUL_RR);
+			emitByte(0xc0 + 8 * instr.dst + instr.src);
+			break;
+		case randomx::SuperscalarInstructionType::IROR_C:
+			emit(REX_ROT_I8);
+			emitByte(0xc8 + instr.dst);
+			emitByte(instr.getImm32() & 63);
+			break;
+		case randomx::SuperscalarInstructionType::IADD_C7:
+			emit(REX_81);
+			emitByte(0xc0 + instr.dst);
+			emit32(instr.getImm32());
+			break;
+		case randomx::SuperscalarInstructionType::IXOR_C7:
+			emit(REX_XOR_RI);
+			emitByte(0xf0 + instr.dst);
+			emit32(instr.getImm32());
+			break;
+		case randomx::SuperscalarInstructionType::IADD_C8:
+			emit(REX_81);
+			emitByte(0xc0 + instr.dst);
+			emit32(instr.getImm32());
+#ifdef RANDOMX_ALIGN
+			emit(NOP1);
+#endif
+			break;
+		case randomx::SuperscalarInstructionType::IXOR_C8:
+			emit(REX_XOR_RI);
+			emitByte(0xf0 + instr.dst);
+			emit32(instr.getImm32());
+#ifdef RANDOMX_ALIGN
+			emit(NOP1);
+#endif
+			break;
+		case randomx::SuperscalarInstructionType::IADD_C9:
+			emit(REX_81);
+			emitByte(0xc0 + instr.dst);
+			emit32(instr.getImm32());
+#ifdef RANDOMX_ALIGN
+			emit(NOP2);
+#endif
+			break;
+		case randomx::SuperscalarInstructionType::IXOR_C9:
+			emit(REX_XOR_RI);
+			emitByte(0xf0 + instr.dst);
+			emit32(instr.getImm32());
+#ifdef RANDOMX_ALIGN
+			emit(NOP2);
+#endif
+			break;
+		case randomx::SuperscalarInstructionType::IMULH_R:
+			emit(REX_MOV_RR64);
+			emitByte(0xc0 + instr.dst);
+			emit(REX_MUL_R);
+			emitByte(0xe0 + instr.src);
+			emit(REX_MOV_R64R);
+			emitByte(0xc2 + 8 * instr.dst);
+			break;
+		case randomx::SuperscalarInstructionType::ISMULH_R:
+			emit(REX_MOV_RR64);
+			emitByte(0xc0 + instr.dst);
+			emit(REX_MUL_R);
+			emitByte(0xe8 + instr.src);
+			emit(REX_MOV_R64R);
+			emitByte(0xc2 + 8 * instr.dst);
+			break;
+		case randomx::SuperscalarInstructionType::IMUL_RCP:
+			emit(MOV_RAX_I);
+			emit64(reciprocalCache[instr.getImm32()]);
+			emit(REX_IMUL_RM);
+			emitByte(0xc0 + 8 * instr.dst);
+			break;
+		default:
+			UNREACHABLE;
+		}
+	}
+
+	void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) {
+		emit(LEA_32);
+		emitByte(0x80 + instr.src + (rax ? 0 : 8));
+		if (instr.src == RegisterNeedsSib) {
+			emitByte(0x24);
+		}
+		emit32(instr.getImm32());
+		if (rax)
+			emitByte(AND_EAX_I);
+		else
+			emit(AND_ECX_I);
+		emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+	}
+
+	void JitCompilerX86::genAddressRegDst(Instruction& instr) {
+		emit(LEA_32);
+		emitByte(0x80 + instr.dst);
+		if (instr.dst == RegisterNeedsSib) {
+			emitByte(0x24);
+		}
+		emit32(instr.getImm32());
+		emitByte(AND_EAX_I);
+		if (instr.getModCond() < StoreL3Condition) {
+			emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
+		}
+		else {
+			emit32(ScratchpadL3Mask);
+		}
+	}
+
+	void JitCompilerX86::genAddressImm(Instruction& instr) {
+		emit32(instr.getImm32() & ScratchpadL3Mask);
+	}
+
+	void JitCompilerX86::h_IADD_RS(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		emit(REX_LEA);
+		if (instr.dst == RegisterNeedsDisplacement)
+			emitByte(0xac);
+		else
+			emitByte(0x04 + 8 * instr.dst);
+		genSIB(instr.getModShift(), instr.src, instr.dst);
+		if (instr.dst == RegisterNeedsDisplacement)
+			emit32(instr.getImm32());
+	}
+
+	void JitCompilerX86::h_IADD_M(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			genAddressReg(instr);
+			emit(REX_ADD_RM);
+			emitByte(0x04 + 8 * instr.dst);
+			emitByte(0x06);
+		}
+		else {
+			emit(REX_ADD_RM);
+			emitByte(0x86 + 8 * instr.dst);
+			genAddressImm(instr);
+		}
+	}
+
+	void JitCompilerX86::genSIB(int scale, int index, int base) {
+		emitByte((scale << 6) | (index << 3) | base);
+	}
+
+	void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			emit(REX_SUB_RR);
+			emitByte(0xc0 + 8 * instr.dst + instr.src);
+		}
+		else {
+			emit(REX_81);
+			emitByte(0xe8 + instr.dst);
+			emit32(instr.getImm32());
+		}
+	}
+
+	void JitCompilerX86::h_ISUB_M(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			genAddressReg(instr);
+			emit(REX_SUB_RM);
+			emitByte(0x04 + 8 * instr.dst);
+			emitByte(0x06);
+		}
+		else {
+			emit(REX_SUB_RM);
+			emitByte(0x86 + 8 * instr.dst);
+			genAddressImm(instr);
+		}
+	}
+
+	void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			emit(REX_IMUL_RR);
+			emitByte(0xc0 + 8 * instr.dst + instr.src);
+		}
+		else {
+			emit(REX_IMUL_RRI);
+			emitByte(0xc0 + 9 * instr.dst);
+			emit32(instr.getImm32());
+		}
+	}
+
+	void JitCompilerX86::h_IMUL_M(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			genAddressReg(instr);
+			emit(REX_IMUL_RM);
+			emitByte(0x04 + 8 * instr.dst);
+			emitByte(0x06);
+		}
+		else {
+			emit(REX_IMUL_RM);
+			emitByte(0x86 + 8 * instr.dst);
+			genAddressImm(instr);
+		}
+	}
+
+	void JitCompilerX86::h_IMULH_R(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		emit(REX_MOV_RR64);
+		emitByte(0xc0 + instr.dst);
+		emit(REX_MUL_R);
+		emitByte(0xe0 + instr.src);
+		emit(REX_MOV_R64R);
+		emitByte(0xc2 + 8 * instr.dst);
+	}
+
+	void JitCompilerX86::h_IMULH_M(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			genAddressReg(instr, false);
+			emit(REX_MOV_RR64);
+			emitByte(0xc0 + instr.dst);
+			emit(REX_MUL_MEM);
+		}
+		else {
+			emit(REX_MOV_RR64);
+			emitByte(0xc0 + instr.dst);
+			emit(REX_MUL_M);
+			emitByte(0xa6);
+			genAddressImm(instr);
+		}
+		emit(REX_MOV_R64R);
+		emitByte(0xc2 + 8 * instr.dst);
+	}
+
+	void JitCompilerX86::h_ISMULH_R(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		emit(REX_MOV_RR64);
+		emitByte(0xc0 + instr.dst);
+		emit(REX_MUL_R);
+		emitByte(0xe8 + instr.src);
+		emit(REX_MOV_R64R);
+		emitByte(0xc2 + 8 * instr.dst);
+	}
+
+	void JitCompilerX86::h_ISMULH_M(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			genAddressReg(instr, false);
+			emit(REX_MOV_RR64);
+			emitByte(0xc0 + instr.dst);
+			emit(REX_IMUL_MEM);
+		}
+		else {
+			emit(REX_MOV_RR64);
+			emitByte(0xc0 + instr.dst);
+			emit(REX_MUL_M);
+			emitByte(0xae);
+			genAddressImm(instr);
+		}
+		emit(REX_MOV_R64R);
+		emitByte(0xc2 + 8 * instr.dst);
+	}
+
+	void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
+		const uint32_t divisor = instr.getImm32();
+		if (!isZeroOrPowerOf2(divisor)) {
+			registerUsage[instr.dst] = i;
+			emit(MOV_RAX_I);
+			emit64(randomx_reciprocal_fast(divisor));
+			emit(REX_IMUL_RM);
+			emitByte(0xc0 + 8 * instr.dst);
+		}
+	}
+
+	void JitCompilerX86::h_INEG_R(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		emit(REX_NEG);
+		emitByte(0xd8 + instr.dst);
+	}
+
+	void JitCompilerX86::h_IXOR_R(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			emit(REX_XOR_RR);
+			emitByte(0xc0 + 8 * instr.dst + instr.src);
+		}
+		else {
+			emit(REX_XOR_RI);
+			emitByte(0xf0 + instr.dst);
+			emit32(instr.getImm32());
+		}
+	}
+
+	void JitCompilerX86::h_IXOR_M(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			genAddressReg(instr);
+			emit(REX_XOR_RM);
+			emitByte(0x04 + 8 * instr.dst);
+			emitByte(0x06);
+		}
+		else {
+			emit(REX_XOR_RM);
+			emitByte(0x86 + 8 * instr.dst);
+			genAddressImm(instr);
+		}
+	}
+
+	void JitCompilerX86::h_IROR_R(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			emit(REX_MOV_RR);
+			emitByte(0xc8 + instr.src);
+			emit(REX_ROT_CL);
+			emitByte(0xc8 + instr.dst);
+		}
+		else {
+			emit(REX_ROT_I8);
+			emitByte(0xc8 + instr.dst);
+			emitByte(instr.getImm32() & 63);
+		}
+	}
+
+	void JitCompilerX86::h_IROL_R(Instruction& instr, int i) {
+		registerUsage[instr.dst] = i;
+		if (instr.src != instr.dst) {
+			emit(REX_MOV_RR);
+			emitByte(0xc8 + instr.src);
+			emit(REX_ROT_CL);
+			emitByte(0xc0 + instr.dst);
+		}
+		else {
+			emit(REX_ROT_I8);
+			emitByte(0xc0 + instr.dst);
+			emitByte(instr.getImm32() & 63);
+		}
+	}
+
+	void JitCompilerX86::h_ISWAP_R(Instruction& instr, int i) {
+		if (instr.src != instr.dst) {
+			registerUsage[instr.dst] = i;
+			registerUsage[instr.src] = i;
+			emit(REX_XCHG);
+			emitByte(0xc0 + instr.src + 8 * instr.dst);
+		}
+	}
+
+	void JitCompilerX86::h_FSWAP_R(Instruction& instr, int i) {
+		emit(SHUFPD);
+		emitByte(0xc0 + 9 * instr.dst);
+		emitByte(1);
+	}
+
+	void JitCompilerX86::h_FADD_R(Instruction& instr, int i) {
+		instr.dst %= RegisterCountFlt;
+		instr.src %= RegisterCountFlt;
+		emit(REX_ADDPD);
+		emitByte(0xc0 + instr.src + 8 * instr.dst);
+	}
+
+	void JitCompilerX86::h_FADD_M(Instruction& instr, int i) {
+		instr.dst %= RegisterCountFlt;
+		genAddressReg(instr);
+		emit(REX_CVTDQ2PD_XMM12);
+		emit(REX_ADDPD);
+		emitByte(0xc4 + 8 * instr.dst);
+	}
+
+	void JitCompilerX86::h_FSUB_R(Instruction& instr, int i) {
+		instr.dst %= RegisterCountFlt;
+		instr.src %= RegisterCountFlt;
+		emit(REX_SUBPD);
+		emitByte(0xc0 + instr.src + 8 * instr.dst);
+	}
+
+	void JitCompilerX86::h_FSUB_M(Instruction& instr, int i) {
+		instr.dst %= RegisterCountFlt;
+		genAddressReg(instr);
+		emit(REX_CVTDQ2PD_XMM12);
+		emit(REX_SUBPD);
+		emitByte(0xc4 + 8 * instr.dst);
+	}
+
+	void JitCompilerX86::h_FSCAL_R(Instruction& instr, int i) {
+		instr.dst %= RegisterCountFlt;
+		emit(REX_XORPS);
+		emitByte(0xc7 + 8 * instr.dst);
+	}
+
+	void JitCompilerX86::h_FMUL_R(Instruction& instr, int i) {
+		instr.dst %= RegisterCountFlt;
+		instr.src %= RegisterCountFlt;
+		emit(REX_MULPD);
+		emitByte(0xe0 + instr.src + 8 * instr.dst);
+	}
+
+	void JitCompilerX86::h_FDIV_M(Instruction& instr, int i) {
+		instr.dst %= RegisterCountFlt;
+		genAddressReg(instr);
+		emit(REX_CVTDQ2PD_XMM12);
+		emit(REX_ANDPS_XMM12);
+		emit(REX_DIVPD);
+		emitByte(0xe4 + 8 * instr.dst);
+	}
+
+	void JitCompilerX86::h_FSQRT_R(Instruction& instr, int i) {
+		instr.dst %= RegisterCountFlt;
+		emit(SQRTPD);
+		emitByte(0xe4 + 9 * instr.dst);
+	}
+
+	void JitCompilerX86::h_CFROUND(Instruction& instr, int i) {
+		emit(REX_MOV_RR64);
+		emitByte(0xc0 + instr.src);
+		int rotate = (13 - (instr.getImm32() & 63)) & 63;
+		if (rotate != 0) {
+			emit(ROL_RAX);
+			emitByte(rotate);
+		}
+		emit(AND_OR_MOV_LDMXCSR);
+	}
+
+	void JitCompilerX86::h_CBRANCH(Instruction& instr, int i) {
+		int reg = instr.dst;
+		int target = registerUsage[reg] + 1;
+		emit(REX_ADD_I);
+		emitByte(0xc0 + reg);
+		int shift = instr.getModCond() + ConditionOffset;
+		uint32_t imm = instr.getImm32() | (1UL << shift);
+		if (ConditionOffset > 0 || shift > 0)
+			imm &= ~(1UL << (shift - 1));
+		emit32(imm);
+		emit(REX_TEST);
+		emitByte(0xc0 + reg);
+		emit32(ConditionMask << shift);
+		emit(JZ);
+		emit32(instructionOffsets[target] - (codePos + 4));
+		//mark all registers as used
+		for (unsigned j = 0; j < RegistersCount; ++j) {
+			registerUsage[j] = i;
+		}
+	}
+
+	void JitCompilerX86::h_ISTORE(Instruction& instr, int i) {
+		genAddressRegDst(instr);
+		emit(REX_MOV_MR);
+		emitByte(0x04 + 8 * instr.src);
+		emitByte(0x06);
+	}
+
+	void JitCompilerX86::h_NOP(Instruction& instr, int i) {
+		emit(NOP1);
+	}
+
+#include "instruction_weights.hpp"
+#define INST_HANDLE(x) REPN(&JitCompilerX86::h_##x, WT(x))
+
+	InstructionGeneratorX86 JitCompilerX86::engine[256] = {
+		INST_HANDLE(IADD_RS)
+		INST_HANDLE(IADD_M)
+		INST_HANDLE(ISUB_R)
+		INST_HANDLE(ISUB_M)
+		INST_HANDLE(IMUL_R)
+		INST_HANDLE(IMUL_M)
+		INST_HANDLE(IMULH_R)
+		INST_HANDLE(IMULH_M)
+		INST_HANDLE(ISMULH_R)
+		INST_HANDLE(ISMULH_M)
+		INST_HANDLE(IMUL_RCP)
+		INST_HANDLE(INEG_R)
+		INST_HANDLE(IXOR_R)
+		INST_HANDLE(IXOR_M)
+		INST_HANDLE(IROR_R)
+		INST_HANDLE(IROL_R)
+		INST_HANDLE(ISWAP_R)
+		INST_HANDLE(FSWAP_R)
+		INST_HANDLE(FADD_R)
+		INST_HANDLE(FADD_M)
+		INST_HANDLE(FSUB_R)
+		INST_HANDLE(FSUB_M)
+		INST_HANDLE(FSCAL_R)
+		INST_HANDLE(FMUL_R)
+		INST_HANDLE(FDIV_M)
+		INST_HANDLE(FSQRT_R)
+		INST_HANDLE(CBRANCH)
+		INST_HANDLE(CFROUND)
+		INST_HANDLE(ISTORE)
+		INST_HANDLE(NOP)
+	};
+
+}
--- a/crypto/randomx/jit_compiler_x86.hpp
+++ b/crypto/randomx/jit_compiler_x86.hpp
@ -0,0 +1,142 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <vector>
+#include "common.hpp"
+
+namespace randomx {
+
+	class Program;
+	struct ProgramConfiguration;
+	class SuperscalarProgram;
+	class JitCompilerX86;
+	class Instruction;
+
+	typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int);
+
+	class JitCompilerX86 {
+	public:
+		JitCompilerX86();
+		~JitCompilerX86();
+		void generateProgram(Program&, ProgramConfiguration&);
+		void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
+		template<size_t N>
+		void generateSuperscalarHash(SuperscalarProgram (&programs)[N], std::vector<uint64_t> &);
+		void generateDatasetInitCode();
+		ProgramFunc* getProgramFunc() {
+			return (ProgramFunc*)code;
+		}
+		DatasetInitFunc* getDatasetInitFunc() {
+			return (DatasetInitFunc*)code;
+		}
+		uint8_t* getCode() {
+			return code;
+		}
+		size_t getCodeSize();
+		void enableWriting();
+		void enableExecution();
+		void enableAll();
+	private:
+		static InstructionGeneratorX86 engine[256];
+		std::vector<int32_t> instructionOffsets;
+		int registerUsage[RegistersCount];
+		uint8_t* code;
+		int32_t codePos;
+
+		void generateProgramPrologue(Program&, ProgramConfiguration&);
+		void generateProgramEpilogue(Program&, ProgramConfiguration&);
+		void genAddressReg(Instruction&, bool);
+		void genAddressRegDst(Instruction&);
+		void genAddressImm(Instruction&);
+		void genSIB(int scale, int index, int base);
+
+		void generateCode(Instruction&, int);
+		void generateSuperscalarCode(Instruction &, std::vector<uint64_t> &);
+
+		void emitByte(uint8_t val) {
+			code[codePos] = val;
+			codePos++;
+		}
+
+		void emit32(uint32_t val) {
+			memcpy(code + codePos, &val, sizeof val);
+			codePos += sizeof val;
+		}
+
+		void emit64(uint64_t val) {
+			memcpy(code + codePos, &val, sizeof val);
+			codePos += sizeof val;
+		}
+
+		template<size_t N>
+		void emit(const uint8_t (&src)[N]) {
+			emit(src, N);
+		}
+
+		void emit(const uint8_t* src, size_t count) {
+			memcpy(code + codePos, src, count);
+			codePos += count;
+		}
+
+		void h_IADD_RS(Instruction&, int);
+		void h_IADD_M(Instruction&, int);
+		void h_ISUB_R(Instruction&, int);
+		void h_ISUB_M(Instruction&, int);
+		void h_IMUL_R(Instruction&, int);
+		void h_IMUL_M(Instruction&, int);
+		void h_IMULH_R(Instruction&, int);
+		void h_IMULH_M(Instruction&, int);
+		void h_ISMULH_R(Instruction&, int);
+		void h_ISMULH_M(Instruction&, int);
+		void h_IMUL_RCP(Instruction&, int);
+		void h_INEG_R(Instruction&, int);
+		void h_IXOR_R(Instruction&, int);
+		void h_IXOR_M(Instruction&, int);
+		void h_IROR_R(Instruction&, int);
+		void h_IROL_R(Instruction&, int);
+		void h_ISWAP_R(Instruction&, int);
+		void h_FSWAP_R(Instruction&, int);
+		void h_FADD_R(Instruction&, int);
+		void h_FADD_M(Instruction&, int);
+		void h_FSUB_R(Instruction&, int);
+		void h_FSUB_M(Instruction&, int);
+		void h_FSCAL_R(Instruction&, int);
+		void h_FMUL_R(Instruction&, int);
+		void h_FDIV_M(Instruction&, int);
+		void h_FSQRT_R(Instruction&, int);
+		void h_CBRANCH(Instruction&, int);
+		void h_CFROUND(Instruction&, int);
+		void h_ISTORE(Instruction&, int);
+		void h_NOP(Instruction&, int);
+	};
+
+}
--- a/crypto/randomx/jit_compiler_x86_static.S
+++ b/crypto/randomx/jit_compiler_x86_static.S
@ -0,0 +1,230 @@
+# Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# 	* Redistributions of source code must retain the above copyright
+# 	  notice, this list of conditions and the following disclaimer.
+# 	* Redistributions in binary form must reproduce the above copyright
+# 	  notice, this list of conditions and the following disclaimer in the
+# 	  documentation and/or other materials provided with the distribution.
+# 	* Neither the name of the copyright holder nor the
+# 	  names of its contributors may be used to endorse or promote products
+# 	  derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+.intel_syntax noprefix
+#if defined(__APPLE__)
+.text
+#define DECL(x) _##x
+#else
+.section .text
+#define DECL(x) x
+#endif
+
+#if defined(__WIN32__) || defined(__CYGWIN__)
+#define WINABI
+#endif
+
+.global DECL(randomx_prefetch_scratchpad)
+.global DECL(randomx_prefetch_scratchpad_end)
+.global DECL(randomx_program_prologue)
+.global DECL(randomx_program_loop_begin)
+.global DECL(randomx_program_loop_load)
+.global DECL(randomx_program_start)
+.global DECL(randomx_program_read_dataset)
+.global DECL(randomx_program_read_dataset_sshash_init)
+.global DECL(randomx_program_read_dataset_sshash_fin)
+.global DECL(randomx_program_loop_store)
+.global DECL(randomx_program_loop_end)
+.global DECL(randomx_dataset_init)
+.global DECL(randomx_program_epilogue)
+.global DECL(randomx_sshash_load)
+.global DECL(randomx_sshash_prefetch)
+.global DECL(randomx_sshash_end)
+.global DECL(randomx_sshash_init)
+.global DECL(randomx_program_end)
+.global DECL(randomx_reciprocal_fast)
+
+#include "configuration.h"
+
+#define RANDOMX_SCRATCHPAD_MASK      (RANDOMX_SCRATCHPAD_L3-64)
+#define RANDOMX_DATASET_BASE_MASK    (RANDOMX_DATASET_BASE_SIZE-64)
+#define RANDOMX_CACHE_MASK           (RANDOMX_ARGON_MEMORY*16-1)
+#define RANDOMX_ALIGN                4096
+#define SUPERSCALAR_OFFSET           ((((RANDOMX_ALIGN + 32 * RANDOMX_PROGRAM_SIZE) - 1) / (RANDOMX_ALIGN) + 1) * (RANDOMX_ALIGN))
+
+#define db .byte
+
+DECL(randomx_prefetch_scratchpad):
+	mov rdx, rax
+	and eax, RANDOMX_SCRATCHPAD_MASK
+	prefetcht0 [rsi+rax]
+	ror rdx, 32
+	and edx, RANDOMX_SCRATCHPAD_MASK
+	prefetcht0 [rsi+rdx]
+
+DECL(randomx_prefetch_scratchpad_end):
+
+.balign 64
+DECL(randomx_program_prologue):
+#if defined(WINABI)
+	#include "asm/program_prologue_win64.inc"
+#else
+	#include "asm/program_prologue_linux.inc"
+#endif
+	movapd xmm13, xmmword ptr [mantissaMask+rip]
+	movapd xmm14, xmmword ptr [exp240+rip]
+	movapd xmm15, xmmword ptr [scaleMask+rip]
+	mov rdx, rax
+	and eax, RANDOMX_SCRATCHPAD_MASK
+	ror rdx, 32
+	and edx, RANDOMX_SCRATCHPAD_MASK
+	jmp rx_program_loop_begin
+
+.balign 64
+	#include "asm/program_xmm_constants.inc"
+
+.balign 64
+DECL(randomx_program_loop_begin):
+rx_program_loop_begin:
+	nop
+
+DECL(randomx_program_loop_load):
+	#include "asm/program_loop_load.inc"
+
+DECL(randomx_program_start):
+	nop
+
+DECL(randomx_program_read_dataset):
+	#include "asm/program_read_dataset.inc"
+
+DECL(randomx_program_read_dataset_sshash_init):
+	#include "asm/program_read_dataset_sshash_init.inc"
+
+DECL(randomx_program_read_dataset_sshash_fin):
+	#include "asm/program_read_dataset_sshash_fin.inc"
+
+DECL(randomx_program_loop_store):
+	#include "asm/program_loop_store.inc"
+
+DECL(randomx_program_loop_end):
+	nop
+
+.balign 64
+DECL(randomx_dataset_init):
+rx_dataset_init:
+	push rbx
+	push rbp
+	push r12
+	push r13
+	push r14
+	push r15
+#if defined(WINABI)
+	push rdi
+	push rsi
+	mov rdi, qword ptr [rcx] ;# cache->memory
+	mov rsi, rdx ;# dataset
+	mov rbp, r8  ;# block index
+	push r9      ;# max. block index
+#else
+	mov rdi, qword ptr [rdi] ;# cache->memory
+	;# dataset in rsi
+	mov rbp, rdx  ;# block index
+	push rcx      ;# max. block index
+#endif
+init_block_loop:
+	prefetchw byte ptr [rsi]
+	mov rbx, rbp
+	.byte 232 ;# 0xE8 = call
+	.int SUPERSCALAR_OFFSET - (call_offset - rx_dataset_init)
+call_offset:
+	mov qword ptr [rsi+0], r8
+	mov qword ptr [rsi+8], r9
+	mov qword ptr [rsi+16], r10
+	mov qword ptr [rsi+24], r11
+	mov qword ptr [rsi+32], r12
+	mov qword ptr [rsi+40], r13
+	mov qword ptr [rsi+48], r14
+	mov qword ptr [rsi+56], r15
+	add rbp, 1
+	add rsi, 64
+	cmp rbp, qword ptr [rsp]
+	jb init_block_loop
+	pop rax
+#if defined(WINABI)
+	pop rsi
+	pop rdi
+#endif
+	pop r15
+	pop r14
+	pop r13
+	pop r12
+	pop rbp
+	pop rbx
+	ret
+
+.balign 64
+DECL(randomx_program_epilogue):
+	#include "asm/program_epilogue_store.inc"
+#if defined(WINABI)
+	#include "asm/program_epilogue_win64.inc"
+#else
+	#include "asm/program_epilogue_linux.inc"
+#endif
+
+.balign 64
+DECL(randomx_sshash_load):
+	#include "asm/program_sshash_load.inc"
+
+DECL(randomx_sshash_prefetch):
+	#include "asm/program_sshash_prefetch.inc"
+
+DECL(randomx_sshash_end):
+	nop
+
+.balign 64
+DECL(randomx_sshash_init):
+	lea r8, [rbx+1]
+	#include "asm/program_sshash_prefetch.inc"
+	imul r8, qword ptr [r0_mul+rip]
+	mov r9, qword ptr [r1_add+rip]
+	xor r9, r8
+	mov r10, qword ptr [r2_add+rip]
+	xor r10, r8
+	mov r11, qword ptr [r3_add+rip]
+	xor r11, r8
+	mov r12, qword ptr [r4_add+rip]
+	xor r12, r8
+	mov r13, qword ptr [r5_add+rip]
+	xor r13, r8
+	mov r14, qword ptr [r6_add+rip]
+	xor r14, r8
+	mov r15, qword ptr [r7_add+rip]
+	xor r15, r8
+	jmp rx_program_end
+
+.balign 64
+	#include "asm/program_sshash_constants.inc"
+
+.balign 64
+DECL(randomx_program_end):
+rx_program_end:
+	nop
+
+DECL(randomx_reciprocal_fast):
+#if !defined(WINABI)
+	mov rcx, rdi
+#endif
+	#include "asm/randomx_reciprocal.inc"
--- a/crypto/randomx/jit_compiler_x86_static.asm
+++ b/crypto/randomx/jit_compiler_x86_static.asm
@ -0,0 +1,223 @@
+; Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+;
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are met:
+; 	* Redistributions of source code must retain the above copyright
+; 	  notice, this list of conditions and the following disclaimer.
+; 	* Redistributions in binary form must reproduce the above copyright
+; 	  notice, this list of conditions and the following disclaimer in the
+; 	  documentation and/or other materials provided with the distribution.
+; 	* Neither the name of the copyright holder nor the
+; 	  names of its contributors may be used to endorse or promote products
+; 	  derived from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+IFDEF RAX
+
+_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
+
+PUBLIC randomx_prefetch_scratchpad
+PUBLIC randomx_prefetch_scratchpad_end
+PUBLIC randomx_program_prologue
+PUBLIC randomx_program_loop_begin
+PUBLIC randomx_program_loop_load
+PUBLIC randomx_program_start
+PUBLIC randomx_program_read_dataset
+PUBLIC randomx_program_read_dataset_sshash_init
+PUBLIC randomx_program_read_dataset_sshash_fin
+PUBLIC randomx_dataset_init
+PUBLIC randomx_program_loop_store
+PUBLIC randomx_program_loop_end
+PUBLIC randomx_program_epilogue
+PUBLIC randomx_sshash_load
+PUBLIC randomx_sshash_prefetch
+PUBLIC randomx_sshash_end
+PUBLIC randomx_sshash_init
+PUBLIC randomx_program_end
+PUBLIC randomx_reciprocal_fast
+
+include asm/configuration.asm
+
+RANDOMX_SCRATCHPAD_MASK     EQU (RANDOMX_SCRATCHPAD_L3-64)
+RANDOMX_DATASET_BASE_MASK   EQU (RANDOMX_DATASET_BASE_SIZE-64)
+RANDOMX_CACHE_MASK          EQU (RANDOMX_ARGON_MEMORY*16-1)
+RANDOMX_ALIGN               EQU 4096
+SUPERSCALAR_OFFSET          EQU ((((RANDOMX_ALIGN + 32 * RANDOMX_PROGRAM_SIZE) - 1) / (RANDOMX_ALIGN) + 1) * (RANDOMX_ALIGN))
+
+randomx_prefetch_scratchpad PROC
+	mov rdx, rax
+	and eax, RANDOMX_SCRATCHPAD_MASK
+	prefetcht0 [rsi+rax]
+	ror rdx, 32
+	and edx, RANDOMX_SCRATCHPAD_MASK
+	prefetcht0 [rsi+rdx]
+randomx_prefetch_scratchpad ENDP
+
+randomx_prefetch_scratchpad_end PROC
+randomx_prefetch_scratchpad_end ENDP
+
+ALIGN 64
+randomx_program_prologue PROC
+	include asm/program_prologue_win64.inc
+	movapd xmm13, xmmword ptr [mantissaMask]
+	movapd xmm14, xmmword ptr [exp240]
+	movapd xmm15, xmmword ptr [scaleMask]
+	mov rdx, rax
+	and eax, RANDOMX_SCRATCHPAD_MASK
+	ror rdx, 32
+	and edx, RANDOMX_SCRATCHPAD_MASK
+	jmp rx_program_loop_begin
+randomx_program_prologue ENDP
+
+ALIGN 64
+	include asm/program_xmm_constants.inc
+
+ALIGN 64
+randomx_program_loop_begin PROC
+rx_program_loop_begin::
+	nop
+randomx_program_loop_begin ENDP
+
+randomx_program_loop_load PROC
+	include asm/program_loop_load.inc
+randomx_program_loop_load ENDP
+
+randomx_program_start PROC
+	nop
+randomx_program_start ENDP
+
+randomx_program_read_dataset PROC
+	include asm/program_read_dataset.inc
+randomx_program_read_dataset ENDP
+
+randomx_program_read_dataset_sshash_init PROC
+	include asm/program_read_dataset_sshash_init.inc
+randomx_program_read_dataset_sshash_init ENDP
+
+randomx_program_read_dataset_sshash_fin PROC
+	include asm/program_read_dataset_sshash_fin.inc
+randomx_program_read_dataset_sshash_fin ENDP
+
+randomx_program_loop_store PROC
+	include asm/program_loop_store.inc
+randomx_program_loop_store ENDP
+
+randomx_program_loop_end PROC
+	nop
+randomx_program_loop_end ENDP
+
+ALIGN 64
+randomx_dataset_init PROC
+	push rbx
+	push rbp
+	push rdi
+	push rsi
+	push r12
+	push r13
+	push r14
+	push r15
+	mov rdi, qword ptr [rcx] ;# cache->memory
+	mov rsi, rdx ;# dataset
+	mov rbp, r8  ;# block index
+	push r9      ;# max. block index
+init_block_loop:
+	prefetchw byte ptr [rsi]
+	mov rbx, rbp
+	db 232 ;# 0xE8 = call
+	dd SUPERSCALAR_OFFSET - distance
+	distance equ $ - offset randomx_dataset_init
+	mov qword ptr [rsi+0], r8
+	mov qword ptr [rsi+8], r9
+	mov qword ptr [rsi+16], r10
+	mov qword ptr [rsi+24], r11
+	mov qword ptr [rsi+32], r12
+	mov qword ptr [rsi+40], r13
+	mov qword ptr [rsi+48], r14
+	mov qword ptr [rsi+56], r15
+	add rbp, 1
+	add rsi, 64
+	cmp rbp, qword ptr [rsp]
+	jb init_block_loop
+	pop r9
+	pop r15
+	pop r14
+	pop r13
+	pop r12
+	pop rsi
+	pop rdi
+	pop rbp
+	pop rbx
+	ret
+randomx_dataset_init ENDP
+
+ALIGN 64
+randomx_program_epilogue PROC
+	include asm/program_epilogue_store.inc
+	include asm/program_epilogue_win64.inc
+randomx_program_epilogue ENDP
+
+ALIGN 64
+randomx_sshash_load PROC
+	include asm/program_sshash_load.inc
+randomx_sshash_load ENDP
+
+randomx_sshash_prefetch PROC
+	include asm/program_sshash_prefetch.inc
+randomx_sshash_prefetch ENDP
+
+randomx_sshash_end PROC
+	nop
+randomx_sshash_end ENDP
+
+ALIGN 64
+randomx_sshash_init PROC
+	lea r8, [rbx+1]
+	include asm/program_sshash_prefetch.inc
+	imul r8, qword ptr [r0_mul]
+	mov r9, qword ptr [r1_add]
+	xor r9, r8
+	mov r10, qword ptr [r2_add]
+	xor r10, r8
+	mov r11, qword ptr [r3_add]
+	xor r11, r8
+	mov r12, qword ptr [r4_add]
+	xor r12, r8
+	mov r13, qword ptr [r5_add]
+	xor r13, r8
+	mov r14, qword ptr [r6_add]
+	xor r14, r8
+	mov r15, qword ptr [r7_add]
+	xor r15, r8
+	jmp rx_program_end
+randomx_sshash_init ENDP
+
+ALIGN 64
+	include asm/program_sshash_constants.inc
+
+ALIGN 64
+randomx_program_end PROC
+rx_program_end::
+	nop
+randomx_program_end ENDP
+
+randomx_reciprocal_fast PROC
+	include asm/randomx_reciprocal.inc
+randomx_reciprocal_fast ENDP
+
+_RANDOMX_JITX86_STATIC ENDS
+
+ENDIF
+
+END
--- a/crypto/randomx/jit_compiler_x86_static.hpp
+++ b/crypto/randomx/jit_compiler_x86_static.hpp
@ -0,0 +1,50 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+extern "C" {
+	void randomx_prefetch_scratchpad();
+	void randomx_prefetch_scratchpad_end();
+	void randomx_program_prologue();
+	void randomx_program_loop_begin();
+	void randomx_program_loop_load();
+	void randomx_program_start();
+	void randomx_program_read_dataset();
+	void randomx_program_read_dataset_sshash_init();
+	void randomx_program_read_dataset_sshash_fin();
+	void randomx_program_loop_store();
+	void randomx_program_loop_end();
+	void randomx_dataset_init();
+	void randomx_program_epilogue();
+	void randomx_sshash_load();
+	void randomx_sshash_prefetch();
+	void randomx_sshash_end();
+	void randomx_sshash_init();
+	void randomx_program_end();
+}
--- a/crypto/randomx/program.hpp
+++ b/crypto/randomx/program.hpp
@ -0,0 +1,71 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cstdint>
+#include <ostream>
+#include "common.hpp"
+#include "instruction.hpp"
+#include "blake2/endian.h"
+
+namespace randomx {
+
+	struct ProgramConfiguration {
+		uint64_t eMask[2];
+		uint32_t readReg0, readReg1, readReg2, readReg3;
+	};
+
+	class Program {
+	public:
+		Instruction& operator()(int pc) {
+			return programBuffer[pc];
+		}
+		friend std::ostream& operator<<(std::ostream& os, const Program& p) {
+			p.print(os);
+			return os;
+		}
+		uint64_t getEntropy(int i) {
+			return load64(&entropyBuffer[i]);
+		}
+		uint32_t getSize() {
+			return RANDOMX_PROGRAM_SIZE;
+		}
+	private:
+		void print(std::ostream& os) const {
+			for (int i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
+				auto instr = programBuffer[i];
+				os << instr;
+			}
+		}
+		uint64_t entropyBuffer[16];
+		Instruction programBuffer[RANDOMX_PROGRAM_SIZE];
+	};
+
+	static_assert(sizeof(Program) % 64 == 0, "Invalid size of class randomx::Program");
+}
--- a/crypto/randomx/randomx.cpp
+++ b/crypto/randomx/randomx.cpp
@ -0,0 +1,450 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "randomx.h"
+#include "dataset.hpp"
+#include "vm_interpreted.hpp"
+#include "vm_interpreted_light.hpp"
+#include "vm_compiled.hpp"
+#include "vm_compiled_light.hpp"
+#include "blake2/blake2.h"
+#include "cpu.hpp"
+#include <cassert>
+#include <limits>
+
+#if defined(__SSE__) || defined(__SSE2__) || (defined(_M_IX86_FP) && (_M_IX86_FP > 0))
+#define USE_CSR_INTRINSICS
+#include <xmmintrin.h>
+#else
+#include <cfenv>
+#endif
+
+extern "C" {
+
+	randomx_flags randomx_get_flags() {
+		randomx_flags flags = RANDOMX_HAVE_COMPILER ? RANDOMX_FLAG_JIT : RANDOMX_FLAG_DEFAULT;
+		randomx::Cpu cpu;
+#ifdef RANDOMX_FORCE_SECURE
+		if (flags == RANDOMX_FLAG_JIT) {
+			flags |= RANDOMX_FLAG_SECURE;
+		}
+#endif
+		if (HAVE_AES && cpu.hasAes()) {
+			flags |= RANDOMX_FLAG_HARD_AES;
+		}
+		if (randomx_argon2_impl_avx2() != nullptr && cpu.hasAvx2()) {
+			flags |= RANDOMX_FLAG_ARGON2_AVX2;
+		}
+		if (randomx_argon2_impl_ssse3() != nullptr && cpu.hasSsse3()) {
+			flags |= RANDOMX_FLAG_ARGON2_SSSE3;
+		}
+		return flags;
+	}
+
+	randomx_cache *randomx_alloc_cache(randomx_flags flags) {
+		randomx_cache *cache = nullptr;
+		auto impl = randomx::selectArgonImpl(flags);
+		if (impl == nullptr) {
+			return cache;
+		}
+
+		try {
+			cache = new randomx_cache();
+			cache->argonImpl = impl;
+			switch ((int)(flags & (RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES))) {
+				case RANDOMX_FLAG_DEFAULT:
+					cache->dealloc = &randomx::deallocCache<randomx::DefaultAllocator>;
+					cache->jit = nullptr;
+					cache->initialize = &randomx::initCache;
+					cache->datasetInit = &randomx::initDataset;
+					cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(randomx::CacheSize);
+					break;
+
+				case RANDOMX_FLAG_JIT:
+					cache->dealloc = &randomx::deallocCache<randomx::DefaultAllocator>;
+					cache->jit = new randomx::JitCompiler();
+					cache->initialize = &randomx::initCacheCompile;
+					cache->datasetInit = cache->jit->getDatasetInitFunc();
+					cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(randomx::CacheSize);
+					break;
+
+				case RANDOMX_FLAG_LARGE_PAGES:
+					cache->dealloc = &randomx::deallocCache<randomx::LargePageAllocator>;
+					cache->jit = nullptr;
+					cache->initialize = &randomx::initCache;
+					cache->datasetInit = &randomx::initDataset;
+					cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::CacheSize);
+					break;
+
+				case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES:
+					cache->dealloc = &randomx::deallocCache<randomx::LargePageAllocator>;
+					cache->jit = new randomx::JitCompiler();
+					cache->initialize = &randomx::initCacheCompile;
+					cache->datasetInit = cache->jit->getDatasetInitFunc();
+					cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::CacheSize);
+					break;
+
+				default:
+					UNREACHABLE;
+			}
+		}
+		catch (std::exception &ex) {
+			if (cache != nullptr) {
+				randomx_release_cache(cache);
+				cache = nullptr;
+			}
+		}
+		if (cache && cache->memory == nullptr) {
+			randomx_release_cache(cache);
+			cache = nullptr;
+		}
+
+		return cache;
+	}
+
+	void randomx_init_cache(randomx_cache *cache, const void *key, size_t keySize) {
+		assert(cache != nullptr);
+		assert(keySize == 0 || key != nullptr);
+		std::string cacheKey;
+		cacheKey.assign((const char *)key, keySize);
+		if (cache->cacheKey != cacheKey || !cache->isInitialized()) {
+			cache->initialize(cache, key, keySize);
+			cache->cacheKey = cacheKey;
+		}
+	}
+
+	void *randomx_get_cache_memory(randomx_cache *cache) {
+		assert(cache != nullptr);
+		return cache->memory;
+	}
+
+	void randomx_release_cache(randomx_cache* cache) {
+		assert(cache != nullptr);
+		cache->dealloc(cache);
+		delete cache;
+	}
+
+	randomx_dataset *randomx_alloc_dataset(randomx_flags flags) {
+
+		//fail on 32-bit systems if DatasetSize is >= 4 GiB
+		if (randomx::DatasetSize > std::numeric_limits<size_t>::max()) {
+			return nullptr;
+		}
+
+		randomx_dataset *dataset = nullptr;
+
+		try {
+			dataset = new randomx_dataset();
+			if (flags & RANDOMX_FLAG_LARGE_PAGES) {
+				dataset->dealloc = &randomx::deallocDataset<randomx::LargePageAllocator>;
+				dataset->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::DatasetSize);
+			}
+			else {
+				dataset->dealloc = &randomx::deallocDataset<randomx::DefaultAllocator>;
+				dataset->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(randomx::DatasetSize);
+			}
+		}
+		catch (std::exception &ex) {
+			if (dataset != nullptr) {
+				randomx_release_dataset(dataset);
+				dataset = nullptr;
+			}
+		}
+		if (dataset && dataset->memory == nullptr) {
+			randomx_release_dataset(dataset);
+			dataset = nullptr;
+		}
+
+		return dataset;
+	}
+
+	constexpr unsigned long DatasetItemCount = randomx::DatasetSize / RANDOMX_DATASET_ITEM_SIZE;
+
+	unsigned long randomx_dataset_item_count() {
+		return DatasetItemCount;
+	}
+
+	void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount) {
+		assert(dataset != nullptr);
+		assert(cache != nullptr);
+		assert(startItem < DatasetItemCount && itemCount <= DatasetItemCount);
+		assert(startItem + itemCount <= DatasetItemCount);
+
+		if (itemCount < 4) {
+			uint8_t buf[randomx::CacheLineSize * 4];
+			cache->datasetInit(cache, buf, startItem, startItem + 4);
+			memcpy(dataset->memory + startItem * randomx::CacheLineSize, buf, itemCount * randomx::CacheLineSize);
+		}
+		else if ((itemCount % 4) == 0) {
+			cache->datasetInit(cache, dataset->memory + startItem * randomx::CacheLineSize, startItem, startItem + itemCount);
+		}
+		else {
+			cache->datasetInit(cache, dataset->memory + startItem * randomx::CacheLineSize, startItem, startItem + itemCount - (itemCount % 4));
+
+			startItem += itemCount - 4;
+			cache->datasetInit(cache, dataset->memory + startItem * randomx::CacheLineSize, startItem, startItem + 4);
+		}
+	}
+
+	void *randomx_get_dataset_memory(randomx_dataset *dataset) {
+		assert(dataset != nullptr);
+		return dataset->memory;
+	}
+
+	void randomx_release_dataset(randomx_dataset *dataset) {
+		assert(dataset != nullptr);
+		dataset->dealloc(dataset);
+		delete dataset;
+	}
+
+	randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset) {
+		assert(cache != nullptr || (flags & RANDOMX_FLAG_FULL_MEM));
+		assert(cache == nullptr || cache->isInitialized());
+		assert(dataset != nullptr || !(flags & RANDOMX_FLAG_FULL_MEM));
+
+		randomx_vm *vm = nullptr;
+
+		try {
+			switch ((int)(flags & (RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES))) {
+				case RANDOMX_FLAG_DEFAULT:
+					vm = new randomx::InterpretedLightVmDefault();
+					break;
+
+				case RANDOMX_FLAG_FULL_MEM:
+					vm = new randomx::InterpretedVmDefault();
+					break;
+
+				case RANDOMX_FLAG_JIT:
+					if (flags & RANDOMX_FLAG_SECURE) {
+						vm = new randomx::CompiledLightVmDefaultSecure();
+					}
+					else {
+						vm = new randomx::CompiledLightVmDefault();
+					}
+					break;
+
+				case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT:
+					if (flags & RANDOMX_FLAG_SECURE) {
+						vm = new randomx::CompiledVmDefaultSecure();
+					}
+					else {
+						vm = new randomx::CompiledVmDefault();
+					}
+					break;
+
+				case RANDOMX_FLAG_HARD_AES:
+					vm = new randomx::InterpretedLightVmHardAes();
+					break;
+
+				case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES:
+					vm = new randomx::InterpretedVmHardAes();
+					break;
+
+				case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES:
+					if (flags & RANDOMX_FLAG_SECURE) {
+						vm = new randomx::CompiledLightVmHardAesSecure();
+					}
+					else {
+						vm = new randomx::CompiledLightVmHardAes();
+					}
+					break;
+
+				case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES:
+					if (flags & RANDOMX_FLAG_SECURE) {
+						vm = new randomx::CompiledVmHardAesSecure();
+					}
+					else {
+						vm = new randomx::CompiledVmHardAes();
+					}
+					break;
+
+				case RANDOMX_FLAG_LARGE_PAGES:
+					vm = new randomx::InterpretedLightVmLargePage();
+					break;
+
+				case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_LARGE_PAGES:
+					vm = new randomx::InterpretedVmLargePage();
+					break;
+
+				case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES:
+					if (flags & RANDOMX_FLAG_SECURE) {
+						vm = new randomx::CompiledLightVmLargePageSecure();
+					}
+					else {
+						vm = new randomx::CompiledLightVmLargePage();
+					}
+					break;
+
+				case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES:
+					if (flags & RANDOMX_FLAG_SECURE) {
+						vm = new randomx::CompiledVmLargePageSecure();
+					}
+					else {
+						vm = new randomx::CompiledVmLargePage();
+					}
+					break;
+
+				case RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES:
+					vm = new randomx::InterpretedLightVmLargePageHardAes();
+					break;
+
+				case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES:
+					vm = new randomx::InterpretedVmLargePageHardAes();
+					break;
+
+				case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES:
+					if (flags & RANDOMX_FLAG_SECURE) {
+						vm = new randomx::CompiledLightVmLargePageHardAesSecure();
+					}
+					else {
+						vm = new randomx::CompiledLightVmLargePageHardAes();
+					}
+					break;
+
+				case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES:
+					if (flags & RANDOMX_FLAG_SECURE) {
+						vm = new randomx::CompiledVmLargePageHardAesSecure();
+					}
+					else {
+						vm = new randomx::CompiledVmLargePageHardAes();
+					}
+					break;
+
+				default:
+					UNREACHABLE;
+			}
+
+			if(cache != nullptr) {
+				vm->setCache(cache);
+				vm->cacheKey = cache->cacheKey;
+			}
+
+			if(dataset != nullptr)
+				vm->setDataset(dataset);
+
+			vm->allocate();
+		}
+		catch (std::exception &ex) {
+			delete vm;
+			vm = nullptr;
+		}
+
+		return vm;
+	}
+
+	void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache) {
+		assert(machine != nullptr);
+		assert(cache != nullptr && cache->isInitialized());
+		if (machine->cacheKey != cache->cacheKey || machine->getMemory() != cache->memory) {
+			machine->setCache(cache);
+			machine->cacheKey = cache->cacheKey;
+		}
+	}
+
+	void randomx_vm_set_dataset(randomx_vm *machine, randomx_dataset *dataset) {
+		assert(machine != nullptr);
+		assert(dataset != nullptr);
+		machine->setDataset(dataset);
+	}
+
+	void randomx_destroy_vm(randomx_vm *machine) {
+		assert(machine != nullptr);
+		delete machine;
+	}
+
+	void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output) {
+		assert(machine != nullptr);
+		assert(inputSize == 0 || input != nullptr);
+		assert(output != nullptr);
+
+#ifdef USE_CSR_INTRINSICS
+		const unsigned int fpstate = _mm_getcsr();
+#else
+		fenv_t fpstate;
+		fegetenv(&fpstate);
+#endif
+
+		alignas(16) uint64_t tempHash[8];
+		int blakeResult = blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
+		assert(blakeResult == 0);
+		machine->initScratchpad(&tempHash);
+		machine->resetRoundingMode();
+		for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) {
+			machine->run(&tempHash);
+			blakeResult = blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
+			assert(blakeResult == 0);
+		}
+		machine->run(&tempHash);
+		machine->getFinalResult(output, RANDOMX_HASH_SIZE);
+
+#ifdef USE_CSR_INTRINSICS
+		_mm_setcsr(fpstate);
+#else
+		fesetenv(&fpstate);
+#endif
+	}
+
+	void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize) {
+		blake2b(machine->tempHash, sizeof(machine->tempHash), input, inputSize, nullptr, 0);
+		machine->initScratchpad(machine->tempHash);
+	}
+
+	void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output) {
+		machine->resetRoundingMode();
+		for (uint32_t chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) {
+			machine->run(machine->tempHash);
+			blake2b(machine->tempHash, sizeof(machine->tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
+		}
+		machine->run(machine->tempHash);
+
+		// Finish current hash and fill the scratchpad for the next hash at the same time
+		blake2b(machine->tempHash, sizeof(machine->tempHash), nextInput, nextInputSize, nullptr, 0);
+		machine->hashAndFill(output, RANDOMX_HASH_SIZE, machine->tempHash);
+	}
+
+	void randomx_calculate_hash_last(randomx_vm* machine, void* output) {
+		machine->resetRoundingMode();
+		for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) {
+			machine->run(machine->tempHash);
+			blake2b(machine->tempHash, sizeof(machine->tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
+		}
+		machine->run(machine->tempHash);
+		machine->getFinalResult(output, RANDOMX_HASH_SIZE);
+	}
+
+	void randomx_calculate_commitment(const void* input, size_t inputSize, const void* hash_in, void* com_out) {
+		assert(inputSize == 0 || input != nullptr);
+		assert(hash_in != nullptr);
+		assert(com_out != nullptr);
+		blake2b_state state;
+		blake2b_init(&state, RANDOMX_HASH_SIZE);
+		blake2b_update(&state, input, inputSize);
+		blake2b_update(&state, hash_in, RANDOMX_HASH_SIZE);
+		blake2b_final(&state, com_out, RANDOMX_HASH_SIZE);
+	}
+}
--- a/crypto/randomx/randomx.h
+++ b/crypto/randomx/randomx.h
@ -0,0 +1,288 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef RANDOMX_H
+#define RANDOMX_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define RANDOMX_HASH_SIZE 32
+#define RANDOMX_DATASET_ITEM_SIZE 64
+
+#ifndef RANDOMX_EXPORT
+#define RANDOMX_EXPORT
+#endif
+
+typedef enum {
+  RANDOMX_FLAG_DEFAULT = 0,
+  RANDOMX_FLAG_LARGE_PAGES = 1,
+  RANDOMX_FLAG_HARD_AES = 2,
+  RANDOMX_FLAG_FULL_MEM = 4,
+  RANDOMX_FLAG_JIT = 8,
+  RANDOMX_FLAG_SECURE = 16,
+  RANDOMX_FLAG_ARGON2_SSSE3 = 32,
+  RANDOMX_FLAG_ARGON2_AVX2 = 64,
+  RANDOMX_FLAG_ARGON2 = 96
+} randomx_flags;
+
+typedef struct randomx_dataset randomx_dataset;
+typedef struct randomx_cache randomx_cache;
+typedef struct randomx_vm randomx_vm;
+
+
+#if defined(__cplusplus)
+
+#ifdef __cpp_constexpr
+#define CONSTEXPR constexpr
+#else
+#define CONSTEXPR
+#endif
+
+inline CONSTEXPR randomx_flags operator |(randomx_flags a, randomx_flags b) {
+	return static_cast<randomx_flags>(static_cast<int>(a) | static_cast<int>(b));
+}
+inline CONSTEXPR randomx_flags operator &(randomx_flags a, randomx_flags b) {
+	return static_cast<randomx_flags>(static_cast<int>(a) & static_cast<int>(b));
+}
+inline randomx_flags& operator |=(randomx_flags& a, randomx_flags b) {
+	return a = a | b;
+}
+
+extern "C" {
+#endif
+
+/**
+ * @return The recommended flags to be used on the current machine.
+ *         Does not include:
+ *            RANDOMX_FLAG_LARGE_PAGES
+ *            RANDOMX_FLAG_FULL_MEM
+ *            RANDOMX_FLAG_SECURE
+ *         These flags must be added manually if desired.
+ *         On OpenBSD RANDOMX_FLAG_SECURE is enabled by default in JIT mode as W^X is enforced by the OS.
+ */
+RANDOMX_EXPORT randomx_flags randomx_get_flags(void);
+
+/**
+ * Creates a randomx_cache structure and allocates memory for RandomX Cache.
+ *
+ * @param flags is any combination of these 2 flags (each flag can be set or not set):
+ *        RANDOMX_FLAG_LARGE_PAGES - allocate memory in large pages
+ *        RANDOMX_FLAG_JIT - create cache structure with JIT compilation support; this makes
+ *                           subsequent Dataset initialization faster
+ *        Optionally, one of these two flags may be selected:
+ *        RANDOMX_FLAG_ARGON2_SSSE3 - optimized Argon2 for CPUs with the SSSE3 instruction set
+ *                                   makes subsequent cache initialization faster
+ *        RANDOMX_FLAG_ARGON2_AVX2 - optimized Argon2 for CPUs with the AVX2 instruction set
+ *                                   makes subsequent cache initialization faster
+ *
+ * @return Pointer to an allocated randomx_cache structure.
+ *         Returns NULL if:
+ *         (1) memory allocation fails
+ *         (2) the RANDOMX_FLAG_JIT is set and JIT compilation is not supported on the current platform
+ *         (3) an invalid or unsupported RANDOMX_FLAG_ARGON2 value is set
+ */
+RANDOMX_EXPORT randomx_cache *randomx_alloc_cache(randomx_flags flags);
+
+/**
+ * Initializes the cache memory and SuperscalarHash using the provided key value.
+ * Does nothing if called again with the same key value.
+ *
+ * @param cache is a pointer to a previously allocated randomx_cache structure. Must not be NULL.
+ * @param key is a pointer to memory which contains the key value. Must not be NULL.
+ * @param keySize is the number of bytes of the key.
+*/
+RANDOMX_EXPORT void randomx_init_cache(randomx_cache *cache, const void *key, size_t keySize);
+
+/**
+ * Returns a pointer to the internal memory buffer of the cache structure. The size
+ * of the internal memory buffer is RANDOMX_ARGON_MEMORY KiB.
+ *
+ * @param cache is a pointer to a previously allocated randomx_cache structure. Must not be NULL.
+ *
+ * @return Pointer to the internal memory buffer of the cache structure.
+*/
+RANDOMX_EXPORT void *randomx_get_cache_memory(randomx_cache *cache);
+
+/**
+ * Releases all memory occupied by the randomx_cache structure.
+ *
+ * @param cache is a pointer to a previously allocated randomx_cache structure.
+*/
+RANDOMX_EXPORT void randomx_release_cache(randomx_cache* cache);
+
+/**
+ * Creates a randomx_dataset structure and allocates memory for RandomX Dataset.
+ *
+ * @param flags is the initialization flags. Only one flag is supported (can be set or not set):
+ *        RANDOMX_FLAG_LARGE_PAGES - allocate memory in large pages
+ *
+ * @return Pointer to an allocated randomx_dataset structure.
+ *         NULL is returned if memory allocation fails.
+ */
+RANDOMX_EXPORT randomx_dataset *randomx_alloc_dataset(randomx_flags flags);
+
+/**
+ * Gets the number of items contained in the dataset.
+ *
+ * @return the number of items contained in the dataset.
+*/
+RANDOMX_EXPORT unsigned long randomx_dataset_item_count(void);
+
+/**
+ * Initializes dataset items.
+ *
+ * Note: In order to use the Dataset, all items from 0 to (randomx_dataset_item_count() - 1) must be initialized.
+ * This may be done by several calls to this function using non-overlapping item sequences.
+ *
+ * @param dataset is a pointer to a previously allocated randomx_dataset structure. Must not be NULL.
+ * @param cache is a pointer to a previously allocated and initialized randomx_cache structure. Must not be NULL.
+ * @param startItem is the item number where initialization should start.
+ * @param itemCount is the number of items that should be initialized.
+*/
+RANDOMX_EXPORT void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount);
+
+/**
+ * Returns a pointer to the internal memory buffer of the dataset structure. The size
+ * of the internal memory buffer is randomx_dataset_item_count() * RANDOMX_DATASET_ITEM_SIZE.
+ *
+ * @param dataset is a pointer to a previously allocated randomx_dataset structure. Must not be NULL.
+ *
+ * @return Pointer to the internal memory buffer of the dataset structure.
+*/
+RANDOMX_EXPORT void *randomx_get_dataset_memory(randomx_dataset *dataset);
+
+/**
+ * Releases all memory occupied by the randomx_dataset structure.
+ *
+ * @param dataset is a pointer to a previously allocated randomx_dataset structure.
+*/
+RANDOMX_EXPORT void randomx_release_dataset(randomx_dataset *dataset);
+
+/**
+ * Creates and initializes a RandomX virtual machine.
+ *
+ * @param flags is any combination of these 5 flags (each flag can be set or not set):
+ *        RANDOMX_FLAG_LARGE_PAGES - allocate scratchpad memory in large pages
+ *        RANDOMX_FLAG_HARD_AES - virtual machine will use hardware accelerated AES
+ *        RANDOMX_FLAG_FULL_MEM - virtual machine will use the full dataset
+ *        RANDOMX_FLAG_JIT - virtual machine will use a JIT compiler
+ *        RANDOMX_FLAG_SECURE - when combined with RANDOMX_FLAG_JIT, the JIT pages are never
+ *                              writable and executable at the same time (W^X policy)
+ *        The numeric values of the first 4 flags are ordered so that a higher value will provide
+ *        faster hash calculation and a lower numeric value will provide higher portability.
+ *        Using RANDOMX_FLAG_DEFAULT (all flags not set) works on all platforms, but is the slowest.
+ * @param cache is a pointer to an initialized randomx_cache structure. Can be
+ *        NULL if RANDOMX_FLAG_FULL_MEM is set.
+ * @param dataset is a pointer to a randomx_dataset structure. Can be NULL
+ *        if RANDOMX_FLAG_FULL_MEM is not set.
+ *
+ * @return Pointer to an initialized randomx_vm structure.
+ *         Returns NULL if:
+ *         (1) Scratchpad memory allocation fails.
+ *         (2) The requested initialization flags are not supported on the current platform.
+ *         (3) cache parameter is NULL and RANDOMX_FLAG_FULL_MEM is not set
+ *         (4) dataset parameter is NULL and RANDOMX_FLAG_FULL_MEM is set
+*/
+RANDOMX_EXPORT randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset);
+
+/**
+ * Reinitializes a virtual machine with a new Cache. This function should be called anytime
+ * the Cache is reinitialized with a new key. Does nothing if called with a Cache containing
+ * the same key value as already set.
+ *
+ * @param machine is a pointer to a randomx_vm structure that was initialized
+ *        without RANDOMX_FLAG_FULL_MEM. Must not be NULL.
+ * @param cache is a pointer to an initialized randomx_cache structure. Must not be NULL.
+*/
+RANDOMX_EXPORT void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache);
+
+/**
+ * Reinitializes a virtual machine with a new Dataset.
+ *
+ * @param machine is a pointer to a randomx_vm structure that was initialized
+ *        with RANDOMX_FLAG_FULL_MEM. Must not be NULL.
+ * @param dataset is a pointer to an initialized randomx_dataset structure. Must not be NULL.
+*/
+RANDOMX_EXPORT void randomx_vm_set_dataset(randomx_vm *machine, randomx_dataset *dataset);
+
+/**
+ * Releases all memory occupied by the randomx_vm structure.
+ *
+ * @param machine is a pointer to a previously created randomx_vm structure.
+*/
+RANDOMX_EXPORT void randomx_destroy_vm(randomx_vm *machine);
+
+/**
+ * Calculates a RandomX hash value.
+ *
+ * @param machine is a pointer to a randomx_vm structure. Must not be NULL.
+ * @param input is a pointer to memory to be hashed. Must not be NULL.
+ * @param inputSize is the number of bytes to be hashed.
+ * @param output is a pointer to memory where the hash will be stored. Must not
+ *        be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing.
+*/
+RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output);
+
+/**
+ * Set of functions used to calculate multiple RandomX hashes more efficiently.
+ * randomx_calculate_hash_first will begin a hash calculation.
+ * randomx_calculate_hash_next  will output the hash value of the previous input
+ *                              and begin the calculation of the next hash.
+ * randomx_calculate_hash_last  will output the hash value of the previous input.
+ *
+ * WARNING: These functions may alter the floating point rounding mode of the calling thread.
+ *
+ * @param machine is a pointer to a randomx_vm structure. Must not be NULL.
+ * @param input is a pointer to memory to be hashed. Must not be NULL.
+ * @param inputSize is the number of bytes to be hashed.
+ * @param nextInput is a pointer to memory to be hashed for the next hash. Must not be NULL.
+ * @param nextInputSize is the number of bytes to be hashed for the next hash.
+ * @param output is a pointer to memory where the hash will be stored. Must not
+ *        be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing.
+*/
+RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize);
+RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output);
+RANDOMX_EXPORT void randomx_calculate_hash_last(randomx_vm* machine, void* output);
+
+/**
+ * Calculate a RandomX commitment from a RandomX hash and its input.
+ *
+ * @param input is a pointer to memory that was hashed. Must not be NULL.
+ * @param inputSize is the number of bytes in the input.
+ * @param hash_in is the output from randomx_calculate_hash* (RANDOMX_HASH_SIZE bytes).
+ * @param com_out is a pointer to memory where the commitment will be stored. Must not
+ *        be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing.
+*/
+RANDOMX_EXPORT void randomx_calculate_commitment(const void* input, size_t inputSize, const void* hash_in, void* com_out);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
--- a/crypto/randomx/reciprocal.c
+++ b/crypto/randomx/reciprocal.c
@ -0,0 +1,72 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <assert.h>
+#include "reciprocal.h"
+
+/*
+	Calculates rcp = 2**x / divisor for highest integer x such that rcp < 2**64.
+	divisor must not be 0 or a power of 2
+
+	Equivalent x86 assembly (divisor in rcx):
+
+	mov edx, 1
+	mov r8, rcx
+	xor eax, eax
+	bsr rcx, rcx
+	shl rdx, cl
+	div r8
+	ret
+
+*/
+uint64_t randomx_reciprocal(uint32_t divisor) {
+
+	assert(divisor != 0);
+
+	const uint64_t p2exp63 = 1ULL << 63;
+	const uint64_t q = p2exp63 / divisor;
+	const uint64_t r = p2exp63 % divisor;
+
+#ifdef __GNUC__
+	const uint32_t shift = 64 - __builtin_clzll(divisor);
+#else
+	uint32_t shift = 32;
+	for (uint32_t k = 1U << 31; (k & divisor) == 0; k >>= 1)
+		--shift;
+#endif
+
+	return (q << shift) + ((r << shift) / divisor);
+}
+
+#if !RANDOMX_HAVE_FAST_RECIPROCAL
+
+uint64_t randomx_reciprocal_fast(uint32_t divisor) {
+	return randomx_reciprocal(divisor);
+}
+
+#endif
--- a/crypto/randomx/reciprocal.h
+++ b/crypto/randomx/reciprocal.h
@ -0,0 +1,48 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <stdint.h>
+
+#if defined(_M_X64) || defined(__x86_64__)
+#define RANDOMX_HAVE_FAST_RECIPROCAL 1
+#else
+#define RANDOMX_HAVE_FAST_RECIPROCAL 0
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+uint64_t randomx_reciprocal(uint32_t);
+uint64_t randomx_reciprocal_fast(uint32_t);
+
+#if defined(__cplusplus)
+}
+#endif
--- a/crypto/randomx/soft_aes.cpp
+++ b/crypto/randomx/soft_aes.cpp
@ -0,0 +1,378 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "soft_aes.h"
+
+alignas(16) const uint8_t sbox[256] = {
+	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
+};
+
+alignas(16) const uint32_t lutEnc0[256] = {
+	0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591,
+	0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56, 0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec,
+	0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa, 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb,
+	0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, 0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b,
+	0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c, 0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83,
+	0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9, 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a,
+	0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, 0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f,
+	0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df, 0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea,
+	0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34, 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b,
+	0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, 0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413,
+	0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1, 0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6,
+	0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972, 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85,
+	0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, 0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511,
+	0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe, 0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b,
+	0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05, 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1,
+	0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, 0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf,
+	0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3, 0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e,
+	0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a, 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6,
+	0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, 0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b,
+	0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428, 0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad,
+	0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14, 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8,
+	0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, 0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2,
+	0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda, 0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949,
+	0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf, 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810,
+	0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, 0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697,
+	0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e, 0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f,
+	0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc, 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c,
+	0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, 0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27,
+	0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122, 0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433,
+	0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9, 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5,
+	0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, 0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0,
+	0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e, 0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c,
+};
+
+alignas(16) const uint32_t lutEnc1[256] = {
+	0x6363c6a5, 0x7c7cf884, 0x7777ee99, 0x7b7bf68d, 0xf2f2ff0d, 0x6b6bd6bd, 0x6f6fdeb1, 0xc5c59154,
+	0x30306050, 0x01010203, 0x6767cea9, 0x2b2b567d, 0xfefee719, 0xd7d7b562, 0xabab4de6, 0x7676ec9a,
+	0xcaca8f45, 0x82821f9d, 0xc9c98940, 0x7d7dfa87, 0xfafaef15, 0x5959b2eb, 0x47478ec9, 0xf0f0fb0b,
+	0xadad41ec, 0xd4d4b367, 0xa2a25ffd, 0xafaf45ea, 0x9c9c23bf, 0xa4a453f7, 0x7272e496, 0xc0c09b5b,
+	0xb7b775c2, 0xfdfde11c, 0x93933dae, 0x26264c6a, 0x36366c5a, 0x3f3f7e41, 0xf7f7f502, 0xcccc834f,
+	0x3434685c, 0xa5a551f4, 0xe5e5d134, 0xf1f1f908, 0x7171e293, 0xd8d8ab73, 0x31316253, 0x15152a3f,
+	0x0404080c, 0xc7c79552, 0x23234665, 0xc3c39d5e, 0x18183028, 0x969637a1, 0x05050a0f, 0x9a9a2fb5,
+	0x07070e09, 0x12122436, 0x80801b9b, 0xe2e2df3d, 0xebebcd26, 0x27274e69, 0xb2b27fcd, 0x7575ea9f,
+	0x0909121b, 0x83831d9e, 0x2c2c5874, 0x1a1a342e, 0x1b1b362d, 0x6e6edcb2, 0x5a5ab4ee, 0xa0a05bfb,
+	0x5252a4f6, 0x3b3b764d, 0xd6d6b761, 0xb3b37dce, 0x2929527b, 0xe3e3dd3e, 0x2f2f5e71, 0x84841397,
+	0x5353a6f5, 0xd1d1b968, 0x00000000, 0xededc12c, 0x20204060, 0xfcfce31f, 0xb1b179c8, 0x5b5bb6ed,
+	0x6a6ad4be, 0xcbcb8d46, 0xbebe67d9, 0x3939724b, 0x4a4a94de, 0x4c4c98d4, 0x5858b0e8, 0xcfcf854a,
+	0xd0d0bb6b, 0xefefc52a, 0xaaaa4fe5, 0xfbfbed16, 0x434386c5, 0x4d4d9ad7, 0x33336655, 0x85851194,
+	0x45458acf, 0xf9f9e910, 0x02020406, 0x7f7ffe81, 0x5050a0f0, 0x3c3c7844, 0x9f9f25ba, 0xa8a84be3,
+	0x5151a2f3, 0xa3a35dfe, 0x404080c0, 0x8f8f058a, 0x92923fad, 0x9d9d21bc, 0x38387048, 0xf5f5f104,
+	0xbcbc63df, 0xb6b677c1, 0xdadaaf75, 0x21214263, 0x10102030, 0xffffe51a, 0xf3f3fd0e, 0xd2d2bf6d,
+	0xcdcd814c, 0x0c0c1814, 0x13132635, 0xececc32f, 0x5f5fbee1, 0x979735a2, 0x444488cc, 0x17172e39,
+	0xc4c49357, 0xa7a755f2, 0x7e7efc82, 0x3d3d7a47, 0x6464c8ac, 0x5d5dbae7, 0x1919322b, 0x7373e695,
+	0x6060c0a0, 0x81811998, 0x4f4f9ed1, 0xdcdca37f, 0x22224466, 0x2a2a547e, 0x90903bab, 0x88880b83,
+	0x46468cca, 0xeeeec729, 0xb8b86bd3, 0x1414283c, 0xdedea779, 0x5e5ebce2, 0x0b0b161d, 0xdbdbad76,
+	0xe0e0db3b, 0x32326456, 0x3a3a744e, 0x0a0a141e, 0x494992db, 0x06060c0a, 0x2424486c, 0x5c5cb8e4,
+	0xc2c29f5d, 0xd3d3bd6e, 0xacac43ef, 0x6262c4a6, 0x919139a8, 0x959531a4, 0xe4e4d337, 0x7979f28b,
+	0xe7e7d532, 0xc8c88b43, 0x37376e59, 0x6d6ddab7, 0x8d8d018c, 0xd5d5b164, 0x4e4e9cd2, 0xa9a949e0,
+	0x6c6cd8b4, 0x5656acfa, 0xf4f4f307, 0xeaeacf25, 0x6565caaf, 0x7a7af48e, 0xaeae47e9, 0x08081018,
+	0xbaba6fd5, 0x7878f088, 0x25254a6f, 0x2e2e5c72, 0x1c1c3824, 0xa6a657f1, 0xb4b473c7, 0xc6c69751,
+	0xe8e8cb23, 0xdddda17c, 0x7474e89c, 0x1f1f3e21, 0x4b4b96dd, 0xbdbd61dc, 0x8b8b0d86, 0x8a8a0f85,
+	0x7070e090, 0x3e3e7c42, 0xb5b571c4, 0x6666ccaa, 0x484890d8, 0x03030605, 0xf6f6f701, 0x0e0e1c12,
+	0x6161c2a3, 0x35356a5f, 0x5757aef9, 0xb9b969d0, 0x86861791, 0xc1c19958, 0x1d1d3a27, 0x9e9e27b9,
+	0xe1e1d938, 0xf8f8eb13, 0x98982bb3, 0x11112233, 0x6969d2bb, 0xd9d9a970, 0x8e8e0789, 0x949433a7,
+	0x9b9b2db6, 0x1e1e3c22, 0x87871592, 0xe9e9c920, 0xcece8749, 0x5555aaff, 0x28285078, 0xdfdfa57a,
+	0x8c8c038f, 0xa1a159f8, 0x89890980, 0x0d0d1a17, 0xbfbf65da, 0xe6e6d731, 0x424284c6, 0x6868d0b8,
+	0x414182c3, 0x999929b0, 0x2d2d5a77, 0x0f0f1e11, 0xb0b07bcb, 0x5454a8fc, 0xbbbb6dd6, 0x16162c3a,
+};
+
+alignas(16) const uint32_t lutEnc2[256] = {
+	0x63c6a563, 0x7cf8847c, 0x77ee9977, 0x7bf68d7b, 0xf2ff0df2, 0x6bd6bd6b, 0x6fdeb16f, 0xc59154c5,
+	0x30605030, 0x01020301, 0x67cea967, 0x2b567d2b, 0xfee719fe, 0xd7b562d7, 0xab4de6ab, 0x76ec9a76,
+	0xca8f45ca, 0x821f9d82, 0xc98940c9, 0x7dfa877d, 0xfaef15fa, 0x59b2eb59, 0x478ec947, 0xf0fb0bf0,
+	0xad41ecad, 0xd4b367d4, 0xa25ffda2, 0xaf45eaaf, 0x9c23bf9c, 0xa453f7a4, 0x72e49672, 0xc09b5bc0,
+	0xb775c2b7, 0xfde11cfd, 0x933dae93, 0x264c6a26, 0x366c5a36, 0x3f7e413f, 0xf7f502f7, 0xcc834fcc,
+	0x34685c34, 0xa551f4a5, 0xe5d134e5, 0xf1f908f1, 0x71e29371, 0xd8ab73d8, 0x31625331, 0x152a3f15,
+	0x04080c04, 0xc79552c7, 0x23466523, 0xc39d5ec3, 0x18302818, 0x9637a196, 0x050a0f05, 0x9a2fb59a,
+	0x070e0907, 0x12243612, 0x801b9b80, 0xe2df3de2, 0xebcd26eb, 0x274e6927, 0xb27fcdb2, 0x75ea9f75,
+	0x09121b09, 0x831d9e83, 0x2c58742c, 0x1a342e1a, 0x1b362d1b, 0x6edcb26e, 0x5ab4ee5a, 0xa05bfba0,
+	0x52a4f652, 0x3b764d3b, 0xd6b761d6, 0xb37dceb3, 0x29527b29, 0xe3dd3ee3, 0x2f5e712f, 0x84139784,
+	0x53a6f553, 0xd1b968d1, 0x00000000, 0xedc12ced, 0x20406020, 0xfce31ffc, 0xb179c8b1, 0x5bb6ed5b,
+	0x6ad4be6a, 0xcb8d46cb, 0xbe67d9be, 0x39724b39, 0x4a94de4a, 0x4c98d44c, 0x58b0e858, 0xcf854acf,
+	0xd0bb6bd0, 0xefc52aef, 0xaa4fe5aa, 0xfbed16fb, 0x4386c543, 0x4d9ad74d, 0x33665533, 0x85119485,
+	0x458acf45, 0xf9e910f9, 0x02040602, 0x7ffe817f, 0x50a0f050, 0x3c78443c, 0x9f25ba9f, 0xa84be3a8,
+	0x51a2f351, 0xa35dfea3, 0x4080c040, 0x8f058a8f, 0x923fad92, 0x9d21bc9d, 0x38704838, 0xf5f104f5,
+	0xbc63dfbc, 0xb677c1b6, 0xdaaf75da, 0x21426321, 0x10203010, 0xffe51aff, 0xf3fd0ef3, 0xd2bf6dd2,
+	0xcd814ccd, 0x0c18140c, 0x13263513, 0xecc32fec, 0x5fbee15f, 0x9735a297, 0x4488cc44, 0x172e3917,
+	0xc49357c4, 0xa755f2a7, 0x7efc827e, 0x3d7a473d, 0x64c8ac64, 0x5dbae75d, 0x19322b19, 0x73e69573,
+	0x60c0a060, 0x81199881, 0x4f9ed14f, 0xdca37fdc, 0x22446622, 0x2a547e2a, 0x903bab90, 0x880b8388,
+	0x468cca46, 0xeec729ee, 0xb86bd3b8, 0x14283c14, 0xdea779de, 0x5ebce25e, 0x0b161d0b, 0xdbad76db,
+	0xe0db3be0, 0x32645632, 0x3a744e3a, 0x0a141e0a, 0x4992db49, 0x060c0a06, 0x24486c24, 0x5cb8e45c,
+	0xc29f5dc2, 0xd3bd6ed3, 0xac43efac, 0x62c4a662, 0x9139a891, 0x9531a495, 0xe4d337e4, 0x79f28b79,
+	0xe7d532e7, 0xc88b43c8, 0x376e5937, 0x6ddab76d, 0x8d018c8d, 0xd5b164d5, 0x4e9cd24e, 0xa949e0a9,
+	0x6cd8b46c, 0x56acfa56, 0xf4f307f4, 0xeacf25ea, 0x65caaf65, 0x7af48e7a, 0xae47e9ae, 0x08101808,
+	0xba6fd5ba, 0x78f08878, 0x254a6f25, 0x2e5c722e, 0x1c38241c, 0xa657f1a6, 0xb473c7b4, 0xc69751c6,
+	0xe8cb23e8, 0xdda17cdd, 0x74e89c74, 0x1f3e211f, 0x4b96dd4b, 0xbd61dcbd, 0x8b0d868b, 0x8a0f858a,
+	0x70e09070, 0x3e7c423e, 0xb571c4b5, 0x66ccaa66, 0x4890d848, 0x03060503, 0xf6f701f6, 0x0e1c120e,
+	0x61c2a361, 0x356a5f35, 0x57aef957, 0xb969d0b9, 0x86179186, 0xc19958c1, 0x1d3a271d, 0x9e27b99e,
+	0xe1d938e1, 0xf8eb13f8, 0x982bb398, 0x11223311, 0x69d2bb69, 0xd9a970d9, 0x8e07898e, 0x9433a794,
+	0x9b2db69b, 0x1e3c221e, 0x87159287, 0xe9c920e9, 0xce8749ce, 0x55aaff55, 0x28507828, 0xdfa57adf,
+	0x8c038f8c, 0xa159f8a1, 0x89098089, 0x0d1a170d, 0xbf65dabf, 0xe6d731e6, 0x4284c642, 0x68d0b868,
+	0x4182c341, 0x9929b099, 0x2d5a772d, 0x0f1e110f, 0xb07bcbb0, 0x54a8fc54, 0xbb6dd6bb, 0x162c3a16,
+};
+
+alignas(16) const uint32_t lutEnc3[256] = {
+	0xc6a56363, 0xf8847c7c, 0xee997777, 0xf68d7b7b, 0xff0df2f2, 0xd6bd6b6b, 0xdeb16f6f, 0x9154c5c5,
+	0x60503030, 0x02030101, 0xcea96767, 0x567d2b2b, 0xe719fefe, 0xb562d7d7, 0x4de6abab, 0xec9a7676,
+	0x8f45caca, 0x1f9d8282, 0x8940c9c9, 0xfa877d7d, 0xef15fafa, 0xb2eb5959, 0x8ec94747, 0xfb0bf0f0,
+	0x41ecadad, 0xb367d4d4, 0x5ffda2a2, 0x45eaafaf, 0x23bf9c9c, 0x53f7a4a4, 0xe4967272, 0x9b5bc0c0,
+	0x75c2b7b7, 0xe11cfdfd, 0x3dae9393, 0x4c6a2626, 0x6c5a3636, 0x7e413f3f, 0xf502f7f7, 0x834fcccc,
+	0x685c3434, 0x51f4a5a5, 0xd134e5e5, 0xf908f1f1, 0xe2937171, 0xab73d8d8, 0x62533131, 0x2a3f1515,
+	0x080c0404, 0x9552c7c7, 0x46652323, 0x9d5ec3c3, 0x30281818, 0x37a19696, 0x0a0f0505, 0x2fb59a9a,
+	0x0e090707, 0x24361212, 0x1b9b8080, 0xdf3de2e2, 0xcd26ebeb, 0x4e692727, 0x7fcdb2b2, 0xea9f7575,
+	0x121b0909, 0x1d9e8383, 0x58742c2c, 0x342e1a1a, 0x362d1b1b, 0xdcb26e6e, 0xb4ee5a5a, 0x5bfba0a0,
+	0xa4f65252, 0x764d3b3b, 0xb761d6d6, 0x7dceb3b3, 0x527b2929, 0xdd3ee3e3, 0x5e712f2f, 0x13978484,
+	0xa6f55353, 0xb968d1d1, 0x00000000, 0xc12ceded, 0x40602020, 0xe31ffcfc, 0x79c8b1b1, 0xb6ed5b5b,
+	0xd4be6a6a, 0x8d46cbcb, 0x67d9bebe, 0x724b3939, 0x94de4a4a, 0x98d44c4c, 0xb0e85858, 0x854acfcf,
+	0xbb6bd0d0, 0xc52aefef, 0x4fe5aaaa, 0xed16fbfb, 0x86c54343, 0x9ad74d4d, 0x66553333, 0x11948585,
+	0x8acf4545, 0xe910f9f9, 0x04060202, 0xfe817f7f, 0xa0f05050, 0x78443c3c, 0x25ba9f9f, 0x4be3a8a8,
+	0xa2f35151, 0x5dfea3a3, 0x80c04040, 0x058a8f8f, 0x3fad9292, 0x21bc9d9d, 0x70483838, 0xf104f5f5,
+	0x63dfbcbc, 0x77c1b6b6, 0xaf75dada, 0x42632121, 0x20301010, 0xe51affff, 0xfd0ef3f3, 0xbf6dd2d2,
+	0x814ccdcd, 0x18140c0c, 0x26351313, 0xc32fecec, 0xbee15f5f, 0x35a29797, 0x88cc4444, 0x2e391717,
+	0x9357c4c4, 0x55f2a7a7, 0xfc827e7e, 0x7a473d3d, 0xc8ac6464, 0xbae75d5d, 0x322b1919, 0xe6957373,
+	0xc0a06060, 0x19988181, 0x9ed14f4f, 0xa37fdcdc, 0x44662222, 0x547e2a2a, 0x3bab9090, 0x0b838888,
+	0x8cca4646, 0xc729eeee, 0x6bd3b8b8, 0x283c1414, 0xa779dede, 0xbce25e5e, 0x161d0b0b, 0xad76dbdb,
+	0xdb3be0e0, 0x64563232, 0x744e3a3a, 0x141e0a0a, 0x92db4949, 0x0c0a0606, 0x486c2424, 0xb8e45c5c,
+	0x9f5dc2c2, 0xbd6ed3d3, 0x43efacac, 0xc4a66262, 0x39a89191, 0x31a49595, 0xd337e4e4, 0xf28b7979,
+	0xd532e7e7, 0x8b43c8c8, 0x6e593737, 0xdab76d6d, 0x018c8d8d, 0xb164d5d5, 0x9cd24e4e, 0x49e0a9a9,
+	0xd8b46c6c, 0xacfa5656, 0xf307f4f4, 0xcf25eaea, 0xcaaf6565, 0xf48e7a7a, 0x47e9aeae, 0x10180808,
+	0x6fd5baba, 0xf0887878, 0x4a6f2525, 0x5c722e2e, 0x38241c1c, 0x57f1a6a6, 0x73c7b4b4, 0x9751c6c6,
+	0xcb23e8e8, 0xa17cdddd, 0xe89c7474, 0x3e211f1f, 0x96dd4b4b, 0x61dcbdbd, 0x0d868b8b, 0x0f858a8a,
+	0xe0907070, 0x7c423e3e, 0x71c4b5b5, 0xccaa6666, 0x90d84848, 0x06050303, 0xf701f6f6, 0x1c120e0e,
+	0xc2a36161, 0x6a5f3535, 0xaef95757, 0x69d0b9b9, 0x17918686, 0x9958c1c1, 0x3a271d1d, 0x27b99e9e,
+	0xd938e1e1, 0xeb13f8f8, 0x2bb39898, 0x22331111, 0xd2bb6969, 0xa970d9d9, 0x07898e8e, 0x33a79494,
+	0x2db69b9b, 0x3c221e1e, 0x15928787, 0xc920e9e9, 0x8749cece, 0xaaff5555, 0x50782828, 0xa57adfdf,
+	0x038f8c8c, 0x59f8a1a1, 0x09808989, 0x1a170d0d, 0x65dabfbf, 0xd731e6e6, 0x84c64242, 0xd0b86868,
+	0x82c34141, 0x29b09999, 0x5a772d2d, 0x1e110f0f, 0x7bcbb0b0, 0xa8fc5454, 0x6dd6bbbb, 0x2c3a1616,
+};
+
+alignas(16) const uint32_t lutDec0[256] = {
+	0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b,
+	0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5, 0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5,
+	0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d, 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b,
+	0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295, 0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e,
+	0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927, 0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d,
+	0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362, 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9,
+	0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52, 0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566,
+	0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3, 0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed,
+	0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e, 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4,
+	0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4, 0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd,
+	0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d, 0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060,
+	0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967, 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879,
+	0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000, 0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c,
+	0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36, 0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624,
+	0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b, 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c,
+	0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12, 0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14,
+	0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3, 0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b,
+	0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8, 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684,
+	0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7, 0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177,
+	0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947, 0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322,
+	0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498, 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f,
+	0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54, 0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382,
+	0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf, 0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb,
+	0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83, 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef,
+	0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029, 0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235,
+	0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733, 0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117,
+	0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4, 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546,
+	0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb, 0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d,
+	0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb, 0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a,
+	0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773, 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478,
+	0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2, 0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff,
+	0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664, 0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0,
+};
+
+alignas(16) const uint32_t lutDec1[256] = {
+	0xa7f45150, 0x65417e53, 0xa4171ac3, 0x5e273a96, 0x6bab3bcb, 0x459d1ff1, 0x58faacab, 0x03e34b93,
+	0xfa302055, 0x6d76adf6, 0x76cc8891, 0x4c02f525, 0xd7e54ffc, 0xcb2ac5d7, 0x44352680, 0xa362b58f,
+	0x5ab1de49, 0x1bba2567, 0x0eea4598, 0xc0fe5de1, 0x752fc302, 0xf04c8112, 0x97468da3, 0xf9d36bc6,
+	0x5f8f03e7, 0x9c921595, 0x7a6dbfeb, 0x595295da, 0x83bed42d, 0x217458d3, 0x69e04929, 0xc8c98e44,
+	0x89c2756a, 0x798ef478, 0x3e58996b, 0x71b927dd, 0x4fe1beb6, 0xad88f017, 0xac20c966, 0x3ace7db4,
+	0x4adf6318, 0x311ae582, 0x33519760, 0x7f536245, 0x7764b1e0, 0xae6bbb84, 0xa081fe1c, 0x2b08f994,
+	0x68487058, 0xfd458f19, 0x6cde9487, 0xf87b52b7, 0xd373ab23, 0x024b72e2, 0x8f1fe357, 0xab55662a,
+	0x28ebb207, 0xc2b52f03, 0x7bc5869a, 0x0837d3a5, 0x872830f2, 0xa5bf23b2, 0x6a0302ba, 0x8216ed5c,
+	0x1ccf8a2b, 0xb479a792, 0xf207f3f0, 0xe2694ea1, 0xf4da65cd, 0xbe0506d5, 0x6234d11f, 0xfea6c48a,
+	0x532e349d, 0x55f3a2a0, 0xe18a0532, 0xebf6a475, 0xec830b39, 0xef6040aa, 0x9f715e06, 0x106ebd51,
+	0x8a213ef9, 0x06dd963d, 0x053eddae, 0xbde64d46, 0x8d5491b5, 0x5dc47105, 0xd406046f, 0x155060ff,
+	0xfb981924, 0xe9bdd697, 0x434089cc, 0x9ed96777, 0x42e8b0bd, 0x8b890788, 0x5b19e738, 0xeec879db,
+	0x0a7ca147, 0x0f427ce9, 0x1e84f8c9, 0x00000000, 0x86800983, 0xed2b3248, 0x70111eac, 0x725a6c4e,
+	0xff0efdfb, 0x38850f56, 0xd5ae3d1e, 0x392d3627, 0xd90f0a64, 0xa65c6821, 0x545b9bd1, 0x2e36243a,
+	0x670a0cb1, 0xe757930f, 0x96eeb4d2, 0x919b1b9e, 0xc5c0804f, 0x20dc61a2, 0x4b775a69, 0x1a121c16,
+	0xba93e20a, 0x2aa0c0e5, 0xe0223c43, 0x171b121d, 0x0d090e0b, 0xc78bf2ad, 0xa8b62db9, 0xa91e14c8,
+	0x19f15785, 0x0775af4c, 0xdd99eebb, 0x607fa3fd, 0x2601f79f, 0xf5725cbc, 0x3b6644c5, 0x7efb5b34,
+	0x29438b76, 0xc623cbdc, 0xfcedb668, 0xf1e4b863, 0xdc31d7ca, 0x85634210, 0x22971340, 0x11c68420,
+	0x244a857d, 0x3dbbd2f8, 0x32f9ae11, 0xa129c76d, 0x2f9e1d4b, 0x30b2dcf3, 0x52860dec, 0xe3c177d0,
+	0x16b32b6c, 0xb970a999, 0x489411fa, 0x64e94722, 0x8cfca8c4, 0x3ff0a01a, 0x2c7d56d8, 0x903322ef,
+	0x4e4987c7, 0xd138d9c1, 0xa2ca8cfe, 0x0bd49836, 0x81f5a6cf, 0xde7aa528, 0x8eb7da26, 0xbfad3fa4,
+	0x9d3a2ce4, 0x9278500d, 0xcc5f6a9b, 0x467e5462, 0x138df6c2, 0xb8d890e8, 0xf7392e5e, 0xafc382f5,
+	0x805d9fbe, 0x93d0697c, 0x2dd56fa9, 0x1225cfb3, 0x99acc83b, 0x7d1810a7, 0x639ce86e, 0xbb3bdb7b,
+	0x7826cd09, 0x18596ef4, 0xb79aec01, 0x9a4f83a8, 0x6e95e665, 0xe6ffaa7e, 0xcfbc2108, 0xe815efe6,
+	0x9be7bad9, 0x366f4ace, 0x099fead4, 0x7cb029d6, 0xb2a431af, 0x233f2a31, 0x94a5c630, 0x66a235c0,
+	0xbc4e7437, 0xca82fca6, 0xd090e0b0, 0xd8a73315, 0x9804f14a, 0xdaec41f7, 0x50cd7f0e, 0xf691172f,
+	0xd64d768d, 0xb0ef434d, 0x4daacc54, 0x0496e4df, 0xb5d19ee3, 0x886a4c1b, 0x1f2cc1b8, 0x5165467f,
+	0xea5e9d04, 0x358c015d, 0x7487fa73, 0x410bfb2e, 0x1d67b35a, 0xd2db9252, 0x5610e933, 0x47d66d13,
+	0x61d79a8c, 0x0ca1377a, 0x14f8598e, 0x3c13eb89, 0x27a9ceee, 0xc961b735, 0xe51ce1ed, 0xb1477a3c,
+	0xdfd29c59, 0x73f2553f, 0xce141879, 0x37c773bf, 0xcdf753ea, 0xaafd5f5b, 0x6f3ddf14, 0xdb447886,
+	0xf3afca81, 0xc468b93e, 0x3424382c, 0x40a3c25f, 0xc31d1672, 0x25e2bc0c, 0x493c288b, 0x950dff41,
+	0x01a83971, 0xb30c08de, 0xe4b4d89c, 0xc1566490, 0x84cb7b61, 0xb632d570, 0x5c6c4874, 0x57b8d042,
+};
+
+alignas(16) const uint32_t lutDec2[256] = {
+	0xf45150a7, 0x417e5365, 0x171ac3a4, 0x273a965e, 0xab3bcb6b, 0x9d1ff145, 0xfaacab58, 0xe34b9303,
+	0x302055fa, 0x76adf66d, 0xcc889176, 0x02f5254c, 0xe54ffcd7, 0x2ac5d7cb, 0x35268044, 0x62b58fa3,
+	0xb1de495a, 0xba25671b, 0xea45980e, 0xfe5de1c0, 0x2fc30275, 0x4c8112f0, 0x468da397, 0xd36bc6f9,
+	0x8f03e75f, 0x9215959c, 0x6dbfeb7a, 0x5295da59, 0xbed42d83, 0x7458d321, 0xe0492969, 0xc98e44c8,
+	0xc2756a89, 0x8ef47879, 0x58996b3e, 0xb927dd71, 0xe1beb64f, 0x88f017ad, 0x20c966ac, 0xce7db43a,
+	0xdf63184a, 0x1ae58231, 0x51976033, 0x5362457f, 0x64b1e077, 0x6bbb84ae, 0x81fe1ca0, 0x08f9942b,
+	0x48705868, 0x458f19fd, 0xde94876c, 0x7b52b7f8, 0x73ab23d3, 0x4b72e202, 0x1fe3578f, 0x55662aab,
+	0xebb20728, 0xb52f03c2, 0xc5869a7b, 0x37d3a508, 0x2830f287, 0xbf23b2a5, 0x0302ba6a, 0x16ed5c82,
+	0xcf8a2b1c, 0x79a792b4, 0x07f3f0f2, 0x694ea1e2, 0xda65cdf4, 0x0506d5be, 0x34d11f62, 0xa6c48afe,
+	0x2e349d53, 0xf3a2a055, 0x8a0532e1, 0xf6a475eb, 0x830b39ec, 0x6040aaef, 0x715e069f, 0x6ebd5110,
+	0x213ef98a, 0xdd963d06, 0x3eddae05, 0xe64d46bd, 0x5491b58d, 0xc471055d, 0x06046fd4, 0x5060ff15,
+	0x981924fb, 0xbdd697e9, 0x4089cc43, 0xd967779e, 0xe8b0bd42, 0x8907888b, 0x19e7385b, 0xc879dbee,
+	0x7ca1470a, 0x427ce90f, 0x84f8c91e, 0x00000000, 0x80098386, 0x2b3248ed, 0x111eac70, 0x5a6c4e72,
+	0x0efdfbff, 0x850f5638, 0xae3d1ed5, 0x2d362739, 0x0f0a64d9, 0x5c6821a6, 0x5b9bd154, 0x36243a2e,
+	0x0a0cb167, 0x57930fe7, 0xeeb4d296, 0x9b1b9e91, 0xc0804fc5, 0xdc61a220, 0x775a694b, 0x121c161a,
+	0x93e20aba, 0xa0c0e52a, 0x223c43e0, 0x1b121d17, 0x090e0b0d, 0x8bf2adc7, 0xb62db9a8, 0x1e14c8a9,
+	0xf1578519, 0x75af4c07, 0x99eebbdd, 0x7fa3fd60, 0x01f79f26, 0x725cbcf5, 0x6644c53b, 0xfb5b347e,
+	0x438b7629, 0x23cbdcc6, 0xedb668fc, 0xe4b863f1, 0x31d7cadc, 0x63421085, 0x97134022, 0xc6842011,
+	0x4a857d24, 0xbbd2f83d, 0xf9ae1132, 0x29c76da1, 0x9e1d4b2f, 0xb2dcf330, 0x860dec52, 0xc177d0e3,
+	0xb32b6c16, 0x70a999b9, 0x9411fa48, 0xe9472264, 0xfca8c48c, 0xf0a01a3f, 0x7d56d82c, 0x3322ef90,
+	0x4987c74e, 0x38d9c1d1, 0xca8cfea2, 0xd498360b, 0xf5a6cf81, 0x7aa528de, 0xb7da268e, 0xad3fa4bf,
+	0x3a2ce49d, 0x78500d92, 0x5f6a9bcc, 0x7e546246, 0x8df6c213, 0xd890e8b8, 0x392e5ef7, 0xc382f5af,
+	0x5d9fbe80, 0xd0697c93, 0xd56fa92d, 0x25cfb312, 0xacc83b99, 0x1810a77d, 0x9ce86e63, 0x3bdb7bbb,
+	0x26cd0978, 0x596ef418, 0x9aec01b7, 0x4f83a89a, 0x95e6656e, 0xffaa7ee6, 0xbc2108cf, 0x15efe6e8,
+	0xe7bad99b, 0x6f4ace36, 0x9fead409, 0xb029d67c, 0xa431afb2, 0x3f2a3123, 0xa5c63094, 0xa235c066,
+	0x4e7437bc, 0x82fca6ca, 0x90e0b0d0, 0xa73315d8, 0x04f14a98, 0xec41f7da, 0xcd7f0e50, 0x91172ff6,
+	0x4d768dd6, 0xef434db0, 0xaacc544d, 0x96e4df04, 0xd19ee3b5, 0x6a4c1b88, 0x2cc1b81f, 0x65467f51,
+	0x5e9d04ea, 0x8c015d35, 0x87fa7374, 0x0bfb2e41, 0x67b35a1d, 0xdb9252d2, 0x10e93356, 0xd66d1347,
+	0xd79a8c61, 0xa1377a0c, 0xf8598e14, 0x13eb893c, 0xa9ceee27, 0x61b735c9, 0x1ce1ede5, 0x477a3cb1,
+	0xd29c59df, 0xf2553f73, 0x141879ce, 0xc773bf37, 0xf753eacd, 0xfd5f5baa, 0x3ddf146f, 0x447886db,
+	0xafca81f3, 0x68b93ec4, 0x24382c34, 0xa3c25f40, 0x1d1672c3, 0xe2bc0c25, 0x3c288b49, 0x0dff4195,
+	0xa8397101, 0x0c08deb3, 0xb4d89ce4, 0x566490c1, 0xcb7b6184, 0x32d570b6, 0x6c48745c, 0xb8d04257,
+};
+
+alignas(16) const uint32_t lutDec3[256] = {
+	0x5150a7f4, 0x7e536541, 0x1ac3a417, 0x3a965e27, 0x3bcb6bab, 0x1ff1459d, 0xacab58fa, 0x4b9303e3,
+	0x2055fa30, 0xadf66d76, 0x889176cc, 0xf5254c02, 0x4ffcd7e5, 0xc5d7cb2a, 0x26804435, 0xb58fa362,
+	0xde495ab1, 0x25671bba, 0x45980eea, 0x5de1c0fe, 0xc302752f, 0x8112f04c, 0x8da39746, 0x6bc6f9d3,
+	0x03e75f8f, 0x15959c92, 0xbfeb7a6d, 0x95da5952, 0xd42d83be, 0x58d32174, 0x492969e0, 0x8e44c8c9,
+	0x756a89c2, 0xf478798e, 0x996b3e58, 0x27dd71b9, 0xbeb64fe1, 0xf017ad88, 0xc966ac20, 0x7db43ace,
+	0x63184adf, 0xe582311a, 0x97603351, 0x62457f53, 0xb1e07764, 0xbb84ae6b, 0xfe1ca081, 0xf9942b08,
+	0x70586848, 0x8f19fd45, 0x94876cde, 0x52b7f87b, 0xab23d373, 0x72e2024b, 0xe3578f1f, 0x662aab55,
+	0xb20728eb, 0x2f03c2b5, 0x869a7bc5, 0xd3a50837, 0x30f28728, 0x23b2a5bf, 0x02ba6a03, 0xed5c8216,
+	0x8a2b1ccf, 0xa792b479, 0xf3f0f207, 0x4ea1e269, 0x65cdf4da, 0x06d5be05, 0xd11f6234, 0xc48afea6,
+	0x349d532e, 0xa2a055f3, 0x0532e18a, 0xa475ebf6, 0x0b39ec83, 0x40aaef60, 0x5e069f71, 0xbd51106e,
+	0x3ef98a21, 0x963d06dd, 0xddae053e, 0x4d46bde6, 0x91b58d54, 0x71055dc4, 0x046fd406, 0x60ff1550,
+	0x1924fb98, 0xd697e9bd, 0x89cc4340, 0x67779ed9, 0xb0bd42e8, 0x07888b89, 0xe7385b19, 0x79dbeec8,
+	0xa1470a7c, 0x7ce90f42, 0xf8c91e84, 0x00000000, 0x09838680, 0x3248ed2b, 0x1eac7011, 0x6c4e725a,
+	0xfdfbff0e, 0x0f563885, 0x3d1ed5ae, 0x3627392d, 0x0a64d90f, 0x6821a65c, 0x9bd1545b, 0x243a2e36,
+	0x0cb1670a, 0x930fe757, 0xb4d296ee, 0x1b9e919b, 0x804fc5c0, 0x61a220dc, 0x5a694b77, 0x1c161a12,
+	0xe20aba93, 0xc0e52aa0, 0x3c43e022, 0x121d171b, 0x0e0b0d09, 0xf2adc78b, 0x2db9a8b6, 0x14c8a91e,
+	0x578519f1, 0xaf4c0775, 0xeebbdd99, 0xa3fd607f, 0xf79f2601, 0x5cbcf572, 0x44c53b66, 0x5b347efb,
+	0x8b762943, 0xcbdcc623, 0xb668fced, 0xb863f1e4, 0xd7cadc31, 0x42108563, 0x13402297, 0x842011c6,
+	0x857d244a, 0xd2f83dbb, 0xae1132f9, 0xc76da129, 0x1d4b2f9e, 0xdcf330b2, 0x0dec5286, 0x77d0e3c1,
+	0x2b6c16b3, 0xa999b970, 0x11fa4894, 0x472264e9, 0xa8c48cfc, 0xa01a3ff0, 0x56d82c7d, 0x22ef9033,
+	0x87c74e49, 0xd9c1d138, 0x8cfea2ca, 0x98360bd4, 0xa6cf81f5, 0xa528de7a, 0xda268eb7, 0x3fa4bfad,
+	0x2ce49d3a, 0x500d9278, 0x6a9bcc5f, 0x5462467e, 0xf6c2138d, 0x90e8b8d8, 0x2e5ef739, 0x82f5afc3,
+	0x9fbe805d, 0x697c93d0, 0x6fa92dd5, 0xcfb31225, 0xc83b99ac, 0x10a77d18, 0xe86e639c, 0xdb7bbb3b,
+	0xcd097826, 0x6ef41859, 0xec01b79a, 0x83a89a4f, 0xe6656e95, 0xaa7ee6ff, 0x2108cfbc, 0xefe6e815,
+	0xbad99be7, 0x4ace366f, 0xead4099f, 0x29d67cb0, 0x31afb2a4, 0x2a31233f, 0xc63094a5, 0x35c066a2,
+	0x7437bc4e, 0xfca6ca82, 0xe0b0d090, 0x3315d8a7, 0xf14a9804, 0x41f7daec, 0x7f0e50cd, 0x172ff691,
+	0x768dd64d, 0x434db0ef, 0xcc544daa, 0xe4df0496, 0x9ee3b5d1, 0x4c1b886a, 0xc1b81f2c, 0x467f5165,
+	0x9d04ea5e, 0x015d358c, 0xfa737487, 0xfb2e410b, 0xb35a1d67, 0x9252d2db, 0xe9335610, 0x6d1347d6,
+	0x9a8c61d7, 0x377a0ca1, 0x598e14f8, 0xeb893c13, 0xceee27a9, 0xb735c961, 0xe1ede51c, 0x7a3cb147,
+	0x9c59dfd2, 0x553f73f2, 0x1879ce14, 0x73bf37c7, 0x53eacdf7, 0x5f5baafd, 0xdf146f3d, 0x7886db44,
+	0xca81f3af, 0xb93ec468, 0x382c3424, 0xc25f40a3, 0x1672c31d, 0xbc0c25e2, 0x288b493c, 0xff41950d,
+	0x397101a8, 0x08deb30c, 0xd89ce4b4, 0x6490c156, 0x7b6184cb, 0xd570b632, 0x48745c6c, 0xd04257b8,
+};
+
+alignas(16) const uint8_t lutEncIndex[4][32] = {
+	{ 0, 255, 255, 255, 4, 255, 255, 255, 8, 255, 255, 255, 12, 255, 255, 255, 16, 255, 255, 255, 20, 255, 255, 255, 24, 255, 255, 255, 28, 255, 255, 255 },
+	{ 5, 255, 255, 255, 9, 255, 255, 255, 13, 255, 255, 255, 1, 255, 255, 255, 21, 255, 255, 255, 25, 255, 255, 255, 29, 255, 255, 255, 17, 255, 255, 255 },
+	{ 10, 255, 255, 255, 14, 255, 255, 255, 2, 255, 255, 255, 6, 255, 255, 255, 26, 255, 255, 255, 30, 255, 255, 255, 18, 255, 255, 255, 22, 255, 255, 255 },
+	{ 15, 255, 255, 255, 3, 255, 255, 255, 7, 255, 255, 255, 11, 255, 255, 255, 31, 255, 255, 255, 19, 255, 255, 255, 23, 255, 255, 255, 27, 255, 255, 255 }
+};
+
+alignas(16) const uint8_t lutDecIndex[4][32] = {
+	{ 0, 255, 255, 255, 4, 255, 255, 255, 8, 255, 255, 255, 12, 255, 255, 255, 16, 255, 255, 255, 20, 255, 255, 255, 24, 255, 255, 255, 28, 255, 255, 255 },
+	{ 13, 255, 255, 255, 1, 255, 255, 255, 5, 255, 255, 255, 9, 255, 255, 255, 29, 255, 255, 255, 17, 255, 255, 255, 21, 255, 255, 255, 25, 255, 255, 255 },
+	{ 10, 255, 255, 255, 14, 255, 255, 255, 2, 255, 255, 255, 6, 255, 255, 255, 26, 255, 255, 255, 30, 255, 255, 255, 18, 255, 255, 255, 22, 255, 255, 255 },
+	{ 7, 255, 255, 255, 11, 255, 255, 255, 15, 255, 255, 255, 3, 255, 255, 255, 23, 255, 255, 255, 27, 255, 255, 255, 31, 255, 255, 255, 19, 255, 255, 255 }
+};
+
+rx_vec_i128 soft_aesenc(rx_vec_i128 in, rx_vec_i128 key) {
+	uint32_t s0, s1, s2, s3;
+
+	s0 = rx_vec_i128_w(in);
+	s1 = rx_vec_i128_z(in);
+	s2 = rx_vec_i128_y(in);
+	s3 = rx_vec_i128_x(in);
+
+	rx_vec_i128 out = rx_set_int_vec_i128(
+		(lutEnc0[s0 & 0xff] ^ lutEnc1[(s3 >> 8) & 0xff] ^ lutEnc2[(s2 >> 16) & 0xff] ^ lutEnc3[s1 >> 24]),
+		(lutEnc0[s1 & 0xff] ^ lutEnc1[(s0 >> 8) & 0xff] ^ lutEnc2[(s3 >> 16) & 0xff] ^ lutEnc3[s2 >> 24]),
+		(lutEnc0[s2 & 0xff] ^ lutEnc1[(s1 >> 8) & 0xff] ^ lutEnc2[(s0 >> 16) & 0xff] ^ lutEnc3[s3 >> 24]),
+		(lutEnc0[s3 & 0xff] ^ lutEnc1[(s2 >> 8) & 0xff] ^ lutEnc2[(s1 >> 16) & 0xff] ^ lutEnc3[s0 >> 24])
+	);
+
+	return rx_xor_vec_i128(out, key);
+}
+
+rx_vec_i128 soft_aesdec(rx_vec_i128 in, rx_vec_i128 key) {
+	uint32_t s0, s1, s2, s3;
+
+	s0 = rx_vec_i128_w(in);
+	s1 = rx_vec_i128_z(in);
+	s2 = rx_vec_i128_y(in);
+	s3 = rx_vec_i128_x(in);
+
+	rx_vec_i128 out = rx_set_int_vec_i128(
+		(lutDec0[s0 & 0xff] ^ lutDec1[(s1 >> 8) & 0xff] ^ lutDec2[(s2 >> 16) & 0xff] ^ lutDec3[s3 >> 24]),
+		(lutDec0[s1 & 0xff] ^ lutDec1[(s2 >> 8) & 0xff] ^ lutDec2[(s3 >> 16) & 0xff] ^ lutDec3[s0 >> 24]),
+		(lutDec0[s2 & 0xff] ^ lutDec1[(s3 >> 8) & 0xff] ^ lutDec2[(s0 >> 16) & 0xff] ^ lutDec3[s1 >> 24]),
+		(lutDec0[s3 & 0xff] ^ lutDec1[(s0 >> 8) & 0xff] ^ lutDec2[(s1 >> 16) & 0xff] ^ lutDec3[s2 >> 24])
+	);
+
+	return rx_xor_vec_i128(out, key);
+}
--- a/crypto/randomx/soft_aes.h
+++ b/crypto/randomx/soft_aes.h
@ -0,0 +1,58 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <stdint.h>
+#include "intrin_portable.h"
+
+extern const uint32_t lutEnc0[256];
+extern const uint32_t lutEnc1[256];
+extern const uint32_t lutEnc2[256];
+extern const uint32_t lutEnc3[256];
+extern const uint32_t lutDec0[256];
+extern const uint32_t lutDec1[256];
+extern const uint32_t lutDec2[256];
+extern const uint32_t lutDec3[256];
+
+extern const uint8_t lutEncIndex[4][32];
+extern const uint8_t lutDecIndex[4][32];
+
+rx_vec_i128 soft_aesenc(rx_vec_i128 in, rx_vec_i128 key);
+
+rx_vec_i128 soft_aesdec(rx_vec_i128 in, rx_vec_i128 key);
+
+template<bool soft>
+inline rx_vec_i128 aesenc(rx_vec_i128 in, rx_vec_i128 key) {
+	return soft ? soft_aesenc(in, key) : rx_aesenc_vec_i128(in, key);
+}
+
+template<bool soft>
+inline rx_vec_i128 aesdec(rx_vec_i128 in, rx_vec_i128 key) {
+	return soft ? soft_aesdec(in, key) : rx_aesdec_vec_i128(in, key);
+}
--- a/crypto/randomx/superscalar.cpp
+++ b/crypto/randomx/superscalar.cpp
@ -0,0 +1,903 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "configuration.h"
+#include "program.hpp"
+#include "blake2/endian.h"
+#include <iostream>
+#include <vector>
+#include <algorithm>
+#include <stdexcept>
+#include <iomanip>
+#include "superscalar.hpp"
+#include "intrin_portable.h"
+#include "reciprocal.h"
+#include "common.hpp"
+
+namespace randomx {
+
+	static bool isMultiplication(SuperscalarInstructionType type) {
+		return type == SuperscalarInstructionType::IMUL_R || type == SuperscalarInstructionType::IMULH_R || type == SuperscalarInstructionType::ISMULH_R || type == SuperscalarInstructionType::IMUL_RCP;
+	}
+
+	//uOPs (micro-ops) are represented only by the execution port they can go to
+	namespace ExecutionPort {
+		using type = int;
+		constexpr type Null = 0;
+		constexpr type P0 = 1;
+		constexpr type P1 = 2;
+		constexpr type P5 = 4;
+		constexpr type P01 = P0 | P1;
+		constexpr type P05 = P0 | P5;
+		constexpr type P015 = P0 | P1 | P5;
+	}
+
+	//Macro-operation as output of the x86 decoder
+	//Usually one macro-op = one x86 instruction, but 2 instructions are sometimes fused into 1 macro-op
+	//Macro-op can consist of 1 or 2 uOPs.
+	class MacroOp {
+	public:
+		MacroOp(const char* name, int size)
+			: name_(name), size_(size), latency_(0), uop1_(ExecutionPort::Null), uop2_(ExecutionPort::Null) {}
+		MacroOp(const char* name, int size, int latency, ExecutionPort::type uop)
+			: name_(name), size_(size), latency_(latency), uop1_(uop), uop2_(ExecutionPort::Null) {}
+		MacroOp(const char* name, int size, int latency, ExecutionPort::type uop1, ExecutionPort::type uop2)
+			: name_(name), size_(size), latency_(latency), uop1_(uop1), uop2_(uop2) {}
+		MacroOp(const MacroOp& parent, bool dependent)
+			: name_(parent.name_), size_(parent.size_), latency_(parent.latency_), uop1_(parent.uop1_), uop2_(parent.uop2_), dependent_(dependent) {}
+		const char* getName() const {
+			return name_;
+		}
+		int getSize() const {
+			return size_;
+		}
+		int getLatency() const {
+			return latency_;
+		}
+		ExecutionPort::type getUop1() const {
+			return uop1_;
+		}
+		ExecutionPort::type getUop2() const {
+			return uop2_;
+		}
+		bool isSimple() const {
+			return uop2_ == ExecutionPort::Null;
+		}
+		bool isEliminated() const {
+			return uop1_ == ExecutionPort::Null;
+		}
+		bool isDependent() const {
+			return dependent_;
+		}
+		static const MacroOp Add_rr;
+		static const MacroOp Add_ri;
+		static const MacroOp Lea_sib;
+		static const MacroOp Sub_rr;
+		static const MacroOp Imul_rr;
+		static const MacroOp Imul_r;
+		static const MacroOp Mul_r;
+		static const MacroOp Mov_rr;
+		static const MacroOp Mov_ri64;
+		static const MacroOp Xor_rr;
+		static const MacroOp Xor_ri;
+		static const MacroOp Ror_rcl;
+		static const MacroOp Ror_ri;
+		static const MacroOp TestJz_fused;
+		static const MacroOp Xor_self;
+		static const MacroOp Cmp_ri;
+		static const MacroOp Setcc_r;
+	private:
+		const char* name_;
+		int size_;
+		int latency_;
+		ExecutionPort::type uop1_;
+		ExecutionPort::type uop2_;
+		bool dependent_ = false;
+	};
+
+	//Size: 3 bytes
+	const MacroOp MacroOp::Add_rr = MacroOp("add r,r", 3, 1, ExecutionPort::P015);
+	const MacroOp MacroOp::Sub_rr = MacroOp("sub r,r", 3, 1, ExecutionPort::P015);
+	const MacroOp MacroOp::Xor_rr = MacroOp("xor r,r", 3, 1, ExecutionPort::P015);
+	const MacroOp MacroOp::Imul_r = MacroOp("imul r", 3, 4, ExecutionPort::P1, ExecutionPort::P5);
+	const MacroOp MacroOp::Mul_r = MacroOp("mul r", 3, 4, ExecutionPort::P1, ExecutionPort::P5);
+	const MacroOp MacroOp::Mov_rr = MacroOp("mov r,r", 3);
+
+	//Size: 4 bytes
+	const MacroOp MacroOp::Lea_sib = MacroOp("lea r,r+r*s", 4, 1, ExecutionPort::P01);
+	const MacroOp MacroOp::Imul_rr = MacroOp("imul r,r", 4, 3, ExecutionPort::P1);
+	const MacroOp MacroOp::Ror_ri = MacroOp("ror r,i", 4, 1, ExecutionPort::P05);
+
+	//Size: 7 bytes (can be optionally padded with nop to 8 or 9 bytes)
+	const MacroOp MacroOp::Add_ri = MacroOp("add r,i", 7, 1, ExecutionPort::P015);
+	const MacroOp MacroOp::Xor_ri = MacroOp("xor r,i", 7, 1, ExecutionPort::P015);
+
+	//Size: 10 bytes
+	const MacroOp MacroOp::Mov_ri64 = MacroOp("mov rax,i64", 10, 1, ExecutionPort::P015);
+
+	//Unused:
+	const MacroOp MacroOp::Ror_rcl = MacroOp("ror r,cl", 3, 1, ExecutionPort::P0, ExecutionPort::P5);
+	const MacroOp MacroOp::Xor_self = MacroOp("xor rcx,rcx", 3);
+	const MacroOp MacroOp::Cmp_ri = MacroOp("cmp r,i", 7, 1, ExecutionPort::P015);
+	const MacroOp MacroOp::Setcc_r = MacroOp("setcc cl", 3, 1, ExecutionPort::P05);
+	const MacroOp MacroOp::TestJz_fused = MacroOp("testjz r,i", 13, 0, ExecutionPort::P5);
+
+	const MacroOp IMULH_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Mul_r, MacroOp::Mov_rr };
+	const MacroOp ISMULH_R_ops_array[] = { MacroOp::Mov_rr, MacroOp::Imul_r, MacroOp::Mov_rr };
+	const MacroOp IMUL_RCP_ops_array[] = { MacroOp::Mov_ri64, MacroOp(MacroOp::Imul_rr, true) };
+
+	class SuperscalarInstructionInfo {
+	public:
+		const char* getName() const {
+			return name_;
+		}
+		int getSize() const {
+			return ops_.size();
+		}
+		bool isSimple() const {
+			return getSize() == 1;
+		}
+		int getLatency() const {
+			return latency_;
+		}
+		const MacroOp& getOp(int index) const {
+			return ops_[index];
+		}
+		SuperscalarInstructionType getType() const {
+			return type_;
+		}
+		int getResultOp() const {
+			return resultOp_;
+		}
+		int getDstOp() const {
+			return dstOp_;
+		}
+		int getSrcOp() const {
+			return srcOp_;
+		}
+		static const SuperscalarInstructionInfo ISUB_R;
+		static const SuperscalarInstructionInfo IXOR_R;
+		static const SuperscalarInstructionInfo IADD_RS;
+		static const SuperscalarInstructionInfo IMUL_R;
+		static const SuperscalarInstructionInfo IROR_C;
+		static const SuperscalarInstructionInfo IADD_C7;
+		static const SuperscalarInstructionInfo IXOR_C7;
+		static const SuperscalarInstructionInfo IADD_C8;
+		static const SuperscalarInstructionInfo IXOR_C8;
+		static const SuperscalarInstructionInfo IADD_C9;
+		static const SuperscalarInstructionInfo IXOR_C9;
+		static const SuperscalarInstructionInfo IMULH_R;
+		static const SuperscalarInstructionInfo ISMULH_R;
+		static const SuperscalarInstructionInfo IMUL_RCP;
+		static const SuperscalarInstructionInfo NOP;
+	private:
+		const char* name_;
+		SuperscalarInstructionType type_;
+		std::vector<MacroOp> ops_;
+		int latency_;
+		int resultOp_ = 0;
+		int dstOp_ = 0;
+		int srcOp_;
+
+		SuperscalarInstructionInfo(const char* name)
+			: name_(name), type_(SuperscalarInstructionType::INVALID), latency_(0) {}
+		SuperscalarInstructionInfo(const char* name, SuperscalarInstructionType type, const MacroOp& op, int srcOp)
+			: name_(name), type_(type), latency_(op.getLatency()), srcOp_(srcOp) {
+			ops_.push_back(MacroOp(op));
+		}
+		template <size_t N>
+		SuperscalarInstructionInfo(const char* name, SuperscalarInstructionType type, const MacroOp(&arr)[N], int resultOp, int dstOp, int srcOp)
+			: name_(name), type_(type), latency_(0), resultOp_(resultOp), dstOp_(dstOp), srcOp_(srcOp) {
+			for (unsigned i = 0; i < N; ++i) {
+				ops_.push_back(MacroOp(arr[i]));
+				latency_ += ops_.back().getLatency();
+			}
+			static_assert(N > 1, "Invalid array size");
+		}
+	};
+
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISUB_R = SuperscalarInstructionInfo("ISUB_R", SuperscalarInstructionType::ISUB_R, MacroOp::Sub_rr, 0);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_R = SuperscalarInstructionInfo("IXOR_R", SuperscalarInstructionType::IXOR_R, MacroOp::Xor_rr, 0);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_RS = SuperscalarInstructionInfo("IADD_RS", SuperscalarInstructionType::IADD_RS, MacroOp::Lea_sib, 0);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_R = SuperscalarInstructionInfo("IMUL_R", SuperscalarInstructionType::IMUL_R, MacroOp::Imul_rr, 0);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IROR_C = SuperscalarInstructionInfo("IROR_C", SuperscalarInstructionType::IROR_C, MacroOp::Ror_ri, -1);
+
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C7 = SuperscalarInstructionInfo("IADD_C7", SuperscalarInstructionType::IADD_C7, MacroOp::Add_ri, -1);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C7 = SuperscalarInstructionInfo("IXOR_C7", SuperscalarInstructionType::IXOR_C7, MacroOp::Xor_ri, -1);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C8 = SuperscalarInstructionInfo("IADD_C8", SuperscalarInstructionType::IADD_C8, MacroOp::Add_ri, -1);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C8 = SuperscalarInstructionInfo("IXOR_C8", SuperscalarInstructionType::IXOR_C8, MacroOp::Xor_ri, -1);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IADD_C9 = SuperscalarInstructionInfo("IADD_C9", SuperscalarInstructionType::IADD_C9, MacroOp::Add_ri, -1);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IXOR_C9 = SuperscalarInstructionInfo("IXOR_C9", SuperscalarInstructionType::IXOR_C9, MacroOp::Xor_ri, -1);
+
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMULH_R = SuperscalarInstructionInfo("IMULH_R", SuperscalarInstructionType::IMULH_R, IMULH_R_ops_array, 1, 0, 1);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::ISMULH_R = SuperscalarInstructionInfo("ISMULH_R", SuperscalarInstructionType::ISMULH_R, ISMULH_R_ops_array, 1, 0, 1);
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::IMUL_RCP = SuperscalarInstructionInfo("IMUL_RCP", SuperscalarInstructionType::IMUL_RCP, IMUL_RCP_ops_array, 1, 1, -1);
+	
+	const SuperscalarInstructionInfo SuperscalarInstructionInfo::NOP = SuperscalarInstructionInfo("NOP");
+
+	//these are some of the options how to split a 16-byte window into 3 or 4 x86 instructions.
+	//RandomX uses instructions with a native size of 3 (sub, xor, mul, mov), 4 (lea, mul), 7 (xor, add immediate) or 10 bytes (mov 64-bit immediate).
+	//Slots with sizes of 8 or 9 bytes need to be padded with a nop instruction.
+	const int buffer0[] = { 4, 8, 4 };
+	const int buffer1[] = { 7, 3, 3, 3 };
+	const int buffer2[] = { 3, 7, 3, 3 };
+	const int buffer3[] = { 4, 9, 3 };
+	const int buffer4[] = { 4, 4, 4, 4 };
+	const int buffer5[] = { 3, 3, 10 };
+
+	class DecoderBuffer {
+	public:
+		static const DecoderBuffer Default;
+		template <size_t N>
+		DecoderBuffer(const char* name, int index, const int(&arr)[N])
+			: name_(name), index_(index), counts_(arr), opsCount_(N) {}
+		const int* getCounts() const {
+			return counts_;
+		}
+		int getSize() const {
+			return opsCount_;
+		}
+		int getIndex() const {
+			return index_;
+		}
+		const char* getName() const {
+			return name_;
+		}
+		const DecoderBuffer* fetchNext(SuperscalarInstructionType instrType, int cycle, int mulCount, Blake2Generator& gen) const {
+			//If the current RandomX instruction is "IMULH", the next fetch configuration must be 3-3-10
+			//because the full 128-bit multiplication instruction is 3 bytes long and decodes to 2 uOPs on Intel CPUs.
+			//Intel CPUs can decode at most 4 uOPs per cycle, so this requires a 2-1-1 configuration for a total of 3 macro ops.
+			if (instrType == SuperscalarInstructionType::IMULH_R || instrType == SuperscalarInstructionType::ISMULH_R)
+				return &decodeBuffer3310;
+
+			//To make sure that the multiplication port is saturated, a 4-4-4-4 configuration is generated if the number of multiplications
+			//is lower than the number of cycles.
+			if (mulCount < cycle + 1)
+				return &decodeBuffer4444;
+
+			//If the current RandomX instruction is "IMUL_RCP", the next buffer must begin with a 4-byte slot for multiplication.
+			if(instrType == SuperscalarInstructionType::IMUL_RCP)
+				return (gen.getByte() & 1) ? &decodeBuffer484 : &decodeBuffer493;
+
+			//Default: select a random fetch configuration.
+			return fetchNextDefault(gen);
+		}
+	private:
+		const char* name_;
+		int index_;
+		const int* counts_;
+		int opsCount_;
+		DecoderBuffer() : index_(-1) {}
+		static const DecoderBuffer decodeBuffer484;
+		static const DecoderBuffer decodeBuffer7333;
+		static const DecoderBuffer decodeBuffer3733;
+		static const DecoderBuffer decodeBuffer493;
+		static const DecoderBuffer decodeBuffer4444;
+		static const DecoderBuffer decodeBuffer3310;
+		static const DecoderBuffer* decodeBuffers[4];
+		const DecoderBuffer* fetchNextDefault(Blake2Generator& gen) const {
+			return decodeBuffers[gen.getByte() & 3];
+		}
+	};
+
+	const DecoderBuffer DecoderBuffer::decodeBuffer484 = DecoderBuffer("4,8,4", 0, buffer0);
+	const DecoderBuffer DecoderBuffer::decodeBuffer7333 = DecoderBuffer("7,3,3,3", 1, buffer1);
+	const DecoderBuffer DecoderBuffer::decodeBuffer3733 = DecoderBuffer("3,7,3,3", 2, buffer2);
+	const DecoderBuffer DecoderBuffer::decodeBuffer493 = DecoderBuffer("4,9,3", 3, buffer3);
+	const DecoderBuffer DecoderBuffer::decodeBuffer4444 = DecoderBuffer("4,4,4,4", 4, buffer4);
+	const DecoderBuffer DecoderBuffer::decodeBuffer3310 = DecoderBuffer("3,3,10", 5, buffer5);
+
+	const DecoderBuffer* DecoderBuffer::decodeBuffers[4] = {
+			&DecoderBuffer::decodeBuffer484,
+			&DecoderBuffer::decodeBuffer7333,
+			&DecoderBuffer::decodeBuffer3733,
+			&DecoderBuffer::decodeBuffer493,
+	};
+
+	const DecoderBuffer DecoderBuffer::Default = DecoderBuffer();
+
+	const SuperscalarInstructionInfo* slot_3[]  = { &SuperscalarInstructionInfo::ISUB_R, &SuperscalarInstructionInfo::IXOR_R };
+	const SuperscalarInstructionInfo* slot_3L[] = { &SuperscalarInstructionInfo::ISUB_R, &SuperscalarInstructionInfo::IXOR_R, &SuperscalarInstructionInfo::IMULH_R, &SuperscalarInstructionInfo::ISMULH_R };
+	const SuperscalarInstructionInfo* slot_4[]  = { &SuperscalarInstructionInfo::IROR_C, &SuperscalarInstructionInfo::IADD_RS };
+	const SuperscalarInstructionInfo* slot_7[]  = { &SuperscalarInstructionInfo::IXOR_C7, &SuperscalarInstructionInfo::IADD_C7 };
+	const SuperscalarInstructionInfo* slot_8[] = { &SuperscalarInstructionInfo::IXOR_C8, &SuperscalarInstructionInfo::IADD_C8 };
+	const SuperscalarInstructionInfo* slot_9[] = { &SuperscalarInstructionInfo::IXOR_C9, &SuperscalarInstructionInfo::IADD_C9 };
+	const SuperscalarInstructionInfo* slot_10   = &SuperscalarInstructionInfo::IMUL_RCP;
+
+	static bool selectRegister(std::vector<int>& availableRegisters, Blake2Generator& gen, int& reg) {
+		int index;
+		if (availableRegisters.size() == 0)
+			return false;
+
+		if (availableRegisters.size() > 1) {
+			index = gen.getUInt32() % availableRegisters.size();
+		}
+		else {
+			index = 0;
+		}
+		reg = availableRegisters[index];
+		return true;
+	}
+
+	class RegisterInfo {
+	public:
+		RegisterInfo() : latency(0), lastOpGroup(SuperscalarInstructionType::INVALID), lastOpPar(-1), value(0) {}
+		int latency;
+		SuperscalarInstructionType lastOpGroup;
+		int lastOpPar;
+		int value;
+	};
+
+	//"SuperscalarInstruction" consists of one or more macro-ops
+	class SuperscalarInstruction {
+	public:
+		void toInstr(Instruction& instr) { //translate to a RandomX instruction format
+			instr.opcode = (int)getType();
+			instr.dst = dst_;
+			instr.src = src_ >= 0 ? src_ : dst_;
+			instr.setMod(mod_);
+			instr.setImm32(imm32_);
+		}
+
+		void createForSlot(Blake2Generator& gen, int slotSize, int fetchType, bool isLast, bool isFirst) {
+			switch (slotSize)
+			{
+			case 3:
+				//if this is the last slot, we can also select "IMULH" instructions
+				if (isLast) {
+					create(slot_3L[gen.getByte() & 3], gen);
+				}
+				else {
+					create(slot_3[gen.getByte() & 1], gen);
+				}
+				break;
+			case 4:
+				//if this is the 4-4-4-4 buffer, issue multiplications as the first 3 instructions
+				if (fetchType == 4 && !isLast) {
+					create(&SuperscalarInstructionInfo::IMUL_R, gen);
+				}
+				else {
+					create(slot_4[gen.getByte() & 1], gen);
+				}
+				break;
+			case 7:
+				create(slot_7[gen.getByte() & 1], gen);
+				break;
+			case 8:
+				create(slot_8[gen.getByte() & 1], gen);
+				break;
+			case 9:
+				create(slot_9[gen.getByte() & 1], gen);
+				break;
+			case 10:
+				create(slot_10, gen);
+				break;
+			default:
+				UNREACHABLE;
+			}
+		}
+
+		void create(const SuperscalarInstructionInfo* info, Blake2Generator& gen) {
+			info_ = info;
+			reset();
+			switch (info->getType())
+			{
+			case SuperscalarInstructionType::ISUB_R: {
+				mod_ = 0;
+				imm32_ = 0;
+				opGroup_ = SuperscalarInstructionType::IADD_RS;
+				groupParIsSource_ = true;
+			} break;
+
+			case SuperscalarInstructionType::IXOR_R: {
+				mod_ = 0;
+				imm32_ = 0;
+				opGroup_ = SuperscalarInstructionType::IXOR_R;
+				groupParIsSource_ = true;
+			} break;
+
+			case SuperscalarInstructionType::IADD_RS: {
+				mod_ = gen.getByte();
+				imm32_ = 0;
+				opGroup_ = SuperscalarInstructionType::IADD_RS;
+				groupParIsSource_ = true;
+			} break;
+
+			case SuperscalarInstructionType::IMUL_R: {
+				mod_ = 0;
+				imm32_ = 0;
+				opGroup_ = SuperscalarInstructionType::IMUL_R;
+				groupParIsSource_ = true;
+			} break;
+
+			case SuperscalarInstructionType::IROR_C: {
+				mod_ = 0;
+				do {
+					imm32_ = gen.getByte() & 63;
+				} while (imm32_ == 0);
+				opGroup_ = SuperscalarInstructionType::IROR_C;
+				opGroupPar_ = -1;
+			} break;
+
+			case SuperscalarInstructionType::IADD_C7:
+			case SuperscalarInstructionType::IADD_C8:
+			case SuperscalarInstructionType::IADD_C9: {
+				mod_ = 0;
+				imm32_ = gen.getUInt32();
+				opGroup_ = SuperscalarInstructionType::IADD_C7;
+				opGroupPar_ = -1;
+			} break;
+
+			case SuperscalarInstructionType::IXOR_C7:
+			case SuperscalarInstructionType::IXOR_C8:
+			case SuperscalarInstructionType::IXOR_C9: {
+				mod_ = 0;
+				imm32_ = gen.getUInt32();
+				opGroup_ = SuperscalarInstructionType::IXOR_C7;
+				opGroupPar_ = -1;
+			} break;
+
+			case SuperscalarInstructionType::IMULH_R: {
+				canReuse_ = true;
+				mod_ = 0;
+				imm32_ = 0;
+				opGroup_ = SuperscalarInstructionType::IMULH_R;
+				opGroupPar_ = gen.getUInt32();
+			} break;
+
+			case SuperscalarInstructionType::ISMULH_R: {
+				canReuse_ = true;
+				mod_ = 0;
+				imm32_ = 0;
+				opGroup_ = SuperscalarInstructionType::ISMULH_R;
+				opGroupPar_ = gen.getUInt32();
+			} break;
+
+			case SuperscalarInstructionType::IMUL_RCP: {
+				mod_ = 0;
+				do {
+					imm32_ = gen.getUInt32();
+				} while (isZeroOrPowerOf2(imm32_));
+				opGroup_ = SuperscalarInstructionType::IMUL_RCP;
+				opGroupPar_ = -1;
+			} break;
+
+			default:
+				break;
+			}
+		}
+
+		bool selectDestination(int cycle, bool allowChainedMul, RegisterInfo (&registers)[8], Blake2Generator& gen) {
+			/*if (allowChainedMultiplication && opGroup_ == SuperscalarInstructionType::IMUL_R)
+				std::cout << "Selecting destination with chained MUL enabled" << std::endl;*/
+			std::vector<int> availableRegisters;
+			//Conditions for the destination register:
+			// * value must be ready at the required cycle
+			// * cannot be the same as the source register unless the instruction allows it
+			//   - this avoids optimizable instructions such as "xor r, r" or "sub r, r"
+			// * register cannot be multiplied twice in a row unless allowChainedMul is true 
+			//   - this avoids accumulation of trailing zeroes in registers due to excessive multiplication
+			//   - allowChainedMul is set to true if an attempt to find source/destination registers failed (this is quite rare, but prevents a catastrophic failure of the generator)
+			// * either the last instruction applied to the register or its source must be different than this instruction
+			//   - this avoids optimizable instruction sequences such as "xor r1, r2; xor r1, r2" or "ror r, C1; ror r, C2" or "add r, C1; add r, C2"
+			// * register r5 cannot be the destination of the IADD_RS instruction (limitation of the x86 lea instruction)
+			for (unsigned i = 0; i < 8; ++i) {
+				if (registers[i].latency <= cycle && (canReuse_ || i != src_) && (allowChainedMul || opGroup_ != SuperscalarInstructionType::IMUL_R || registers[i].lastOpGroup != SuperscalarInstructionType::IMUL_R) && (registers[i].lastOpGroup != opGroup_ || registers[i].lastOpPar != opGroupPar_) && (info_->getType() != SuperscalarInstructionType::IADD_RS || i != RegisterNeedsDisplacement))
+					availableRegisters.push_back(i);
+			}
+			return selectRegister(availableRegisters, gen, dst_);
+		}
+
+		bool selectSource(int cycle, RegisterInfo(&registers)[8], Blake2Generator& gen) {
+			std::vector<int> availableRegisters;
+			//all registers that are ready at the cycle
+			for (unsigned i = 0; i < 8; ++i) {
+				if (registers[i].latency <= cycle)
+					availableRegisters.push_back(i);
+			}
+			//if there are only 2 available registers for IADD_RS and one of them is r5, select it as the source because it cannot be the destination
+			if (availableRegisters.size() == 2 && info_->getType() == SuperscalarInstructionType::IADD_RS) {
+				if (availableRegisters[0] == RegisterNeedsDisplacement || availableRegisters[1] == RegisterNeedsDisplacement) {
+					opGroupPar_ = src_ = RegisterNeedsDisplacement;
+					return true;
+				}
+			}
+			if (selectRegister(availableRegisters, gen, src_)) {
+				if (groupParIsSource_)
+					opGroupPar_ = src_;
+				return true;
+			}
+			return false;
+		}
+
+		SuperscalarInstructionType getType() {
+			return info_->getType();
+		}
+		int getSource() {
+			return src_;
+		}
+		int getDestination() {
+			return dst_;
+		}
+		SuperscalarInstructionType getGroup() {
+			return opGroup_;
+		}
+		int getGroupPar() {
+			return opGroupPar_;
+		}
+
+		const SuperscalarInstructionInfo& getInfo() const {
+			return *info_;
+		}
+
+		static const SuperscalarInstruction Null;
+
+	private:
+		const SuperscalarInstructionInfo* info_;
+		int src_ = -1;
+		int dst_ = -1;
+		int mod_;
+		uint32_t imm32_;
+		SuperscalarInstructionType opGroup_;
+		int opGroupPar_;
+		bool canReuse_ = false;
+		bool groupParIsSource_ = false;
+
+		void reset() {
+			src_ = dst_ = -1;
+			canReuse_ = groupParIsSource_ = false;
+		}
+
+		SuperscalarInstruction(const SuperscalarInstructionInfo* info) : info_(info) {
+		}
+	};
+
+	const SuperscalarInstruction SuperscalarInstruction::Null = SuperscalarInstruction(&SuperscalarInstructionInfo::NOP);
+
+	constexpr int CYCLE_MAP_SIZE = RANDOMX_SUPERSCALAR_LATENCY + 4;
+	constexpr int LOOK_FORWARD_CYCLES = 4;
+	constexpr int MAX_THROWAWAY_COUNT = 256;
+
+	template<bool commit>
+	static int scheduleUop(ExecutionPort::type uop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle) {
+		//The scheduling here is done optimistically by checking port availability in order P5 -> P0 -> P1 to not overload
+		//port P1 (multiplication) by instructions that can go to any port.
+		for (; cycle < CYCLE_MAP_SIZE; ++cycle) {
+			if ((uop & ExecutionPort::P5) != 0 && !portBusy[cycle][2]) {
+				if (commit) {
+					if (trace) std::cout << "; P5 at cycle " << cycle << std::endl;
+					portBusy[cycle][2] = uop;
+				}
+				return cycle;
+			}
+			if ((uop & ExecutionPort::P0) != 0 && !portBusy[cycle][0]) {
+				if (commit) {
+					if (trace) std::cout << "; P0 at cycle " << cycle << std::endl;
+					portBusy[cycle][0] = uop;
+				}
+				return cycle;
+			}
+			if ((uop & ExecutionPort::P1) != 0 && !portBusy[cycle][1]) {
+				if (commit) {
+					if (trace) std::cout << "; P1 at cycle " << cycle << std::endl;
+					portBusy[cycle][1] = uop;
+				}
+				return cycle;
+			}
+		}
+		return -1;
+	}
+
+	template<bool commit>
+	static int scheduleMop(const MacroOp& mop, ExecutionPort::type(&portBusy)[CYCLE_MAP_SIZE][3], int cycle, int depCycle) {
+		//if this macro-op depends on the previous one, increase the starting cycle if needed
+		//this handles an explicit dependency chain in IMUL_RCP
+		if (mop.isDependent()) {
+			cycle = std::max(cycle, depCycle);
+		}
+		//move instructions are eliminated and don't need an execution unit
+		if (mop.isEliminated()) {
+			if (commit)
+				if (trace) std::cout << "; (eliminated)" << std::endl;
+			return cycle;
+		} 
+		else if (mop.isSimple()) {
+			//this macro-op has only one uOP
+			return scheduleUop<commit>(mop.getUop1(), portBusy, cycle);
+		}
+		else {
+			//macro-ops with 2 uOPs are scheduled conservatively by requiring both uOPs to execute in the same cycle
+			for (; cycle < CYCLE_MAP_SIZE; ++cycle) {
+
+				int cycle1 = scheduleUop<false>(mop.getUop1(), portBusy, cycle);
+				int cycle2 = scheduleUop<false>(mop.getUop2(), portBusy, cycle);
+
+				if (cycle1 >= 0 && cycle1 == cycle2) {
+					if (commit) {
+						scheduleUop<true>(mop.getUop1(), portBusy, cycle1);
+						scheduleUop<true>(mop.getUop2(), portBusy, cycle2);
+					}
+					return cycle1;
+				}
+			}
+		}
+
+		return -1;
+	}
+
+	void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen) {
+
+		ExecutionPort::type portBusy[CYCLE_MAP_SIZE][3];
+		memset(portBusy, 0, sizeof(portBusy));
+		RegisterInfo registers[8];
+
+		const DecoderBuffer* decodeBuffer = &DecoderBuffer::Default;
+		SuperscalarInstruction currentInstruction = SuperscalarInstruction::Null;
+		int macroOpIndex = 0;
+		int codeSize = 0;
+		int macroOpCount = 0;
+		int cycle = 0;
+		int depCycle = 0;
+		int retireCycle = 0;
+		bool portsSaturated = false;
+		int programSize = 0;
+		int mulCount = 0;
+		int decodeCycle;
+		int throwAwayCount = 0;
+
+		//decode instructions for RANDOMX_SUPERSCALAR_LATENCY cycles or until an execution port is saturated.
+		//Each decode cycle decodes 16 bytes of x86 code.
+		//Since a decode cycle produces on average 3.45 macro-ops and there are only 3 ALU ports, execution ports are always
+		//saturated first. The cycle limit is present only to guarantee loop termination.
+		//Program size is limited to SuperscalarMaxSize instructions.
+		for (decodeCycle = 0; decodeCycle < RANDOMX_SUPERSCALAR_LATENCY && !portsSaturated && programSize < SuperscalarMaxSize; ++decodeCycle) {
+
+			//select a decode configuration
+			decodeBuffer = decodeBuffer->fetchNext(currentInstruction.getType(), decodeCycle, mulCount, gen);
+			if (trace) std::cout << "; ------------- fetch cycle " << cycle << " (" << decodeBuffer->getName() << ")" << std::endl;
+
+			int bufferIndex = 0;
+			
+			//fill all instruction slots in the current decode buffer
+			while (bufferIndex < decodeBuffer->getSize()) {
+				int topCycle = cycle;
+
+				//if we have issued all macro-ops for the current RandomX instruction, create a new instruction
+				if (macroOpIndex >= currentInstruction.getInfo().getSize()) {
+					if (portsSaturated || programSize >= SuperscalarMaxSize)
+						break;
+					//select an instruction so that the first macro-op fits into the current slot
+					currentInstruction.createForSlot(gen, decodeBuffer->getCounts()[bufferIndex], decodeBuffer->getIndex(), decodeBuffer->getSize() == bufferIndex + 1, bufferIndex == 0);
+					macroOpIndex = 0;
+					if (trace) std::cout << "; " << currentInstruction.getInfo().getName() << std::endl;
+				}
+				const MacroOp& mop = currentInstruction.getInfo().getOp(macroOpIndex);
+				if (trace) std::cout << mop.getName() << " ";
+
+				//calculate the earliest cycle when this macro-op (all of its uOPs) can be scheduled for execution
+				int scheduleCycle = scheduleMop<false>(mop, portBusy, cycle, depCycle);
+				if (scheduleCycle < 0) {
+					if (trace) std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << cycle << ")" << std::endl;
+					//__debugbreak();
+					portsSaturated = true;
+					break;
+				}
+
+				//find a source register (if applicable) that will be ready when this instruction executes
+				if (macroOpIndex == currentInstruction.getInfo().getSrcOp()) {
+					int forward;
+					//if no suitable operand is ready, look up to LOOK_FORWARD_CYCLES forward
+					for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectSource(scheduleCycle, registers, gen); ++forward) {
+						if (trace) std::cout << "; src STALL at cycle " << cycle << std::endl;
+						++scheduleCycle;
+						++cycle;
+					}
+					//if no register was found, throw the instruction away and try another one
+					if (forward == LOOK_FORWARD_CYCLES) {
+						if (throwAwayCount < MAX_THROWAWAY_COUNT) {
+							throwAwayCount++;
+							macroOpIndex = currentInstruction.getInfo().getSize();
+							if (trace) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
+							//cycle = topCycle;
+							continue;
+						}
+						//abort this decode buffer
+						if (trace) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - source registers not available for operation " << currentInstruction.getInfo().getName() << std::endl;
+						currentInstruction = SuperscalarInstruction::Null;
+						break;
+					}
+					if (trace) std::cout << "; src = r" << currentInstruction.getSource() << std::endl;
+				}
+				//find a destination register that will be ready when this instruction executes
+				if (macroOpIndex == currentInstruction.getInfo().getDstOp()) {
+					int forward;
+					for (forward = 0; forward < LOOK_FORWARD_CYCLES && !currentInstruction.selectDestination(scheduleCycle, throwAwayCount > 0, registers, gen); ++forward) {
+						if (trace) std::cout << "; dst STALL at cycle " << cycle << std::endl;
+						++scheduleCycle;
+						++cycle;
+					}
+					if (forward == LOOK_FORWARD_CYCLES) { //throw instruction away
+						if (throwAwayCount < MAX_THROWAWAY_COUNT) {
+							throwAwayCount++;
+							macroOpIndex = currentInstruction.getInfo().getSize();
+							if (trace) std::cout << "; THROW away " << currentInstruction.getInfo().getName() << std::endl;
+							//cycle = topCycle;
+							continue;
+						}
+						//abort this decode buffer
+						if (trace) std::cout << "Aborting at cycle " << cycle << " with decode buffer " << decodeBuffer->getName() << " - destination registers not available" << std::endl;
+						currentInstruction = SuperscalarInstruction::Null;
+						break;
+					}
+					if (trace) std::cout << "; dst = r" << currentInstruction.getDestination() << std::endl;
+				}
+				throwAwayCount = 0;
+
+				//recalculate when the instruction can be scheduled for execution based on operand availability
+				scheduleCycle = scheduleMop<true>(mop, portBusy, scheduleCycle, scheduleCycle);
+
+				if (scheduleCycle < 0) {
+					if (trace) std::cout << "Unable to map operation '" << mop.getName() << "' to execution port (cycle " << scheduleCycle << ")" << std::endl;
+					portsSaturated = true;
+					break;
+				}
+
+				//calculate when the result will be ready
+				depCycle = scheduleCycle + mop.getLatency();
+
+				//if this instruction writes the result, modify register information
+				//  RegisterInfo.latency - which cycle the register will be ready
+				//  RegisterInfo.lastOpGroup - the last operation that was applied to the register
+				//  RegisterInfo.lastOpPar - the last operation source value (-1 = constant, 0-7 = register)
+				if (macroOpIndex == currentInstruction.getInfo().getResultOp()) {
+					int dst = currentInstruction.getDestination();
+					RegisterInfo& ri = registers[dst];
+					retireCycle = depCycle;
+					ri.latency = retireCycle;
+					ri.lastOpGroup = currentInstruction.getGroup();
+					ri.lastOpPar = currentInstruction.getGroupPar();
+					if (trace) std::cout << "; RETIRED at cycle " << retireCycle << std::endl;
+				}
+				codeSize += mop.getSize();
+				bufferIndex++;
+				macroOpIndex++;
+				macroOpCount++;
+
+				//terminating condition
+				if (scheduleCycle >= RANDOMX_SUPERSCALAR_LATENCY) {
+					portsSaturated = true;
+				}
+				cycle = topCycle;
+
+				//when all macro-ops of the current instruction have been issued, add the instruction into the program
+				if (macroOpIndex >= currentInstruction.getInfo().getSize()) {
+					currentInstruction.toInstr(prog(programSize++));
+					mulCount += isMultiplication(currentInstruction.getType());
+				}
+			}
+			++cycle;
+		}
+
+		double ipc = (macroOpCount / (double)retireCycle);
+
+		memset(prog.asicLatencies, 0, sizeof(prog.asicLatencies));
+
+		//Calculate ASIC latency:
+		//Assumes 1 cycle latency for all operations and unlimited parallelization.
+		for (int i = 0; i < programSize; ++i) {
+			Instruction& instr = prog(i);
+			int latDst = prog.asicLatencies[instr.dst] + 1;
+			int latSrc = instr.dst != instr.src ? prog.asicLatencies[instr.src] + 1 : 0;
+			prog.asicLatencies[instr.dst] = std::max(latDst, latSrc);
+		}
+
+		//address register is the register with the highest ASIC latency
+		int asicLatencyMax = 0;
+		int addressReg = 0;
+		for (int i = 0; i < 8; ++i) {
+			if (prog.asicLatencies[i] > asicLatencyMax) {
+				asicLatencyMax = prog.asicLatencies[i];
+				addressReg = i;
+			}
+			prog.cpuLatencies[i] = registers[i].latency;
+		}
+
+		prog.setSize(programSize);
+		prog.setAddressRegister(addressReg);
+
+		prog.cpuLatency = retireCycle;
+		prog.asicLatency = asicLatencyMax;
+		prog.codeSize = codeSize;
+		prog.macroOps = macroOpCount;
+		prog.decodeCycles = decodeCycle;
+		prog.ipc = ipc;
+		prog.mulCount = mulCount;
+		
+
+		/*if(INFO) std::cout << "; ALU port utilization:" << std::endl;
+		if (INFO) std::cout << "; (* = in use, _ = idle)" << std::endl;
+
+		int portCycles = 0;
+		for (int i = 0; i < CYCLE_MAP_SIZE; ++i) {
+			std::cout << "; " << std::setw(3) << i << " ";
+			for (int j = 0; j < 3; ++j) {
+				std::cout << (portBusy[i][j] ? '*' : '_');
+				portCycles += !!portBusy[i][j];
+			}
+			std::cout << std::endl;
+		}*/
+	}
+
+	void executeSuperscalar(int_reg_t(&r)[8], SuperscalarProgram& prog, std::vector<uint64_t> *reciprocals) {
+		for (unsigned j = 0; j < prog.getSize(); ++j) {
+			Instruction& instr = prog(j);
+			switch ((SuperscalarInstructionType)instr.opcode)
+			{
+			case SuperscalarInstructionType::ISUB_R:
+				r[instr.dst] -= r[instr.src];
+				break;
+			case SuperscalarInstructionType::IXOR_R:
+				r[instr.dst] ^= r[instr.src];
+				break;
+			case SuperscalarInstructionType::IADD_RS:
+				r[instr.dst] += r[instr.src] << instr.getModShift();
+				break;
+			case SuperscalarInstructionType::IMUL_R:
+				r[instr.dst] *= r[instr.src];
+				break;
+			case SuperscalarInstructionType::IROR_C:
+				r[instr.dst] = rotr(r[instr.dst], instr.getImm32());
+				break;
+			case SuperscalarInstructionType::IADD_C7:
+			case SuperscalarInstructionType::IADD_C8:
+			case SuperscalarInstructionType::IADD_C9:
+				r[instr.dst] += signExtend2sCompl(instr.getImm32());
+				break;
+			case SuperscalarInstructionType::IXOR_C7:
+			case SuperscalarInstructionType::IXOR_C8:
+			case SuperscalarInstructionType::IXOR_C9:
+				r[instr.dst] ^= signExtend2sCompl(instr.getImm32());
+				break;
+			case SuperscalarInstructionType::IMULH_R:
+				r[instr.dst] = mulh(r[instr.dst], r[instr.src]);
+				break;
+			case SuperscalarInstructionType::ISMULH_R:
+				r[instr.dst] = smulh(r[instr.dst], r[instr.src]);
+				break;
+			case SuperscalarInstructionType::IMUL_RCP:
+				if (reciprocals != nullptr)
+					r[instr.dst] *= (*reciprocals)[instr.getImm32()];
+				else
+					r[instr.dst] *= randomx_reciprocal(instr.getImm32());
+				break;
+			default:
+				UNREACHABLE;
+			}
+		}
+	}
+}
--- a/crypto/randomx/superscalar.hpp
+++ b/crypto/randomx/superscalar.hpp
@ -0,0 +1,60 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+#include "superscalar_program.hpp"
+#include "blake2_generator.hpp"
+
+namespace randomx {
+	                                              //                  Intel Ivy Bridge reference
+	enum class SuperscalarInstructionType {       //uOPs (decode)   execution ports         latency       code size
+		ISUB_R = 0,                               //1               p015                    1               3 (sub)
+		IXOR_R = 1,                               //1               p015                    1               3 (xor)
+		IADD_RS = 2,                              //1               p01                     1               4 (lea)
+		IMUL_R = 3,                               //1               p1                      3               4 (imul)
+		IROR_C = 4,                               //1               p05                     1               4 (ror)
+		IADD_C7 = 5,                              //1               p015                    1               7 (add)
+		IXOR_C7 = 6,                              //1               p015                    1               7 (xor)
+		IADD_C8 = 7,                              //1+0             p015                    1               7+1 (add+nop)
+		IXOR_C8 = 8,                              //1+0             p015                    1               7+1 (xor+nop)
+		IADD_C9 = 9,                              //1+0             p015                    1               7+2 (add+nop)
+		IXOR_C9 = 10,                             //1+0             p015                    1               7+2 (xor+nop)
+		IMULH_R = 11,                             //1+2+1           0+(p1,p5)+0             3               3+3+3 (mov+mul+mov)
+		ISMULH_R = 12,                            //1+2+1           0+(p1,p5)+0             3               3+3+3 (mov+imul+mov)
+		IMUL_RCP = 13,                            //1+1             p015+p1                 4              10+4   (mov+imul)
+
+		COUNT = 14,
+		INVALID = -1
+	};
+
+	void generateSuperscalar(SuperscalarProgram& prog, Blake2Generator& gen);
+	void executeSuperscalar(uint64_t(&r)[8], SuperscalarProgram& prog, std::vector<uint64_t> *reciprocals = nullptr);
+}
--- a/crypto/randomx/superscalar_program.hpp
+++ b/crypto/randomx/superscalar_program.hpp
@ -0,0 +1,84 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cstdint>
+#include "instruction.hpp"
+#include "common.hpp"
+
+namespace randomx {
+
+	class SuperscalarProgram {
+	public:
+		Instruction& operator()(int pc) {
+			return programBuffer[pc];
+		}
+		friend std::ostream& operator<<(std::ostream& os, const SuperscalarProgram& p) {
+			p.print(os);
+			return os;
+		}
+		uint32_t getSize() {
+			return size;
+		}
+		void setSize(uint32_t val) {
+			size = val;
+		}
+		int getAddressRegister() {
+			return addrReg;
+		}
+		void setAddressRegister(int val) {
+			addrReg = val;
+		}
+
+		Instruction programBuffer[SuperscalarMaxSize];
+		uint32_t size
+#ifndef NDEBUG
+			= 0
+#endif
+			;
+		int addrReg;
+		double ipc;
+		int codeSize;
+		int macroOps;
+		int decodeCycles;
+		int cpuLatency;
+		int asicLatency;
+		int mulCount;
+		int cpuLatencies[8];
+		int asicLatencies[8];
+	private:
+		void print(std::ostream& os) const {
+			for (unsigned i = 0; i < size; ++i) {
+				auto instr = programBuffer[i];
+				os << instr;
+			}
+		}
+	};
+
+}
--- a/crypto/randomx/virtual_machine.cpp
+++ b/crypto/randomx/virtual_machine.cpp
@ -0,0 +1,145 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <cstring>
+#include <iomanip>
+#include <stdexcept>
+#include "virtual_machine.hpp"
+#include "common.hpp"
+#include "aes_hash.hpp"
+#include "blake2/blake2.h"
+#include "intrin_portable.h"
+#include "allocator.hpp"
+
+randomx_vm::~randomx_vm() {
+
+}
+
+void randomx_vm::resetRoundingMode() {
+	rx_reset_float_state();
+}
+
+namespace randomx {
+
+	static inline uint64_t getSmallPositiveFloatBits(uint64_t entropy) {
+		auto exponent = entropy >> 59; //0..31
+		auto mantissa = entropy & mantissaMask;
+		exponent += exponentBias;
+		exponent &= exponentMask;
+		exponent <<= mantissaSize;
+		return exponent | mantissa;
+	}
+
+	static inline uint64_t getStaticExponent(uint64_t entropy) {
+		auto exponent = constExponentBits;
+		exponent |= (entropy >> (64 - staticExponentBits)) << dynamicExponentBits;
+		exponent <<= mantissaSize;
+		return exponent;
+	}
+
+	static inline uint64_t getFloatMask(uint64_t entropy) {
+		constexpr uint64_t mask22bit = (1ULL << 22) - 1;
+		return (entropy & mask22bit) | getStaticExponent(entropy);
+	}
+
+}
+
+void randomx_vm::initialize() {
+	store64(&reg.a[0].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(0)));
+	store64(&reg.a[0].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(1)));
+	store64(&reg.a[1].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(2)));
+	store64(&reg.a[1].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(3)));
+	store64(&reg.a[2].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(4)));
+	store64(&reg.a[2].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(5)));
+	store64(&reg.a[3].lo, randomx::getSmallPositiveFloatBits(program.getEntropy(6)));
+	store64(&reg.a[3].hi, randomx::getSmallPositiveFloatBits(program.getEntropy(7)));
+	mem.ma = program.getEntropy(8) & randomx::CacheLineAlignMask;
+	mem.mx = program.getEntropy(10);
+	auto addressRegisters = program.getEntropy(12);
+	config.readReg0 = 0 + (addressRegisters & 1);
+	addressRegisters >>= 1;
+	config.readReg1 = 2 + (addressRegisters & 1);
+	addressRegisters >>= 1;
+	config.readReg2 = 4 + (addressRegisters & 1);
+	addressRegisters >>= 1;
+	config.readReg3 = 6 + (addressRegisters & 1);
+	datasetOffset = (program.getEntropy(13) % (randomx::DatasetExtraItems + 1)) * randomx::CacheLineSize;
+	store64(&config.eMask[0], randomx::getFloatMask(program.getEntropy(14)));
+	store64(&config.eMask[1], randomx::getFloatMask(program.getEntropy(15)));
+}
+
+namespace randomx {
+
+	alignas(16) volatile static rx_vec_i128 aesDummy;
+
+	template<class Allocator, bool softAes>
+	VmBase<Allocator, softAes>::~VmBase() {
+		Allocator::freeMemory(scratchpad, ScratchpadSize);
+	}
+
+	template<class Allocator, bool softAes>
+	void VmBase<Allocator, softAes>::allocate() {
+		if (datasetPtr == nullptr)
+			throw std::invalid_argument("Cache/Dataset not set");
+#ifndef __riscv
+		if (!softAes) { //if hardware AES is not supported, it's better to fail now than to return a ticking bomb
+			rx_vec_i128 tmp = rx_load_vec_i128((const rx_vec_i128*)&aesDummy);
+			tmp = rx_aesenc_vec_i128(tmp, tmp);
+			rx_store_vec_i128((rx_vec_i128*)&aesDummy, tmp);
+		}
+#endif
+		scratchpad = (uint8_t*)Allocator::allocMemory(ScratchpadSize);
+	}
+
+	template<class Allocator, bool softAes>
+	void VmBase<Allocator, softAes>::getFinalResult(void* out, size_t outSize) {
+		hashAes1Rx4<softAes>(scratchpad, ScratchpadSize, &reg.a);
+		blake2b(out, outSize, &reg, sizeof(RegisterFile), nullptr, 0);
+	}
+
+	template<class Allocator, bool softAes>
+	void VmBase<Allocator, softAes>::hashAndFill(void* out, size_t outSize, uint64_t *fill_state) {
+		hashAndFillAes1Rx4<softAes>((void*) getScratchpad(), ScratchpadSize, &reg.a, fill_state);
+		blake2b(out, outSize, &reg, sizeof(RegisterFile), nullptr, 0);
+	}
+
+	template<class Allocator, bool softAes>
+	void VmBase<Allocator, softAes>::initScratchpad(void* seed) {
+		fillAes1Rx4<softAes>(seed, ScratchpadSize, scratchpad);
+	}
+
+	template<class Allocator, bool softAes>
+	void VmBase<Allocator, softAes>::generateProgram(void* seed) {
+		fillAes4Rx4<softAes>(seed, sizeof(program), &program);
+	}
+
+	template class VmBase<AlignedAllocator<CacheLineSize>, false>;
+	template class VmBase<AlignedAllocator<CacheLineSize>, true>;
+	template class VmBase<LargePageAllocator, false>;
+	template class VmBase<LargePageAllocator, true>;
+}
--- a/crypto/randomx/virtual_machine.hpp
+++ b/crypto/randomx/virtual_machine.hpp
@ -0,0 +1,91 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cstdint>
+#include "common.hpp"
+#include "program.hpp"
+
+/* Global namespace for C binding */
+class randomx_vm {
+public:
+	virtual ~randomx_vm() = 0;
+	virtual void allocate() = 0;
+	virtual void getFinalResult(void* out, size_t outSize) = 0;
+	virtual void hashAndFill(void* out, size_t outSize, uint64_t *fill_state) = 0;
+	virtual void setDataset(randomx_dataset* dataset) { }
+	virtual void setCache(randomx_cache* cache) { }
+	virtual void initScratchpad(void* seed) = 0;
+	virtual void run(void* seed) = 0;
+	void resetRoundingMode();
+	randomx::RegisterFile *getRegisterFile() {
+		return &reg;
+	}
+	const void* getScratchpad() {
+		return scratchpad;
+	}
+	const randomx::Program& getProgram()
+	{
+		return program;
+	}
+	const uint8_t* getMemory() const {
+		return mem.memory;
+	}
+protected:
+	void initialize();
+	alignas(64) randomx::Program program;
+	alignas(64) randomx::RegisterFile reg;
+	alignas(16) randomx::ProgramConfiguration config;
+	randomx::MemoryRegisters mem;
+	uint8_t* scratchpad = nullptr;
+	union {
+		randomx_cache* cachePtr = nullptr;
+		randomx_dataset* datasetPtr;
+	};
+	uint64_t datasetOffset;
+public:
+	std::string cacheKey;
+	alignas(16) uint64_t tempHash[8]; //8 64-bit values used to store intermediate data
+};
+
+namespace randomx {
+
+	template<class Allocator, bool softAes>
+	class VmBase : public randomx_vm {
+	public:
+		~VmBase() override;
+		void allocate() override;
+		void initScratchpad(void* seed) override;
+		void getFinalResult(void* out, size_t outSize) override;
+		void hashAndFill(void* out, size_t outSize, uint64_t *fill_state) override;
+	protected:
+		void generateProgram(void* seed);
+	};
+
+}
--- a/crypto/randomx/virtual_memory.c
+++ b/crypto/randomx/virtual_memory.c
@ -0,0 +1,243 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#include <windows.h>
+#else
+#define _GNU_SOURCE	1	/* needed for MAP_ANONYMOUS on older platforms */
+#ifdef __APPLE__
+#include <mach/vm_statistics.h>
+#include <TargetConditionals.h>
+#include <AvailabilityMacros.h>
+# if TARGET_OS_OSX
+#  define USE_PTHREAD_JIT_WP	1
+#  include <pthread.h>
+#  include <sys/utsname.h>
+#  include <stdio.h>
+# endif
+#endif
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <errno.h>
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+#define PAGE_READONLY PROT_READ
+#define PAGE_READWRITE (PROT_READ | PROT_WRITE)
+#define PAGE_EXECUTE_READ (PROT_READ | PROT_EXEC)
+#define PAGE_EXECUTE_READWRITE (PROT_READ | PROT_WRITE | PROT_EXEC)
+#endif
+
+#include "virtual_memory.h"
+
+#if defined(USE_PTHREAD_JIT_WP) && defined(MAC_OS_VERSION_11_0) \
+	&& MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
+static int MacOSchecked, MacOSver;
+/* This function is used implicitly by clang's __builtin_available() checker.
+ * When cross-compiling, the library containing this function doesn't exist,
+ * and linking will fail because the symbol is unresolved. The function here
+ * is a quick and dirty hack to get close enough to identify MacOSX 11.0.
+ */
+static int32_t __isOSVersionAtLeast(int32_t major, int32_t minor, int32_t subminor) {
+	if (!MacOSchecked) {
+	    struct utsname ut;
+		int mmaj, mmin;
+		uname(&ut);
+		sscanf(ut.release, "%d.%d", &mmaj, &mmin);
+		// The utsname release version is 9 greater than the canonical OS version
+		mmaj -= 9;
+		MacOSver = (mmaj << 8) | mmin;
+		MacOSchecked = 1;
+	}
+	return MacOSver >= ((major << 8) | minor);
+}
+#endif
+
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#define Fail(func)	do  {*errfunc = func; return GetLastError();} while(0)
+int setPrivilege(const char* pszPrivilege, BOOL bEnable, char **errfunc) {
+	HANDLE           hToken;
+	TOKEN_PRIVILEGES tp;
+	BOOL             status;
+	DWORD            error = 0;
+
+	*errfunc = NULL;
+
+	if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken))
+		Fail("OpenProcessToken");
+
+	if (!LookupPrivilegeValue(NULL, pszPrivilege, &tp.Privileges[0].Luid)) {
+		*errfunc = "LookupPrivilegeValue";
+		error = GetLastError();
+		goto out;
+	}
+
+	tp.PrivilegeCount = 1;
+
+	if (bEnable)
+		tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+	else
+		tp.Privileges[0].Attributes = 0;
+
+	status = AdjustTokenPrivileges(hToken, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0);
+
+	error = GetLastError();
+	if (!status || (error != ERROR_SUCCESS)) {
+		*errfunc = "AdjustTokenPrivileges";
+		goto out;
+	}
+
+out:
+	if (!CloseHandle(hToken)) {
+		if (*errfunc == NULL) {
+			*errfunc = "CloseHandle";
+			error = GetLastError();
+		}
+	}
+	return error;
+}
+#else
+#define Fail(func)	do  {*errfunc = func; return errno;} while(0)
+#endif
+
+void* allocMemoryPages(size_t bytes) {
+	void* mem;
+#if defined(_WIN32) || defined(__CYGWIN__)
+	mem = VirtualAlloc(NULL, bytes, MEM_COMMIT, PAGE_READWRITE);
+#else
+	#if defined(__NetBSD__)
+		#define RESERVED_FLAGS PROT_MPROTECT(PROT_EXEC)
+	#else
+		#define RESERVED_FLAGS 0
+	#endif
+	#ifdef USE_PTHREAD_JIT_WP
+		#define MEXTRA MAP_JIT
+		#define PEXTRA	PROT_EXEC
+	#else
+		#define MEXTRA 0
+		#define PEXTRA	0
+	#endif
+	mem = mmap(NULL, bytes, PAGE_READWRITE | RESERVED_FLAGS | PEXTRA, MAP_ANONYMOUS | MAP_PRIVATE | MEXTRA, -1, 0);
+	if (mem == MAP_FAILED)
+		mem = NULL;
+#if defined(USE_PTHREAD_JIT_WP) && defined(MAC_OS_VERSION_11_0) \
+	&& MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
+	if (__builtin_available(macOS 11.0, *)) {
+		pthread_jit_write_protect_np(0);
+	}
+#endif
+#endif
+	return mem;
+}
+
+static inline int pageProtect(void* ptr, size_t bytes, int rules, char **errfunc) {
+#if defined(_WIN32) || defined(__CYGWIN__)
+	DWORD oldp;
+	if (!VirtualProtect(ptr, bytes, (DWORD)rules, &oldp)) {
+		Fail("VirtualProtect");
+	}
+#else
+	if (-1 == mprotect(ptr, bytes, rules))
+		Fail("mprotect");
+#endif
+	return 0;
+}
+
+void setPagesRW(void* ptr, size_t bytes) {
+	char *errfunc;
+#if defined(USE_PTHREAD_JIT_WP) && defined(MAC_OS_VERSION_11_0) \
+	&& MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
+	if (__builtin_available(macOS 11.0, *)) {
+		pthread_jit_write_protect_np(0);
+	} else {
+		pageProtect(ptr, bytes, PAGE_READWRITE, &errfunc);
+	}
+#else
+	pageProtect(ptr, bytes, PAGE_READWRITE, &errfunc);
+#endif
+}
+
+void setPagesRX(void* ptr, size_t bytes) {
+	char *errfunc;
+#if defined(USE_PTHREAD_JIT_WP) && defined(MAC_OS_VERSION_11_0) \
+	&& MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
+	if (__builtin_available(macOS 11.0, *)) {
+		pthread_jit_write_protect_np(1);
+		__builtin___clear_cache((char*)ptr, ((char*)ptr) + bytes);
+	} else {
+		pageProtect(ptr, bytes, PAGE_EXECUTE_READ, &errfunc);
+	}
+#else
+	pageProtect(ptr, bytes, PAGE_EXECUTE_READ, &errfunc);
+#endif
+}
+
+void setPagesRWX(void* ptr, size_t bytes) {
+	char *errfunc;
+	pageProtect(ptr, bytes, PAGE_EXECUTE_READWRITE, &errfunc);
+}
+
+void* allocLargePagesMemory(size_t bytes) {
+	void* mem;
+	char *errfunc;
+#if defined(_WIN32) || defined(__CYGWIN__)
+	if (setPrivilege("SeLockMemoryPrivilege", 1, &errfunc))
+		return NULL;
+	size_t pageMinimum = GetLargePageMinimum();
+	if (!pageMinimum) {
+		errfunc = "No large pages";
+		return NULL;
+	}
+	mem = VirtualAlloc(NULL, alignSize(bytes, pageMinimum), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
+#else
+#ifdef __APPLE__
+	mem = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
+#elif defined(__FreeBSD__)
+	mem = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER, -1, 0);
+#elif defined(__OpenBSD__) || defined(__NetBSD__)
+	mem = MAP_FAILED; // OpenBSD does not support huge pages
+#else
+	mem = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0);
+#endif
+	if (mem == MAP_FAILED)
+		mem = NULL;
+#endif
+	return mem;
+}
+
+void freePagedMemory(void* ptr, size_t bytes) {
+#if defined(_WIN32) || defined(__CYGWIN__)
+	VirtualFree(ptr, 0, MEM_RELEASE);
+#else
+	// some munmap implementations can crash on null pointer, despite what the manpage says
+	if (ptr) {
+		munmap(ptr, bytes);
+	}
+#endif
+}
--- a/crypto/randomx/virtual_memory.h
+++ b/crypto/randomx/virtual_memory.h
@ -0,0 +1,48 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h>
+
+#define alignSize(pos, align) (((pos - 1) / align + 1) * align)
+
+void* allocMemoryPages(size_t);
+void setPagesRW(void*, size_t);
+void setPagesRX(void*, size_t);
+void setPagesRWX(void*, size_t);
+void* allocLargePagesMemory(size_t);
+void freePagedMemory(void*, size_t);
+
+#ifdef __cplusplus
+}
+#endif
--- a/crypto/randomx/vm_compiled.cpp
+++ b/crypto/randomx/vm_compiled.cpp
@ -0,0 +1,80 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "vm_compiled.hpp"
+#include "common.hpp"
+
+namespace randomx {
+
+	static_assert(sizeof(MemoryRegisters) == 2 * sizeof(addr_t) + sizeof(uintptr_t), "Invalid alignment of struct randomx::MemoryRegisters");
+	static_assert(sizeof(RegisterFile) == 256, "Invalid alignment of struct randomx::RegisterFile");
+
+	template<class Allocator, bool softAes, bool secureJit>
+	CompiledVm<Allocator, softAes, secureJit>::CompiledVm() {
+		if (!secureJit) {
+			compiler.enableAll(); //make JIT buffer both writable and executable
+		}
+	}
+
+	template<class Allocator, bool softAes, bool secureJit>
+	void CompiledVm<Allocator, softAes, secureJit>::setDataset(randomx_dataset* dataset) {
+		datasetPtr = dataset;
+	}
+
+	template<class Allocator, bool softAes, bool secureJit>
+	void CompiledVm<Allocator, softAes, secureJit>::run(void* seed) {
+		VmBase<Allocator, softAes>::generateProgram(seed);
+		randomx_vm::initialize();
+		if (secureJit) {
+			compiler.enableWriting();
+		}
+		compiler.generateProgram(program, config);
+		if (secureJit) {
+			compiler.enableExecution();
+		}
+		mem.memory = datasetPtr->memory + datasetOffset;
+		execute();
+	}
+
+	template<class Allocator, bool softAes, bool secureJit>
+	void CompiledVm<Allocator, softAes, secureJit>::execute() {
+#if defined(__aarch64__) || defined(__riscv)
+		memcpy(reg.f, config.eMask, sizeof(config.eMask));
+#endif
+		compiler.getProgramFunc()(reg, mem, scratchpad, RANDOMX_PROGRAM_ITERATIONS);
+	}
+
+	template class CompiledVm<AlignedAllocator<CacheLineSize>, false, false>;
+	template class CompiledVm<AlignedAllocator<CacheLineSize>, true, false>;
+	template class CompiledVm<LargePageAllocator, false, false>;
+	template class CompiledVm<LargePageAllocator, true, false>;
+	template class CompiledVm<AlignedAllocator<CacheLineSize>, false, true>;
+	template class CompiledVm<AlignedAllocator<CacheLineSize>, true, true>;
+	template class CompiledVm<LargePageAllocator, false, true>;
+	template class CompiledVm<LargePageAllocator, true, true>;
+}
--- a/crypto/randomx/vm_compiled.hpp
+++ b/crypto/randomx/vm_compiled.hpp
@ -0,0 +1,77 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <new>
+#include <cstdint>
+#include "virtual_machine.hpp"
+#include "jit_compiler.hpp"
+#include "allocator.hpp"
+#include "dataset.hpp"
+
+namespace randomx {
+
+	template<class Allocator, bool softAes, bool secureJit>
+	class CompiledVm : public VmBase<Allocator, softAes> {
+	public:
+		void* operator new(size_t size) {
+			void* ptr = AlignedAllocator<CacheLineSize>::allocMemory(size);
+			if (ptr == nullptr)
+				throw std::bad_alloc();
+			return ptr;
+		}
+		void operator delete(void* ptr) {
+			AlignedAllocator<CacheLineSize>::freeMemory(ptr, sizeof(CompiledVm));
+		}
+		CompiledVm();
+		void setDataset(randomx_dataset* dataset) override;
+		void run(void* seed) override;
+
+		using VmBase<Allocator, softAes>::mem;
+		using VmBase<Allocator, softAes>::program;
+		using VmBase<Allocator, softAes>::config;
+		using VmBase<Allocator, softAes>::reg;
+		using VmBase<Allocator, softAes>::scratchpad;
+		using VmBase<Allocator, softAes>::datasetPtr;
+		using VmBase<Allocator, softAes>::datasetOffset;
+	protected:
+		void execute();
+
+		JitCompiler compiler;
+	};
+
+	using CompiledVmDefault = CompiledVm<AlignedAllocator<CacheLineSize>, true, false>;
+	using CompiledVmHardAes = CompiledVm<AlignedAllocator<CacheLineSize>, false, false>;
+	using CompiledVmLargePage = CompiledVm<LargePageAllocator, true, false>;
+	using CompiledVmLargePageHardAes = CompiledVm<LargePageAllocator, false, false>;
+	using CompiledVmDefaultSecure = CompiledVm<AlignedAllocator<CacheLineSize>, true, true>;
+	using CompiledVmHardAesSecure = CompiledVm<AlignedAllocator<CacheLineSize>, false, true>;
+	using CompiledVmLargePageSecure = CompiledVm<LargePageAllocator, true, true>;
+	using CompiledVmLargePageHardAesSecure = CompiledVm<LargePageAllocator, false, true>;
+}
--- a/crypto/randomx/vm_compiled_light.cpp
+++ b/crypto/randomx/vm_compiled_light.cpp
@ -0,0 +1,70 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "vm_compiled_light.hpp"
+#include "common.hpp"
+#include <stdexcept>
+
+namespace randomx {
+
+	template<class Allocator, bool softAes, bool secureJit>
+	void CompiledLightVm<Allocator, softAes, secureJit>::setCache(randomx_cache* cache) {
+		cachePtr = cache;
+		mem.memory = cache->memory;
+		if (secureJit) {
+			compiler.enableWriting();
+		}
+		compiler.generateSuperscalarHash(cache->programs, cache->reciprocalCache);
+		if (secureJit) {
+			compiler.enableExecution();
+		}
+	}
+
+	template<class Allocator, bool softAes, bool secureJit>
+	void CompiledLightVm<Allocator, softAes, secureJit>::run(void* seed) {
+		VmBase<Allocator, softAes>::generateProgram(seed);
+		randomx_vm::initialize();
+		if (secureJit) {
+			compiler.enableWriting();
+		}
+		compiler.generateProgramLight(program, config, datasetOffset);
+		if (secureJit) {
+			compiler.enableExecution();
+		}
+		CompiledVm<Allocator, softAes, secureJit>::execute();
+	}
+
+	template class CompiledLightVm<AlignedAllocator<CacheLineSize>, false, false>;
+	template class CompiledLightVm<AlignedAllocator<CacheLineSize>, true, false>;
+	template class CompiledLightVm<LargePageAllocator, false, false>;
+	template class CompiledLightVm<LargePageAllocator, true, false>;
+	template class CompiledLightVm<AlignedAllocator<CacheLineSize>, false, true>;
+	template class CompiledLightVm<AlignedAllocator<CacheLineSize>, true, true>;
+	template class CompiledLightVm<LargePageAllocator, false, true>;
+	template class CompiledLightVm<LargePageAllocator, true, true>;
+}
--- a/crypto/randomx/vm_compiled_light.hpp
+++ b/crypto/randomx/vm_compiled_light.hpp
@ -0,0 +1,68 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <new>
+#include "vm_compiled.hpp"
+
+namespace randomx {
+
+	template<class Allocator, bool softAes, bool secureJit>
+	class CompiledLightVm : public CompiledVm<Allocator, softAes, secureJit> {
+	public:
+		void* operator new(size_t size) {
+			void* ptr = AlignedAllocator<CacheLineSize>::allocMemory(size);
+			if (ptr == nullptr)
+				throw std::bad_alloc();
+			return ptr;
+		}
+		void operator delete(void* ptr) {
+			AlignedAllocator<CacheLineSize>::freeMemory(ptr, sizeof(CompiledLightVm));
+		}
+		void setCache(randomx_cache* cache) override;
+		void setDataset(randomx_dataset* dataset) override { }
+		void run(void* seed) override;
+
+		using CompiledVm<Allocator, softAes, secureJit>::mem;
+		using CompiledVm<Allocator, softAes, secureJit>::compiler;
+		using CompiledVm<Allocator, softAes, secureJit>::program;
+		using CompiledVm<Allocator, softAes, secureJit>::config;
+		using CompiledVm<Allocator, softAes, secureJit>::cachePtr;
+		using CompiledVm<Allocator, softAes, secureJit>::datasetOffset;
+	};
+
+	using CompiledLightVmDefault = CompiledLightVm<AlignedAllocator<CacheLineSize>, true, false>;
+	using CompiledLightVmHardAes = CompiledLightVm<AlignedAllocator<CacheLineSize>, false, false>;
+	using CompiledLightVmLargePage = CompiledLightVm<LargePageAllocator, true, false>;
+	using CompiledLightVmLargePageHardAes = CompiledLightVm<LargePageAllocator, false, false>;
+	using CompiledLightVmDefaultSecure = CompiledLightVm<AlignedAllocator<CacheLineSize>, true, true>;
+	using CompiledLightVmHardAesSecure = CompiledLightVm<AlignedAllocator<CacheLineSize>, false, true>;
+	using CompiledLightVmLargePageSecure = CompiledLightVm<LargePageAllocator, true, true>;
+	using CompiledLightVmLargePageHardAesSecure = CompiledLightVm<LargePageAllocator, false, true>;
+}
--- a/crypto/randomx/vm_interpreted.cpp
+++ b/crypto/randomx/vm_interpreted.cpp
@ -0,0 +1,131 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <iostream>
+#include <iomanip>
+#include <stdexcept>
+#include <sstream>
+#include <cmath>
+#include <cfloat>
+#include "vm_interpreted.hpp"
+#include "dataset.hpp"
+#include "intrin_portable.h"
+#include "reciprocal.h"
+
+namespace randomx {
+
+	template<class Allocator, bool softAes>
+	void InterpretedVm<Allocator, softAes>::setDataset(randomx_dataset* dataset) {
+		datasetPtr = dataset;
+		mem.memory = dataset->memory;
+	}
+
+	template<class Allocator, bool softAes>
+	void InterpretedVm<Allocator, softAes>::run(void* seed) {
+		VmBase<Allocator, softAes>::generateProgram(seed);
+		randomx_vm::initialize();
+		execute();
+	}
+
+	template<class Allocator, bool softAes>
+	void InterpretedVm<Allocator, softAes>::execute() {
+
+		NativeRegisterFile nreg;
+
+		for(unsigned i = 0; i < RegisterCountFlt; ++i)
+			nreg.a[i] = rx_load_vec_f128(&reg.a[i].lo);
+
+		compileProgram(program, bytecode, nreg);
+
+		uint32_t spAddr0 = mem.mx;
+		uint32_t spAddr1 = mem.ma;
+
+		for(unsigned ic = 0; ic < RANDOMX_PROGRAM_ITERATIONS; ++ic) {
+			uint64_t spMix = nreg.r[config.readReg0] ^ nreg.r[config.readReg1];
+			spAddr0 ^= spMix;
+			spAddr0 &= ScratchpadL3Mask64;
+			spAddr1 ^= spMix >> 32;
+			spAddr1 &= ScratchpadL3Mask64;
+			
+			for (unsigned i = 0; i < RegistersCount; ++i)
+				nreg.r[i] ^= load64(scratchpad + spAddr0 + 8 * i);
+
+			for (unsigned i = 0; i < RegisterCountFlt; ++i)
+				nreg.f[i] = rx_cvt_packed_int_vec_f128(scratchpad + spAddr1 + 8 * i);
+
+			for (unsigned i = 0; i < RegisterCountFlt; ++i)
+				nreg.e[i] = maskRegisterExponentMantissa(config, rx_cvt_packed_int_vec_f128(scratchpad + spAddr1 + 8 * (RegisterCountFlt + i)));
+
+			executeBytecode(bytecode, scratchpad, config);
+
+			mem.mx ^= nreg.r[config.readReg2] ^ nreg.r[config.readReg3];
+			mem.mx &= CacheLineAlignMask;
+			datasetPrefetch(datasetOffset + mem.mx);
+			datasetRead(datasetOffset + mem.ma, nreg.r);
+			std::swap(mem.mx, mem.ma);
+
+			for (unsigned i = 0; i < RegistersCount; ++i)
+				store64(scratchpad + spAddr1 + 8 * i, nreg.r[i]);
+
+			for (unsigned i = 0; i < RegisterCountFlt; ++i)
+				nreg.f[i] = rx_xor_vec_f128(nreg.f[i], nreg.e[i]);
+
+			for (unsigned i = 0; i < RegisterCountFlt; ++i)
+				rx_store_vec_f128((double*)(scratchpad + spAddr0 + 16 * i), nreg.f[i]);
+
+			spAddr0 = 0;
+			spAddr1 = 0;
+		}
+
+		for (unsigned i = 0; i < RegistersCount; ++i)
+			store64(&reg.r[i], nreg.r[i]);
+
+		for (unsigned i = 0; i < RegisterCountFlt; ++i)
+			rx_store_vec_f128(&reg.f[i].lo, nreg.f[i]);
+
+		for (unsigned i = 0; i < RegisterCountFlt; ++i)
+			rx_store_vec_f128(&reg.e[i].lo, nreg.e[i]);
+	}
+
+	template<class Allocator, bool softAes>
+	void InterpretedVm<Allocator, softAes>::datasetRead(uint64_t address, int_reg_t(&r)[RegistersCount]) {
+		uint64_t* datasetLine = (uint64_t*)(mem.memory + address);
+		for (int i = 0; i < RegistersCount; ++i)
+			r[i] ^= datasetLine[i];
+	}
+
+	template<class Allocator, bool softAes>
+	void InterpretedVm<Allocator, softAes>::datasetPrefetch(uint64_t address) {
+		rx_prefetch_nta(mem.memory + address);
+	}
+
+	template class InterpretedVm<AlignedAllocator<CacheLineSize>, false>;
+	template class InterpretedVm<AlignedAllocator<CacheLineSize>, true>;
+	template class InterpretedVm<LargePageAllocator, false>;
+	template class InterpretedVm<LargePageAllocator, true>;
+}
--- a/crypto/randomx/vm_interpreted.hpp
+++ b/crypto/randomx/vm_interpreted.hpp
@ -0,0 +1,75 @@
+/*
+Copyright (c) 2018-2019, tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <new>
+#include <vector>
+#include "common.hpp"
+#include "virtual_machine.hpp"
+#include "bytecode_machine.hpp"
+#include "intrin_portable.h"
+#include "allocator.hpp"
+
+namespace randomx {
+
+	template<class Allocator, bool softAes>
+	class InterpretedVm : public VmBase<Allocator, softAes>, public BytecodeMachine {
+	public:
+		using VmBase<Allocator, softAes>::mem;
+		using VmBase<Allocator, softAes>::scratchpad;
+		using VmBase<Allocator, softAes>::program;
+		using VmBase<Allocator, softAes>::config;
+		using VmBase<Allocator, softAes>::reg;
+		using VmBase<Allocator, softAes>::datasetPtr;
+		using VmBase<Allocator, softAes>::datasetOffset;
+		void* operator new(size_t size) {
+			void* ptr = AlignedAllocator<CacheLineSize>::allocMemory(size);
+			if (ptr == nullptr)
+				throw std::bad_alloc();
+			return ptr;
+		}
+		void operator delete(void* ptr) {
+			AlignedAllocator<CacheLineSize>::freeMemory(ptr, sizeof(InterpretedVm));
+		}
+		void run(void* seed) override;
+		void setDataset(randomx_dataset* dataset) override;
+	protected:
+		virtual void datasetRead(uint64_t blockNumber, int_reg_t(&r)[RegistersCount]);
+		virtual void datasetPrefetch(uint64_t blockNumber);
+	private:
+		void execute();
+
+		InstructionByteCode bytecode[RANDOMX_PROGRAM_SIZE];
+	};
+
+	using InterpretedVmDefault = InterpretedVm<AlignedAllocator<CacheLineSize>, true>;
+	using InterpretedVmHardAes = InterpretedVm<AlignedAllocator<CacheLineSize>, false>;
+	using InterpretedVmLargePage = InterpretedVm<LargePageAllocator, true>;
+	using InterpretedVmLargePageHardAes = InterpretedVm<LargePageAllocator, false>;
+}
--- a/Show more
+++ b/Show more