diff --git a/CMakeLists.txt b/CMakeLists.txt
index 84b7701f1..95003034f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -488,17 +488,17 @@ if(CUDA_FOUND)
     if(CUDA_COMPILER STREQUAL "clang")
         # build device code with clang
         add_library(
-            xmrstak_cuda_backend
+            xmrstakrx_cuda_backend
             SHARED
             ${CUDASRCFILES}
         )
-        set_target_properties(xmrstak_cuda_backend PROPERTIES COMPILE_FLAGS ${CLANG_BUILD_FLAGS})
-        set_target_properties(xmrstak_cuda_backend PROPERTIES LINKER_LANGUAGE CXX)
+        set_target_properties(xmrstakrx_cuda_backend PROPERTIES COMPILE_FLAGS ${CLANG_BUILD_FLAGS})
+        set_target_properties(xmrstakrx_cuda_backend PROPERTIES LINKER_LANGUAGE CXX)
         set_source_files_properties(${CUDASRCFILES} PROPERTIES LANGUAGE CXX)
     else()
         #  build device code with nvcc
         cuda_add_library(
-            xmrstak_cuda_backend
+            xmrstakrx_cuda_backend
             SHARED
             ${CUDASRCFILES}
         )
@@ -506,8 +506,8 @@ if(CUDA_FOUND)
 
     set(CUDA_LIBRARIES ${CUDA_LIBRARIES})
     
-    target_link_libraries(xmrstak_cuda_backend ${CUDA_LIBRARIES})
-    target_link_libraries(xmrstak_cuda_backend xmr-stak-backend)
+    target_link_libraries(xmrstakrx_cuda_backend ${CUDA_LIBRARIES})
+    target_link_libraries(xmrstakrx_cuda_backend xmr-stak-backend)
 endif()
 
 # compile AMD backend
@@ -515,12 +515,12 @@ if(OpenCL_FOUND)
     file(GLOB OPENCLSRCFILES
         "xmrstak/backend/amd/amd_gpu/*.cpp"
         "xmrstak/backend/amd/*.cpp")
-    add_library(xmrstak_opencl_backend
+    add_library(xmrstakrx_opencl_backend
         SHARED
         ${OPENCLSRCFILES}
     )
-    target_link_libraries(xmrstak_opencl_backend ${OpenCL_LIBRARY} )
-    target_link_libraries(xmrstak_opencl_backend xmr-stak-backend)
+    target_link_libraries(xmrstakrx_opencl_backend ${OpenCL_LIBRARY} )
+    target_link_libraries(xmrstakrx_opencl_backend xmr-stak-backend)
 endif()
 
 # compile final binary
@@ -548,19 +548,19 @@ if( NOT CMAKE_INSTALL_PREFIX STREQUAL PROJECT_BINARY_DIR )
             RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/${EXECUTABLE_OUTPUT_PATH}")
     if(CUDA_FOUND)
         if(WIN32)
-            install(TARGETS xmrstak_cuda_backend
+            install(TARGETS xmrstakrx_cuda_backend
                 RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/${LIBRARY_OUTPUT_PATH}")
         else()
-            install(TARGETS xmrstak_cuda_backend
+            install(TARGETS xmrstakrx_cuda_backend
                 LIBRARY DESTINATION "${CMAKE_INSTALL_PREFIX}/${LIBRARY_OUTPUT_PATH}")
         endif()
     endif()
     if(OpenCL_FOUND)
         if(WIN32)
-            install(TARGETS xmrstak_opencl_backend
+            install(TARGETS xmrstakrx_opencl_backend
                 RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/${LIBRARY_OUTPUT_PATH}")
         else()
-            install(TARGETS xmrstak_opencl_backend
+            install(TARGETS xmrstakrx_opencl_backend
                 LIBRARY DESTINATION "${CMAKE_INSTALL_PREFIX}/${LIBRARY_OUTPUT_PATH}")
         endif()
     endif()
diff --git a/README.md b/README.md
index 633b4e630..96eb2c966 100644
--- a/README.md
+++ b/README.md
@@ -1,82 +1,10 @@
-###### fireice-uk's and psychocrypt's
-# XMR-Stak-RX: Free RandomX Mining Software
-
-XMR-Stak-RX is a universal free and open source stratum pool miner. This miner supports x86-64 CPUs, AMD and NVIDIA GPUs and can be used for various randomX variants.
-
-## Video guides
-[<img src="doc/_img/stak-yt-cover.jpg">](https://www.youtube.com/playlist?list=PLAhUkom29iGMFoN8pk91JA-oqvxlmJ5H8)
-###### Video by Crypto Sewer
-
-## Overview
-* [Features](#features)
-* [Supported coins and algorithms](#supported-coins-and-algorithms)
-* [Download](#download)
-* [FAQ](doc/FAQ.md)
-* [Developer Donation](#developer-donation)
-* [Developer PGP Key's](doc/pgp_keys.md)
-
-## Guides and FAQ
-To improve our support we created [Xmr-Stak forum](https://www.reddit.com/r/XmrStak). Check it out if you have a problem, or you are looking for most up to date config for your card and [guides](https://www.reddit.com/r/XmrStak/wiki/index).
-* [Usage](doc/usage.md)
-* [How to compile](doc/compile/compile.md)
-* [Fine tuning](doc/tuning.md)
-* [FAQ](doc/FAQ.md)
-* [Troubleshooting](doc/troubleshooting.md) (Fixing common problems)
-
-## Features
-
-- Supports all common backends (CPU/x86, AMD/NVIDIA GPU).
-- Supports all common OS (Linux, Windows and macOS).
-- Supports `randomx`, `randomx_loki` and `randomx_wow`
-- Easy to use and flexible in setup:
-  - guided start with easy/advanced setup option (no need to edit a config file for the first start)
-  - auto-configuration and config file creation for each backend.
-- Open source software (GPLv3)
-- TLS support.
-- [HTML statistics](doc/usage.md#html-and-json-api-report-configuraton)
-- [JSON API for monitoring](doc/usage.md#html-and-json-api-report-configuraton)
-
-## HTML reports
-  <img src="https://gist.githubusercontent.com/fireice-uk/2da301131ac01695ff79539a27b81d68/raw/4c09cdeee86f94df2e9dd86b927e64aded6184f5/xmr-stak-cpu-hashrate.png" width="260"> <img src="https://gist.githubusercontent.com/fireice-uk/2da301131ac01695ff79539a27b81d68/raw/4c09cdeee86f94df2e9dd86b927e64aded6184f5/xmr-stak-cpu-results.png" width="260"> <img src="https://gist.githubusercontent.com/fireice-uk/2da301131ac01695ff79539a27b81d68/raw/4c09cdeee86f94df2e9dd86b927e64aded6184f5/xmr-stak-cpu-connection.png" width="260">
-
-## Supported coins and algorithms
-
-Following coins can be mined using this miner:
-
-- [Loki]()
-- [Monero](https://getmonero.org)
-- [WOW]()
-
-
-**[Ryo Currency](https://ryo-currency.com)** - is a way for us to implement the ideas that we were unable to in
-Monero. See [here](https://github.com/fireice-uk/cryptonote-speedup-demo/) for details.
-
-If your preferred coin is not listed, you can choose one of the following mining algorithms:
-    
-- 1MiB scratchpad memory
-    - randomx_wow
-    
-
-- 2MiB scratchpad memory
-    - randomx
-    - randomx_loki
-    
-Please note, this list is not complete and is not an endorsement.
-
-## Download
-
-You can find the latest releases and precompiled binaries on GitHub under [Releases](https://github.com/fireice-uk/xmr-stak/releases).
-
-## Developer Donation
-
-If you want to donate directly to support further development, here are our wallets
-
-fireice-uk:
-```
-4581HhZkQHgZrZjKeCfCJxZff9E3xCgHGF25zABZz7oR71TnbbgiS7sK9jveE6Dx6uMs2LwszDuvQJgRZQotdpHt1fTdDhk
-```
-
-psychocrypt:
-```
-45tcqnJMgd3VqeTznNotiNj4G9PQoK67TGRiHyj6EYSZ31NUbAfs9XdiU5squmZb717iHJLxZv3KfEw8jCYGL5wa19yrVCn
-```
+<a href="https://github.com/fireice-uk/xmr-stak/tree/master/doc/README.md" _target="blank"><img src="doc/_img/gpu.png"></a>
+<a href="#select_coin" _target="blank"><img src="doc/_img/cpu.png"></a>
+<table>
+    <p id="select_coin">
+    <tr>
+        <td align="center"><a href=https://github.com/xmrig/xmrig><img src="doc/_img/xmrig.png"></a></td>
+        <td align="center"><a href=https://ragerx.lol><img src="doc/_img/ragerx.png"></a></td>
+        <td align="center"><a href=doc/README.md><img src="doc/_img/rx.png"></a></td>
+    </tr>
+</table>
\ No newline at end of file
diff --git a/doc/FAQ.md b/doc/FAQ.md
index b78ac15cb..c16e4adca 100644
--- a/doc/FAQ.md
+++ b/doc/FAQ.md
@@ -1,5 +1,5 @@
 # FAQ
-To improve our support we created [Xmr-Stak forum](https://www.reddit.com/r/XmrStak). Check it out if you have a problem, or you are looking for most up to date config for your card and [guides](https://www.reddit.com/r/XmrStak/wiki/index).
+To improve our support we created [Xmr-Stak-RX forum](https://www.reddit.com/r/XmrStak). Check it out if you have a problem, or you are looking for most up to date config for your card and [guides](https://www.reddit.com/r/XmrStak/wiki/index).
 
 
 ## Content Overview
@@ -10,13 +10,13 @@ To improve our support we created [Xmr-Stak forum](https://www.reddit.com/r/XmrS
 
 ### Virus Protection Alert
 Some virus protection software flags the miner binary as *malware*. This is a false positive — the software does not contain any malware (and since it is open source, you can verify that yourself!)
-If your antivirus software flags **xmr-stak**, it will likely move it to its quarantine area. You may have to whitelist **xmr-stak** in your antivirus.
+If your antivirus software flags **xmr-stak-rx**, it will likely move it to its quarantine area. You may have to whitelist **xmr-stak-rx** in your antivirus.
 
 ### Change Currency to Mine
 If the miner is compiled for Monero and Aeon than you can change
  - the value `currency` in the config *or*
  - start the miner with the [command line option](usage.md) `--currency monero` or `--currency aeon7`
- - run `xmr-stak --help` to see all supported currencies and algorithms
+ - run `xmr-stak-rx --help` to see all supported currencies and algorithms
 
 ### How can I mine Monero
 Set the value `currency` in `pools.txt` to `monero`.
diff --git a/doc/README.md b/doc/README.md
new file mode 100644
index 000000000..804668d32
--- /dev/null
+++ b/doc/README.md
@@ -0,0 +1,90 @@
+<table>
+    <tr>
+        <td align="center"><a href=#><img src="/doc/_img/xmr-stak-rx-btn.png"></a></td>
+        <td align="center"><a href=https://github.com/fireice-uk/xmr-stak/tree/master/doc/README.md><img src="/doc/_img/xmr-stak-btn.png"></a></td>
+        <td align="center"><a href=https://ragerx.lol><img src="/doc/_img/ragerx-btn.png"></a></td>
+    </tr>
+</table>
+
+<table>
+    <tr>
+        <td align="center"><a href=#features-overview><img src="/doc/_img/menu-features.png"></a></td>
+        <td align="center"><a href=#supported-coins-and-randomx-variants><img src="/doc/_img/menu-supported-coins.png"></a></td>
+        <td align="center"><a href=#Donations><img src="/doc/_img/menu-donations.png"></a></td>
+        <td align="center"><a href=#get-miner><img src="/doc/_img/menu-get-miner.png"></a></td>
+        <td align="center"><a href=#support-additional-guides-and-feedback><img src="/doc/_img/menu-support.png"></a></td>
+    </tr>
+</table>
+
+<table>
+    <tr>
+        <td align="center"><a href=usage.md><img src="/doc/_img/usage.png"></a></td>
+        <td align="center"><a href=compile/compile.md><img src="/doc/_img/how-to-compile.png"></a></td>
+        <td align="center"><a href=tuning.md><img src="/doc/_img/fine-tuning.png"></a></td>
+        <td align="center"><a href=troubleshooting.md><img src="/doc/_img/troubleshooting.png"></a></td>
+        <td align="center"><a href=FAQ.md><img src="/doc/_img/faq.png"></a></td>
+    </tr>
+</table>
+
+## Features overview
+[<img src="/doc/_img/features.png">](#)
+
+
+### Supported coins and RandomX variants
+Xmr-Stak-RX supports various variants of RandomX algorithm. Use one of the following options (type this coin alias in either `pool.txt` config file or on startup configuration under `"currency"` parameter and miner will pick it's variant of RandomX algorithm for mining):
+
+| Coin name | Coin alias in config | POW type |
+| --- | --- |  --- |
+| Monero | `monero` | RandomX |
+| Loki Network | `loki` | RandomXL |
+| Wownero (Monero's testnet) | `wownero` | RandomWOW |
+
+
+## Donations
+[<img src="/doc/_img/fee.png">](#)
+
+## Get Miner
+Please note that code is developed on the [dev branch](#), if you want to check out the latest updates, before they are merged on [main branch](#), please refer there. Master branch will always point to a version that we consider stable, so you can download the code by simply typing `git clone https://github.com/fireice-uk/xmr-stak-rx.git`  
+
+Also you can find the latest releases, changelog and precompiled binaries on GitHub under [releases](#) section.
+
+If you want to compile the miner from source files, navigate to ["how to compile"](#) section of docs or [xmr-stak forum](#) where you will find the latest step-by-step instructions.
+
+
+## Start Mining
+Miner has 2 ways of initial configuring: simple and advanced. The simple method will prompt user with minimum information. Required answers are y , (or yes), n , (or no):
+
+#### Simple setup:
+* `Use simple setup method?` y    
+* `Please enter the currency that you want to mine:` Enter currency or mining algorithm  
+* `Enter pool address (pool address:port):` Enter pool connection address:port  
+* `Username (wallet address or pool login):` Enter wallet address
+* `Password (mostly empty or x):` press Enter  
+* `Does this pool port support TLS/SSL? Use no if unknown. (y/N):` press y or n  
+
+#### Advanced setup:
+* `Use simple setup method?` n  
+* `Do you want to use the HTTP interface? Unlike the screen display, browser interface is not affected by the GPU lag. If you don't want to use it, please enter 0, otherwise enter port number that the miner should listen on` 5656
+* `Please enter the currency that you want to mine:` Enter currency or mining algorithm
+* `Enter pool address (pool address:port):` Enter pool connection address:port 
+* `Username (wallet address or pool login):` Enter wallet address
+* `Password (mostly empty or x):` press Enter
+* `Rig identifier for pool-side statistics (needs pool support). Can be empty:` Enter rig name or press Enter
+* `Does this pool port support TLS/SSL? Use no if unknown. (y/N)` Enter y or n
+* `Do you want to use nicehash on this pool? (y/N)` n
+* `Do you want to use multiple pools? (y/N)` Enter y if you want to se up backup pool or n
+
+
+## Support additional guides and feedback
+[<img src="/doc/_img/YT.png">](#)
+To improve our support we created [Xmr-Stak forum](https://www.reddit.com/r/XmrStak) which is also applicable to Xmr-Stak-RX. Check it out if you have a problem, or you are looking for most up to date config for your card and [guides](https://www.reddit.com/r/XmrStak/wiki/index).
+
+<table>
+    <tr>
+        <td align="center"><a href=usage.md><img src="/doc/_img/usage.png"></a></td>
+        <td align="center"><a href=compile/compile.md><img src="/doc/_img/how-to-compile.png"></a></td>
+        <td align="center"><a href=tuning.md><img src="/doc/_img/fine-tuning.png"></a></td>
+        <td align="center"><a href=troubleshooting.md><img src="/doc/_img/troubleshooting.png"></a></td>
+        <td align="center"><a href=FAQ.md><img src="/doc/_img/faq.png"></a></td>
+    </tr>
+</table>
diff --git a/doc/_img/2ragerx-btn.png b/doc/_img/2ragerx-btn.png
new file mode 100644
index 000000000..1c0edd98c
Binary files /dev/null and b/doc/_img/2ragerx-btn.png differ
diff --git a/doc/_img/2xmr-stak-btn.png b/doc/_img/2xmr-stak-btn.png
new file mode 100644
index 000000000..7626e27c1
Binary files /dev/null and b/doc/_img/2xmr-stak-btn.png differ
diff --git a/doc/_img/YT.png b/doc/_img/YT.png
new file mode 100644
index 000000000..cf7a869a2
Binary files /dev/null and b/doc/_img/YT.png differ
diff --git a/doc/_img/cpu.png b/doc/_img/cpu.png
new file mode 100644
index 000000000..6a370fbc9
Binary files /dev/null and b/doc/_img/cpu.png differ
diff --git a/doc/_img/faq-green.png b/doc/_img/faq-green.png
new file mode 100644
index 000000000..440a855b2
Binary files /dev/null and b/doc/_img/faq-green.png differ
diff --git a/doc/_img/faq.png b/doc/_img/faq.png
new file mode 100644
index 000000000..83167e3c7
Binary files /dev/null and b/doc/_img/faq.png differ
diff --git a/doc/_img/features-xmr-stak.png b/doc/_img/features-xmr-stak.png
new file mode 100644
index 000000000..ef75a3b14
Binary files /dev/null and b/doc/_img/features-xmr-stak.png differ
diff --git a/doc/_img/features.png b/doc/_img/features.png
new file mode 100644
index 000000000..37c877291
Binary files /dev/null and b/doc/_img/features.png differ
diff --git a/doc/_img/fee.png b/doc/_img/fee.png
new file mode 100644
index 000000000..cd3cdaf00
Binary files /dev/null and b/doc/_img/fee.png differ
diff --git a/doc/_img/fine-tuning-green.png b/doc/_img/fine-tuning-green.png
new file mode 100644
index 000000000..b58184bfa
Binary files /dev/null and b/doc/_img/fine-tuning-green.png differ
diff --git a/doc/_img/fine-tuning.png b/doc/_img/fine-tuning.png
new file mode 100644
index 000000000..6b817cffe
Binary files /dev/null and b/doc/_img/fine-tuning.png differ
diff --git a/doc/_img/gpu.png b/doc/_img/gpu.png
new file mode 100644
index 000000000..4d5578007
Binary files /dev/null and b/doc/_img/gpu.png differ
diff --git a/doc/_img/header.png b/doc/_img/header.png
new file mode 100644
index 000000000..8c9eeefad
Binary files /dev/null and b/doc/_img/header.png differ
diff --git a/doc/_img/how-to-compile-green.png b/doc/_img/how-to-compile-green.png
new file mode 100644
index 000000000..e82c8b693
Binary files /dev/null and b/doc/_img/how-to-compile-green.png differ
diff --git a/doc/_img/how-to-compile.png b/doc/_img/how-to-compile.png
new file mode 100644
index 000000000..a54603484
Binary files /dev/null and b/doc/_img/how-to-compile.png differ
diff --git a/doc/_img/html_reports.png b/doc/_img/html_reports.png
new file mode 100644
index 000000000..2d17bc1bf
Binary files /dev/null and b/doc/_img/html_reports.png differ
diff --git a/doc/_img/menu-donations-green.png b/doc/_img/menu-donations-green.png
new file mode 100644
index 000000000..a299980d3
Binary files /dev/null and b/doc/_img/menu-donations-green.png differ
diff --git a/doc/_img/menu-donations.png b/doc/_img/menu-donations.png
new file mode 100644
index 000000000..f73facf6f
Binary files /dev/null and b/doc/_img/menu-donations.png differ
diff --git a/doc/_img/menu-features-green.png b/doc/_img/menu-features-green.png
new file mode 100644
index 000000000..527d68d4c
Binary files /dev/null and b/doc/_img/menu-features-green.png differ
diff --git a/doc/_img/menu-features.png b/doc/_img/menu-features.png
new file mode 100644
index 000000000..bcf71064d
Binary files /dev/null and b/doc/_img/menu-features.png differ
diff --git a/doc/_img/menu-get-miner-green.png b/doc/_img/menu-get-miner-green.png
new file mode 100644
index 000000000..9e3bd5753
Binary files /dev/null and b/doc/_img/menu-get-miner-green.png differ
diff --git a/doc/_img/menu-get-miner.png b/doc/_img/menu-get-miner.png
new file mode 100644
index 000000000..891a35f16
Binary files /dev/null and b/doc/_img/menu-get-miner.png differ
diff --git a/doc/_img/menu-support-green.png b/doc/_img/menu-support-green.png
new file mode 100644
index 000000000..3db8e76ef
Binary files /dev/null and b/doc/_img/menu-support-green.png differ
diff --git a/doc/_img/menu-support.png b/doc/_img/menu-support.png
new file mode 100644
index 000000000..5cd80e42f
Binary files /dev/null and b/doc/_img/menu-support.png differ
diff --git a/doc/_img/menu-supported-coins-green.png b/doc/_img/menu-supported-coins-green.png
new file mode 100644
index 000000000..8678ea444
Binary files /dev/null and b/doc/_img/menu-supported-coins-green.png differ
diff --git a/doc/_img/menu-supported-coins.png b/doc/_img/menu-supported-coins.png
new file mode 100644
index 000000000..aabc37283
Binary files /dev/null and b/doc/_img/menu-supported-coins.png differ
diff --git a/doc/_img/ragerx-btn.png b/doc/_img/ragerx-btn.png
new file mode 100644
index 000000000..d08e245fc
Binary files /dev/null and b/doc/_img/ragerx-btn.png differ
diff --git a/doc/_img/ragerx.png b/doc/_img/ragerx.png
new file mode 100644
index 000000000..bc2453d2a
Binary files /dev/null and b/doc/_img/ragerx.png differ
diff --git a/doc/_img/rx.png b/doc/_img/rx.png
new file mode 100644
index 000000000..d9c4c3dfa
Binary files /dev/null and b/doc/_img/rx.png differ
diff --git a/doc/_img/split.png b/doc/_img/split.png
new file mode 100644
index 000000000..11a8635b9
Binary files /dev/null and b/doc/_img/split.png differ
diff --git a/doc/_img/troubleshooting-green.png b/doc/_img/troubleshooting-green.png
new file mode 100644
index 000000000..d36cec8b8
Binary files /dev/null and b/doc/_img/troubleshooting-green.png differ
diff --git a/doc/_img/troubleshooting.png b/doc/_img/troubleshooting.png
new file mode 100644
index 000000000..e57eda740
Binary files /dev/null and b/doc/_img/troubleshooting.png differ
diff --git a/doc/_img/usage-green.png b/doc/_img/usage-green.png
new file mode 100644
index 000000000..c60b9a432
Binary files /dev/null and b/doc/_img/usage-green.png differ
diff --git a/doc/_img/usage.png b/doc/_img/usage.png
new file mode 100644
index 000000000..d9421ba66
Binary files /dev/null and b/doc/_img/usage.png differ
diff --git a/doc/_img/xmr-stak-btn-active.png b/doc/_img/xmr-stak-btn-active.png
new file mode 100644
index 000000000..68520be91
Binary files /dev/null and b/doc/_img/xmr-stak-btn-active.png differ
diff --git a/doc/_img/xmr-stak-btn.png b/doc/_img/xmr-stak-btn.png
new file mode 100644
index 000000000..0356f41aa
Binary files /dev/null and b/doc/_img/xmr-stak-btn.png differ
diff --git a/doc/_img/xmr-stak-cpu-connection.png b/doc/_img/xmr-stak-cpu-connection.png
new file mode 100644
index 000000000..d07a8d0a9
Binary files /dev/null and b/doc/_img/xmr-stak-cpu-connection.png differ
diff --git a/doc/_img/xmr-stak-cpu-hashrate.png b/doc/_img/xmr-stak-cpu-hashrate.png
new file mode 100644
index 000000000..488a34825
Binary files /dev/null and b/doc/_img/xmr-stak-cpu-hashrate.png differ
diff --git a/doc/_img/xmr-stak-cpu-results.png b/doc/_img/xmr-stak-cpu-results.png
new file mode 100644
index 000000000..7244f9579
Binary files /dev/null and b/doc/_img/xmr-stak-cpu-results.png differ
diff --git a/doc/_img/xmr-stak-rx-btn-inactive.png b/doc/_img/xmr-stak-rx-btn-inactive.png
new file mode 100644
index 000000000..1644a9505
Binary files /dev/null and b/doc/_img/xmr-stak-rx-btn-inactive.png differ
diff --git a/doc/_img/xmr-stak-rx-btn.png b/doc/_img/xmr-stak-rx-btn.png
new file mode 100644
index 000000000..39f0c87f7
Binary files /dev/null and b/doc/_img/xmr-stak-rx-btn.png differ
diff --git a/doc/_img/xmrig.png b/doc/_img/xmrig.png
new file mode 100644
index 000000000..cdeaa4501
Binary files /dev/null and b/doc/_img/xmrig.png differ
diff --git a/doc/compile/compile.md b/doc/compile/compile.md
index 08801d8a0..ca06e1280 100644
--- a/doc/compile/compile.md
+++ b/doc/compile/compile.md
@@ -1,4 +1,4 @@
-# Compile xmr-stak
+# Compile Xmr-Stak-RX
 
 ## Content Overview
 * [Build System](#build-system)
@@ -17,7 +17,7 @@ The build system is CMake, if you are not familiar with CMake you can learn more
 
 By default the miner will be build with all dependencies. Each optional dependency can be disabled (this will reduce the miner features).
 
-There are two easy ways to set variables for `cmake` to configure *xmr-stak*
+There are two easy ways to set variables for `cmake` to configure *xmr-stak-rx*
 - use the ncurses GUI
   - `ccmake ..`
   - edit your options
@@ -35,8 +35,8 @@ After the configuration you need to compile the miner, follow the guide for your
 
 ## Generic Build Options
 - `CMAKE_INSTALL_PREFIX` install miner to the home folder
-  - `cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/xmr-stak`
-  - you can find the binary and the `config.txt` file after `make install` in `$HOME/xmr-stak/bin`
+  - `cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/xmr-stak-rx`
+  - you can find the binary and the `config.txt` file after `make install` in `$HOME/xmr-stak-rx/bin`
 - `CMAKE_LINK_STATIC` link libgcc and libstdc++ libraries static (default OFF)
   - disable with `cmake .. -DCMAKE_LINK_STATIC=ON`
   - if you use static compile to run the miner on another system set `-DXMR-STAK_COMPILE=generic`
@@ -124,5 +124,5 @@ on Windows Driver Release Notes
 nVidia always puts the runtime-included CUDA version in the release notes PDF for whatever driver, doesn't hurt to
 double check your specific one.
 
-For better navigation of CUDA version matching, xmr-stak will display both version numbers during CUDA detection phases
+For better navigation of CUDA version matching, xmr-stak-rx will display both version numbers during CUDA detection phases
 such as `[9.2/10.0]` which is the compiled (SDK) version and the current (driver) runtime version.
\ No newline at end of file
diff --git a/doc/compile/compile_FreeBSD.md b/doc/compile/compile_FreeBSD.md
index a4eb3414b..6015e23df 100644
--- a/doc/compile/compile_FreeBSD.md
+++ b/doc/compile/compile_FreeBSD.md
@@ -1,4 +1,4 @@
-# Compile **xmr-stak** for FreeBSD
+# Compile **Xmr-Stak-RX** for FreeBSD
 
 ## Install Dependencies
 
@@ -10,10 +10,10 @@ From the root shell, run the following commands:
 
 Type 'y' and hit enter to proceed with installing the packages.
 
-    git clone https://github.com/fireice-uk/xmr-stak.git
+    git clone https://github.com/fireice-uk/xmr-stak.git -b xmr-stak-rx
     mkdir xmr-stak/build
     cd xmr-stak/build
     cmake ..
     make install
 
-Now you have the binary located at "bin/xmr-stak" and the needed shared libraries.
+Now you have the binary located at "bin/" and the needed shared libraries.
diff --git a/doc/compile/compile_Linux.md b/doc/compile/compile_Linux.md
index 6c80bc56a..51bae9c96 100644
--- a/doc/compile/compile_Linux.md
+++ b/doc/compile/compile_Linux.md
@@ -24,7 +24,7 @@ ROCm is not supporting old GPUs please check if your GPU is supported https://ro
 ```
     # Ubuntu / Debian
     sudo apt install libmicrohttpd-dev libssl-dev cmake build-essential libhwloc-dev
-    git clone https://github.com/fireice-uk/xmr-stak.git
+    git clone https://github.com/fireice-uk/xmr-stak.git -b xmr-stak-rx
     mkdir xmr-stak/build
     cd xmr-stak/build
     cmake ..
@@ -32,7 +32,7 @@ ROCm is not supporting old GPUs please check if your GPU is supported https://ro
 
     # Arch
     sudo pacman -S --needed base-devel hwloc openssl cmake libmicrohttpd
-    git clone https://github.com/fireice-uk/xmr-stak.git
+    git clone https://github.com/fireice-uk/xmr-stak.git -b xmr-stak-rx
     mkdir xmr-stak/build
     cd xmr-stak/build
     cmake ..
@@ -40,7 +40,7 @@ ROCm is not supporting old GPUs please check if your GPU is supported https://ro
 
     # Fedora
     sudo dnf install gcc gcc-c++ hwloc-devel libmicrohttpd-devel libstdc++-static make openssl-devel cmake
-    git clone https://github.com/fireice-uk/xmr-stak.git
+    git clone https://github.com/fireice-uk/xmr-stak.git -b xmr-stak-rx
     mkdir xmr-stak/build
     cd xmr-stak/build
     cmake ..
@@ -50,7 +50,7 @@ ROCm is not supporting old GPUs please check if your GPU is supported https://ro
     sudo yum install centos-release-scl epel-release
     sudo yum install cmake3 devtoolset-4-gcc* hwloc-devel libmicrohttpd-devel openssl-devel make
     scl enable devtoolset-4 bash
-    git clone https://github.com/fireice-uk/xmr-stak.git
+    git clone https://github.com/fireice-uk/xmr-stak.git -b xmr-stak-rx
     mkdir xmr-stak/build
     cd xmr-stak/build
     cmake3 ..
@@ -65,7 +65,7 @@ ROCm is not supporting old GPUs please check if your GPU is supported https://ro
     cd /tmp/cmake-3.4.1/ && ./configure && make && sudo make install && cd -
     sudo update-alternatives --install /usr/bin/cmake cmake /usr/local/bin/cmake 1 --force
     sudo apt install libmicrohttpd-dev libssl-dev libhwloc-dev
-    git clone https://github.com/fireice-uk/xmr-stak.git
+    git clone https://github.com/fireice-uk/xmr-stak.git -b xmr-stak-rx
     mkdir xmr-stak/build
     cd xmr-stak/build
     cmake ..
@@ -90,7 +90,7 @@ ROCm is not supporting old GPUs please check if your GPU is supported https://ro
     make
     sudo make install
     cd ..
-    git clone http://github.com/fireice-uk/xmr-stak
+    git clone http://github.com/fireice-uk/xmr-stak -b xmr-stak-rx
     cd xmr-stak
     mkdir build
     cd build
@@ -116,7 +116,5 @@ cmake -DCUDA_HOST_COMPILER=/usr/bin/gcc-5 ..
 ```
     cmake -DCMAKE_LINK_STATIC=ON -DXMR-STAK_COMPILE=generic .
     make install
-    cd bin\Release
-    copy C:\xmr-stak-dep\openssl\bin\* .
 ```
 Note - cmake caches variables, so if you want to do a dynamic build later you need to specify '-DCMAKE_LINK_STATIC=OFF'
diff --git a/doc/compile/compile_macOS.md b/doc/compile/compile_macOS.md
index 46f1d5b32..c39f7d10f 100644
--- a/doc/compile/compile_macOS.md
+++ b/doc/compile/compile_macOS.md
@@ -1,8 +1,8 @@
-# Compile **xmr-stak** for macOS
+# Compile **xmr-stak-rc** for macOS
 
 ## Dependencies
 
-Assuming you already have [Homebrew](https://brew.sh) installed, the installation of dependencies is pretty straightforward and will generate the `xmr-stak` binary in the `bin/` directory.
+Assuming you already have [Homebrew](https://brew.sh) installed, the installation of dependencies is pretty straightforward and will generate the `xmr-stak-rx` binary in the `bin/` directory.
 
 ### For NVIDIA GPUs
 
diff --git a/doc/troubleshooting.md b/doc/troubleshooting.md
index fb0dc88ce..5529f44dc 100644
--- a/doc/troubleshooting.md
+++ b/doc/troubleshooting.md
@@ -1,5 +1,5 @@
 # Troubleshooting
-To improve our support we created [Xmr-Stak forum](https://www.reddit.com/r/XmrStak). Check it out if you have a problem, or you are looking for most up to date config for your card and [guides](https://www.reddit.com/r/XmrStak/wiki/index).
+To improve our support we created [Xmr-Stak-RX forum](https://www.reddit.com/r/XmrStak). Check it out if you have a problem, or you are looking for most up to date config for your card and [guides](https://www.reddit.com/r/XmrStak/wiki/index).
 
 
 ### 1. CL_MEM_OBJECT_ALLOCATION_FAILURE when calling clEnqueue
@@ -102,7 +102,7 @@ If that happens, disable all auto-starting applications and run the miner after
 
  
 ### 12. (Ubuntu compiling) - Nvidia insufficient driver
-If you have this error after compiling xmr-stak in Ubuntu - make sure you have the latest drivers and not X.org.X Nouveau or v390. Install them manually or with [cuda package](https://www.reddit.com/r/XmrStak/wiki/guides/startup#wiki_2._ubuntu_18.10_setup_.2B_nvidia_.28compiling_from_source.29)
+If you have this error after compiling xmr-stak-rx in Ubuntu - make sure you have the latest drivers and not X.org.X Nouveau or v390. Install them manually or with [cuda package](https://www.reddit.com/r/XmrStak/wiki/guides/startup#wiki_2._ubuntu_18.10_setup_.2B_nvidia_.28compiling_from_source.29)
 
  
 
diff --git a/doc/tuning.md b/doc/tuning.md
index 7ee909a68..98f766fc5 100644
--- a/doc/tuning.md
+++ b/doc/tuning.md
@@ -162,7 +162,7 @@ After setting number of checks per intensity value, you will need to set ceiling
 **1.Enabling and configuring auto-tune**  
 Navigate to amd.txt config file in miner's folder, find (in the bottom part) parameter "auto_tune" : 0, and set it to "auto_tune" : 6, (6-10 rounds per intensity value suits most cases.)
 Set autogenerated value of "intensity" : X, for each thread in amd.txt to slightly higher level (e.g. from 890 to 1000)
-Start xmr-stak.exe
+Start xmr-stak-rx.exe
 
 **2. Reading and understanding the log**  
 Here is an example of log for 1 GPU with 2 threads (your values will vary):
diff --git a/doc/usage.md b/doc/usage.md
index 800ff6949..4c12610f9 100644
--- a/doc/usage.md
+++ b/doc/usage.md
@@ -1,4 +1,4 @@
-# HowTo Use Xmr-Stak
+# HowTo Use Xmr-Stak-RX
 
 ## Content Overview
 * [Configurations](#configurations)
@@ -20,7 +20,7 @@ The number of files depends on the available backends.
 Note: If the pool is ignoring the option `rig_id` in `pools.txt` to name your worker please check the pool documentation how a worker name can be set.
 
 ## Usage on Windows
-1) Double click the `xmr-stak.exe` file
+1) Double click the `xmr-stak-rx.exe` file
 2) Fill in the pool url settings, currency, username and password
 
 `set XMRSTAK_NOWAIT=1` disable the dialog `Press any key to exit.` for non UAC execution.
@@ -28,20 +28,20 @@ Note: If the pool is ignoring the option `rig_id` in `pools.txt` to name your wo
 
 ## Usage on Linux & macOS
 1) Open a terminal within the folder with the binary
-2) Start the miner with `./xmr-stak`
+2) Start the miner with `./xmr-stak-rx`
 
 ## Command Line Options
 
 The miner allow to overwrite some of the settings via command line options.
-Run `xmr-stak --help` to show all available command line options.
+Run `xmr-stak-rx --help` to show all available command line options.
 
 ## Use Different Backends
 
-On linux and OSX please add `./` before the binary name `xmr-stak`.
+On linux and OSX please add `./` before the binary name `xmr-stak-rx`.
 
 ### CPU Only:
 ```
-xmr-stak --noAMD --noNVIDIA
+xmr-stak-rx --noAMD --noNVIDIA
 ```
 
 ### NVIDIA/AMD Only:
@@ -49,7 +49,7 @@ xmr-stak --noAMD --noNVIDIA
 The miner will automatically detect if CUDA (for NVIDIA GPUs) or OpenCL (for AMD GPUs) is available.
 
 ```
-xmr-stak --noCPU
+xmr-stak-rx --noCPU
 ```
 
 ### NVIDIA via OpenCL
@@ -58,7 +58,7 @@ It is possible to use the OpenCl backend which is originally created for AMD GPU
 Some NVIDIA GPUs can reach better performance with this backend.
 
 ```
-xmr-stak --openCLVendor NVIDIA --noNVIDIA
+xmr-stak-rx --openCLVendor NVIDIA --noNVIDIA
 ```
 
 ## Docker image usage
@@ -66,15 +66,15 @@ xmr-stak --openCLVendor NVIDIA --noNVIDIA
 You can run the Docker image the following way:
 
 ```
-docker run --rm -it -u $(id -u):$(id -g) --name fireice-uk/xmr-stak -v "$PWD":/mnt xmr-stak
-docker stop xmr-stak
-docker run --rm -it -u $(id -u):$(id -g) --name fireice-uk/xmr-stak -v "$PWD":/mnt xmr-stak --config config.txt
+docker run --rm -it -u $(id -u):$(id -g) --name fireice-uk/xmr-stak-rx -v "$PWD":/mnt xmr-stak-rx
+docker stop xmr-stak-rx
+docker run --rm -it -u $(id -u):$(id -g) --name fireice-uk/xmr-stak-rx -v "$PWD":/mnt xmr-stak-rx --config config.txt
 ```
 
 Debug the docker image by getting inside:
 
 ```
-docker run --entrypoint=/bin/bash --rm -it -u $(id -u):$(id -g) --name fireice-uk/xmr-stak -v "$PWD":/mnt xmr-stak
+docker run --entrypoint=/bin/bash --rm -it -u $(id -u):$(id -g) --name fireice-uk/xmr-stak-rx -v "$PWD":/mnt xmr-stak-rx
 ```
 
 ## HTML and JSON API report configuration
diff --git a/xmrstak/backend/backendConnector.cpp b/xmrstak/backend/backendConnector.cpp
index 5baab8fae..1be778618 100644
--- a/xmrstak/backend/backendConnector.cpp
+++ b/xmrstak/backend/backendConnector.cpp
@@ -56,10 +56,6 @@ bool BackendConnector::self_test()
 
 std::vector<iBackend*>* BackendConnector::thread_starter(miner_work& pWork)
 {
-
-	//randomX_global_ctx::inst();
-//randomX_global_ctx::inst();
-
 	std::vector<iBackend*>* pvThreads = new std::vector<iBackend*>;
 
 #ifndef CONF_NO_OPENCL
@@ -67,7 +63,7 @@ std::vector<iBackend*>* BackendConnector::thread_starter(miner_work& pWork)
 	{
 		const std::string backendName = xmrstak::params::inst().openCLVendor;
 		plugin amdplugin;
-		amdplugin.load(backendName, "xmrstak_opencl_backend");
+		amdplugin.load(backendName, "xmrstakrx_opencl_backend");
 		std::vector<iBackend*>* amdThreads = amdplugin.startBackend(static_cast<uint32_t>(pvThreads->size()), pWork, environment::inst());
 		size_t numWorkers = 0u;
 		if(amdThreads != nullptr)
@@ -88,9 +84,9 @@ std::vector<iBackend*>* BackendConnector::thread_starter(miner_work& pWork)
 
 		plugin nvidiaplugin;
 #ifdef XMRSTAK_DEV_RELEASE
-		std::vector<std::string> libNames = {"xmrstak_cuda_backend_cuda10_0", "xmrstak_cuda_backend"};
+		std::vector<std::string> libNames = {"xmrstakrx_cuda_backend_cuda10_0", "xmrstakrx_cuda_backend"};
 #else
-		std::vector<std::string> libNames = {"xmrstak_cuda_backend"};
+		std::vector<std::string> libNames = {"xmrstakrx_cuda_backend"};
 #endif
 		size_t numWorkers = 0u;
 
diff --git a/xmrstak/backend/cpu/autoAdjust.hpp b/xmrstak/backend/cpu/autoAdjust.hpp
index cd40fd195..caa3530ac 100644
--- a/xmrstak/backend/cpu/autoAdjust.hpp
+++ b/xmrstak/backend/cpu/autoAdjust.hpp
@@ -52,7 +52,7 @@ class autoAdjust
 			if(L3KB_size < halfHashMemSizeKB || L3KB_size > (halfHashMemSizeKB * 2048))
 				printer::inst()->print_msg(L0, "Autoconf failed: L3 size sanity check failed - %u KB.", L3KB_size);
 
-			conf += std::string("    { \"low_power_mode\" : false, \"no_prefetch\" : true,  \"asm\" : \"off\", \"affine_to_cpu\" : false },\n");
+			conf += std::string("    { \"low_power_mode\" : false, \"affine_to_cpu\" : false },\n");
 			printer::inst()->print_msg(L0, "Autoconf FAILED. Create config for a single thread. Please try to add new ones until the hashrate slows down.");
 		}
 		else
@@ -76,7 +76,7 @@ class autoAdjust
 
 				conf += std::string("    { \"low_power_mode\" : ");
 				conf += std::string(double_mode ? "true" : "false");
-				conf += std::string(", \"no_prefetch\" : true, \"asm\" : \"auto\", \"affine_to_cpu\" : ");
+				conf += std::string(", \"affine_to_cpu\" : ");
 				conf += std::to_string(aff_id);
 				conf += std::string(" },\n");
 
diff --git a/xmrstak/backend/cpu/autoAdjustHwloc.hpp b/xmrstak/backend/cpu/autoAdjustHwloc.hpp
index 10a395e7a..f06244c8a 100644
--- a/xmrstak/backend/cpu/autoAdjustHwloc.hpp
+++ b/xmrstak/backend/cpu/autoAdjustHwloc.hpp
@@ -71,7 +71,7 @@ class autoAdjustHwloc
 			{
 				conf += std::string("    { \"low_power_mode\" : ");
 				conf += std::string((id & 0x8000000) != 0 ? "true" : "false");
-				conf += std::string(", \"no_prefetch\" : true, \"asm\" : \"auto\", \"affine_to_cpu\" : ");
+				conf += std::string(", \"affine_to_cpu\" : ");
 				conf += std::to_string(id & 0x7FFFFFF);
 				conf += std::string(" },\n");
 			}
diff --git a/xmrstak/backend/cpu/crypto/randomx/aes_hash.cpp b/xmrstak/backend/cpu/crypto/randomx/aes_hash.cpp
index fe149dfec..4a400d0a8 100644
--- a/xmrstak/backend/cpu/crypto/randomx/aes_hash.cpp
+++ b/xmrstak/backend/cpu/crypto/randomx/aes_hash.cpp
@@ -212,3 +212,84 @@ void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
 
 template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
 template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
+
+template<bool softAes>
+void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state) {
+	uint8_t* scratchpadPtr = (uint8_t*)scratchpad;
+	const uint8_t* scratchpadEnd = scratchpadPtr + scratchpadSize;
+
+	// initial state
+	rx_vec_i128 hash_state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0);
+	rx_vec_i128 hash_state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1);
+	rx_vec_i128 hash_state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2);
+	rx_vec_i128 hash_state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3);
+
+	const rx_vec_i128 key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0);
+	const rx_vec_i128 key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1);
+	const rx_vec_i128 key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2);
+	const rx_vec_i128 key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3);
+
+	rx_vec_i128 fill_state0 = rx_load_vec_i128((rx_vec_i128*)fill_state + 0);
+	rx_vec_i128 fill_state1 = rx_load_vec_i128((rx_vec_i128*)fill_state + 1);
+	rx_vec_i128 fill_state2 = rx_load_vec_i128((rx_vec_i128*)fill_state + 2);
+	rx_vec_i128 fill_state3 = rx_load_vec_i128((rx_vec_i128*)fill_state + 3);
+
+	constexpr int PREFETCH_DISTANCE = 4096;
+	const char* prefetchPtr = ((const char*)scratchpad) + PREFETCH_DISTANCE;
+	scratchpadEnd -= PREFETCH_DISTANCE;
+
+	for (int i = 0; i < 2; ++i) {
+		//process 64 bytes at a time in 4 lanes
+		while (scratchpadPtr < scratchpadEnd) {
+			hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 0));
+			hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 1));
+			hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 2));
+			hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128((rx_vec_i128*)scratchpadPtr + 3));
+
+			fill_state0 = aesdec<softAes>(fill_state0, key0);
+			fill_state1 = aesenc<softAes>(fill_state1, key1);
+			fill_state2 = aesdec<softAes>(fill_state2, key2);
+			fill_state3 = aesenc<softAes>(fill_state3, key3);
+
+			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 0, fill_state0);
+			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 1, fill_state1);
+			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 2, fill_state2);
+			rx_store_vec_i128((rx_vec_i128*)scratchpadPtr + 3, fill_state3);
+
+			rx_prefetch_t0(prefetchPtr);
+
+			scratchpadPtr += 64;
+			prefetchPtr += 64;
+		}
+		prefetchPtr = (const char*) scratchpad;
+		scratchpadEnd += PREFETCH_DISTANCE;
+	}
+
+	rx_store_vec_i128((rx_vec_i128*)fill_state + 0, fill_state0);
+	rx_store_vec_i128((rx_vec_i128*)fill_state + 1, fill_state1);
+	rx_store_vec_i128((rx_vec_i128*)fill_state + 2, fill_state2);
+	rx_store_vec_i128((rx_vec_i128*)fill_state + 3, fill_state3);
+
+	//two extra rounds to achieve full diffusion
+	rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0);
+	rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1);
+
+	hash_state0 = aesenc<softAes>(hash_state0, xkey0);
+	hash_state1 = aesdec<softAes>(hash_state1, xkey0);
+	hash_state2 = aesenc<softAes>(hash_state2, xkey0);
+	hash_state3 = aesdec<softAes>(hash_state3, xkey0);
+
+	hash_state0 = aesenc<softAes>(hash_state0, xkey1);
+	hash_state1 = aesdec<softAes>(hash_state1, xkey1);
+	hash_state2 = aesenc<softAes>(hash_state2, xkey1);
+	hash_state3 = aesdec<softAes>(hash_state3, xkey1);
+
+	//output hash
+	rx_store_vec_i128((rx_vec_i128*)hash + 0, hash_state0);
+	rx_store_vec_i128((rx_vec_i128*)hash + 1, hash_state1);
+	rx_store_vec_i128((rx_vec_i128*)hash + 2, hash_state2);
+	rx_store_vec_i128((rx_vec_i128*)hash + 3, hash_state3);
+}
+
+template void hashAndFillAes1Rx4<false>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
+template void hashAndFillAes1Rx4<true>(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
diff --git a/xmrstak/backend/cpu/crypto/randomx/aes_hash.hpp b/xmrstak/backend/cpu/crypto/randomx/aes_hash.hpp
index b4d0e9405..9f75f73ae 100644
--- a/xmrstak/backend/cpu/crypto/randomx/aes_hash.hpp
+++ b/xmrstak/backend/cpu/crypto/randomx/aes_hash.hpp
@@ -38,3 +38,6 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
 
 template<bool softAes>
 void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
+
+template<bool softAes>
+void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, void* fill_state);
diff --git a/xmrstak/backend/cpu/crypto/randomx/intrin_portable.h b/xmrstak/backend/cpu/crypto/randomx/intrin_portable.h
index 346c433ae..1dcd3ad37 100644
--- a/xmrstak/backend/cpu/crypto/randomx/intrin_portable.h
+++ b/xmrstak/backend/cpu/crypto/randomx/intrin_portable.h
@@ -102,6 +102,7 @@ typedef __m128d rx_vec_f128;
 #define rx_aligned_alloc(a, b) _mm_malloc(a,b)
 #define rx_aligned_free(a) _mm_free(a)
 #define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA)
+#define rx_prefetch_t0(x) _mm_prefetch((const char *)(x), _MM_HINT_T0)
 
 #define rx_load_vec_f128 _mm_load_pd
 #define rx_store_vec_f128 _mm_store_pd
@@ -201,6 +202,7 @@ typedef union{
 #define rx_aligned_alloc(a, b) malloc(a)
 #define rx_aligned_free(a) free(a)
 #define rx_prefetch_nta(x)
+#define rx_prefetch_t0(x)
 
 /* Splat 64-bit long long to 2 64-bit long longs */
 FORCE_INLINE __m128i vec_splat2sd (int64_t scalar)
@@ -399,6 +401,10 @@ inline void rx_prefetch_nta(void* ptr) {
 	asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
 }
 
+inline void rx_prefetch_t0(const void* ptr) {
+	asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
+}
+
 FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
 	return vld1q_f64((const float64_t*)pd);
 }
@@ -532,6 +538,7 @@ typedef union {
 #define rx_aligned_alloc(a, b) malloc(a)
 #define rx_aligned_free(a) free(a)
 #define rx_prefetch_nta(x)
+#define rx_prefetch_t0(x)
 
 FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
 	rx_vec_f128 x;
diff --git a/xmrstak/backend/cpu/crypto/randomx/jit_compiler.hpp b/xmrstak/backend/cpu/crypto/randomx/jit_compiler.hpp
index 424b737dd..03b605085 100644
--- a/xmrstak/backend/cpu/crypto/randomx/jit_compiler.hpp
+++ b/xmrstak/backend/cpu/crypto/randomx/jit_compiler.hpp
@@ -30,6 +30,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #if defined(_M_X64) || defined(__x86_64__)
 #include "crypto/randomx/jit_compiler_x86.hpp"
+#elif defined(__aarch64__)
+#include "crypto/randomx/jit_compiler_a64.hpp"
 #else
 #include "crypto/randomx/jit_compiler_fallback.hpp"
 #endif
diff --git a/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86.cpp b/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86.cpp
index 2a3425352..bfde7d002 100644
--- a/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86.cpp
+++ b/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86.cpp
@@ -29,6 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <stdexcept>
 #include <cstring>
 #include <climits>
+#include <atomic>
 #include "crypto/randomx/jit_compiler_x86.hpp"
 #include "crypto/randomx/jit_compiler_x86_static.hpp"
 #include "crypto/randomx/superscalar.hpp"
@@ -36,6 +37,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "crypto/randomx/reciprocal.h"
 #include "crypto/randomx/virtual_memory.hpp"
 
+#ifdef _MSC_VER
+#   include <intrin.h>
+#else
+#   include <cpuid.h>
+#endif
+
 namespace randomx {
 	/*
 
@@ -108,7 +115,7 @@ namespace randomx {
 	const int32_t codeSshPrefetchSize = codeShhEnd - codeShhPrefetch;
 	const int32_t codeSshInitSize = codeProgramEnd - codeShhInit;
 
-	const int32_t epilogueOffset = CodeSize - epilogueSize;
+	const int32_t epilogueOffset = (CodeSize - epilogueSize) & ~63;
 	constexpr int32_t superScalarHashOffset = 32768;
 
 	static const uint8_t REX_ADD_RR[] = { 0x4d, 0x03 };
@@ -183,6 +190,7 @@ namespace randomx {
 	static const uint8_t REX_ADD_I[] = { 0x49, 0x81 };
 	static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
 	static const uint8_t JZ[] = { 0x0f, 0x84 };
+	static const uint8_t JZ_SHORT = 0x74;
 	static const uint8_t RET = 0xc3;
 	static const uint8_t LEA_32[] = { 0x41, 0x8d };
 	static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 };
@@ -197,20 +205,100 @@ namespace randomx {
 	static const uint8_t NOP7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 };
 	static const uint8_t NOP8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
 
-//	static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 };
+	static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 };
+
+	static const uint8_t JMP_ALIGN_PREFIX[14][16] = {
+		{},
+		{0x2E},
+		{0x2E, 0x2E},
+		{0x2E, 0x2E, 0x2E},
+		{0x2E, 0x2E, 0x2E, 0x2E},
+		{0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
+		{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
+		{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
+		{0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
+		{0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
+		{0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
+		{0x66, 0x66, 0x90, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
+		{0x0F, 0x1F, 0x40, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
+		{0x0F, 0x1F, 0x44, 0x00, 0x00, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E},
+	};
+
+	bool JitCompilerX86::BranchesWithin32B = false;
 
 	size_t JitCompilerX86::getCodeSize() {
 		return codePos < prologueSize ? 0 : codePos - prologueSize;
 	}
 
+    static inline void cpuid(uint32_t level, int32_t output[4])
+    {
+        memset(output, 0, sizeof(int32_t) * 4);
+
+#   ifdef _MSC_VER
+        __cpuid(output, static_cast<int>(level));
+#   else
+        __cpuid_count(level, 0, output[0], output[1], output[2], output[3]);
+#   endif
+    }
+
+    // CPU-specific tweaks
+	void JitCompilerX86::applyTweaks() {
+		int32_t info[4];
+		cpuid(0, info);
+
+		int32_t manufacturer[4];
+		manufacturer[0] = info[1];
+		manufacturer[1] = info[3];
+		manufacturer[2] = info[2];
+		manufacturer[3] = 0;
+
+		if (strcmp((const char*)manufacturer, "GenuineIntel") == 0) {
+			struct
+			{
+				unsigned int stepping : 4;
+				unsigned int model : 4;
+				unsigned int family : 4;
+				unsigned int processor_type : 2;
+				unsigned int reserved1 : 2;
+				unsigned int ext_model : 4;
+				unsigned int ext_family : 8;
+				unsigned int reserved2 : 4;
+			} processor_info;
+
+			cpuid(1, info);
+			memcpy(&processor_info, info, sizeof(processor_info));
+
+			// Intel JCC erratum mitigation
+			if (processor_info.family == 6) {
+				const uint32_t model = processor_info.model | (processor_info.ext_model << 4);
+				const uint32_t stepping = processor_info.stepping;
+
+				// Affected CPU models and stepping numbers are taken from https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
+				BranchesWithin32B =
+					((model == 0x4E) && (stepping == 0x3)) ||
+					((model == 0x55) && (stepping == 0x4)) ||
+					((model == 0x5E) && (stepping == 0x3)) ||
+					((model == 0x8E) && (stepping >= 0x9) && (stepping <= 0xC)) ||
+					((model == 0x9E) && (stepping >= 0x9) && (stepping <= 0xD)) ||
+					((model == 0xA6) && (stepping == 0x0)) ||
+					((model == 0xAE) && (stepping == 0xA));
+			}
+		}
+	}
+
+	static std::atomic<size_t> codeOffset;
+
 	JitCompilerX86::JitCompilerX86() {
-		code = (uint8_t*)allocExecutableMemory(CodeSize);
+		applyTweaks();
+		allocatedCode = (uint8_t*)allocExecutableMemory(CodeSize * 2);
+		// Shift code base address to improve caching - all threads will use different L2/L3 cache sets
+		code = allocatedCode + (codeOffset.fetch_add(59 * 64) % CodeSize);
 		memcpy(code, codePrologue, prologueSize);
 		memcpy(code + epilogueOffset, codeEpilogue, epilogueSize);
 	}
 
 	JitCompilerX86::~JitCompilerX86() {
-		freePagedMemory(code, CodeSize);
+		freePagedMemory(allocatedCode, CodeSize);
 	}
 
 	void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) {
@@ -307,6 +395,22 @@ namespace randomx {
 		emit(RandomX_CurrentConfig.codePrefetchScratchpadTweaked, prefetchScratchpadSize, code, codePos);
 		memcpy(code + codePos, codeLoopStore, loopStoreSize);
 		codePos += loopStoreSize;
+
+		if (BranchesWithin32B) {
+			const uint32_t branch_begin = static_cast<uint32_t>(codePos);
+			const uint32_t branch_end = static_cast<uint32_t>(branch_begin + 9);
+
+			// If the jump crosses or touches 32-byte boundary, align it
+			if ((branch_begin ^ branch_end) >= 32) {
+				uint32_t alignment_size = 32 - (branch_begin & 31);
+				if (alignment_size > 8) {
+					emit(NOPX[alignment_size - 9], alignment_size - 8, code, codePos);
+					alignment_size = 8;
+				}
+				emit(NOPX[alignment_size - 1], alignment_size, code, codePos);
+			}
+		}
+
 		emit(SUB_EBX, code, codePos);
 		emit(JNZ, code, codePos);
 		emit32(prologueSize - codePos - 4, code, codePos);
@@ -408,12 +512,13 @@ namespace randomx {
 		}
 	}
 
-	void JitCompilerX86::genAddressReg(const Instruction& instr, uint8_t* code, int& codePos, bool rax) {
-		emit(LEA_32, code, codePos);
-		emitByte(0x80 + instr.src + (rax ? 0 : 8), code, codePos);
-		if (instr.src == RegisterNeedsSib) {
-			emitByte(0x24, code, codePos);
-		}
+	template<bool rax>
+	FORCE_INLINE void JitCompilerX86::genAddressReg(const Instruction& instr, uint8_t* code, int& codePos) {
+		const uint32_t src = *((uint32_t*)&instr) & 0xFF0000;
+
+		*(uint32_t*)(code + codePos) = (rax ? 0x24808d41 : 0x24888d41) + src;
+		codePos += (src == (RegisterNeedsSib << 16)) ? 4 : 3;
+
 		emit32(instr.getImm32(), code, codePos);
 		if (rax)
 			emitByte(AND_EAX_I, code, codePos);
@@ -422,12 +527,14 @@ namespace randomx {
 		emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask, code, codePos);
 	}
 
-	void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, int& codePos) {
-		emit(LEA_32, code, codePos);
-		emitByte(0x80 + instr.dst, code, codePos);
-		if (instr.dst == RegisterNeedsSib) {
-			emitByte(0x24, code, codePos);
-		}
+	template void JitCompilerX86::genAddressReg<false>(const Instruction& instr, uint8_t* code, int& codePos);
+	template void JitCompilerX86::genAddressReg<true>(const Instruction& instr, uint8_t* code, int& codePos);
+
+	FORCE_INLINE void JitCompilerX86::genAddressRegDst(const Instruction& instr, uint8_t* code, int& codePos) {
+		const uint32_t dst = static_cast<uint32_t>(instr.dst) << 16;
+		*(uint32_t*)(code + codePos) = 0x24808d41 + dst;
+		codePos += (dst == (RegisterNeedsSib << 16)) ? 4 : 3;
+
 		emit32(instr.getImm32(), code, codePos);
 		emitByte(AND_EAX_I, code, codePos);
 		if (instr.getModCond() < StoreL3Condition) {
@@ -438,7 +545,7 @@ namespace randomx {
 		}
 	}
 
-	void JitCompilerX86::genAddressImm(const Instruction& instr, uint8_t* code, int& codePos) {
+	FORCE_INLINE void JitCompilerX86::genAddressImm(const Instruction& instr, uint8_t* code, int& codePos) {
 		emit32(instr.getImm32() & ScratchpadL3Mask, code, codePos);
 	}
 
@@ -483,7 +590,7 @@ namespace randomx {
 		int pos = codePos;
 		
 		if (instr.src != instr.dst) {
-			genAddressReg(instr, p, pos);
+			genAddressReg<true>(instr, p, pos);
 			emit32(template_IADD_M[instr.dst], p, pos);
 		}
 		else {
@@ -523,7 +630,7 @@ namespace randomx {
 		int pos = codePos;
 		
 		if (instr.src != instr.dst) {
-			genAddressReg(instr, p, pos);
+			genAddressReg<true>(instr, p, pos);
 			emit(REX_SUB_RM, p, pos);
 			emitByte(0x04 + 8 * instr.dst, p, pos);
 			emitByte(0x06, p, pos);
@@ -561,7 +668,7 @@ namespace randomx {
 		int pos = codePos;
 		
 		if (instr.src != instr.dst) {
-			genAddressReg(instr, p, pos);
+			genAddressReg<true>(instr, p, pos);
 			emit(REX_IMUL_RM, p, pos);
 			emitByte(0x04 + 8 * instr.dst, p, pos);
 			emitByte(0x06, p, pos);
@@ -596,7 +703,7 @@ namespace randomx {
 		int pos = codePos;
 		
 		if (instr.src != instr.dst) {
-			genAddressReg(instr, p, pos, false);
+			genAddressReg<false>(instr, p, pos);
 			emit(REX_MOV_RR64, p, pos);
 			emitByte(0xc0 + instr.dst, p, pos);
 			emit(REX_MUL_MEM, p, pos);
@@ -635,7 +742,7 @@ namespace randomx {
 		int pos = codePos;
 		
 		if (instr.src != instr.dst) {
-			genAddressReg(instr, p, pos, false);
+			genAddressReg<false>(instr, p, pos);
 			emit(REX_MOV_RR64, p, pos);
 			emitByte(0xc0 + instr.dst, p, pos);
 			emit(REX_IMUL_MEM, p, pos);
@@ -704,7 +811,7 @@ namespace randomx {
 		int pos = codePos;
 		
 		if (instr.src != instr.dst) {
-			genAddressReg(instr, p, pos);
+			genAddressReg<true>(instr, p, pos);
 			emit(REX_XOR_RM, p, pos);
 			emitByte(0x04 + 8 * instr.dst, p, pos);
 			emitByte(0x06, p, pos);
@@ -801,7 +908,7 @@ namespace randomx {
 		int pos = codePos;
 		
 		const uint32_t dst = instr.dst % RegisterCountFlt;
-		genAddressReg(instr, p, pos);
+		genAddressReg<true>(instr, p, pos);
 		emit(REX_CVTDQ2PD_XMM12, p, pos);
 		emit(REX_ADDPD, p, pos);
 		emitByte(0xc4 + 8 * dst, p, pos);
@@ -826,7 +933,7 @@ namespace randomx {
 		int pos = codePos;
 		
 		const uint32_t dst = instr.dst % RegisterCountFlt;
-		genAddressReg(instr, p, pos);
+		genAddressReg<true>(instr, p, pos);
 		emit(REX_CVTDQ2PD_XMM12, p, pos);
 		emit(REX_SUBPD, p, pos);
 		emitByte(0xc4 + 8 * dst, p, pos);
@@ -862,7 +969,7 @@ namespace randomx {
 		int pos = codePos;
 		
 		const uint32_t dst = instr.dst % RegisterCountFlt;
-		genAddressReg(instr, p, pos);
+		genAddressReg<true>(instr, p, pos);
 		emit(REX_CVTDQ2PD_XMM12, p, pos);
 		emit(REX_ANDPS_XMM12, p, pos);
 		emit(REX_DIVPD, p, pos);
@@ -902,19 +1009,39 @@ namespace randomx {
 		uint8_t* const p = code;
 		int pos = codePos;
 		
-		int reg = instr.dst;
+		const int reg = instr.dst;
+		int32_t jmp_offset = registerUsage[reg] - (pos + 16);
+
+		if (BranchesWithin32B) {
+			const uint32_t branch_begin = static_cast<uint32_t>(pos + 7);
+			const uint32_t branch_end = static_cast<uint32_t>(branch_begin + ((jmp_offset >= -128) ? 9 : 13));
+
+			// If the jump crosses or touches 32-byte boundary, align it
+			if ((branch_begin ^ branch_end) >= 32) {
+				const uint32_t alignment_size = 32 - (branch_begin & 31);
+				jmp_offset -= alignment_size;
+				emit(JMP_ALIGN_PREFIX[alignment_size], alignment_size, p, pos);
+			}
+		}
+
 		emit(REX_ADD_I, p, pos);
 		emitByte(0xc0 + reg, p, pos);
-		int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset;
-		uint32_t imm = instr.getImm32() | (1UL << shift);
-		if (RandomX_CurrentConfig.JumpOffset > 0 || shift > 0)
-			imm &= ~(1UL << (shift - 1));
+		const int shift = instr.getModCond() + RandomX_CurrentConfig.JumpOffset;
+		const uint32_t imm = (instr.getImm32() | (1UL << shift)) & ~(1UL << (shift - 1));
 		emit32(imm, p, pos);
 		emit(REX_TEST, p, pos);
 		emitByte(0xc0 + reg, p, pos);
 		emit32(RandomX_CurrentConfig.ConditionMask_Calculated << shift, p, pos);
-		emit(JZ, p, pos);
-		emit32(registerUsage[reg] - (pos + 4), p, pos);
+
+		if (jmp_offset >= -128) {
+			emitByte(JZ_SHORT, p, pos);
+			emitByte(jmp_offset, p, pos);
+		}
+		else {
+			emit(JZ, p, pos);
+			emit32(jmp_offset - 4, p, pos);
+		}
+
 		//mark all registers as used
 		uint64_t* r = (uint64_t*) registerUsage;
 		uint64_t k = pos;
diff --git a/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86.hpp b/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86.hpp
index 30b16f586..f1864018a 100644
--- a/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86.hpp
+++ b/xmrstak/backend/cpu/crypto/randomx/jit_compiler_x86.hpp
@@ -67,12 +67,17 @@ namespace randomx {
 
 		static InstructionGeneratorX86 engine[256];
 		int registerUsage[RegistersCount];
+		uint8_t* allocatedCode;
 		uint8_t* code;
 		int32_t codePos;
 
+		static bool BranchesWithin32B;
+
+		static void applyTweaks();
 		void generateProgramPrologue(Program&, ProgramConfiguration&);
 		void generateProgramEpilogue(Program&, ProgramConfiguration&);
-		static void genAddressReg(const Instruction&, uint8_t* code, int& codePos, bool rax = true);
+		template<bool rax>
+		static void genAddressReg(const Instruction&, uint8_t* code, int& codePos);
 		static void genAddressRegDst(const Instruction&, uint8_t* code, int& codePos);
 		static void genAddressImm(const Instruction&, uint8_t* code, int& codePos);
 		static void genSIB(int scale, int index, int base, uint8_t* code, int& codePos);
diff --git a/xmrstak/backend/cpu/crypto/randomx/randomx.cpp b/xmrstak/backend/cpu/crypto/randomx/randomx.cpp
index 475a32183..2ee097fda 100644
--- a/xmrstak/backend/cpu/crypto/randomx/randomx.cpp
+++ b/xmrstak/backend/cpu/crypto/randomx/randomx.cpp
@@ -458,4 +458,22 @@ extern "C" {
 		machine->getFinalResult(output, RANDOMX_HASH_SIZE);
 	}
 
+	void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize) {
+		rx_blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
+		machine->initScratchpad(tempHash);
+	}
+
+	void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output) {
+		machine->resetRoundingMode();
+		for (uint32_t chain = 0; chain < RandomX_CurrentConfig.ProgramCount - 1; ++chain) {
+			machine->run(&tempHash);
+			rx_blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
+		}
+		machine->run(&tempHash);
+
+		// Finish current hash and fill the scratchpad for the next hash at the same time
+		rx_blake2b(tempHash, sizeof(tempHash), nextInput, nextInputSize, nullptr, 0);
+		machine->hashAndFill(output, RANDOMX_HASH_SIZE, tempHash);
+	}
+
 }
diff --git a/xmrstak/backend/cpu/crypto/randomx/randomx.h b/xmrstak/backend/cpu/crypto/randomx/randomx.h
index 22671156a..51c66334d 100644
--- a/xmrstak/backend/cpu/crypto/randomx/randomx.h
+++ b/xmrstak/backend/cpu/crypto/randomx/randomx.h
@@ -330,6 +330,9 @@ RANDOMX_EXPORT void randomx_destroy_vm(randomx_vm *machine);
 */
 RANDOMX_EXPORT void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output);
 
+RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, uint64_t (&tempHash)[8], const void* input, size_t inputSize);
+RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, uint64_t (&tempHash)[8], const void* nextInput, size_t nextInputSize, void* output);
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/xmrstak/backend/cpu/crypto/randomx/virtual_machine.cpp b/xmrstak/backend/cpu/crypto/randomx/virtual_machine.cpp
index 2913c7e5e..ecd187e2f 100644
--- a/xmrstak/backend/cpu/crypto/randomx/virtual_machine.cpp
+++ b/xmrstak/backend/cpu/crypto/randomx/virtual_machine.cpp
@@ -114,6 +114,12 @@ namespace randomx {
         rx_blake2b(out, outSize, &reg, sizeof(RegisterFile), nullptr, 0);
 	}
 
+	template<bool softAes>
+	void VmBase<softAes>::hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) {
+		hashAndFillAes1Rx4<softAes>(scratchpad, ScratchpadSize, &reg.a, fill_state);
+        rx_blake2b(out, outSize, &reg, sizeof(RegisterFile), nullptr, 0);
+	}
+
 	template<bool softAes>
 	void VmBase<softAes>::initScratchpad(void* seed) {
 		fillAes1Rx4<softAes>(seed, ScratchpadSize, scratchpad);
diff --git a/xmrstak/backend/cpu/crypto/randomx/virtual_machine.hpp b/xmrstak/backend/cpu/crypto/randomx/virtual_machine.hpp
index c85af0097..d3718d04d 100644
--- a/xmrstak/backend/cpu/crypto/randomx/virtual_machine.hpp
+++ b/xmrstak/backend/cpu/crypto/randomx/virtual_machine.hpp
@@ -39,6 +39,7 @@ class randomx_vm
 	virtual ~randomx_vm() = 0;
 	virtual void setScratchpad(uint8_t *scratchpad) = 0;
 	virtual void getFinalResult(void* out, size_t outSize) = 0;
+	virtual void hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) = 0;
 	virtual void setDataset(randomx_dataset* dataset) { }
 	virtual void setCache(randomx_cache* cache) { }
 	virtual void initScratchpad(void* seed) = 0;
@@ -82,6 +83,7 @@ namespace randomx {
 		void setScratchpad(uint8_t *scratchpad) override;
 		void initScratchpad(void* seed) override;
 		void getFinalResult(void* out, size_t outSize) override;
+		void hashAndFill(void* out, size_t outSize, uint64_t (&fill_state)[8]) override;
 
 	protected:
 		void generateProgram(void* seed);
diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp
index a633494eb..eecdf63f0 100644
--- a/xmrstak/backend/cpu/minethd.cpp
+++ b/xmrstak/backend/cpu/minethd.cpp
@@ -114,7 +114,7 @@ bool minethd::thd_setaffinity(std::thread::native_handle_type h, uint64_t cpu_id
 #endif
 }
 
-minethd::minethd(miner_work& pWork, size_t iNo, int iMultiway, int64_t affinity)
+minethd::minethd(miner_work& pWork, size_t iNo, int iMultiway, int64_t affinity) : affinity(affinity)
 {
 	this->backendType = iBackend::CPU;
 	oWork = pWork;
@@ -608,14 +608,10 @@ void minethd::multiway_work_main()
 		if(on_new_job != nullptr)
 			on_new_job(oWork, ctx);
 
-		while(globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
+		constexpr uint64_t update_stat_each = 128;
+		// only check each 128 hash if the job has changed
+		while((iCount % update_stat_each) != 0 || globalStates::inst().iGlobalJobNo.load(std::memory_order_relaxed) == iJobNo)
 		{
-			if((iCount++ & 0x7) == 0) //Store stats every 8*N hashes
-			{
-				updateStats((iCount - iLastCount) * N, oWork.iPoolId);
-				iLastCount = iCount;
-			}
-
 			nonce_ctr -= N;
 			if(nonce_ctr <= 0)
 			{
@@ -640,9 +636,13 @@ void minethd::multiway_work_main()
 							oWork.iPoolId));
 				}
 			}
-
-			std::this_thread::yield();
+			if((iCount++ % update_stat_each) == 0) //Store stats every 8*N hashes
+			{
+				updateStats((iCount - iLastCount) * N, oWork.iPoolId);
+				iLastCount = iCount;
+			}
 		}
+		std::this_thread::yield();
 
 		globalStates::inst().consume_work(oWork, iJobNo);
 		prep_multiway_work<N>(bWorkBlob, piNonce);
diff --git a/xmrstak/version.cpp b/xmrstak/version.cpp
index cdcf82132..6930bcb9e 100644
--- a/xmrstak/version.cpp
+++ b/xmrstak/version.cpp
@@ -20,7 +20,7 @@
 #endif
 
 #define XMR_STAK_NAME "xmr-stak-rx"
-#define XMR_STAK_VERSION "1.0.1-rx"
+#define XMR_STAK_VERSION "1.0.2-rx"
 
 #if defined(_WIN32)
 #define OS_TYPE "win"