Skip to content

Commit

Permalink
Add support for Windows and ARM
Browse files Browse the repository at this point in the history
Add support for compiling using MSVC. Add support for ARM (with optional
NEON support) and other platforms (with reference implementation).
  • Loading branch information
Matoking committed Mar 20, 2019
1 parent 0703881 commit f0fd4a5
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 64 deletions.
10 changes: 8 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
### Added
- Add support for Windows (x86 and x86-64) using the MSVC compiler.
- Add support for Linux (ARM) using the GCC compiler.
- Add support for the NEON instruction set on ARM for better performance when generating PoW.

## [0.2] - 2019-03-07
### Added
- `Block.has_valid_work` and `Block.has_valid_signature` properties are cached to prevent redundant work.
Expand All @@ -19,5 +25,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Initial release

[0.2]: https://github.com/Matoking/nanolib/compare/0.2...0.1
[Unreleased]: https://github.com/Matoking/nanolib/compare/0.1...HEAD
[Unreleased]: https://github.com/Matoking/nanolib/compare/0.2...HEAD
[0.2]: https://github.com/Matoking/nanolib/compare/0.1...0.2
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ Features
* Create and deserialize legacy and universal blocks
* Account generation from seed using the same algorithm as the original NANO wallet and NanoVault
* Functions for converting between different NANO denominations
* High performance cryptographic operations using C extensions (signing and verifying blocks, and solving block proof-of-work)
* Proof-of-work solving supports SSE2, SSSE3, SSE4.1 and AVX instruction sets for improved performance. The best supported implementation is selected at runtime with a fallback implementation with universal compatibility.
* High performance cryptographic operations using C extensions (signing and verifying blocks, and generating block proof-of-work)
* Proof-of-work generation supports SSE2, SSSE3, SSE4.1, AVX and NEON instruction sets for improved performance. The best supported implementation is selected at runtime with a fallback implementation with universal compatibility.
* Backed by automated tests
* Compatible with Python 3.6 and up
* Licensed under the very permissive *Creative Commons Zero* license
Expand Down
136 changes: 79 additions & 57 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
import glob
import io
import os
import platform
import sys
from distutils.ccompiler import get_default_compiler
from shutil import rmtree

from setuptools import Command, Extension, setup
Expand All @@ -29,60 +31,85 @@
'bitarray>=0.8.1', 'ed25519-blake2b>=1.4', 'py-cpuinfo>=4'
]

NANOCURRENCY_WORK_REF = Extension(
"nanolib._work_ref",
include_dirs=[
"src/nanolib-work-module/BLAKE2/ref",
],
sources=[
"src/nanolib-work-module/work.c",
] + glob.glob("src/nanolib-work-module/BLAKE2/ref/blake2b*.c"),
extra_compile_args=["-DWORK_REF"]
)

NANOCURRENCY_WORK_SSE2 = Extension(
"nanolib._work_sse2",
include_dirs=[
"src/nanolib-work-module/BLAKE2/sse",
],
sources=[
"src/nanolib-work-module/work.c",
] + glob.glob("src/nanolib-work-module/BLAKE2/sse/blake2b*.c"),
extra_compile_args=["-DWORK_SSE2", "-msse2"]
)
def get_compile_args(iset=None):
FLAGS = {
"unix": {
"avx": ["-DWORK_AVX", "-mavx"],
"sse4_1": ["-DWORK_SSE4_1", "-msse4.1"],
"ssse3": ["-DWORK_SSSE3", "-mssse3"],
"sse2": ["-DWORK_SSE2", "-msse2"],
"neon": ["-DWORK_NEON", "-mfpu=neon"],
None: ["-DWORK_REF"]
},
"msvc": {
"avx": ["/DWORK_AVX", "/arch:AVX", "/DHAVE_AVX", "/D__SSE4_1__"],
"sse4_1": ["/DWORK_SSE4_1", "/arch:SSE2", "/D__SSE4_1__"],
"ssse3": ["/DWORK_SSSE3", "/arch:SSE2", "/D__SSSE3__"],
"sse2": ["/DWORK_SSE2", "/arch:SSE2", "/D__SSE2__"],
"neon": ["/DWORK_NEON"],
None: ["/DWORK_REF"]
}
}

NANOCURRENCY_WORK_SSSE3 = Extension(
"nanolib._work_ssse3",
include_dirs=[
"src/nanolib-work-module/BLAKE2/sse",
],
sources=[
"src/nanolib-work-module/work.c",
] + glob.glob("src/nanolib-work-module/BLAKE2/sse/blake2b*.c"),
extra_compile_args=["-DWORK_SSSE3", "-mssse3"]
)
compiler = get_default_compiler()

NANOCURRENCY_WORK_SSE4_1 = Extension(
"nanolib._work_sse4_1",
include_dirs=[
"src/nanolib-work-module/BLAKE2/sse",
],
sources=[
"src/nanolib-work-module/work.c",
] + glob.glob("src/nanolib-work-module/BLAKE2/sse/blake2b*.c"),
extra_compile_args=["-DWORK_SSE4_1", "-msse4.1"]
)
try:
return FLAGS[compiler][iset]
except KeyError:
raise OSError("Compiler '{}' not supported.".format(compiler))


SOURCE_ROOT = os.path.join("src", "nanolib-work-module", "BLAKE2")
SOURCE_FILES = {
"ref": glob.glob(os.path.join(SOURCE_ROOT, "ref", "blake2b*.c")),
"sse": glob.glob(os.path.join(SOURCE_ROOT, "sse", "blake2b*.c")),
"neon": glob.glob(os.path.join(SOURCE_ROOT, "neon", "blake2b-*.c"))
}


def create_work_extension(source_name="ref", iset=None):
source_path = os.path.join(
"src", "nanolib-work-module", "BLAKE2", source_name
)
module_suffix = iset if iset else "ref"

return Extension(
"nanolib._work_{}".format(module_suffix),
include_dirs=[source_path],
sources=[
os.path.join("src", "nanolib-work-module", "work.c")
] + SOURCE_FILES[source_name],
extra_compile_args=get_compile_args(iset)
)


EXTENSIONS_TO_BUILD = []

_machine = platform.machine()

# https://stackoverflow.com/a/45125525
_is_arm = _machine.startswith("arm") or _machine.startswith("aarch64")
# 'AMD64' only appears on Windows
_is_x86 = _machine.startswith("x86") or _machine in ("i386", "i686", "AMD64")


if _is_x86:
EXTENSIONS_TO_BUILD = [
create_work_extension("sse", "avx"),
create_work_extension("sse", "sse4_1"),
create_work_extension("sse", "ssse3"),
create_work_extension("sse", "sse2"),
create_work_extension("ref")
]
elif _is_arm:
EXTENSIONS_TO_BUILD = [
create_work_extension("neon", "neon"),
create_work_extension("ref")
]
else:
EXTENSIONS_TO_BUILD = [create_work_extension("ref")]

NANOCURRENCY_WORK_AVX = Extension(
"nanolib._work_avx",
include_dirs=[
"src/nanolib-work-module/BLAKE2/sse",
],
sources=[
"src/nanolib-work-module/work.c",
] + glob.glob("src/nanolib-work-module/BLAKE2/sse/blake2b*.c"),
extra_compile_args=["-DWORK_AVX", "-mavx"]
)

# The rest you shouldn't have to touch too much :)
# ------------------------------------------------
Expand Down Expand Up @@ -190,13 +217,7 @@ def run(self):
author_email=EMAIL,
python_requires=REQUIRES_PYTHON,
url=URL,
ext_modules=[
NANOCURRENCY_WORK_REF,
NANOCURRENCY_WORK_SSE2,
NANOCURRENCY_WORK_SSSE3,
NANOCURRENCY_WORK_SSE4_1,
NANOCURRENCY_WORK_AVX,
],
ext_modules=EXTENSIONS_TO_BUILD,
packages=["nanolib"],
package_data={"": ["LICENSE"]},
package_dir={"nanolib": "src/nanolib"},
Expand All @@ -214,6 +235,7 @@ def run(self):
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: Implementation :: CPython',
'Operating System :: Microsoft :: Windows',
'Operating System :: POSIX :: Linux',
'Topic :: Office/Business :: Financial',
],
Expand Down
6 changes: 6 additions & 0 deletions src/nanolib-work-module/work.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ work_module = {
"_work_sse4_1",
#elif WORK_AVX
"_work_avx",
#elif WORK_NEON
"_work_neon",
#endif
module_doc,
-1,
Expand All @@ -111,4 +113,8 @@ PyMODINIT_FUNC PyInit__work_sse4_1(void) {
PyMODINIT_FUNC PyInit__work_avx(void) {
return PyModule_Create(&work_module);
}
#elif WORK_NEON
PyMODINIT_FUNC PyInit__work_neon(void) {
return PyModule_Create(&work_module);
}
#endif
6 changes: 4 additions & 2 deletions src/nanolib/work.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,12 @@
from . import _work_avx as _work
elif "sse4_1" in _cpu_flags:
from . import _work_sse4_1 as _work
elif "ssse3" in _cpu_flags: # SSSE3
elif "ssse3" in _cpu_flags:
from . import _work_ssse3 as _work
elif "sse2" in _cpu_flags: # SSE2
elif "sse2" in _cpu_flags:
from . import _work_sse2 as _work
elif "neon" in _cpu_flags:
from . import _work_neon as _work
else:
from . import _work_ref as _work

Expand Down
2 changes: 1 addition & 1 deletion tests/performance_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def pow_solve_speed_test():
# Get supported CPU instruction sets
cpu_flags = cpuinfo.get_cpu_info()["flags"]

all_flags = ["avx", "sse4_1", "ssse3", "sse2"]
all_flags = ["avx", "sse4_1", "ssse3", "sse2", "neon"]
supported_flags = [
flag for flag in all_flags
if flag in cpu_flags
Expand Down

0 comments on commit f0fd4a5

Please sign in to comment.