Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Small optimization and static build with GNU Guix. #198

Merged
merged 12 commits into from
Aug 14, 2023
1 change: 1 addition & 0 deletions .github/workflows/small_test_on_push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ jobs:
g++
python3-dev
libatomic-ops-dev
gcc-multilib
autoconf
libgsl-dev
zlib1g-dev
Expand Down
41 changes: 31 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,30 +1,50 @@
# Specify the minimum version for CMake

cmake_minimum_required(VERSION 3.16)
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)

# Project's name
project(smoothxg)

# We build using c++17
set(CMAKE_CXX_STANDARD 17)

find_package(ZLIB REQUIRED)

enable_testing()

include(CheckIPOSupported) # adds lto
check_ipo_supported(RESULT ipo_supported OUTPUT output)
SET(CMAKE_RANLIB "gcc-ranlib") # too try lto with older runtimes

# This builds a static version of ./bin/smoothxg
option(BUILD_STATIC "Build static binary" OFF)
if (BUILD_STATIC)
set(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
set(BUILD_SHARED_LIBS OFF)
set(CMAKE_EXE_LINKER_FLAGS "-static")
endif()

find_package(OpenMP)
# if(OpenMP_CXX_FOUND)
# target_link_libraries(MyTarget PUBLIC OpenMP::OpenMP_CXX)
# endif()

find_package(PkgConfig REQUIRED)
find_package(ZLIB REQUIRED)
# find_package(ZSTD REQUIRED) - needs recent versions of cmake

# Preload the following libraries before running tests
set(PRELOAD "libasan.so:libjemalloc.so.2")

if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING
"Choose the type of build, options are: Release Debug Generic." FORCE)
message(STATUS "Choose the type of build, options are: Release Debug Generic!")
endif()

# set(CMAKE_BUILD_TYPE Debug) -- don't uncomment this, instead run
# cmake -DCMAKE_BUILD_TYPE=Debug ..

if (${CMAKE_BUILD_TYPE} MATCHES Release)
set(EXTRA_FLAGS "-Ofast -march=native")
if(NOT EXTRA_FLAGS)
set(EXTRA_FLAGS "-Ofast -march=native -flto -fno-fat-lto-objects")
endif()
set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG") # reset CXX_FLAGS to replace -O3 with -Ofast

# Increase SPOA's performance
Expand Down Expand Up @@ -418,7 +438,7 @@ add_executable(smoothxg
target_link_libraries(smoothxg spoa abpoa)
target_link_libraries(smoothxg ${smoothxg_LIBS})

target_link_libraries(smoothxg ZLIB::ZLIB zstd jemalloc)
target_link_libraries(smoothxg z zstd jemalloc)

set_target_properties(smoothxg PROPERTIES OUTPUT_NAME "smoothxg")
target_include_directories(smoothxg PUBLIC ${smoothxg_INCLUDES})
Expand All @@ -442,9 +462,10 @@ file(MAKE_DIRECTORY ${CMAKE_SOURCE_DIR}/include)
execute_process(COMMAND bash ${CMAKE_SOURCE_DIR}/scripts/generate_git_version.sh ${CMAKE_SOURCE_DIR}/include)

add_test(
NAME smoothxg-test
COMMAND bin/smoothxg -t 2 -g test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.seqwish.gfa -j 5k -e 5k -l 700,900,1100 -m test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.maf -C "consensus,10,100:test/data/gi_568815592_32578768-32589835.txt:y,1000:test/data/gi_568815592_32578768-32589835.txt:n,10000" -o test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.gfa -r 12
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
NAME smoothxg-test
# 2 cores because of CI limitations
COMMAND bin/smoothxg -t 2 -g test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.seqwish.gfa -j 5k -e 5k -l 700,900,1100 -m test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.maf -C "consensus,10,100:test/data/gi_568815592_32578768-32589835.txt:y,1000:test/data/gi_568815592_32578768-32589835.txt:n,10000" -o test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.gfa -r 12
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
set_tests_properties(smoothxg-test PROPERTIES ENVIRONMENT "ASAN_OPTIONS=detect_leaks=1:symbolize=1;LSAN_OPTIONS=verbosity=0:log_threads=1")

if (APPLE)
Expand Down
35 changes: 33 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Path names should be unique.

### building from source

`smoothxg` uses cmake to build itself and its dependencies. At least GCC version 9.3.0 is required for compilation.
`smoothxg` uses cmake to build itself and its dependencies. At least GCC version 9.3.0 is required for compilation.
You can check your version via:

```
Expand All @@ -50,8 +50,31 @@ cd smoothxg
cmake -H. -Bbuild && cmake --build build -- -j 4
```

To optimize for architecture

```
cmake -DCMAKE_BUILD_TYPE=Release .. && make -j 16 VERBOSE=1 && ctest . --verbose
```


`libzstd-dev` must be of version 1.4 or higher.

Run tests:

```
ctest . --verbose
```

Note that smoothxg depends on git submodules:

```
git submodule update --init --recursive
```

### Using Guix for building

In your source dir make sure git submodules are up-to-date and follow the instructions in [guix.scm](guix.scm).

#### Notes for distribution

If you need to avoid machine-specific optimizations, use the `CMAKE_BUILD_TYPE=Generic` build type:
Expand All @@ -60,6 +83,14 @@ If you need to avoid machine-specific optimizations, use the `CMAKE_BUILD_TYPE=G
cmake -H. -Bbuild -DCMAKE_BUILD_TYPE=Generic && cmake --build build -- -j 3
```

To build for a specific architecture you can use EXTRA_FLAGS

```shell
cmake -DCMAKE_BUILD_TYPE=Release -DEXTRA_FLAGS="-Ofast -march=znver1" .. && make -j 16 VERBOSE=1
```

And to make a static build add the `-DBUILD_STATIC=ON` switch.

### Bioconda

`smoothxg` recipes for Bioconda are available at https://anaconda.org/bioconda/smoothxg.
Expand Down Expand Up @@ -119,7 +150,7 @@ For more details about how to handle Guix channels, go to https://git.genenetwor

#### Notes for debugging

To make the `-S/--write-split-block-fastas` and `-B/--write-poa-block-fastas` options available, and emit a table
To make the `-S/--write-split-block-fastas` and `-B/--write-poa-block-fastas` options available, and emit a table
with POA block statistics, add the `-DPOA_DEBUG=ON` option:

```shell
Expand Down
17 changes: 11 additions & 6 deletions guix.scm
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
;;
;; guix build -f guix.scm
;;
;; (make sure you are running a recent guix and checked out all submodules)
;;
;; To do a cross compilation build for ARM64
;;
;; guix build -f guix.scm --target=aarch64-linux
Expand All @@ -17,6 +19,10 @@
;; cmake -DCMAKE_BUILD_TYPE=Debug ..
;; cmake --build . --verbose -- -j 14 && ctest . --verbose
;;
;; Or for a release, something like
;;
;; cd build && rm -rf ../build/* ; cmake -DCMAKE_BUILD_TYPE=Release .. && make -j 16 VERBOSE=1 && ctest . --verbose
;;
;; For the tests you may need /usr/bin/env. In a container create it with
;;
;; mkdir -p /usr/bin ; ln -s $GUIX_ENVIRONMENT/bin/env /usr/bin/env
Expand Down Expand Up @@ -65,22 +71,21 @@
(inputs
`(
("coreutils" ,coreutils)
; ("cpp-httplib" ,cpp-httplib) later!
("pybind11" ,pybind11) ;; see libstd++ note in remarks above
; ("intervaltree" ,intervaltree) later!
("jemalloc" ,jemalloc)
("gcc" ,gcc-11)
("gcc-lib" ,gcc-11 "lib")
("gcc-toolchain" ,gcc-toolchain)
("gcc-toolchain" , gcc-toolchain)
("gdb" ,gdb)
("git" ,git) ; pulls in perl which does not do RISV-V cross builds yet
; ("lodepng" ,lodepng) later!
("openmpi" ,openmpi)
("python" ,python)
("sdsl-lite" ,sdsl-lite)
("libdivsufsort" ,libdivsufsort)
("zlib" ,zlib)
("zstd-lib" ,zstd "lib")
("zlib-static" ,zlib "static")
("zlib" ,zlib) ; also for the static build we need the includes
("zstd-lib" ,zstd "static")
("zstd" ,zstd "lib") ; same
))
(native-inputs
`(("pkg-config" ,pkg-config)
Expand Down
56 changes: 56 additions & 0 deletions test/performance/check.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Performance testing

In this document we want to make sure we don't regress on speed.

On a `AMD Ryzen 7 3700X 8-Core Processor`:

2 cores:

```
Command being timed: "bin/smoothxg -t 2 -g test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.seqwish.gfa -j 5k -e 5k -l 700,900,1100 -m test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.maf -C consensus,10,100:test/data/gi_568815592_32578768-32589835.txt:y,1000:test/data/gi_568815592_32578768-32589835.txt:n,10000 -o test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.gfa -r 12"
User time (seconds): 5.75
System time (seconds): 1.06
Percent of CPU this job got: 26%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:25.45
```

Note the debug version is about 35s.

8 cores does not make much difference:

```
Command being timed: "bin/smoothxg -t 8 -g test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.seqwish.gfa -j 5k -e 5k -l 700,900,1100 -m test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.maf -C consensus,10,100:test/data/gi_568815592_32578768-32589835.txt:y,1000:test/data/gi_568815592_32578768-32589835.txt:n,10000 -o test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.gfa -r 12"
User time (seconds): 12.42
System time (seconds): 4.85
Percent of CPU this job got: 73%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:23.42
```

Compiling with LTO creates a slightly faster runtime on 2 threads

```
Command being timed: "bin/smoothxg -t 2 -g test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.seqwish.gfa -j 5k -e 5k -l 700,900,1100 -m test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.maf -C consensus,10,100:test/data/gi_568815592_32578768-32589835.txt:y,1000:test/data/gi_568815592_32578768-32589835.txt:n,10000 -o test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.gfa -r 12"
User time (seconds): 5.43
System time (seconds): 1.19
Percent of CPU this job got: 26%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:25.01

```

Honoring -Ofast gives some speedup

```
User time (seconds): 5.55
System time (seconds): 1.02
Percent of CPU this job got: 26%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:24.40
```

The static build with GNU Guix is same

```
User time (seconds): 5.35
System time (seconds): 1.12
Percent of CPU this job got: 26%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:24.46
```