Skip to content

Commit

Permalink
[MISC] Run formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
eseiler committed Sep 26, 2024
1 parent 4e2e201 commit 7f4e563
Show file tree
Hide file tree
Showing 26 changed files with 1,179 additions and 883 deletions.
4 changes: 2 additions & 2 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
# SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik
# SPDX-License-Identifier: CC0-1.0

# Format all files in include/test folder, including std module, excluding contrib module
# find . \( -iname "*.cpp" -or -iname "*.hpp" \) -and -not -path "./lib/*" -and -not -path "./build/*" | xargs clang-format-18 --style=file -i
# Format all files in include/test folder
# find . \( -iname "*.cpp" -or -iname "*.hpp" \) -and -not -path "./build/*" | xargs clang-format-18 --style=file -i
# Staged files: git diff --name-only HEAD --diff-filter=ACMRT | grep -E "(\.cpp|\.hpp)$" | xargs clang-format-18 --style=file -i
---
Language: Cpp
Expand Down
12 changes: 7 additions & 5 deletions .cmake-format.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,20 @@
# SPDX-FileCopyrightText: 2016-2024, Knut Reinert & MPI für molekulare Genetik
# SPDX-License-Identifier: CC0-1.0

# find . \( -iname CMakeLists.txt -o -iname *.cmake \) -a -not -path "./lib/*" -a -not -path "./build/*" | xargs cmake-format -c .cmake-format.yaml -i
# find . \( -iname CMakeLists.txt -o -iname *.cmake \) -a -not -path "./build/*" | xargs cmake-format -c .cmake-format.yaml -i
_help_parse: Options affecting listfile parsing
parse:
_help_additional_commands:
- Specify structure for custom cmake functions
additional_commands:
declare_internal_datasource:
declare_datasource:
pargs:
nargs: '*'
flags: []
kwargs:
FILE: '*'
FILE: 1
URL: '*'
URL_HASH: '*'
CONFIGURE: '*'
URL_HASH: 1
cpmgetpackage:
pargs: 1
spelling: CPMGetPackage
Expand Down
27 changes: 13 additions & 14 deletions cmake/CPM.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,22 @@
#
# SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors

set(CPM_DOWNLOAD_VERSION 0.40.2)
set(CPM_HASH_SUM "c8cdc32c03816538ce22781ed72964dc864b2a34a310d3b7104812a5ca2d835d")
set (CPM_DOWNLOAD_VERSION 0.40.2)
set (CPM_HASH_SUM "c8cdc32c03816538ce22781ed72964dc864b2a34a310d3b7104812a5ca2d835d")

if(CPM_SOURCE_CACHE)
set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
elseif(DEFINED ENV{CPM_SOURCE_CACHE})
set(CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
else()
set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
endif()
if (CPM_SOURCE_CACHE)
set (CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
elseif (DEFINED ENV{CPM_SOURCE_CACHE})
set (CPM_DOWNLOAD_LOCATION "$ENV{CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
else ()
set (CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake")
endif ()

# Expand relative path. This is important if the provided path contains a tilde (~)
get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE)
get_filename_component (CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE)

file(DOWNLOAD
https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake
${CPM_DOWNLOAD_LOCATION} EXPECTED_HASH SHA256=${CPM_HASH_SUM}
file (DOWNLOAD https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake
${CPM_DOWNLOAD_LOCATION} EXPECTED_HASH SHA256=${CPM_HASH_SUM}
)

include(${CPM_DOWNLOAD_LOCATION})
include (${CPM_DOWNLOAD_LOCATION})
5 changes: 2 additions & 3 deletions cmake/test/config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ if (NOT TARGET ${PROJECT_NAME}_test)
add_library (${PROJECT_NAME}_test INTERFACE)
target_compile_options (${PROJECT_NAME}_lib PUBLIC "-pedantic" "-Wall" "-Wextra" "-Werror")


if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
# Disable bogus warnings in GCC12.
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12 AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13)
Expand All @@ -36,8 +35,8 @@ if (NOT TARGET ${PROJECT_NAME}_test)

# !Workaround: Get seqan3 test include dir from seqan3 target
find_path (SEQAN3_TEST_INCLUDE_DIR
NAMES seqan3/test/tmp_directory.hpp
HINTS "${seqan3_SOURCE_DIR}/test/include"
NAMES seqan3/test/tmp_directory.hpp
HINTS "${seqan3_SOURCE_DIR}/test/include"
)
target_include_directories (${PROJECT_NAME}_test SYSTEM INTERFACE "${SEQAN3_TEST_INCLUDE_DIR}")

Expand Down
3 changes: 2 additions & 1 deletion doc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ if (${DOXYGEN_FOUND})
COMMAND ${DOXYGEN_EXECUTABLE}
WORKING_DIRECTORY ${APP_TEMPLATE_DOXYGEN_OUTPUT_DIR}
COMMENT "Generating (developer) API documentation with Doxygen."
VERBATIM)
VERBATIM
)
message (STATUS "${FontBold}You can run `make doc` to build api documentation.${FontReset}")
else ()
message (STATUS "Doxygen not found.")
Expand Down
75 changes: 49 additions & 26 deletions include/ibf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,37 +7,40 @@

#pragma once

#include <filesystem>
#include <iostream>
#include <math.h>
#include <numeric>
#include <string>

#include <seqan3/alphabet/container/concatenated_sequences.hpp>
#include <seqan3/alphabet/nucleotide/dna4.hpp>
#include <filesystem>

#include "shared.hpp"

struct minimiser_arguments
{
std::filesystem::path include_file; // Needs to be defined when only minimisers appearing in this file should be stored
std::filesystem::path exclude_file; // Needs to be defined when minimisers appearing in this file should NOT be stored
std::vector<int> samples{}; // Can be used to indicate that sequence files belong to the same experiment
bool paired = false; // If true, than experiments are seen as paired-end experiments
std::filesystem::path
include_file; // Needs to be defined when only minimisers appearing in this file should be stored
std::filesystem::path
exclude_file; // Needs to be defined when minimisers appearing in this file should NOT be stored
std::vector<int> samples{}; // Can be used to indicate that sequence files belong to the same experiment
bool paired = false; // If true, than experiments are seen as paired-end experiments
bool experiment_names = false; // Flag, if names of experiment should be stored in a txt file
bool ram_friendly = false;
};

//!\brief Generates a random integer not greater than a given maximum
struct RandomGenerator {
int maxi;
RandomGenerator(int max) :
maxi(max) {
}

int operator()() {
return rand() % maxi;
}
struct RandomGenerator
{
int maxi;
RandomGenerator(int max) : maxi(max)
{}

int operator()()
{
return rand() % maxi;
}
};

/*!\brief Get the concrete expression values (= median of all counts of one transcript) for given experiments.
Expand All @@ -48,8 +51,11 @@ struct RandomGenerator {
* \param genome_file A "*.genome" file constructed with the command genome.
* \param paired Flag to indicate if input data is paired or not.
*/
void count(min_arguments const & args, std::vector<std::filesystem::path> sequence_files, std::filesystem::path include_file,
std::filesystem::path genome_file, bool paired);
void count(min_arguments const & args,
std::vector<std::filesystem::path> sequence_files,
std::filesystem::path include_file,
std::filesystem::path genome_file,
bool paired);

/*!\brief Creates a set of minimizers to ignore, which should be used as an input to count.
* \param args The minimiser arguments to use (seed, shape, window size).
Expand All @@ -71,7 +77,10 @@ void read_binary(std::filesystem::path filename, robin_hood::unordered_node_map<
* \param num_of_minimisers Variable, where to number of minimisers should be stored.
* \param cutoff cutoff value.
*/
void read_binary_start(min_arguments & args, std::filesystem::path filename, uint64_t & num_of_minimisers, uint8_t & cutoff);
void read_binary_start(min_arguments & args,
std::filesystem::path filename,
uint64_t & num_of_minimisers,
uint8_t & cutoff);

/*! \brief Creates IBFs.
* \param sequence_files A vector of sequence file paths.
Expand All @@ -85,8 +94,11 @@ void read_binary_start(min_arguments & args, std::filesystem::path filename, uin
* \param num_hash The number of hash functions to use.
* \returns The expression thresholds per experiment.
*/
std::vector<uint16_t> ibf(std::vector<std::filesystem::path> const & sequence_files, estimate_ibf_arguments & ibf_args,
minimiser_arguments & minimiser_args, std::vector<double> & fpr, std::vector<uint8_t> & cutoffs,
std::vector<uint16_t> ibf(std::vector<std::filesystem::path> const & sequence_files,
estimate_ibf_arguments & ibf_args,
minimiser_arguments & minimiser_args,
std::vector<double> & fpr,
std::vector<uint8_t> & cutoffs,
std::filesystem::path const expression_by_genome_file = "",
size_t num_hash = 1);

Expand All @@ -101,7 +113,8 @@ std::vector<uint16_t> ibf(std::vector<std::filesystem::path> const & sequence_fi
* \returns The expression thresholds per experiment.
*/
std::vector<uint16_t> ibf(std::vector<std::filesystem::path> const & minimiser_files,
estimate_ibf_arguments & ibf_args, std::vector<double> & fpr,
estimate_ibf_arguments & ibf_args,
std::vector<double> & fpr,
std::filesystem::path const expression_by_genome_file = "",
size_t num_hash = 1);

Expand All @@ -111,8 +124,10 @@ std::vector<uint16_t> ibf(std::vector<std::filesystem::path> const & minimiser_f
* \param minimiser_args The minimiser specific arguments to use.
* \param cutoffs List of cutoffs.
*/
void minimiser(std::vector<std::filesystem::path> const & sequence_files, min_arguments const & args,
minimiser_arguments & minimiser_args, std::vector<uint8_t> & cutoffs);
void minimiser(std::vector<std::filesystem::path> const & sequence_files,
min_arguments const & args,
minimiser_arguments & minimiser_args,
std::vector<uint8_t> & cutoffs);

/*! \brief Insert into IBFs.
* \param sequence_files A vector of sequence file paths.
Expand All @@ -127,9 +142,12 @@ void minimiser(std::vector<std::filesystem::path> const & sequence_files, min_ar
* \returns The expression thresholds per experiment.
*/
std::vector<uint16_t> insert(std::vector<std::filesystem::path> const & sequence_files,
estimate_ibf_arguments & ibf_args, minimiser_arguments & minimiser_args,
estimate_ibf_arguments & ibf_args,
minimiser_arguments & minimiser_args,
std::vector<uint8_t> & cutoffs,
std::filesystem::path const expression_by_genome_file, std::filesystem::path path_in, bool samplewise);
std::filesystem::path const expression_by_genome_file,
std::filesystem::path path_in,
bool samplewise);

/*! \brief Insert into IBFs based on the minimiser files
* \param minimiser_files A vector of minimiser file paths.
Expand All @@ -143,7 +161,9 @@ std::vector<uint16_t> insert(std::vector<std::filesystem::path> const & sequence
*/
std::vector<uint16_t> insert(std::vector<std::filesystem::path> const & minimiser_files,
estimate_ibf_arguments & ibf_args,
std::filesystem::path const expression_by_genome_file, std::filesystem::path path_in, bool samplewise);
std::filesystem::path const expression_by_genome_file,
std::filesystem::path path_in,
bool samplewise);

/*! \brief Delete bins from ibfs
* \param delete_files A vector of integers specifiying the bins to delete.
Expand All @@ -152,4 +172,7 @@ std::vector<uint16_t> insert(std::vector<std::filesystem::path> const & minimise
* \param path_in Input directory.
* \param samplewise True, if expression levels were set beforehand.
*/
void delete_bin(std::vector<uint64_t> const & delete_files, estimate_ibf_arguments & ibf_args, std::filesystem::path path_in, bool samplewise);
void delete_bin(std::vector<uint64_t> const & delete_files,
estimate_ibf_arguments & ibf_args,
std::filesystem::path path_in,
bool samplewise);
14 changes: 7 additions & 7 deletions include/shared.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
#include <seqan3/search/kmer_index/shape.hpp>
#include <seqan3/search/views/minimiser_hash.hpp>

inline constexpr static uint64_t adjust_seed(uint8_t const kmer_size, uint64_t const seed = 0x8F3F73B5CF1C9ADEULL) noexcept
static inline constexpr uint64_t adjust_seed(uint8_t const kmer_size,
uint64_t const seed = 0x8F'3F'73'B5'CF'1C'9A'DEULL) noexcept
{
return seed >> (64u - 2u * kmer_size);
}
Expand All @@ -31,7 +32,7 @@ struct all_arguments
struct min_arguments : all_arguments
{
uint8_t k{20};
seqan3::seed s{0x8F3F73B5CF1C9ADEULL};
seqan3::seed s{0x8F'3F'73'B5'CF'1C'9A'DEULL};
seqan3::shape shape = seqan3::ungapped{k};
seqan3::window_size w_size{60};
};
Expand All @@ -41,10 +42,10 @@ struct estimate_ibf_arguments : min_arguments
{
bool compressed = false;
std::vector<uint16_t> expression_thresholds{}; // Expression levels which should be created
uint8_t number_expression_thresholds{}; // If set, the expression levels are determined by the program.
uint8_t number_expression_thresholds{}; // If set, the expression levels are determined by the program.
bool samplewise{false};

template<class Archive>
template <class Archive>
void save(Archive & archive) const
{
archive(k);
Expand All @@ -57,7 +58,7 @@ struct estimate_ibf_arguments : min_arguments
archive(samplewise);
}

template<class Archive>
template <class Archive>
void load(Archive & archive)
{
archive(k);
Expand Down Expand Up @@ -119,8 +120,7 @@ void load_ibf(IBFType & ibf, std::filesystem::path ipath)
* \param opath Path, where the IBF should be stored.
*/
template <class IBFType>
void store_ibf(IBFType const & ibf,
std::filesystem::path opath)
void store_ibf(IBFType const & ibf, std::filesystem::path opath)
{
std::ofstream os{opath, std::ios::binary};
cereal::BinaryOutputArchive oarchive{os};
Expand Down
6 changes: 3 additions & 3 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
cmake_minimum_required (VERSION 3.25)

find_package(OpenMP REQUIRED COMPONENTS CXX)
add_library ("${PROJECT_NAME}_lib" STATIC ibf.cpp estimate.cpp)
find_package (OpenMP REQUIRED COMPONENTS CXX)
add_library ("${PROJECT_NAME}_lib" STATIC estimate.cpp ibf.cpp)
target_link_libraries ("${PROJECT_NAME}_lib" PUBLIC seqan3::seqan3)
target_link_libraries ("${PROJECT_NAME}_lib" PUBLIC robin_hood::robin_hood)
target_link_libraries("${PROJECT_NAME}_lib" PUBLIC OpenMP::OpenMP_CXX)
target_link_libraries ("${PROJECT_NAME}_lib" PUBLIC OpenMP::OpenMP_CXX)
target_include_directories ("${PROJECT_NAME}_lib" PUBLIC ../include)

add_executable ("${PROJECT_NAME}" main.cpp)
Expand Down
Loading

0 comments on commit 7f4e563

Please sign in to comment.