From 543bfc547f7e8179a5555f8b7abf0a8c6cccb8fc Mon Sep 17 00:00:00 2001 From: Rob J Goedman Date: Wed, 7 Dec 2022 15:01:53 -0500 Subject: [PATCH] Squashed 'deps/data/bridgestan/' changes from a8cfe61..c7a425a c7a425a Allow JSON data as a string (#60) 50fe2ec Update README examples section e380e82 Reword docstrings fb513e3 Reorganize and update documentation (#57) 96805fd Compile methods: split stanc args from make args, add default include path (#58) f62bf46 Fix broken link 676db6b Bump actions/setup-python from 2 to 4 (#55) 34f10dd Remove CmdStan Dependency (#51) 821883f Prefix any C-exposed symbols with `bs_` (#54) 81129b0 Add the option for autodiff Hessian calculations (#52) git-subtree-dir: deps/data/bridgestan git-subtree-split: c7a425aac54120bafa643b722ed24b2a32111782 --- .github/dependabot.yml | 6 + .github/workflows/docs.yaml | 23 ++-- .github/workflows/main.yaml | 76 ++++++------- .gitignore | 2 + .gitmodules | 4 + CONTRIBUTING.md | 31 +---- Makefile | 78 ++++++++++--- R/R/bridgestan.R | 34 +++--- R/example.R | 16 +++ R/man/StanModel.Rd | 6 +- R/tests/testthat/test_collisions.c | 2 +- README.md | 177 ++++------------------------- c-example/Makefile | 2 +- c-example/README.md | 2 + c-example/example.c | 8 +- docs/README.md | 37 +----- docs/conf.py | 5 +- docs/getting-started.rst | 141 +++++++++++++++++++++++ docs/how-it-works.rst | 17 --- docs/index.rst | 4 +- docs/install.rst | 103 ----------------- docs/internals.rst | 17 +++ docs/internals/documentation.rst | 33 ++++++ docs/internals/ffi.rst | 87 ++++++++++++++ docs/internals/testing.rst | 70 ++++++++++++ docs/languages.rst | 9 +- docs/languages/c-api.rst | 39 +++++-- docs/languages/julia.md | 143 +++++++++++++++-------- docs/languages/python.rst | 53 ++++++++- docs/languages/r.md | 43 ++++++- julia/MCMC.jl | 49 -------- julia/docs/src/julia.md | 49 +++++++- julia/example.jl | 46 ++------ julia/src/BridgeStan.jl | 15 ++- julia/src/compile.jl | 62 ++++------ julia/src/model.jl | 53 +++++---- julia/test/compile_tests.jl | 5 +- julia/test/model_tests.jl | 8 +- python/bridgestan/__init__.py | 4 +- python/bridgestan/compile.py | 61 +++------- python/bridgestan/model.py | 46 +++++--- python/example.py | 86 ++------------ python/test/__init__.py | 1 + python/test/conftest.py | 47 ++++++++ python/test/test_compile.py | 16 +-- python/test/test_stanmodel.py | 38 ++++++- src/bridgestan.cpp | 47 ++++---- src/bridgestan.h | 52 +++++---- src/bridgestanR.cpp | 80 ++++++------- src/bridgestanR.h | 52 ++++----- src/model_rng.cpp | 90 +++++++++------ src/model_rng.hpp | 6 +- stan | 1 + 53 files changed, 1197 insertions(+), 985 deletions(-) create mode 100644 .gitmodules create mode 100644 R/example.R create mode 100644 docs/getting-started.rst delete mode 100644 docs/how-it-works.rst delete mode 100644 docs/install.rst create mode 100644 docs/internals.rst create mode 100644 docs/internals/documentation.rst create mode 100644 docs/internals/ffi.rst create mode 100644 docs/internals/testing.rst delete mode 100644 julia/MCMC.jl create mode 100644 python/test/__init__.py create mode 100644 python/test/conftest.py create mode 160000 stan diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 88b5062..e5be8b2 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -8,3 +8,9 @@ updates: schedule: # Check for updates to GitHub Actions every month interval: "monthly" + + # checks if stan-dev/stan@master has changed + - package-ecosystem: "gitsubmodule" + directory: "/" + schedule: + interval: "daily" diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index e4e8ea6..ed43eea 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -6,9 +6,6 @@ on: - 'main' workflow_dispatch: {} -env: - CMDSTAN_VERSION: "2.31.0" - jobs: build-docs: name: Publish documentation to gh-pages @@ -23,16 +20,18 @@ jobs: steps: - name: Check out github uses: actions/checkout@v3 + with: + submodules: recursive - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install dependencies (python) run: | python -m pip install --upgrade pip wheel - python -m pip install --upgrade "sphinx>5" nbsphinx ipython ipykernel ipywidgets pydata-sphinx-theme breathe myst-parser + python -m pip install --upgrade "sphinx>5" nbsphinx ipython ipykernel ipywidgets sphinx-copybutton pydata-sphinx-theme breathe myst-parser - name: Install os dependencies run: | @@ -42,18 +41,12 @@ jobs: - name: Set up Julia uses: julia-actions/setup-julia@v1 - - name: CmdStan installation cacheing - id: cmdstan-cache + - name: Stan submodule cacheing uses: actions/cache@v3 + id: stan-cache with: - path: ~/.cmdstan - key: ${{ runner.os }}-cmdstan-${{ env.CMDSTAN_VERSION }} - - - name: Install CmdStan - if: steps.cmdstan-cache.outputs.cache-hit != 'true' - run: | - python -m pip install cmdstanpy - python -m cmdstanpy.install_cmdstan --version "${{ env.CMDSTAN_VERSION }}" --cores 2 + path: ./stan/ + key: ${{ runner.os }}-stan-${{ hashFiles('stan/src/stan/version.hpp') }}-v${{ env.CACHE_VERSION }} - name: Install package run: | diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 8d28bce..d8705b4 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -8,8 +8,7 @@ on: workflow_dispatch: {} env: - CMDSTAN_VERSION: "2.31.0" - CACHE_VERSION: 0 + CACHE_VERSION: 1 jobs: build_test_models: @@ -22,28 +21,19 @@ jobs: steps: - name: Check out github uses: actions/checkout@v3 + with: + submodules: recursive - - name: CmdStan installation cacheing + - name: Stan build caching uses: actions/cache@v3 - id: cmdstan-cache + id: stan-cache with: - path: ~/.cmdstan - key: ${{ runner.os }}-cmdstan-${{ env.CMDSTAN_VERSION }} - - - name: Install CmdStan (Unix) - if: matrix.os != 'windows-latest' - run: | - pipx run --spec cmdstanpy install_cmdstan --version "${{ env.CMDSTAN_VERSION }}" --cores 2 --verbose - - - name: Install CmdStan (Windows) - if: matrix.os == 'windows-latest' - run: | - pipx run --spec cmdstanpy install_cmdstan --version "${{ env.CMDSTAN_VERSION }}" --cores 2 --verbose --compiler + path: ./stan/ + key: ${{ runner.os }}-stan-${{ hashFiles('stan/src/stan/version.hpp') }}-v${{ env.CACHE_VERSION }} - name: Build C example (Unix) if: matrix.os != 'windows-latest' run: | - export CMDSTAN=~/.cmdstan/cmdstan-${{ env.CMDSTAN_VERSION }}/ cd c-example/ make example make example_static @@ -60,20 +50,17 @@ jobs: id: test-models with: path: ./test_models/ - key: ${{ hashFiles('**/*.stan', 'src/*') }}-${{ matrix.os }}-cmdstan-${{ env.CMDSTAN_VERSION }}-v${{ env.CACHE_VERSION }} + key: ${{ hashFiles('**/*.stan', 'src/*', 'stan/src/stan/version.hpp') }}-${{ matrix.os }}-v${{ env.CACHE_VERSION }} - name: Build test models (Unix) if: matrix.os != 'windows-latest' && steps.test-models.outputs.cache-hit != 'true' run: | - export CMDSTAN=~/.cmdstan/cmdstan-${{ env.CMDSTAN_VERSION }}/ make STAN_THREADS=true O=0 test_models -j2 shell: bash - name: Build test models (Windows) if: matrix.os == 'windows-latest' && steps.test-models.outputs.cache-hit != 'true' run: | - $raw_cmdstan = "$($HOME)/.cmdstan/cmdstan-${{ env.CMDSTAN_VERSION }}/" - $env:CMDSTAN = $raw_cmdstan.replace('\', '/') mingw32-make.exe STAN_THREADS=true O=0 test_models -j2 shell: pwsh @@ -88,23 +75,27 @@ jobs: steps: - name: Check out github uses: actions/checkout@v3 + with: + submodules: recursive + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - name: Restore CmdStan + - name: Restore Stan uses: actions/cache@v3 + id: stan-cache with: - path: ~/.cmdstan - key: ${{ runner.os }}-cmdstan-${{ env.CMDSTAN_VERSION }} + path: ./stan/ + key: ${{ runner.os }}-stan-${{ hashFiles('stan/src/stan/version.hpp') }}-v${{ env.CACHE_VERSION }} - name: Restore built models uses: actions/cache@v3 id: test-models with: path: ./test_models/ - key: ${{ hashFiles('**/*.stan', 'src/*') }}-${{ matrix.os }}-cmdstan-${{ env.CMDSTAN_VERSION }}-v${{ env.CACHE_VERSION }} + key: ${{ hashFiles('**/*.stan', 'src/*', 'stan/src/stan/version.hpp') }}-${{ matrix.os }}-v${{ env.CACHE_VERSION }} - name: Install package run: | @@ -117,6 +108,7 @@ jobs: export BRIDGESTAN=$(pwd) cd python/ pytest -v + pytest -v --run-type=ad_hessian test_julia_client: needs: [build_test_models] @@ -128,21 +120,25 @@ jobs: steps: - name: Check out github uses: actions/checkout@v3 + with: + submodules: recursive + - name: Set up Julia uses: julia-actions/setup-julia@v1 - - name: Restore CmdStan + - name: Restore Stan uses: actions/cache@v3 + id: stan-cache with: - path: ~/.cmdstan - key: ${{ runner.os }}-cmdstan-${{ env.CMDSTAN_VERSION }} + path: ./stan/ + key: ${{ runner.os }}-stan-${{ hashFiles('stan/src/stan/version.hpp') }}-v${{ env.CACHE_VERSION }} - name: Restore built models uses: actions/cache@v3 id: test-models with: path: ./test_models/ - key: ${{ hashFiles('**/*.stan', 'src/*') }}-${{ matrix.os }}-cmdstan-${{ env.CMDSTAN_VERSION }}-v${{ env.CACHE_VERSION }} + key: ${{ hashFiles('**/*.stan', 'src/*', 'stan/src/stan/version.hpp') }}-${{ matrix.os }}-v${{ env.CACHE_VERSION }} - name: Run tests run: | @@ -159,6 +155,8 @@ jobs: steps: - name: Check out github uses: actions/checkout@v3 + with: + submodules: recursive - name: Install R uses: r-lib/actions/setup-r@v2.3.1 @@ -171,18 +169,18 @@ jobs: any::testthat any::devtools - - name: Restore CmdStan + - name: Restore Stan uses: actions/cache@v3 with: - path: ~/.cmdstan - key: ${{ runner.os }}-cmdstan-${{ env.CMDSTAN_VERSION }} + path: ./stan/ + key: ${{ runner.os }}-stan-${{ hashFiles('stan/src/stan/version.hpp') }}-v${{ env.CACHE_VERSION }} - name: Restore built models uses: actions/cache@v3 id: test-models with: path: ./test_models/ - key: ${{ hashFiles('**/*.stan', 'src/*') }}-${{ matrix.os }}-cmdstan-${{ env.CMDSTAN_VERSION }}-v${{ env.CACHE_VERSION }} + key: ${{ hashFiles('**/*.stan', 'src/*', 'stan/src/stan/version.hpp') }}-${{ matrix.os }}-v${{ env.CACHE_VERSION }} - name: Run tests run: | @@ -198,16 +196,18 @@ jobs: steps: - name: Check out github uses: actions/checkout@v3 + with: + submodules: recursive - - name: Restore CmdStan + - name: Restore Stan uses: actions/cache@v3 + id: stan-cache with: - path: ~/.cmdstan - key: ${{ runner.os }}-cmdstan-${{ env.CMDSTAN_VERSION }} + path: ./stan/ + key: ${{ runner.os }}-stan-${{ hashFiles('stan/src/stan/version.hpp') }}-v${{ env.CACHE_VERSION }} - name: Setup TBB run: | - cd ~/.cmdstan/cmdstan-${{ env.CMDSTAN_VERSION }} Add-Content $env:GITHUB_PATH "$(pwd)/stan/lib/stan_math/lib/tbb" - name: Restore built models @@ -215,7 +215,7 @@ jobs: id: test-models with: path: ./test_models/ - key: ${{ hashFiles('**/*.stan', 'src/*') }}-windows-latest-cmdstan-${{ env.CMDSTAN_VERSION }}-v${{ env.CACHE_VERSION }} + key: ${{ hashFiles('**/*.stan', 'src/*', 'stan/src/stan/version.hpp') }}-windows-latest-v${{ env.CACHE_VERSION }} - name: Install R uses: r-lib/actions/setup-r@v2.3.1 diff --git a/.gitignore b/.gitignore index 21a6243..5cff5b1 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,5 @@ bridgestan.Rcheck/ build/ .DS_Store + +bin/* diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..8a8bca0 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "stan"] + path = stan + url = https://github.com/stan-dev/stan + branch = master diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e0b0f07..bb039c1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,7 +2,7 @@ We welcome contributions to the project in any form, including bug reports, bug fixes, new features, improved documentation, or improved test coverage. -Our next goal is a stable 1.0 release, which is mostly a matter of more thorough testing and documentation. +Developer-specific documentation is available at https://roualdes.github.io/bridgestan/internals.html ## Licensing @@ -29,26 +29,9 @@ We follow standard open source and GitHub practices: * We propose updates through [GitHub Pull Requests](https://github.com/roualdes/bridgestan/pulls) so that we can do code review. We do not push directly to the main branch. -## Documentation - -We use [Sphinx](https://www.sphinx-doc.org/en/master/) to generate documentation, with the goal of publishing on [Read the Docs](https://readthedocs.org) for the first release. The docs are currently hosted on the [GitHub pages](https://roualdes.github.io/bridgestan/) for this repository. - -We use the following developer-specific tools for documentation. - -* [Sphinx 5.0 or above](https://www.sphinx-doc.org/en/master/) -* [nbsphinx](https://nbsphinx.readthedocs.io/en/0.8.9/) -* [pydata-sphinx-theme](https://pydata-sphinx-theme.readthedocs.io/en/stable/) -* [MySt-Parser](https://myst-parser.readthedocs.io/en/latest/) - -If you wish to build the C++ portions of the documentation, you should also have: - -* [Doxygen](https://doxygen.nl/) -* [Breathe](https://breathe.readthedocs.io/en/stable/index.html) - - ## Builds -We use [Gnu make](https://www.gnu.org/software/make/) for builds. If you have installed [CmdStan](https://mc-stan.org/users/interfaces/cmdstan) as required for the source to work, then you will already have Gnu make and a sufficient C++ compiler. +We use [Gnu make](https://www.gnu.org/software/make/) for builds. If you have previously used [CmdStan](https://mc-stan.org/users/interfaces/cmdstan), then you will already have Gnu make and a sufficient C++ compiler. ## Language specific practices @@ -71,13 +54,8 @@ We use [Gnu make](https://www.gnu.org/software/make/) for builds. If you have i ### Python development * Python development relies on the external dependencies: - * [pytest](https://docs.pytest.org/en/7.1.x/) * [NumPy](https://numpy.org/) -* We integrate a C wrapper of Stan's model class using the [ctypes](https://docs.python.org/3/library/ctypes.html) foreign function interface. - -* We use the [numpy.testing](https://numpy.org/doc/stable/reference/routines.testing.html) unit testing framework and run the tests by calling [pytest](https://docs.pytest.org/en/7.1.x/) from the top-level Python directory `bridgestan/python`. - * We autoformat code with [black](https://black.readthedocs.io/en/stable/). * We recommend but do not require checking the code style and semantics with [PyLint](https://www.pylint.org) using [BridgeStan .pylintrc](https://github.com/roualdes/bridgestan/blob/main/.pylintrc). @@ -86,9 +64,6 @@ We use [Gnu make](https://www.gnu.org/software/make/) for builds. If you have i * R dependencies beyond those included in this repo: * [R6 package](https://cran.r-project.org/web/packages/R6/index.html) for reference classes - * [testthat](https://testthat.r-lib.org) for unit testing - -* We use the most basic [.C interface](https://www.biostat.jhsph.edu/~rpeng/docs/interface.pdf) for calling C from R. ### Julia development @@ -96,7 +71,7 @@ We use [Gnu make](https://www.gnu.org/software/make/) for builds. If you have i * [Test](https://docs.julialang.org/en/v1/stdlib/Test/) * [LinearAlgebra](https://docs.julialang.org/en/v1/stdlib/LinearAlgebra/) -* We use Julia's C foreign function interface, which is documented for [base C](https://docs.julialang.org/en/v1/base/c/) and the [C standard library](https://docs.julialang.org/en/v1/stdlib/Libdl/). +* Julia code is formatted using [JuliaFormatter](https://github.com/domluna/JuliaFormatter.jl). ## Proposing a new interface language diff --git a/Makefile b/Makefile index 615523b..4df150e 100644 --- a/Makefile +++ b/Makefile @@ -1,21 +1,16 @@ ## include paths -GITHUB ?= $(HOME)/github/ -CMDSTAN ?= $(GITHUB)stan-dev/cmdstan/ -STANC ?= $(CMDSTAN)bin/stanc$(EXE) -STAN ?= $(CMDSTAN)stan/ +BS_ROOT ?= . +SRC ?= $(BS_ROOT)/src/ +STAN ?= $(BS_ROOT)/stan/ +STANC ?= $(BS_ROOT)/bin/stanc$(EXE) MATH ?= $(STAN)lib/stan_math/ RAPIDJSON ?= $(STAN)lib/rapidjson_1.1.0/ ## required C++ includes INC_FIRST ?= -I $(STAN)src -I $(RAPIDJSON) -## set CXX = g++ and CX_TYPE to gcc to run under linux -OS ?= $(shell uname -s) -CXX ?= clang++ -CXX_TYPE ?= clang - ## makefiles needed for math library --include $(CMDSTAN)/make/local +-include $(BS_ROOT)/make/local -include $(MATH)make/compiler_flags -include $(MATH)make/dependencies -include $(MATH)make/libraries @@ -32,17 +27,17 @@ ifdef STAN_THREADS else STAN_FLAG_THREADS= endif -STAN_FLAGS=$(STAN_FLAG_THREADS)$(STAN_FLAG_OPENCL) +ifdef BRIDGESTAN_AD_HESSIAN + CXXFLAGS+=-DSTAN_MODEL_FVAR_VAR -DBRIDGESTAN_AD_HESSIAN + STAN_FLAG_HESS=_adhessian +else + STAN_FLAG_HESS= +endif +STAN_FLAGS=$(STAN_FLAG_THREADS)$(STAN_FLAG_OPENCL)$(STAN_FLAG_HESS) -SRC ?= src/ BRIDGE_DEPS = $(SRC)bridgestan.cpp $(SRC)bridgestan.h $(SRC)model_rng.cpp $(SRC)model_rng.hpp $(SRC)bridgestanR.cpp $(SRC)bridgestanR.h BRIDGE_O = $(patsubst %.cpp,%$(STAN_FLAGS).o,$(SRC)bridgestan.cpp) -$(STANC): - @echo 'stanc could not be found. Make sure CmdStan is installed and built, and that the path specificied is correct:' - @echo '$(CMDSTAN)' - exit 1 - $(BRIDGE_O) : $(BRIDGE_DEPS) @echo '' @echo '--- Compiling Stan bridge C++ code ---' @@ -79,6 +74,8 @@ clean: $(RM) $(SRC)/*.o $(RM) test_models/**/*.so $(RM) test_models/**/*.hpp + $(RM) bin/stanc$(EXE) + # build all test models at once TEST_MODEL_LIBS = test_models/throw_tp/throw_tp_model.so test_models/throw_gq/throw_gq_model.so test_models/throw_lp/throw_lp_model.so test_models/throw_data/throw_data_model.so test_models/jacobian/jacobian_model.so test_models/matrix/matrix_model.so test_models/simplex/simplex_model.so test_models/full/full_model.so test_models/stdnormal/stdnormal_model.so test_models/bernoulli/bernoulli_model.so test_models/gaussian/gaussian_model.so test_models/fr_gaussian/fr_gaussian_model.so test_models/simple/simple_model.so test_models/multi/multi_model.so @@ -86,6 +83,13 @@ TEST_MODEL_LIBS = test_models/throw_tp/throw_tp_model.so test_models/throw_gq/th .PHONY: test_models test_models: $(TEST_MODEL_LIBS) +.PHONY: stan-update stan-update-version +stan-update: + git submodule update --init --recursive + +stan-update-remote: + git submodule update --remote --init --recursive + # print compilation command line config .PHONY: compile_info compile_info: @@ -94,3 +98,43 @@ compile_info: ## print value of makefile variable (e.g., make print-TBB_TARGETS) .PHONY: print-% print-% : ; @echo $* = $($*) ; + +# handles downloading of stanc +STANC_DL_RETRY = 5 +STANC_DL_DELAY = 10 +STANC3_TEST_BIN_URL ?= +STANC3_VERSION ?= v2.31.0 + +ifeq ($(OS),Windows_NT) + OS_TAG := windows +else ifeq ($(OS),Darwin) + OS_TAG := mac +else ifeq ($(OS),Linux) + OS_TAG := linux + ifeq ($(shell uname -m),mips64) + ARCH_TAG := -mips64el + else ifeq ($(shell uname -m),ppc64le) + ARCH_TAG := -ppc64el + else ifeq ($(shell uname -m),s390x) + ARCH_TAG := -s390x + else ifeq ($(shell uname -m),aarch64) + ARCH_TAG := -arm64 + else ifeq ($(shell uname -m),armv7l) + ifeq ($(shell readelf -A /usr/bin/file | grep Tag_ABI_VFP_args),) + ARCH_TAG := -armel + else + ARCH_TAG := -armhf + endif + endif +endif + +ifeq ($(OS_TAG),windows) +$(STANC): + @mkdir -p $(dir $@) + $(shell echo "curl -L https://github.com/stan-dev/stanc3/releases/download/$(STANC3_VERSION)/$(OS_TAG)-stanc -o $(STANC) --retry $(STANC_DL_RETRY) --retry-delay $(STANC_DL_DELAY)") +else +$(STANC): + @mkdir -p $(dir $@) + curl -L https://github.com/stan-dev/stanc3/releases/download/$(STANC3_VERSION)/$(OS_TAG)$(ARCH_TAG)-stanc -o $(STANC) --retry $(STANC_DL_RETRY) --retry-delay $(STANC_DL_DELAY) + chmod +x $(STANC) +endif diff --git a/R/R/bridgestan.R b/R/R/bridgestan.R index 51ee75f..7dc9a0b 100755 --- a/R/R/bridgestan.R +++ b/R/R/bridgestan.R @@ -10,7 +10,7 @@ StanModel <- R6::R6Class("StanModel", #' @description #' Create a Stan Model instace. #' @param lib A path to a compiled BridgeStan Shared Object file. - #' @param data A path to a JSON data file for the model. + #' @param data Either a string representation of a JSON object or a path to a data file in JSON format ending in ".json". #' @param rng_seed Seed for the RNG in the model object. #' @param chain_id Used to offset the RNG by a fixed amount. #' @return A new StanModel. @@ -32,7 +32,7 @@ StanModel <- R6::R6Class("StanModel", } dyn.load(private$lib, PACKAGE = private$lib_name) - .C("construct_R", + .C("bs_construct_R", as.character(data), as.integer(rng_seed), as.integer(chain_id), ptr_out = raw(8), PACKAGE = private$lib_name @@ -46,7 +46,7 @@ StanModel <- R6::R6Class("StanModel", #' Get the name of this StanModel #' @return A character vector of the name. name = function() { - .C("name_R", as.raw(private$model), + .C("bs_name_R", as.raw(private$model), name_out = as.character(""), PACKAGE = private$lib_name )$name_out @@ -55,7 +55,7 @@ StanModel <- R6::R6Class("StanModel", #' Get compile information about this Stan model. #' @return A character vector of the Stan version and important flags. model_info = function() { - .C("model_info_R", as.raw(private$model), + .C("bs_model_info_R", as.raw(private$model), info_out = as.character(""), PACKAGE = private$lib_name )$info_out @@ -71,7 +71,7 @@ StanModel <- R6::R6Class("StanModel", #' @param include_gq Whether to include variables from generated quantities. #' @return A list of character vectors of the names. param_names = function(include_tp = FALSE, include_gq = FALSE) { - .C("param_names_R", as.raw(private$model), + .C("bs_param_names_R", as.raw(private$model), as.logical(include_tp), as.logical(include_gq), names_out = as.character(""), PACKAGE = private$lib_name @@ -87,7 +87,7 @@ StanModel <- R6::R6Class("StanModel", #' order of the output is column major and more generally last-index major for containers. #' @return A list of character vectors of the names. param_unc_names = function() { - .C("param_unc_names_R", as.raw(private$model), + .C("bs_param_unc_names_R", as.raw(private$model), names_out = as.character(""), PACKAGE = private$lib_name )$names_out -> names @@ -99,7 +99,7 @@ StanModel <- R6::R6Class("StanModel", #' @param include_gq Whether to include variables from generated quantities. #' @return The number of parameters in the model. param_num = function(include_tp = FALSE, include_gq = FALSE) { - .C("param_num_R", as.raw(private$model), + .C("bs_param_num_R", as.raw(private$model), as.logical(include_tp), as.logical(include_gq), num = as.integer(0), PACKAGE = private$lib_name @@ -112,20 +112,20 @@ StanModel <- R6::R6Class("StanModel", #' For example, `simplex[5]` has a constrained size of 5, but an unconstrained size of 4. #' @return The number of parameters in the model. param_unc_num = function() { - .C("param_unc_num_R", as.raw(private$model), + .C("bs_param_unc_num_R", as.raw(private$model), num = as.integer(0), PACKAGE = private$lib_name )$num }, #' @description - #' This turns a vector of unconstrained params into constrained parameters + #' Returns a vector of constrained params give the unconstrained parameters. #' See also `StanModel$param_unconstrain()`, the inverse of this function. #' @param theta_unc The vector of unconstrained parameters #' @param include_tp Whether to also output the transformed parameters of the model. #' @param include_gq Whether to also output the generated quantities of the model. #' @return The constrained parameters of the model. param_constrain = function(theta_unc, include_tp = FALSE, include_gq = FALSE) { - vars <- .C("param_constrain_R", as.raw(private$model), + vars <- .C("bs_param_constrain_R", as.raw(private$model), as.logical(include_tp), as.logical(include_gq), as.numeric(theta_unc), theta = double(self$param_num(include_tp = include_tp, include_gq = include_gq)), return_code = as.integer(0), @@ -137,7 +137,7 @@ StanModel <- R6::R6Class("StanModel", vars$theta }, #' @description - #' This turns a vector of constrained params into unconstrained parameters. + #' Returns a vector of unconstrained params give the constrained parameters. #' #' It is assumed that these will be in the same order as internally represented by #' the model (e.g., in the same order as `StanModel$param_names()`). @@ -146,7 +146,7 @@ StanModel <- R6::R6Class("StanModel", #' @param theta The vector of constrained parameters #' @return The unconstrained parameters of the model. param_unconstrain = function(theta) { - vars <- .C("param_unconstrain_R", as.raw(private$model), + vars <- .C("bs_param_unconstrain_R", as.raw(private$model), as.numeric(theta), theta_unc = double(self$param_unc_num()), return_code = as.integer(0), @@ -164,7 +164,7 @@ StanModel <- R6::R6Class("StanModel", #' @param json Character vector containing a string representation of JSON data. #' @return The unconstrained parameters of the model. param_unconstrain_json = function(json) { - vars <- .C("param_unconstrain_json_R", as.raw(private$model), + vars <- .C("bs_param_unconstrain_json_R", as.raw(private$model), as.character(json), theta_unc = double(self$param_unc_num()), return_code = as.integer(0), @@ -183,7 +183,7 @@ StanModel <- R6::R6Class("StanModel", #' @param jacobian If `TRUE`, include change of variables terms for constrained parameters. #' @return The log density. log_density = function(theta, propto = TRUE, jacobian = TRUE) { - vars <- .C("log_density_R", as.raw(private$model), + vars <- .C("bs_log_density_R", as.raw(private$model), as.logical(propto), as.logical(jacobian), as.numeric(theta), val = double(1), return_code = as.integer(0), @@ -203,7 +203,7 @@ StanModel <- R6::R6Class("StanModel", #' @return List containing entries `val` (the log density) and `gradient` (the gradient). log_density_gradient = function(theta, propto = TRUE, jacobian = TRUE) { dims <- self$param_unc_num() - vars <- .C("log_density_gradient_R", as.raw(private$model), + vars <- .C("bs_log_density_gradient_R", as.raw(private$model), as.logical(propto), as.logical(jacobian), as.numeric(theta), val = double(1), gradient = double(dims), return_code = as.integer(0), @@ -223,7 +223,7 @@ StanModel <- R6::R6Class("StanModel", #' @return List containing entries `val` (the log density), `gradient` (the gradient), and `hessian` (the Hessian). log_density_hessian = function(theta, propto = TRUE, jacobian = TRUE) { dims <- self$param_unc_num() - vars <- .C("log_density_hessian_R", as.raw(private$model), + vars <- .C("bs_log_density_hessian_R", as.raw(private$model), as.logical(propto), as.logical(jacobian), as.numeric(theta), val = double(1), gradient = double(dims), hess = double(dims * dims), return_code = as.integer(0), @@ -240,7 +240,7 @@ StanModel <- R6::R6Class("StanModel", lib_name = NA, model = NA, finalize = function() { - .C("destruct_R", + .C("bs_destruct_R", as.raw(private$model), return_code = as.integer(0), PACKAGE = private$lib_name diff --git a/R/example.R b/R/example.R new file mode 100644 index 0000000..371648f --- /dev/null +++ b/R/example.R @@ -0,0 +1,16 @@ +library(bridgestan) + +model <- StanModel$new("../test_models/bernoulli/bernoulli_model.so", "../test_models/bernoulli/bernoulli.data.json", 1234, 0) + +print(paste0("This model's name is ", model$name(), ".")) +print(paste0("This model has ", model$param_num(), " parameters.")) + + +x <- runif(model$param_unc_num()) +q <- log(x / (1 - x)) + +res <- model$log_density_gradient(q, jacobian = FALSE) + +print(paste0("log_density and gradient of Bernoulli model: ", + res$val, ", ", res$gradient)) + diff --git a/R/man/StanModel.Rd b/R/man/StanModel.Rd index 63e185a..aeee6f9 100644 --- a/R/man/StanModel.Rd +++ b/R/man/StanModel.Rd @@ -46,7 +46,7 @@ Create a Stan Model instace. \describe{ \item{\code{lib}}{A path to a compiled BridgeStan Shared Object file.} -\item{\code{data}}{A path to a JSON data file for the model.} +\item{\code{data}}{Either a string representation of a JSON object or a path to a data file in JSON format ending in ".json".} \item{\code{rng_seed}}{Seed for the RNG in the model object.} @@ -171,7 +171,7 @@ The number of parameters in the model. \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-StanModel-param_constrain}{}}} \subsection{Method \code{param_constrain()}}{ -This turns a vector of unconstrained params into constrained parameters +Returns a vector of constrained params give the unconstrained parameters. See also \code{StanModel$param_unconstrain()}, the inverse of this function. \subsection{Usage}{ \if{html}{\out{
}}\preformatted{StanModel$param_constrain(theta_unc, include_tp = FALSE, include_gq = FALSE)}\if{html}{\out{
}} @@ -196,7 +196,7 @@ The constrained parameters of the model. \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-StanModel-param_unconstrain}{}}} \subsection{Method \code{param_unconstrain()}}{ -This turns a vector of constrained params into unconstrained parameters. +Returns a vector of unconstrained params give the constrained parameters. It is assumed that these will be in the same order as internally represented by the model (e.g., in the same order as \code{StanModel$param_names()}). diff --git a/R/tests/testthat/test_collisions.c b/R/tests/testthat/test_collisions.c index eea339f..9e923fe 100644 --- a/R/tests/testthat/test_collisions.c +++ b/R/tests/testthat/test_collisions.c @@ -14,7 +14,7 @@ by BridgeStan. const char *out = "test me"; -void name_R(void **unused, char const **name_out) +void bs_name_R(void **unused, char const **name_out) { *name_out = out; } diff --git a/README.md b/README.md index ec7d80d..c0f654e 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,9 @@ Stan is a probabilistic programming language for coding statistical models. For an introduction to what can be coded in Stan, see the [*Stan User's Guide*](https://mc-stan.org/docs/stan-users-guide/index.html). +BridgeStan is currently shipping with Stan version 2.31.0 -More documentation is available at https://roualdes.github.io/bridgestan/ +Documentation is available at https://roualdes.github.io/bridgestan/ #### Compatibility @@ -26,188 +27,48 @@ compiler combinations. ## Installing BridgeStan -### Step 1: Install C++, make, and CmdStan - -See the section [Prerequisites](#prerequisites) for instructions on -installing a C++ toolchain and CmdStan. Please verify CmdStan is -working before proceeding. - -### Step 2: Download BridgeStan - -To download BridgeStan into directory ``, use the following. +Installing the core of BridgeStan is as simple as +[installing a C++ toolchain](https://mc-stan.org/docs/cmdstan-guide/cmdstan-installation.html#cpp-toolchain) +(libraries, compiler, and the `make` command), and downloading this +repository. To download the latest development version, you can run ```shell -$ cd -$ git clone https://github.com/roualdes/bridgestan.git +git clone --recurse-submodules https://github.com/roualdes/bridgestan.git ``` -There is no need to build anything up front. - +For a full guide on installing, configuring, and using BridgeStan, consult the +[documentation](https://roualdes.github.io/bridgestan/getting-started.html) ## Using BridgeStan -There is a built-in example that can be used to verify that BridgeStan -is working. - -### Compiling the Stan program +### Compiling a Stan program To compile the Stan model in `test_models/multi/multi.stan` to a binary shared object (`.so` file), use the following. ``` $ cd bridgestan -$ CMDSTAN=/path/to/cmdstan/ make test_models/multi/multi_model.so +$ make test_models/multi/multi_model.so ``` -The forward slash (`/`) at the end of `cmdstan/` is necessary. +This will require internet access the first time you run it in order +to download the appropriate Stan compiler for your platform into +`/bin/stanc[.exe]` -### Using BridgeStan with Python or Julia +### Example programs This repository includes examples of calling Stan through BridgeStan -in Python or Julia. +in Python, Julia, R, and C. * From Python: [`example.py`](julia/example.py) * From Julia: [`example.jl`](python/example.jl) -Additional examples can be found in the `test` folder for each interface. - -## Custom build instructions - -### Custom C++ flags - -By default, BridgeStan uses the default compiler flags set from -CmdStan's `makefile`. To override the defaults or add new flags, create -or edit the file - -* local make commands: `/make/local`. - -To help get started, CmdStan includes an example -`/make/local.example` that can be copied to -`/make/local`. - - -For example, setting the contents of `make/local` to the following -includes compiler flags for optimization level and architecture. - -``` -# Adding other arbitrary C++ compiler flags -CXXFLAGS+= -O3 -march=native -``` - -### Custom stanc flags - -The Stan Blog post [Options for improving Stan sampling -speed](https://blog.mc-stan.org/2022/08/03/options-for-improving-stan-sampling-speed/) -discusses how Stan speed can be improved with the following Stan -compiler flags. - -``` -# pedantic mode and level 1 optimization -STANCFLAGS+= --warn-pedantic --O1 -``` - -#### Enabling parallel calls of Stan programs - -In order for Python or Julia to be able to call a single Stan model -concurrently from multiple threads or for a Stan model to execute its -own code in parallel, the following flag must be set in `make/local` -or on the command line. - -``` -# Enable threading -STAN_THREADS=true -``` - -Note that this flag changes a lot of the internals of the Stan library -and as such, **all models used in the same process** should have the same -setting. Mixing models which had `STAN_THREADS` enabled with those that didn't -will most likely lead to segmentation faults or other crashes. - - -## Tips - -### Windows paths - -On Windows, BridgeStan requires *forward slashes* in the path to -CmdStan, as in the following example. - -```shell -mingw32-make.exe CMDSTAN="C:/path/to/cmdstan/" ./test_models/multi/multi_model.so -``` - -### Sizes and `param_constrain()` and `param_unconstrain()` - -For a given vector `q` of unconstrained parameters, the function -`param_constrain()` can return an array with length longer than the -length of `q`. This happens, for instance, with a `cov_matrix[K]` -parameter. A covariance matrix has $K \times K$ elements, -but there are only $K + \binom{K}{2}$ parameters in the unconstrained -parameterization (i.e., a Cholesky factor). - -### Parameter ordering - -Parameters are ordered for I/O in the same order they are declared in -the underlying Stan program. The `param_names()` and `param_unc_names()` -functions give the canonical orderings for constrained and unconstrained -parameters respectively. - -## Prerequisites - -### Prereq: Julia, Python, *or* R - -To use BridgeStan from Julia 1.8+, Python 3.9+, or R 3+, you will need to have those -languages installed. - -The interfaces may depend on different packages in their respective languages. - -### Prereq: C++ toolchain - -Stan requires a C++ tool chain consisting of - -* A C++11 compiler -* The Gnu `make` utility for \*nix *or* mingw for Windows - -Here are complete instructions by platform for installing both. - -* [C++ tool chain installation](https://mc-stan.org/docs/cmdstan-guide/cmdstan-installation.html#cpp-toolchain) - -### Prereq: CmdStan - -To install CmdStan, follow the instructions in the guide, +* From R: [`example.r`](R/example.R) -* [Getting Started with CmdStan](https://mc-stan.org/docs/cmdstan-guide/cmdstan-installation.html) - - -### Ensuring tools are built/imported - -At this point, everything should be in place to build and execute a -Stan program. We will assume - -* CmdStan is installed in ``, and -* BridgeStan is checked out in ``. - -To verify the compiler chain is installed correctly, the following -should run without errors. - -```shell -cd -make /test_models/multi/multi -``` - -The second command brings in the latest version of Stan. - -This will require internet access the first time you run it in order -to download the appropriate Stan compiler for your platform into -`/bin/stanc[.exe]` - -You can verify CmdStan works end-to-end by using the resulting -executable to fit some data. - -```shell -.//test_models/multi/multi sample data file=/test_models/multi/multi.data.json -``` +* From C: [`example.c`](c-example/example.c) +Examples of other functionality can be found in the `test` folder for each interface. ## Acknowledgements diff --git a/c-example/Makefile b/c-example/Makefile index ef9f735..4d123e0 100644 --- a/c-example/Makefile +++ b/c-example/Makefile @@ -1,5 +1,5 @@ # Tell the top-level makefile which relative path to use -SRC=../src/ +BS_ROOT=.. include ../Makefile MODEL?=full diff --git a/c-example/README.md b/c-example/README.md index fd61d40..339b504 100644 --- a/c-example/README.md +++ b/c-example/README.md @@ -1,5 +1,7 @@ # BridgeStan from C +[View the C API documentation online](https://roualdes.github.io/bridgestan/languages/c-api.html). + This shows how one could write a C program which calls BridgeStan. Any compiled language with a C foreign function interface and diff --git a/c-example/example.c b/c-example/example.c index eade5a4..d4c7ae8 100644 --- a/c-example/example.c +++ b/c-example/example.c @@ -8,11 +8,11 @@ int main(int argc, char** argv) { } else { data = ""; } - model_rng* model = construct(data, 123, 0); + bs_model_rng* model = bs_construct(data, 123, 0); if (!model) { return 1; } - printf("This model's name is %s.\n", name(model)); - printf("It has %d parameters.\n", param_num(model, 0, 0)); - return destruct(model); + printf("This model's name is %s.\n", bs_name(model)); + printf("It has %d parameters.\n", bs_param_num(model, 0, 0)); + return bs_destruct(model); } diff --git a/docs/README.md b/docs/README.md index cac2dd4..8822263 100644 --- a/docs/README.md +++ b/docs/README.md @@ -7,44 +7,9 @@ These docs are available online at ## Building -Documentation is generated with [Sphinx](https://www.sphinx-doc.org/en/master/). +Building the documentation is documented [here](https://roualdes.github.io/bridgestan/internals/documentation.html). The main documentation can be build by running `make docs` in the top-level repository. `make` can also be run in this folder to produce a specific format, for example `make latexpdf` will produce a PDF manual of the documentation, assuming a LaTeX toolchain is available. - -### Dependencies - -We use the following developer-specific tools for documentation. - -* [Sphinx 5.0 or above](https://www.sphinx-doc.org/en/master/) -* [nbsphinx](https://nbsphinx.readthedocs.io/en/0.8.9/) -* [pydata-sphinx-theme](https://pydata-sphinx-theme.readthedocs.io/en/stable/) -* [MySt-Parser](https://myst-parser.readthedocs.io/en/latest/) - -If you wish to build the C++ portions of the documentation, you should also have: - -* [Doxygen](https://doxygen.nl/) -* [Breathe](https://breathe.readthedocs.io/en/stable/index.html) - -If you wish to build the Julia API docs, you should also have -[Julia](https://julialang.org/) installed. **Note**: the Julia -API doc sources live in `julia/docs/src/` and are merely -copied to this folder during build. If the API doc needs changing -(for example, to add a new function to the list of items to be documented) -update it **there**. - - -## Documentation structure - -* Top-level documentation - - language-agnostic documentation - - `users/` for user-facing language-agnostic documentation - - `devs/` for dev-facing language-agnostic documentation - -* Language-specific documentation: `L/` - - top-level doc for language `L` with pointers to dev and user doc - - `L/users` for user-facing doc for language `L` - - `L/devs` for dev-facing doc for language `L` - - `L/api` for the API spec for language `L` diff --git a/docs/conf.py b/docs/conf.py index ebbb578..d46ac07 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -20,6 +20,7 @@ "sphinx.ext.napoleon", "sphinx.ext.autodoc", "sphinx.ext.githubpages", + "sphinx_copybutton", "nbsphinx", "sphinx.ext.mathjax", "myst_parser", @@ -75,9 +76,7 @@ breathe_projects = {"bridgestan": "./_build/cppxml/"} -breathe_projects_source = { - "bridgestan": ("../src/", ["bridgestan.h", "bridgestanR.h"]) -} +breathe_projects_source = {"bridgestan": ("../src/", ["bridgestan.h", "bridgestanR.h"])} breathe_default_project = "bridgestan" diff --git a/docs/getting-started.rst b/docs/getting-started.rst new file mode 100644 index 0000000..ed52429 --- /dev/null +++ b/docs/getting-started.rst @@ -0,0 +1,141 @@ + +Getting Started +=============== + +Requirement: C++ toolchain +-------------------------- + +Stan requires a C++ tool chain consisting of + +* A C++14 compiler. On Windows, MSCV is *not* supported, so something like MinGW GCC is required. +* The Gnu ``make`` utility for \*nix *or* ``mingw32-make`` for Windows + +Here are complete instructions by platform for installing both, from the CmdStan installation instructions. + +* `C++ tool chain installation - CmdStan User's Guide `__ + +Downloading BridgeStan +---------------------- + +Installing BridgeStan is as simple as ensuring that the above requirements are installed and then downloading +the source repository. + +Installing with ``git`` +_______________________ + +If you have ``git`` installed, you may download BridgeStan by navigating to the folder you'd like +BridgeStan to be in and running + +.. code-block:: shell + + git clone --recurse-submodules https://github.com/roualdes/bridgestan.git + +If you clone without the ``--recurse-submodules`` argument, you can download the required +submodules with ``make stan-update``. + + +Testing the Installation +________________________ + +After this, BridgeStan is installed. You can test a basic compilation by opening +a terminal in your BridgeStan folder and running + +.. code-block:: shell + + # MacOS and Linux + make test_models/multi/multi_model.so + # Windows + mingw32-make.exe test_models/multi/multi_model.so + +This will compile the file ``test_models/multi/multi.stan`` into a shared library object for use with BridgeStan. +This will require internet access the first time you run it in order +to download the appropriate Stan compiler for your platform into +``/bin/stanc[.exe]`` + +Installing an Interface +----------------------- + +To see instructions for installing the BridgeStan client package in your language of +choice, see the :doc:`Language Interfaces page `. + +Optional: Customizing BridgeStan +-------------------------------- + +BridgeStan has many compiler flags and options set by default. Many of these defaults +are the same as those used by the CmdStan interface to Stan. +You can override the defaults or add new flags +on the command line when invoking ``make``, or make them persistent by +creating or editing the file ``/make/local``. + +For example, setting the contents of ``make/local`` to the following +includes compiler flags for optimization level and architecture. + +.. code-block:: Makefile + + # By default we use -O3, this sets a less aggressive C++ optimization level + O=2 + # Adding other arbitrary C++ compiler flags + CXXFLAGS+= -march=native + +Flags for ``stanc3`` can also be set here + +.. code-block:: Makefile + + # pedantic mode and level 1 optimization + STANCFLAGS+= --warn-pedantic --O1 + +Enabling Parallel Calls of Stan Programs +________________________________________ + +In order for Python or Julia to be able to call a single Stan model +concurrently from multiple threads or for a Stan model to execute its +own code in parallel, the following flag must be set in ``make/local`` +or on the command line. + +.. code-block:: Makefile + + # Enable threading + STAN_THREADS=true + +Note that this flag changes a lot of the internals of the Stan library +and as such, **all models used in the same process should have the same +setting**. Mixing models which have ``STAN_THREADS`` enabled with those that do not +will most likely lead to segmentation faults or other crashes. + +Additional flags, such as those for MPI and OpenCL, are covered in the +`CmdStan User's Guide page on Parallelization `__. + +Faster Hessian calculations +___________________________ + +By default, Hessians in BridgeStan are calculated using central finite differences. +This is because not all Stan models support the nested autodiff required for Hessians +to be computed directly, particularly models which use implicit functions like the ``algebra_solver`` +or ODE integrators. + +If your Stan model does not use these features, you can enable autodiff Hessians by +setting the compile-time flag ``BRIDGESTAN_AD_HESSIAN=true`` in the invocation to ``make``. +This can be set in ``make/local`` if you wish to use it by default. + +This value is reported by the ``model_info`` function if you would like to check at run time +whether Hessians are computed with nested autodiff or with finite differences. Similar to +``STAN_THREADS``, it is not advised to mix models which use autodiff Hessians with those that +do not in the same program. + +Using Custom Stan Versions +__________________________ + +If you wish to use BridgeStan for an older released version, all you need to do is + +1. Set ``STANC3_VERSION`` in ``make/local`` to your desired version, e.g. ``v2.26.0`` +2. Go into the ``stan`` submodule and run ``git checkout release/VERSION``, e.g. ``release/v2.26.0`` +3. Also in the ``stan`` submodule, run ``make math-update`` +4. In the top level BridgeStan directory, run ``make clean`` + +To return to the version of Stan currently used by BridgeStan, you can run ``make stan-update`` from the top level directory +and remove ``STANC3_VERSION`` from your ``make/local`` file, before running ``make clean`` again. + + +If you wish to use BridgeStan with a custom fork or branch, the best thing to do is to check out that branch in the ``stan`` submodule, +or, if the fork is of stan-math, in ``stan/libs/stan_math``. The easiest way to use a custom stanc3 is to place the built executable at +``bin/stanc[.exe]``. diff --git a/docs/how-it-works.rst b/docs/how-it-works.rst deleted file mode 100644 index 00cb3c2..0000000 --- a/docs/how-it-works.rst +++ /dev/null @@ -1,17 +0,0 @@ - -How It Works -============ - -BridgeStan works by wrapping the Stan Model class -generated by the Stan compiler in a simple, C-compatible -interface which exposes the desired functionality. BridgeStan does this by -using the ``extern "C"`` linkage available in C++ to expose functions -which are callable from C and C-compatible sources. - -This allows other languages with C foreign function interfaces -to bind to the compiled "bridged" model and call them. This -is a fairly portable solution, since the subset of the C language -needed is small, and most major platforms employ one of the same -C calling conventions, namely the "cdecl" convention. This -allows the programs, even when they are compiled by different compilers, -to talk to each other in an agreeable way. diff --git a/docs/index.rst b/docs/index.rst index bbf1815..f18654d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -20,6 +20,6 @@ higher-level languages for arbitrary Stan models, such as those in :maxdepth: 2 :caption: Contents: - install + getting-started languages - how-it-works + internals diff --git a/docs/install.rst b/docs/install.rst deleted file mode 100644 index ef8e41c..0000000 --- a/docs/install.rst +++ /dev/null @@ -1,103 +0,0 @@ - -Installation -============ - -Requirement: C++ toolchain and CmdStan --------------------------------------- -BridgeStan relies on `CmdStan `__ and -the following assume you have a working installation for your system. -Please make sure you are able to compile and run the CmdStan example program **before continuing**. - -For more information, -see the `CmdStan installation instructions `__. - -Download --------- - -Installing BridgeStan is as simple as ensuring that the above pre-requisites are installed and then downloading -the source repository. - -If you have ``git`` installed, you may do this by navigating to the folder you'd like -BridgeStan to be in and running ``git clone git@github.com:roualdes/bridgestan.git``. - -If you do not have ``git`` installed, you can download a `.zip` file containing the repository -`here `__. Unzip this -file into the folder you would like BridgeStan to be in. - -After this, BridgeStan is installed. You can test a basic compilation by opening -a terminal in your BridgeStan folder and running - -.. code-block:: shell - - # MacOS and Linux - make CMDSTAN=/path/to/cmdstan/here/ test_models/multi/multi_model.so - # Windows - mingw32-make.exe CMDSTAN=C:/path/to/cmdstan/with/forward/slashes/ test_models/multi/multi_model.so - -This will compile the file ``test_models/multi/multi.stan`` into a shared library object for use with BridgeStan. - - - -Interface: Python ------------------ - -If you would like to install the :doc:`Python interface `, -you can either install it directly from Github with - -.. code-block:: shell - - pip install "git+https://github.com/roualdes/bridgestan.git#egg=bridgestan&subdirectory=python" - -Or, since you have already downloaded the repository, you can run - -.. code-block:: shell - - pip install python/ - -from the BridgeStan folder. - -Note that the Python package depends on Python 3.9+ and NumPy, and will install -NumPy if it is not already installed. - - -Interface: Julia ----------------- - -If you would like to install the :doc:`Julia interface `, -you can either install it directly from Github with - -.. code-block:: julia - - ] add https://github.com/roualdes/bridgestan.git:julia - -Or, since you have already downloaded the repository, you can run - -.. code-block:: julia - - ] dev julia/ - -from the BridgeStan folder. - -Note that the Julia package depends on Julia 1.8+. - - -Interface: R ----------------- - -If you would like to install the :doc:`R interface `, -you can either install it directly from Github with - -.. code-block:: R - - devtools::install_github("https://github.com/roualdes/bridgestan", subdir="R") - -Or, since you have already downloaded the repository, you can run - -.. code-block:: R - - install.packages(file.path(getwd(),"R"), repos=NULL, type="source") - -from the BridgeStan folder. - -Note that the R package depends on R 3+ and R6, and will install R6 if it is not -already installed. diff --git a/docs/internals.rst b/docs/internals.rst new file mode 100644 index 0000000..93d95a7 --- /dev/null +++ b/docs/internals.rst @@ -0,0 +1,17 @@ + +How It Works +============ + +This section of the documentation is primarily intended for developers +interested in contributing to BridgeStan or building their own client +interface, but it may be interesting for users who are curious what is +going on "behind the scenes". + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + internals/ffi + internals/testing + internals/documentation + diff --git a/docs/internals/documentation.rst b/docs/internals/documentation.rst new file mode 100644 index 0000000..826a6bb --- /dev/null +++ b/docs/internals/documentation.rst @@ -0,0 +1,33 @@ +Documentation +============= + + +Documenting a mixed-language project such as BridgeStan can be tricky. + +We attempt to keep documentation in the natural place for each language: C/C++ +code is documented with doxygen-style comments in the source, Python documentation +appears in docstrings, etc. With the exception of the R interface, our documentation +generation (powered by `Sphinx `__) automatically +combines these together into the documentation you are reading now. + +To build the documentation, you can run ``make docs`` in the top-level directory. +This places the files in ``docs/_build/html``. At a minimum, the following must be installed: + +* The Python interface to BridgeStan +* `Sphinx 5.0 or above `__ +* `nbsphinx `__ +* `sphinx-copybutton `__ +* `pydata-sphinx-theme `__ +* `MySt-Parser `__ + +There are also optional dependencies depending on which part of the documentation +you are updating. +If you wish to build the C++ portions of the documentation, you should also have: + +* `Doxygen `__ +* `Breathe `__ + +Similarly, the Julia documentation will only update if Julia is installed. Julia +documentation is written in ``julia/docs/src/julia.md``. We then build +this with `DocumenterMarkdown.jl `__, +and the output is placed in ``docs/languages/julia.md``. diff --git a/docs/internals/ffi.rst b/docs/internals/ffi.rst new file mode 100644 index 0000000..9b244c4 --- /dev/null +++ b/docs/internals/ffi.rst @@ -0,0 +1,87 @@ + +Foreign Function Interface Overview +=================================== + +Overview +--------- + +BridgeStan works by wrapping the Stan Model class +generated by the Stan compiler in a :doc:`C-compatible interface <../languages/c-api>` +which exposes the desired functionality. BridgeStan does this by +using the ``extern "C"`` `linkage `__ +available in C++ to expose functions which are callable from C and C-compatible sources. + +BridgeStan clients are then built around their language's +`Foreign Function Interface `__ (FFI). + +This is a fairly portable solution, since the subset of the C language +used in the bindings is small, and most major platforms employ one of the same +C `calling conventions `__, +namely the `"cdecl" `__ convention. +This allows the programs, even when they are compiled by different compilers, +to talk to each other in an agreeable way, placing arguments and return values +in standard locations for communication between languages. + +Each of the BridgeStan clients is built around the C-compatible FFI provided by the host language. +By sticking to a simple, C-level API, we can avoid writing language specific code required +by higher level FFIs such as `pybind `__ or `Rcpp `__. +These provide additional functionality and somewhat "nicer" bindings, but at the cost of making +the source code specific to the language you want to interface with. + + +Language-specific Notes +----------------------- + +Python +______ + +The Python FFI documented in the standard library module :py:mod:`ctypes`. + +Our usage is standard with one exception, which is we use the ``CDLL`` interface +on all platforms, even Windows, due to the fact that BridgeStan models are compiled +with MingGW's gcc. + +The NumPy module :py:mod:`numpy.ctypeslib` is also used for compatibility. + +Julia +_____ + +Julia's FFI is documented in the `Julia manual `__. + +R +_ + +R features several built-in forms of foreign function interface. We use the most basic one, called ``.C``, +as this is the least dependent on R's internals. + +Documentation on the ``.C`` interface can be found by calling ``help(.C)`` in an R session. Equivalent +documentation is available +`online `__, +as well as an extended +`walk-through `__. + +Note: One quirk of the ``.C`` interface is the requirement that all inputs and +return values are passed by pointers. This is the reason for the ``bridgestan_R`` files in the source. + + +General Problems +---------------- + +Allocated Memory +________________ + +Generally speaking, memory allocated on one side of a language barrier +must also be freed on that side. This means special consideration is +needed to pass strings back and forth between the languages, +and inspired some of the design decisions behind ideas like returning +the parameter names as a comma separated list, rather than the more "natural" +array of strings. + +Output Streams +______________ + +Printed output from the C++ code cannot easily be captured in the higher-level language. +This is particularly relevant for error messaging, which is printed to the standard +error output ``stderr`` from C++. This does *not*, for example, correspond to the +``sys.stderr`` stream available from Python. + diff --git a/docs/internals/testing.rst b/docs/internals/testing.rst new file mode 100644 index 0000000..0697019 --- /dev/null +++ b/docs/internals/testing.rst @@ -0,0 +1,70 @@ +Testing +======= + +Testing for BridgeStan is primarily done through the higher-level :doc:`interfaces <../languages>`. + +All tests are based around the same set of test models (in the ``test_models/`` folder). + +You can build all of the test models at once with + +.. code-block:: shell + + make STAN_THREADS=true test_models -j + +Note: The additional functionality provided by +``STAN_THREADS`` is only tested by the Julia tests, +but in order to facilitate the same built models being used in +all tests we use it regardless of interface. + +Tooling +------- + +Python +______ + +In Python we use `pytest `__ to run tests. Tests +are written using basic ``assert`` statements and helper code from :py:mod:`numpy.testing`. + +The Python test suite has the ability to run mutually exclusive groups of code. This is to allow +testing of features such as the ``BRIDGESTAN_AD_HESSIAN`` flag which change underlying code and +therefore cannot be loaded at the same time as models compiled without it. + +Running + +.. code-block:: shell + + cd python/ + pytest -v + +Will run the "default" grouping. To run the other group(s), run + +.. code-block:: shell + + cd python/ + pytest --run-type=ad_hessian -v + +The set up for this can be seen in ``tests/conftest.py`` and is based on the +`Pytest documentation examples `__. + +Julia +_____ + +Julia tests are written using the built in +`unit testing library `__. + +.. code-block:: shell + + julia --project=./julia -t 2 -e "using Pkg; Pkg.test()" + + +R +_ + +R tests are written using `testthat `__. + +.. code-block:: shell + + cd R/ + Rscript -e "devtools::test()" + +The R unit tests are much more basic than the Python or Julia tests. diff --git a/docs/languages.rst b/docs/languages.rst index 065042b..3dc0c64 100644 --- a/docs/languages.rst +++ b/docs/languages.rst @@ -2,12 +2,17 @@ Language Interfaces =================== -BridgeStan currently has clients in three languages: +BridgeStan currently has clients in three languages, a public C API +which underlies all the clients, and an example of a standalone program +written in C. .. toctree:: - :titlesonly: + :maxdepth: 2 languages/python languages/julia languages/r languages/c-api + + + diff --git a/docs/languages/c-api.rst b/docs/languages/c-api.rst index bbf4b0b..f7e3e6b 100644 --- a/docs/languages/c-api.rst +++ b/docs/languages/c-api.rst @@ -1,17 +1,42 @@ C API -======= +===== ---- -Core API --------- +Installation +------------ + +Please follow the :doc:`Getting Started guide <../getting-started>` to install +BridgeStan's pre-requisites and downloaded a copy of the BridgeStan source code. + + +Example Program +--------------- + +An example program is provided alongside the BridgeStan source in ``c-example/``. +Details for building the example can be found in ``c-example/Makefile``. + +.. raw:: html + +
+ Show example.c + + +.. literalinclude:: ../../c-example/example.c + :language: c + +.. raw:: html + +
+ + +API Reference +------------- The following are the C functions exposed by the BridgeStan library in ``bridgestan.h``. -These are wrapped in the various high-level interfaces. An example calling -these functions from pure-C can be found in the ``c-example/`` subdirectory -of the repository. +These are wrapped in the various high-level interfaces. -These functions are actually implemented in C++, see :doc:`../how-it-works` for more details. +These functions are implemented in C++, see :doc:`../internals` for more details. .. autodoxygenfile:: bridgestan.h :project: bridgestan diff --git a/docs/languages/julia.md b/docs/languages/julia.md index 3e9e8b3..3e0667e 100644 --- a/docs/languages/julia.md +++ b/docs/languages/julia.md @@ -1,9 +1,9 @@ - + - + -# Julia Interface: BridgeStan.jl +# Julia Interface % NB: If you are reading this file in python/docs/languages, you are reading a generated output! @@ -15,11 +15,72 @@ --- + + + + +## Installation + + +This assumes you have followed the [Getting Started guide](../getting-started.rst) to install BridgeStan's pre-requisites and downloaded a copy of the BridgeStan source code. + + +To install the Julia interface, you can either install it directly from Github by running the following inside a Julia REPL + + +```julia +] add https://github.com/roualdes/bridgestan.git:julia +``` + + +Or, since you have already downloaded the repository, you can run + + +```julia +] dev julia/ +``` + + +from the BridgeStan folder. + + +Note that the Julia package depends on Julia 1.8+. + + + + + + +## Example Program + + +An example program is provided alongside the Julia interface code in `example.jl`: + + +
+Show example.jl + + +```{literalinclude} ../../julia/example.jl +:language: julia +``` + + +
+ + + + + + +## API Reference + + -## StanModel interface +### StanModel interface # **`BridgeStan.StanModel`** — *Type*. @@ -32,7 +93,7 @@ StanModel(lib, datafile="", seed=204, chain_id=0) A StanModel instance encapsulates a Stan model instantiated with data. -The constructor a Stan model from the supplied library file path and data file path. If seed or chain_id are supplied, these are used to initialize the RNG used by the model. +The constructor a Stan model from the supplied library file path and data. Data should either be a string containing a JSON object or a path to a data file ending in `.json`. If seed or chain_id are supplied, these are used to initialize the RNG used by the model. ``` StanModel(;stan_file, data="", seed=204, chain_id=0) @@ -43,7 +104,7 @@ Construct a StanModel instance from a `.stan` file, compiling if necessary. This is equivalent to calling `compile_model` and then the original constructor of StanModel. -source
+source
# **`BridgeStan.log_density`** — *Function*. @@ -59,7 +120,7 @@ Return the log density of the specified unconstrained parameters. This calculation drops constant terms that do not depend on the parameters if `propto` is `true` and includes change of variables terms for constrained parameters if `jacobian` is `true`. -source
+source
# **`BridgeStan.log_density_gradient`** — *Function*. @@ -77,7 +138,7 @@ This calculation drops constant terms that do not depend on the parameters if `p This allocates new memory for the gradient output each call. See `log_density_gradient!` for a version which allows re-using existing memory. -source
+source
# **`BridgeStan.log_density_hessian`** — *Function*. @@ -95,7 +156,7 @@ This calculation drops constant terms that do not depend on the parameters if `p This allocates new memory for the gradient and Hessian output each call. See `log_density_gradient!` for a version which allows re-using existing memory. -source
+source
# **`BridgeStan.param_constrain`** — *Function*. @@ -106,14 +167,14 @@ This allocates new memory for the gradient and Hessian output each call. See `lo param_constrain(sm, theta_unc, out; include_tp=false, include_gq=false) ``` -This turns a vector of unconstrained params into constrained parameters and (if `include_tp` and `include_gq` are set, respectively) transformed parameters and generated quantities. +Returns a vector constrained parameters given unconstrained parameters. Additionally (if `include_tp` and `include_gq` are set, respectively) returns transformed parameters and generated quantities. This allocates new memory for the output each call. See `param_constrain!` for a version which allows re-using existing memory. This is the inverse of `param_unconstrain`. -source
+source
# **`BridgeStan.param_unconstrain`** — *Function*. @@ -124,7 +185,7 @@ This is the inverse of `param_unconstrain`. param_unconstrain(sm, theta) ``` -This turns a vector of constrained params into unconstrained parameters. +Returns a vector of unconstrained params give the constrained parameters. It is assumed that these will be in the same order as internally represented by the model (e.g., in the same order as `param_unc_names(sm)`). If structured input is needed, use `param_unconstrain_json` @@ -133,7 +194,7 @@ This allocates new memory for the output each call. See `param_unconstrain!` for This is the inverse of `param_constrain`. -source
+source
# **`BridgeStan.param_unconstrain_json`** — *Function*. @@ -151,7 +212,7 @@ The JSON is expected to be in the [JSON Format for CmdStan](https://mc-stan.org/ This allocates new memory for the output each call. See `param_unconstrain_json!` for a version which allows re-using existing memory. -source
+source
# **`BridgeStan.name`** — *Function*. @@ -165,7 +226,7 @@ name(sm) Return the name of the model `sm` -source
+source
# **`BridgeStan.model_info`** — *Function*. @@ -181,7 +242,7 @@ Return information about the model `sm`. This includes the Stan version and important compiler flags. -source
+source
# **`BridgeStan.param_num`** — *Function*. @@ -197,7 +258,7 @@ Return the number of (constrained) parameters in the model. This is the total of all the sizes of items declared in the `parameters` block of the model. If `include_tp` or `include_gq` are true, items declared in the `transformed parameters` and `generate quantities` blocks are included, respectively. -source
+source
# **`BridgeStan.param_unc_num`** — *Function*. @@ -213,7 +274,7 @@ Return the number of unconstrained parameters in the model. This function is mainly different from `param_num` when variables are declared with constraints. For example, `simplex[5]` has a constrained size of 5, but an unconstrained size of 4. -source
+source
# **`BridgeStan.param_names`** — *Function*. @@ -231,7 +292,7 @@ For containers, indexes are separated by periods (.). For example, the scalar `a` has indexed name `"a"`, the vector entry `a[1]` has indexed name `"a.1"` and the matrix entry `a[2, 3]` has indexed names `"a.2.3"`. Parameter order of the output is column major and more generally last-index major for containers. -source
+source
# **`BridgeStan.param_unc_names`** — *Function*. @@ -247,7 +308,7 @@ Return the indexed names of the unconstrained parameters. For example, a scalar unconstrained parameter `b` has indexed name `b` and a vector entry `b[3]` has indexed name `b.3`. -source
+source
# **`BridgeStan.log_density_gradient!`** — *Function*. @@ -265,7 +326,7 @@ This calculation drops constant terms that do not depend on the parameters if `p The gradient is stored in the vector `out`, and a reference is returned. See `log_density_gradient` for a version which allocates fresh memory. -source
+source
# **`BridgeStan.log_density_hessian!`** — *Function*. @@ -283,7 +344,7 @@ This calculation drops constant terms that do not depend on the parameters if `p The gradient is stored in the vector `out_grad` and the Hessian is stored in `out_hess` and references are returned. See `log_density_hessian` for a version which allocates fresh memory. -source
+source
# **`BridgeStan.param_constrain!`** — *Function*. @@ -294,14 +355,14 @@ The gradient is stored in the vector `out_grad` and the Hessian is stored in `ou param_constrain!(sm, theta_unc, out; include_tp=false, include_gq=false) ``` -This turns a vector of unconstrained params into constrained parameters and (if `include_tp` and `include_gq` are set, respectively) transformed parameters and generated quantities. +Returns a vector constrained parameters given unconstrained parameters. Additionally (if `include_tp` and `include_gq` are set, respectively) returns transformed parameters and generated quantities. The result is stored in the vector `out`, and a reference is returned. See `param_constrain` for a version which allocates fresh memory. This is the inverse of `param_unconstrain!`. -source
+source
# **`BridgeStan.param_unconstrain!`** — *Function*. @@ -312,7 +373,7 @@ This is the inverse of `param_unconstrain!`. param_unconstrain!(sm, theta, out) ``` -This turns a vector of constrained params into unconstrained parameters. +Returns a vector of unconstrained params give the constrained parameters. It is assumed that these will be in the same order as internally represented by the model (e.g., in the same order as `param_names(sm)`). If structured input is needed, use `param_unconstrain_json!` @@ -321,7 +382,7 @@ The result is stored in the vector `out`, and a reference is returned. See `para This is the inverse of `param_constrain!`. -source
+source
# **`BridgeStan.param_unconstrain_json!`** — *Function*. @@ -339,14 +400,14 @@ The JSON is expected to be in the [JSON Format for CmdStan](https://mc-stan.org/ The result is stored in the vector `out`, and a reference is returned. See `param_unconstrain_json` for a version which allocates fresh memory. -source
+source
-## Compilation utilities +### Compilation utilities # **`BridgeStan.compile_model`** — *Function*. @@ -354,15 +415,15 @@ The result is stored in the vector `out`, and a reference is returned. See `para ```julia -compile_model(stan_file, args=[]) +compile_model(stan_file; stanc_args=[], make_args=[]) ``` -Run BridgeStan’s Makefile on a `.stan` file, creating the `.so` used by StanModel and return a path to the compiled library. Additional arguments to `make` can be passed as a vector, for example `["STAN_THREADS=true"]` enables the model's threading capabilities. +Run BridgeStan’s Makefile on a `.stan` file, creating the `.so` used by StanModel and return a path to the compiled library. Arguments to `stanc3` can be passed as a vector, for example `["--O1"]` enables level 1 compiler optimizations. Additional arguments to `make` can be passed as a vector, for example `["STAN_THREADS=true"]` enables the model's threading capabilities. If the same flags are defined in `make/local`, the versions passed here will take precedent. -This function assumes that the paths to BridgeStan and CmdStan are both valid. These can be set with `set_bridgestan_path!()` and `set_cmdstan_path!()` if their default values do not match your system configuration. +This function assumes that the path to BridgeStan is valid. This can be set with `set_bridgestan_path!()`. -source
+source
# **`BridgeStan.set_bridgestan_path!`** — *Function*. @@ -378,21 +439,5 @@ Set the path BridgeStan. By default this is set to the value of the environment variable `BRIDGESTAN`. -source
- -# -**`BridgeStan.set_cmdstan_path!`** — *Function*. - - - -```julia -set_cmdstan_path!(path) -``` - -Set the path to CmdStan used by BridgeStan. - -By default this is set to the value of the environment variable `CMDSTAN`, or to the newest installation available in `~/.cmdstan/`. - - -source
+source
diff --git a/docs/languages/python.rst b/docs/languages/python.rst index 601ed48..9e8cbc9 100644 --- a/docs/languages/python.rst +++ b/docs/languages/python.rst @@ -1,20 +1,63 @@ .. py:currentmodule:: bridgestan -Python Interface: bridgestan.py -=============================== +Python Interface +================ ---- +Installation +------------ + +This assumes you have followed the :doc:`Getting Started guide <../getting-started>` to install +BridgeStan's pre-requisites and downloaded a copy of the BridgeStan source code. + +To install the Python interface, you can either install it directly from Github with + +.. code-block:: shell + + pip install "git+https://github.com/roualdes/bridgestan.git#egg=bridgestan&subdirectory=python" + +Or, since you have already downloaded the repository, you can run + +.. code-block:: shell + + pip install python/ + +from the BridgeStan folder. + +Note that the Python package depends on Python 3.9+ and NumPy, and will install +NumPy if it is not already installed. + +Example Program +--------------- + +An example program is provided alongside the Python interface code in ``example.py``: + +.. raw:: html + +
+ Show example.py + + +.. literalinclude:: ../../python/example.py + :language: python + +.. raw:: html + +
+ +API Reference +------------- + StanModel interface -------------------- +___________________ .. autoclass:: bridgestan.StanModel :members: Compilation utilities ---------------------- +_____________________ .. autofunction:: bridgestan.compile_model .. autofunction:: bridgestan.set_bridgestan_path -.. autofunction:: bridgestan.set_cmdstan_path diff --git a/docs/languages/r.md b/docs/languages/r.md index 13ee85a..68b7af1 100644 --- a/docs/languages/r.md +++ b/docs/languages/r.md @@ -1,4 +1,4 @@ -# R Interface: bridgestan.R +# R Interface % The skeleton of this page was made using Roxygen2 and tools::Rd2txt, but it is not automated. % Maybe it will be one day @@ -10,6 +10,40 @@ --- +## Installation + +This assumes you have followed the [Getting Started guide](../getting-started.rst) +to install BridgeStan's pre-requisites and downloaded a copy of the BridgeStan source code. + + + +```R +devtools::install_github("https://github.com/roualdes/bridgestan", subdir="R") +``` + +Or, since you have already downloaded the repository, you can run + +```R +install.packages(file.path(getwd(),"R"), repos=NULL, type="source") +``` +from the BridgeStan folder. + +Note that the R package depends on R 3+ and R6, and will install R6 if it is not +already installed. + +## Example Program + +An example program is provided alongside the R interface code in `example.R`: + +
+Show example.R + +```{literalinclude} ../../R/example.R +:language: R +``` + +
+ ## API Reference ### *R6::R6Class* `StanModel` @@ -36,7 +70,7 @@ _Arguments_ - `lib` A path to a compiled BridgeStan Shared Object file. - - `data` A path to a JSON data file for the model. + - `data` Either a string representation of a JSON object or a path to a data file in JSON format ending in ".json". - `rng_seed` Seed for the RNG in the model object. @@ -179,7 +213,7 @@ _Returns_ **Method** `param_constrain()`: -This turns a vector of unconstrained params into constrained +Returns a vector of constrained params give the unconstrained parameters. parameters See also `StanModel$param_unconstrain()`, the inverse of this function. @@ -208,8 +242,7 @@ _Returns_ **Method** `param_unconstrain()`: -This turns a vector of constrained params into unconstrained -parameters. +Returns a vector of unconstrained params give the constrained parameters. It is assumed that these will be in the same order as internally represented by the model (e.g., in the same order as diff --git a/julia/MCMC.jl b/julia/MCMC.jl deleted file mode 100644 index 84d40e7..0000000 --- a/julia/MCMC.jl +++ /dev/null @@ -1,49 +0,0 @@ -struct HMCDiag - model::BridgeStan.StanModel - stepsize::Float64 - steps::Int64 - metric::Vector{Float64} - theta::Vector{Float64} -end - -function HMCDiag(model, stepsize, steps) - return HMCDiag( - model, - stepsize, - steps, - ones(BridgeStan.param_unc_num(model)), - randn(BridgeStan.param_unc_num(model)), - ) -end - -function joint_logp(hmc::HMCDiag, theta, rho) - logp, _ = BridgeStan.log_density_gradient(hmc.model, theta) - return logp - 0.5 * rho' * (hmc.metric .* rho) -end - -function leapfrog(hmc::HMCDiag, theta, rho) - e = hmc.stepsize .* hmc.metric - lp, grad = BridgeStan.log_density_gradient(hmc.model, theta) - rho_p = rho + 0.5 * hmc.stepsize .* grad - for n = 1:hmc.steps - theta .+= e .* rho_p - lp, grad = BridgeStan.log_density_gradient(hmc.model, theta) - if n != hmc.steps - rho_p .+= e .* grad - end - end - rho_p .+= 0.5 .* e .* grad - return theta, rho_p -end - -function sample(hmc::HMCDiag) - rho = randn(BridgeStan.param_unc_num(model)) - logp = joint_logp(hmc, hmc.theta, rho) - theta_prop, rho_prop = leapfrog(hmc, hmc.theta, rho) - logp_prop = joint_logp(hmc, theta_prop, rho_prop) - if log(rand()) < logp_prop - logp - hmc.theta .= theta_prop - return hmc.theta - end - return hmc.theta -end diff --git a/julia/docs/src/julia.md b/julia/docs/src/julia.md index 41979ab..5f153aa 100644 --- a/julia/docs/src/julia.md +++ b/julia/docs/src/julia.md @@ -1,4 +1,4 @@ -# Julia Interface: BridgeStan.jl +# Julia Interface ```@raw html % NB: If you are reading this file in python/docs/languages, you are reading a generated output! @@ -9,7 +9,49 @@ --- -## StanModel interface +## Installation + +This assumes you have followed the [Getting Started guide](../getting-started.rst) +to install BridgeStan's pre-requisites and downloaded a copy of the BridgeStan source code. + +To install the Julia interface, you can either install it directly from Github by running +the following inside a Julia REPL + +```julia +] add https://github.com/roualdes/bridgestan.git:julia +``` + +Or, since you have already downloaded the repository, you can run + +```julia +] dev julia/ +``` + +from the BridgeStan folder. + +Note that the Julia package depends on Julia 1.8+. + +## Example Program + +An example program is provided alongside the Julia interface code in `example.jl`: + + +```@raw html +
+Show example.jl +``` + +```{literalinclude} ../../julia/example.jl +:language: julia +``` + +```@raw html +
+``` + +## API Reference + +### StanModel interface ```@docs StanModel @@ -32,9 +74,8 @@ param_unconstrain! param_unconstrain_json! ``` -## Compilation utilities +### Compilation utilities ```@docs compile_model set_bridgestan_path! -set_cmdstan_path! ``` diff --git a/julia/example.jl b/julia/example.jl index 80683d6..4c0752a 100644 --- a/julia/example.jl +++ b/julia/example.jl @@ -2,49 +2,19 @@ using BridgeStan const BS = BridgeStan +BS.set_bridgestan_path!("../") + bernoulli_stan = joinpath(@__DIR__, "../test_models/bernoulli/bernoulli.stan") bernoulli_data = joinpath(@__DIR__, "../test_models/bernoulli/bernoulli.data.json") smb = BS.StanModel(stan_file = bernoulli_stan, data = bernoulli_data); -x = rand(BS.param_unc_num(smb)); -q = @. log(x / (1 - x)); # unconstrained scale - -lp, grad = BS.log_density_gradient(smb, q, jacobian = 0) - -println() -println("log_density and gradient of Bernoulli model:") -println((lp, grad)) -println() - - -multi_stan = joinpath(@__DIR__, "../test_models/multi/multi.stan") -multi_data = joinpath(@__DIR__, "../test_models/multi/multi.data.json") -smm = BS.StanModel(stan_file = multi_stan, data = multi_data); -x = randn(BS.param_unc_num(smm)); +println("This model's name is $(BS.name(smb)).") +println("It has $(BS.param_num(smb)) parameters.") -lp, grad = BS.log_density_gradient(smm, x) - -println("log_density and gradient of Multivariate Gaussian model:") -println((lp, grad)) -println() - - -# HMC -include("./MCMC.jl") -using Statistics - -model = BS.StanModel(stan_file = multi_stan, data = multi_data); - -stepsize = 0.25 -steps = 10 -hmcd = HMCDiag(model, stepsize, steps); +x = rand(BS.param_unc_num(smb)); +q = @. log(x / (1 - x)); # unconstrained scale -M = 10_000 -theta = zeros(M, BS.param_unc_num(model)) -for m = 1:M - theta[m, :] .= sample(hmcd) -end +lp, grad = BS.log_density_gradient(smb, q, jacobian = false) -println("Empirical mean: $(round.(mean(theta, dims = 1), digits = 3))") -println("Empirical std: $(round.(std(theta, dims = 1), digits = 3))") +println("log_density and gradient of Bernoulli model: $((lp, grad))") diff --git a/julia/src/BridgeStan.jl b/julia/src/BridgeStan.jl index 02dd1a1..2e411f4 100644 --- a/julia/src/BridgeStan.jl +++ b/julia/src/BridgeStan.jl @@ -18,7 +18,6 @@ export StanModel, log_density, log_density_gradient, log_density_hessian, - set_cmdstan_path!, set_bridgestan_path!, compile_model @@ -26,13 +25,19 @@ include("model.jl") include("compile.jl") """ - StanModel(;stan_file, data="", seed=204, chain_id=0) + StanModel(;stan_file, stanc_args=[], make_args=[], data="", seed=204, chain_id=0) Construct a StanModel instance from a `.stan` file, compiling if necessary. This is equivalent to calling `compile_model` and then the original constructor of StanModel. """ -StanModel(; stan_file::String, data::String = "", seed = 204, chain_id = 0) = - StanModel(compile_model(stan_file), data, seed, chain_id) - +StanModel(; + stan_file::String, + stanc_args::AbstractVector{String} = String[], + make_args::AbstractVector{String} = String[], + data::String = "", + seed = 204, + chain_id = 0, +) = StanModel(compile_model(stan_file; stanc_args, make_args), data, seed, chain_id) + end diff --git a/julia/src/compile.jl b/julia/src/compile.jl index f88f92e..651d45e 100644 --- a/julia/src/compile.jl +++ b/julia/src/compile.jl @@ -6,29 +6,13 @@ end function get_bridgestan() path = get(ENV, "BRIDGESTAN", "") if path == "" - error("BridgeStan path was not set, compilation will not work until you call `set_bridgestan_path!()`") + error( + "BridgeStan path was not set, compilation will not work until you call `set_bridgestan_path!()`", + ) end return path end -function get_cmdstan() - cmdstan = get(ENV, "CMDSTAN", "") - if cmdstan == "" - try - cmdstan = readdir( - joinpath(Base.Filesystem.homedir(), ".cmdstan/"), - join = true, - sort = true, - )[1] - catch - end - end - if cmdstan == "" - error("CmdStan path was not set, compilation will not work until you call `set_cmdstan_path!()`") - end - return cmdstan -end - function validate_stan_dir(path::AbstractString) if !isdir(path) error("Path does not exist!\n$path") @@ -40,21 +24,6 @@ function validate_stan_dir(path::AbstractString) end end -""" - set_cmdstan_path!(path) - -Set the path to CmdStan used by BridgeStan. - -By default this is set to the value of the environment variable -`CMDSTAN`, or to the newest installation available in `~/.cmdstan/`. -""" -function set_cmdstan_path!(path::AbstractString) - if !isdir(path) - error("Path does not exist!\n$path") - end - ENV["CMDSTAN"] = path -end - """ set_bridgestan_path!(path) @@ -71,18 +40,24 @@ end """ - compile_model(stan_file, args=[]) + compile_model(stan_file; stanc_args=[], make_args=[]) Run BridgeStan’s Makefile on a `.stan` file, creating the `.so` used by StanModel and return a path to the compiled library. +Arguments to `stanc3` can be passed as a vector, for example `["--O1"]` enables level 1 compiler +optimizations. Additional arguments to `make` can be passed as a vector, for example `["STAN_THREADS=true"]` -enables the model's threading capabilities. +enables the model's threading capabilities. If the same flags are defined in `make/local`, +the versions passed here will take precedent. -This function assumes that the paths to BridgeStan and CmdStan are both valid. -These can be set with `set_bridgestan_path!()` and `set_cmdstan_path!()` if their default -values do not match your system configuration. +This function assumes that the path to BridgeStan is valid. +This can be set with `set_bridgestan_path!()`. """ -function compile_model(stan_file::AbstractString, args::AbstractVector{String} = String[]) +function compile_model( + stan_file::AbstractString; + stanc_args::AbstractVector{String} = String[], + make_args::AbstractVector{String} = String[], +) bridgestan = get_bridgestan() validate_stan_dir(bridgestan) @@ -95,10 +70,11 @@ function compile_model(stan_file::AbstractString, args::AbstractVector{String} = absolute_path = abspath(stan_file) output_file = splitext(absolute_path)[1] * "_model.so" - cmdstan = replace(abspath(get_cmdstan()), "\\" => "/") * "/" - cmd = - Cmd(`$(get_make()) CMDSTAN=$cmdstan $args $output_file`, dir = abspath(bridgestan)) + cmd = Cmd( + `$(get_make()) $make_args "STANCFLAGS=--include-paths=. $stanc_args" $output_file`, + dir = abspath(bridgestan), + ) out = IOBuffer() err = IOBuffer() is_ok = success(pipeline(cmd; stdout = out, stderr = err)) diff --git a/julia/src/model.jl b/julia/src/model.jl index a197871..a740ff2 100644 --- a/julia/src/model.jl +++ b/julia/src/model.jl @@ -6,7 +6,8 @@ mutable struct StanModelStruct end A StanModel instance encapsulates a Stan model instantiated with data. -The constructor a Stan model from the supplied library file path and data file path. +The constructor a Stan model from the supplied library file path and data. Data +should either be a string containing a JSON object or a path to a data file ending in `.json`. If seed or chain_id are supplied, these are used to initialize the RNG used by the model. StanModel(;stan_file, data="", seed=204, chain_id=0) @@ -31,18 +32,20 @@ mutable struct StanModel end if in(abspath(lib), Libc.Libdl.dllist()) - @warn "Loading a shared object '" * lib * "' which is already loaded.\n" * + @warn "Loading a shared object '" * + lib * + "' which is already loaded.\n" * "If the file has changed since the last time it was loaded, this load may not update the library!" end - if data != "" && !isfile(data) + if data != "" && endswith(data, ".json") && !isfile(data) throw(SystemError("Data file not found")) end lib = Libc.Libdl.dlopen(lib) stanmodel = ccall( - Libc.Libdl.dlsym(lib, "construct"), + Libc.Libdl.dlsym(lib, "bs_construct"), Ptr{StanModelStruct}, (Cstring, UInt32, UInt32), data, @@ -57,7 +60,7 @@ mutable struct StanModel function f(sm) ccall( - Libc.Libdl.dlsym(sm.lib, "destruct"), + Libc.Libdl.dlsym(sm.lib, "bs_destruct"), UInt32, (Ptr{StanModelStruct},), sm.stanmodel, @@ -75,7 +78,7 @@ Return the name of the model `sm` """ function name(sm::StanModel) cstr = ccall( - Libc.Libdl.dlsym(sm.lib, "name"), + Libc.Libdl.dlsym(sm.lib, "bs_name"), Cstring, (Ptr{StanModelStruct},), sm.stanmodel, @@ -93,7 +96,7 @@ compiler flags. """ function model_info(sm::StanModel) cstr = ccall( - Libc.Libdl.dlsym(sm.lib, "model_info"), + Libc.Libdl.dlsym(sm.lib, "bs_model_info"), Cstring, (Ptr{StanModelStruct},), sm.stanmodel, @@ -113,7 +116,7 @@ respectively. """ function param_num(sm::StanModel; include_tp = false, include_gq = false) ccall( - Libc.Libdl.dlsym(sm.lib, "param_num"), + Libc.Libdl.dlsym(sm.lib, "bs_param_num"), Cint, (Ptr{StanModelStruct}, Cint, Cint), sm.stanmodel, @@ -134,7 +137,7 @@ when variables are declared with constraints. For example, """ function param_unc_num(sm::StanModel) ccall( - Libc.Libdl.dlsym(sm.lib, "param_unc_num"), + Libc.Libdl.dlsym(sm.lib, "bs_param_unc_num"), Cint, (Ptr{StanModelStruct},), sm.stanmodel, @@ -155,7 +158,7 @@ Parameter order of the output is column major and more generally last-index majo """ function param_names(sm::StanModel; include_tp = false, include_gq = false) cstr = ccall( - Libc.Libdl.dlsym(sm.lib, "param_names"), + Libc.Libdl.dlsym(sm.lib, "bs_param_names"), Cstring, (Ptr{StanModelStruct}, Cint, Cint), sm.stanmodel, @@ -175,7 +178,7 @@ and a vector entry `b[3]` has indexed name `b.3`. """ function param_unc_names(sm::StanModel) cstr = ccall( - Libc.Libdl.dlsym(sm.lib, "param_unc_names"), + Libc.Libdl.dlsym(sm.lib, "bs_param_unc_names"), Cstring, (Ptr{StanModelStruct},), sm.stanmodel, @@ -186,9 +189,9 @@ end """ param_constrain!(sm, theta_unc, out; include_tp=false, include_gq=false) -This turns a vector of unconstrained params into constrained parameters -and (if `include_tp` and `include_gq` are set, respectively) transformed parameters -and generated quantities. +Returns a vector constrained parameters given unconstrained parameters. +Additionally (if `include_tp` and `include_gq` are set, respectively) +returns transformed parameters and generated quantities. The result is stored in the vector `out`, and a reference is returned. See `param_constrain` for a version which allocates fresh memory. @@ -209,7 +212,7 @@ function param_constrain!( ) end rc = ccall( - Libc.Libdl.dlsym(sm.lib, "param_constrain"), + Libc.Libdl.dlsym(sm.lib, "bs_param_constrain"), Cint, (Ptr{StanModelStruct}, Cint, Cint, Ref{Cdouble}, Ref{Cdouble}), sm.stanmodel, @@ -227,9 +230,9 @@ end """ param_constrain(sm, theta_unc, out; include_tp=false, include_gq=false) -This turns a vector of unconstrained params into constrained parameters -and (if `include_tp` and `include_gq` are set, respectively) -transformed parameters and generated quantities. +Returns a vector constrained parameters given unconstrained parameters. +Additionally (if `include_tp` and `include_gq` are set, respectively) +returns transformed parameters and generated quantities. This allocates new memory for the output each call. See `param_constrain!` for a version which allows @@ -250,7 +253,7 @@ end """ param_unconstrain!(sm, theta, out) -This turns a vector of constrained params into unconstrained parameters. +Returns a vector of unconstrained params give the constrained parameters. It is assumed that these will be in the same order as internally represented by the model (e.g., in the same order as `param_names(sm)`). If structured input is needed, use `param_unconstrain_json!` @@ -271,7 +274,7 @@ function param_unconstrain!(sm::StanModel, theta::Vector{Float64}, out::Vector{F end rc = ccall( - Libc.Libdl.dlsym(sm.lib, "param_unconstrain"), + Libc.Libdl.dlsym(sm.lib, "bs_param_unconstrain"), Cint, (Ptr{StanModelStruct}, Ref{Cdouble}, Ref{Cdouble}), sm.stanmodel, @@ -287,7 +290,7 @@ end """ param_unconstrain(sm, theta) -This turns a vector of constrained params into unconstrained parameters. +Returns a vector of unconstrained params give the constrained parameters. It is assumed that these will be in the same order as internally represented by the model (e.g., in the same order as `param_unc_names(sm)`). If structured input is needed, use `param_unconstrain_json` @@ -324,7 +327,7 @@ function param_unconstrain_json!(sm::StanModel, theta::String, out::Vector{Float end rc = ccall( - Libc.Libdl.dlsym(sm.lib, "param_unconstrain_json"), + Libc.Libdl.dlsym(sm.lib, "bs_param_unconstrain_json"), Cint, (Ptr{StanModelStruct}, Cstring, Ref{Cdouble}), sm.stanmodel, @@ -364,7 +367,7 @@ and includes change of variables terms for constrained parameters if `jacobian` function log_density(sm::StanModel, q::Vector{Float64}; propto = true, jacobian = true) lp = Ref(0.0) rc = ccall( - Libc.Libdl.dlsym(sm.lib, "log_density"), + Libc.Libdl.dlsym(sm.lib, "bs_log_density"), Cint, (Ptr{StanModelStruct}, Cint, Cint, Ref{Cdouble}, Ref{Cdouble}), sm.stanmodel, @@ -409,7 +412,7 @@ function log_density_gradient!( end rc = ccall( - Libc.Libdl.dlsym(sm.lib, "log_density_gradient"), + Libc.Libdl.dlsym(sm.lib, "bs_log_density_gradient"), Cint, (Ptr{StanModelStruct}, Cint, Cint, Ref{Cdouble}, Ref{Cdouble}, Ref{Cdouble}), sm.stanmodel, @@ -485,7 +488,7 @@ function log_density_hessian!( end rc = ccall( - Libc.Libdl.dlsym(sm.lib, "log_density_hessian"), + Libc.Libdl.dlsym(sm.lib, "bs_log_density_hessian"), Cint, ( Ptr{StanModelStruct}, diff --git a/julia/test/compile_tests.jl b/julia/test/compile_tests.jl index f3db24d..1430bed 100644 --- a/julia/test/compile_tests.jl +++ b/julia/test/compile_tests.jl @@ -10,11 +10,11 @@ models = joinpath(@__DIR__, "../../test_models/") stanfile = joinpath(models, "multi", "multi.stan") lib = splitext(stanfile)[1] * "_model.so" rm(lib, force = true) - res = BridgeStan.compile_model(stanfile) + res = BridgeStan.compile_model(stanfile; stanc_args = ["--O1"]) @test Base.samefile(lib, res) rm(lib) - res = BridgeStan.compile_model(stanfile, ["STAN_THREADS=true"]) + res = BridgeStan.compile_model(stanfile; make_args = ["STAN_THREADS=true"]) @test Base.samefile(lib, res) end @@ -33,7 +33,6 @@ end @testset "bad paths" begin - @test_throws ErrorException BridgeStan.set_cmdstan_path!("dummy") @test_throws ErrorException BridgeStan.set_bridgestan_path!("dummy") @test_throws ErrorException BridgeStan.set_bridgestan_path!(models) end diff --git a/julia/test/model_tests.jl b/julia/test/model_tests.jl index 23f3d60..6607ea5 100644 --- a/julia/test/model_tests.jl +++ b/julia/test/model_tests.jl @@ -431,7 +431,7 @@ end @testset "bernoulli" begin # Bernoulli - # CMDSTAN=/path/to/cmdstan/ make test_models/bernoulli/bernoulli_model.so + # make test_models/bernoulli/bernoulli_model.so model = load_test_model("bernoulli") @@ -463,7 +463,7 @@ end @testset "threaded model: multi" begin # Multivariate Gaussian - # CMDSTAN=/path/to/cmdstan/ make test_models/multi/multi_model.so + # make test_models/multi/multi_model.so function gaussian(x) return -0.5 * x' * x @@ -497,7 +497,7 @@ end @testset "gaussian" begin # Guassian with positive constrained standard deviation - # CMDSTAN=/path/to/cmdstan/ make test_models/gaussian/gaussian_model.so + # make test_models/gaussian/gaussian_model.so model = load_test_model("gaussian") @@ -519,7 +519,7 @@ end @testset "fr_gaussian" begin # Full rank Gaussian - # CMDSTAN=/path/to/cmdstan/ make test_models/fr_gaussian/fr_gaussian_model.so + # make test_models/fr_gaussian/fr_gaussian_model.so model = load_test_model("fr_gaussian") diff --git a/python/bridgestan/__init__.py b/python/bridgestan/__init__.py index 1a7ad88..60e0791 100644 --- a/python/bridgestan/__init__.py +++ b/python/bridgestan/__init__.py @@ -1,4 +1,4 @@ -from .compile import compile_model, set_bridgestan_path, set_cmdstan_path +from .compile import compile_model, set_bridgestan_path from .model import StanModel -__all__ = ["StanModel", "set_bridgestan_path", "set_cmdstan_path", "compile_model"] +__all__ = ["StanModel", "set_bridgestan_path", "compile_model"] diff --git a/python/bridgestan/compile.py b/python/bridgestan/compile.py index c088ad2..cefadf9 100644 --- a/python/bridgestan/compile.py +++ b/python/bridgestan/compile.py @@ -31,30 +31,6 @@ def verify_bridgestan_path(path: str) -> None: ) BRIDGESTAN_PATH = os.getenv("BRIDGESTAN", str(PYTHON_FOLDER.parent)) -CMDSTAN_PATH = os.getenv("CMDSTAN", "") -if not CMDSTAN_PATH: - try: - import cmdstanpy - - CMDSTAN_PATH = cmdstanpy.cmdstan_path() - except: - try: - CMDSTAN_PATH = str( - sorted((filter(Path.is_dir, (Path.home() / ".cmdstan").iterdir())))[0] - ) - except: - pass - - -def set_cmdstan_path(path: str) -> None: - """Set the path to CmdStan used by BridgeStan. - - By default this is set to the value of the environment - variable ``CMDSTAN``, or to the newest installation available - in ``~/.cmdstan/``. - """ - global CMDSTAN_PATH - CMDSTAN_PATH = path def set_bridgestan_path(path: str) -> None: @@ -77,20 +53,23 @@ def generate_so_name(model: Path): return model.with_stem(f"{name}_model").with_suffix(".so") -def compile_model(stan_file: str, args: List[str] = []) -> Path: +def compile_model( + stan_file: str, *, stanc_args: List[str] = [], make_args: List[str] = [] +) -> Path: """ Run BridgeStan's Makefile on a ``.stan`` file, creating the ``.so`` used by the StanModel class. - This function assumes that the paths to BridgeStan and CmdStan - are both valid. These can be set with :func:`set_bridgestan_path` - and :func:`set_cmdstan_path` if their default values do not - match your system configuration. + This function assumes that the path to BridgeStan is valid. + This can be set with :func:`set_bridgestan_path`. :param stan_file: A path to a Stan model file. - :param args: A list of additional arguments to pass to Make. + :param stanc_args: A list of arguments to pass to stanc3. + For example, ``["--O1"]`` will enable compiler optimization level 1. + :param make_args: A list of additional arguments to pass to Make. For example, ``["STAN_THREADS=True"]`` will enable - threading for the compiled model. + threading for the compiled model. If the same flags are defined + in ``make/local``, the versions passed here will take precedent. :raises FileNotFoundError or PermissionError: If `stan_file` does not exist or is not readable. :raises ValueError: If BridgeStan cannot be located. @@ -98,20 +77,19 @@ def compile_model(stan_file: str, args: List[str] = []) -> Path: """ verify_bridgestan_path(BRIDGESTAN_PATH) - if not CMDSTAN_PATH: - raise RuntimeError( - "Unable to locate CmdStan, you will need to call " - "'set_cmdstan_path()' before using compilation features" - ) - file_path = Path(stan_file).resolve() validate_readable(str(file_path)) if file_path.suffix != ".stan": raise ValueError(f"File '{stan_file}' does not end in .stan") + stanc_args output = generate_so_name(file_path) - cmdstan = str(Path(CMDSTAN_PATH).resolve()).replace("\\", "/") - cmd = [MAKE, f"CMDSTAN={cmdstan}/"] + args + [str(output)] + cmd = ( + [MAKE] + + make_args + + ["STANCFLAGS=" + " ".join(["--include-paths=."] + stanc_args)] + + [str(output)] + ) proc = subprocess.run( cmd, cwd=BRIDGESTAN_PATH, capture_output=True, text=True, check=False ) @@ -121,11 +99,6 @@ def compile_model(stan_file: str, args: List[str] = []) -> Path: f"Command {' '.join(cmd)} failed with code {proc.returncode}.\n" f"stdout:\n{proc.stdout}\nstderr:\n{proc.stderr}" ) - if "stanc" in error: - error += ( - "\nIf CmdStan is already installed, you may " - "need to set the location with set_cmdstan_path()" - ) raise RuntimeError(error) return output diff --git a/python/bridgestan/model.py b/python/bridgestan/model.py index ccf5fbd..03d1a3c 100644 --- a/python/bridgestan/model.py +++ b/python/bridgestan/model.py @@ -22,7 +22,8 @@ class StanModel: return values. The constructor arguments are :param model_lib: A path to a compiled shared object. - :param model_data: A path to data in JSON format. + :param model_data: Either a string representation of a JSON object or a + path to a data file in JSON format ending in ``.json``. :param seed: A pseudo random number generator seed. :param chain_id: A unique identifier for concurrent chains of pseudorandom numbers. @@ -44,7 +45,8 @@ def __init__( constructor arguments. :param model_lib: A system path to compiled shared object. - :param model_data: A system path to a JSON data file. + :param model_data: Either a string representation of a JSON object or a + system path to a data file in JSON format ending in ``.json``. :param seed: A pseudo random number generator seed. :param chain_id: A unique identifier for concurrent chains of pseudorandom numbers. @@ -54,7 +56,7 @@ def __init__( model from C++. """ validate_readable(model_lib) - if not model_data is None: + if not model_data is None and model_data.endswith('.json'): validate_readable(model_data) self.lib_path = model_lib self.stanlib = ctypes.CDLL(self.lib_path) @@ -62,7 +64,7 @@ def __init__( self.seed = seed self.chain_id = chain_id - self._construct = self.stanlib.construct + self._construct = self.stanlib.bs_construct self._construct.restype = ctypes.c_void_p self._construct.argtypes = [ctypes.c_char_p, ctypes.c_uint, ctypes.c_uint] @@ -73,23 +75,23 @@ def __init__( if not self.model_rng: raise RuntimeError("could not construct model RNG") - self._name = self.stanlib.name + self._name = self.stanlib.bs_name self._name.restype = ctypes.c_char_p self._name.argtypes = [ctypes.c_void_p] - self._model_info = self.stanlib.model_info + self._model_info = self.stanlib.bs_model_info self._model_info.restype = ctypes.c_char_p self._model_info.argtypes = [ctypes.c_void_p] - self._param_num = self.stanlib.param_num + self._param_num = self.stanlib.bs_param_num self._param_num.restype = ctypes.c_int self._param_num.argtypes = [ctypes.c_void_p, ctypes.c_int, ctypes.c_int] - self._param_unc_num = self.stanlib.param_unc_num + self._param_unc_num = self.stanlib.bs_param_unc_num self._param_unc_num.restype = ctypes.c_int self._param_unc_num.argtypes = [ctypes.c_void_p] - self._param_names = self.stanlib.param_names + self._param_names = self.stanlib.bs_param_names self._param_names.restype = ctypes.c_char_p self._param_names.argtypes = [ ctypes.c_void_p, @@ -97,11 +99,11 @@ def __init__( ctypes.c_int, ] - self._param_unc_names = self.stanlib.param_unc_names + self._param_unc_names = self.stanlib.bs_param_unc_names self._param_unc_names.restype = ctypes.c_char_p self._param_unc_names.argtypes = [ctypes.c_void_p] - self._param_constrain = self.stanlib.param_constrain + self._param_constrain = self.stanlib.bs_param_constrain self._param_constrain.restype = ctypes.c_int self._param_constrain.argtypes = [ ctypes.c_void_p, @@ -111,11 +113,11 @@ def __init__( double_array, ] - self._param_unconstrain = self.stanlib.param_unconstrain + self._param_unconstrain = self.stanlib.bs_param_unconstrain self._param_unconstrain.restype = ctypes.c_int self._param_unconstrain.argtypes = [ctypes.c_void_p, double_array, double_array] - self._param_unconstrain_json = self.stanlib.param_unconstrain_json + self._param_unconstrain_json = self.stanlib.bs_param_unconstrain_json self._param_unconstrain_json.restype = ctypes.c_int self._param_unconstrain_json.argtypes = [ ctypes.c_void_p, @@ -123,7 +125,7 @@ def __init__( double_array, ] - self._log_density = self.stanlib.log_density + self._log_density = self.stanlib.bs_log_density self._log_density.restype = ctypes.c_int self._log_density.argtypes = [ ctypes.c_void_p, @@ -133,7 +135,7 @@ def __init__( ctypes.POINTER(ctypes.c_double), ] - self._log_density_gradient = self.stanlib.log_density_gradient + self._log_density_gradient = self.stanlib.bs_log_density_gradient self._log_density_gradient.restype = ctypes.c_int self._log_density_gradient.argtypes = [ ctypes.c_void_p, @@ -144,7 +146,7 @@ def __init__( double_array, ] - self._log_density_hessian = self.stanlib.log_density_hessian + self._log_density_hessian = self.stanlib.bs_log_density_hessian self._log_density_hessian.restype = ctypes.c_int self._log_density_hessian.argtypes = [ ctypes.c_void_p, @@ -156,7 +158,7 @@ def __init__( double_array, ] - self._destruct = self.stanlib.destruct + self._destruct = self.stanlib.bs_destruct self._destruct.restype = ctypes.c_int self._destruct.argtypes = [ctypes.c_void_p] @@ -166,6 +168,8 @@ def from_stan_file( stan_file: str, model_data: Optional[str] = None, *, + stanc_args: List[str] = [], + make_args: List[str] = [], seed: int = 1234, chain_id: int = 0, ): @@ -177,6 +181,12 @@ def from_stan_file( :param stan_file: A path to a Stan model file. :param model_data: A path to data in JSON format. + :param stanc_args: A list of arguments to pass to stanc3. + For example, ``["--O1"]`` will enable compiler optimization level 1. + :param make_args: A list of additional arguments to pass to Make. + For example, ``["STAN_THREADS=True"]`` will enable + threading for the compiled model. If the same flags are defined + in ``make/local``, the versions passed here will take precedent. :param seed: A pseudo random number generator seed. :param chain_id: A unique identifier for concurrent chains of pseudorandom numbers. @@ -185,7 +195,7 @@ def from_stan_file( :raises ValueError: If BridgeStan cannot be located. :raises RuntimeError: If compilation fails. """ - result = compile_model(stan_file) + result = compile_model(stan_file, stanc_args=stanc_args, make_args=make_args) return cls(str(result), model_data, seed=seed, chain_id=chain_id) def __del__(self) -> None: diff --git a/python/example.py b/python/example.py index ffff380..fe450e2 100644 --- a/python/example.py +++ b/python/example.py @@ -1,84 +1,14 @@ - -# REQUIRED IMPORTS import bridgestan as bs import numpy as np -# COMPILE AND CONSTRUCT MODEL -stan = "../test_models/regression/regression.stan" -data = "../test_models/regression/regression.data.json" +stan = "../test_models/bernoulli/bernoulli.stan" +data = "../test_models/bernoulli/bernoulli.data.json" model = bs.StanModel.from_stan_file(stan, data) -print("MODEL NAME: name") -name = model.name() -print(f"model name = {name}\n") - -print("NUMBER OF PARAMETERS: param_num") -for tp in [True, False]: - for gq in [True, False]: - D = model.param_num(include_tp=tp, include_gq=gq) - print(f"tp = {tp:b}, gq = {gq:b}, number of parameters = {D}\n") - -print("PARAMETER NAMES: param_names") -for tp in [True, False]: - for gq in [True, False]: - names = model.param_names(include_tp=tp, include_gq=gq) - print(f"tp = {tp:b}, gq = {gq:b}, parameter names = {names}\n") - -print("NUMBER OF UNCONSTRAINED PARAMETERS: param_unc_num") -D = model.param_unc_num() -print(f"number of unconstrained paramerers = {D}\n") - -print("UNCONSTRAINED PARAMETER NAMES: param_unc_names") -names = model.param_unc_names() -print(f"unconstrained parameter names = {names}\n") - -print("SET UNCONSTRAINED PARAMETERS TO TEST") -alpha = 0.2 -beta = 0.9 -log_sigma = np.log(0.25) -theta_unc = np.array([alpha, beta, log_sigma]) -print(f"theta_unc = {theta_unc}\n") - -print("LOG DENSITY: log_density") -for propto in [True, False]: - for jacobian in [True, False]: - log_p = model.log_density(theta_unc, propto=propto, jacobian=jacobian) - print(f"propto = {propto:b}, jacobian = {jacobian:b}, log density = {log_p}\n") - -print("LOG DENSITY & GRADIENT: log_density_gradient") -for propto in [True, False]: - for jacobian in [True, False]: - log_p, grad = model.log_density_gradient( - theta_unc, propto=propto, jacobian=jacobian - ) - print(f"propto = {propto:b}; jacobian = {jacobian:b}; log density = {log_p}") - print(f" gradient = {grad}\n") - -print("LOG DENSITY & GRADIENT & HESSIAN: log_density_hessian") -for propto in [True, False]: - for jacobian in [True, False]: - log_p, grad, hess = model.log_density_hessian( - theta_unc, propto=propto, jacobian=jacobian - ) - print(f"propto = {propto:b}; jacobian = {jacobian:b}; log density = {log_p}") - print(f" gradient = {grad}") - print(f" hessian = {hess}\n") - -print( - "CONSTRAINING TRANSFORM & TRANSFORMED PARAMETERS & GENERATED QUANTITIES: param_constrain" -) -for tp in [True, False]: - for gq in [True, False]: - # each eval generates random generated quantities - theta = model.param_constrain(theta_unc, include_tp=tp, include_gq=gq) - print(f"tp = {tp:b}, gq = {gq:b}, params = {theta}\n") - -print("UNCONSTRAINING TRANSFORM: param_unconstrain") -theta = model.param_constrain(theta_unc, include_tp=False, include_gq=False) -theta_unc_roundtrip = model.param_unconstrain(theta) -print(f"unconstrained parameters = {theta_unc_roundtrip}\n") +print(f"This model's name is {model.name()}.") +print(f"It has {model.param_num()} parameters.") -print("UNCONSTRAINING TRANSFORM FROM JSON: param_unconstrain_json") -theta_json = '{ "alpha": 0.2, "beta": 0.9, "sigma": 0.25 }' -theta_unc_roundtrip_json = model.param_unconstrain_json(theta_json) -print(f"unconstrained parameters from JSON = {theta_unc_roundtrip_json}\n") +x = np.random.random(model.param_unc_num()) +q = np.log(x / (1 - x)) # unconstrained scale +lp, grad = model.log_density_gradient(q, jacobian=False) +print(f"log_density and gradient of Bernoulli model: {(lp, grad)}") diff --git a/python/test/__init__.py b/python/test/__init__.py new file mode 100644 index 0000000..85b590c --- /dev/null +++ b/python/test/__init__.py @@ -0,0 +1 @@ +# this file intentionally blank but exists for pytest detection diff --git a/python/test/conftest.py b/python/test/conftest.py new file mode 100644 index 0000000..cd89932 --- /dev/null +++ b/python/test/conftest.py @@ -0,0 +1,47 @@ +"""The global configuration for the test suite + +This test primarily handles the process by which we can run tests in +different runtimes. This is important, because DLLs compiled with different +flags cannot be loaded into the same Python instances. + +This sets up a command line argument to pytest called --run-type. By +default, all tests have the "default" type, but some, such as the BRIDGESTAN_AD +tests, have a different type. These types are mutually exclusive - +one is selected each run, and any others are marked to be skipped. + +To create a new runtime environment, add it to the list of types below +and then mark any tests with `@pytest.mark.TYPE_NAME_HERE`. +""" + +import pytest + +types = ("default", "ad_hessian") + +def pytest_addoption(parser): + parser.addoption( + "--run-type", + action="store", + default="default", + help="Some tests cannot be run in the same Python runtime", + choices=types, + ) + + +def pytest_configure(config): + for type in types[1:]: + config.addinivalue_line("markers", f"{type}: mark test as needing a seperate process in group {type}") + + + +def pytest_collection_modifyitems(config, items): + type_to_run = config.getoption("--run-type") + skip_type = pytest.mark.skip(reason="--run-type has selected other tests") + if type_to_run == "default": + for type in types[1:]: + for item in items: + if type in item.keywords: + item.add_marker(skip_type) + else: + for item in items: + if type_to_run not in item.keywords: + item.add_marker(skip_type) diff --git a/python/test/test_compile.py b/python/test/test_compile.py index d4ebb1f..8c0a750 100644 --- a/python/test/test_compile.py +++ b/python/test/test_compile.py @@ -11,10 +11,10 @@ def test_compile_good(): stanfile = STAN_FOLDER / "multi" / "multi.stan" lib = bs.compile.generate_so_name(stanfile) lib.unlink(missing_ok=True) - res = bs.compile_model(stanfile) + res = bs.compile_model(stanfile, stanc_args=["--O1"]) assert lib.samefile(res) lib.unlink() - res = bs.compile_model(stanfile, args=["STAN_THREADS=true"]) + res = bs.compile_model(stanfile, make_args=["STAN_THREADS=true"]) assert lib.samefile(res) @@ -36,18 +36,6 @@ def test_compile_syntax_error(): bs.compile_model(stanfile) -def test_compile_bad_cmdstan(): - stanfile = STAN_FOLDER / "multi" / "multi.stan" - old_path = bs.compile.CMDSTAN_PATH - bs.compile.set_cmdstan_path("") - with pytest.raises(RuntimeError, match=r"set_cmdstan_path"): - bs.compile_model(stanfile) - bs.compile.set_cmdstan_path("dummy") - with pytest.raises(RuntimeError, match=r"Make sure CmdStan is installed"): - bs.compile_model(stanfile) - bs.compile.set_cmdstan_path(old_path) - - def test_compile_bad_bridgestan(): old_path = bs.compile.BRIDGESTAN_PATH with pytest.raises(ValueError, match=r"does not exist"): diff --git a/python/test/test_stanmodel.py b/python/test/test_stanmodel.py index 72ed12a..cb3fa35 100644 --- a/python/test/test_stanmodel.py +++ b/python/test/test_stanmodel.py @@ -1,6 +1,7 @@ from pathlib import Path import numpy as np +import pytest import bridgestan as bs @@ -22,13 +23,19 @@ def test_constructor(): b2 = bs.StanModel(bernoulli_so, bernoulli_data) np.testing.assert_allclose(bool(b2), True) + bernoulli_data_string = ( + STAN_FOLDER / "bernoulli" / "bernoulli.data.json" + ).read_text() + b3 = bs.StanModel(bernoulli_so, bernoulli_data_string) + np.testing.assert_allclose(bool(b3), True) + # test missing so file with np.testing.assert_raises(FileNotFoundError): bs.StanModel("nope, not going to find it") # test missing data file with np.testing.assert_raises(FileNotFoundError): - bs.StanModel(bernoulli_so, "nope, not going to find it") + bs.StanModel(bernoulli_so, "nope, not going to find it.json") # test data load exception throw_data_so = str(STAN_FOLDER / "throw_data" / "throw_data_model.so") @@ -49,7 +56,7 @@ def test_model_info(): std_so = str(STAN_FOLDER / "stdnormal" / "stdnormal_model.so") b = bs.StanModel(std_so) assert "STAN_OPENCL" in b.model_info() - + def test_param_num(): full_so = str(STAN_FOLDER / "full" / "full_model.so") @@ -629,6 +636,33 @@ def test_fr_gaussian(): pos += 1 +@pytest.fixture +def recompile_simple(): + """Recompile simple_model with autodiff hessian enable, then clean-up/restore it after test""" + + stanfile = STAN_FOLDER / "simple" / "simple.stan" + lib = bs.compile.generate_so_name(stanfile) + lib.unlink(missing_ok=True) + res = bs.compile_model(stanfile, make_args=["BRIDGESTAN_AD_HESSIAN=true"]) + + yield res + + lib.unlink(missing_ok=True) + bs.compile_model(stanfile, make_args=["STAN_THREADS=true"]) + + +@pytest.mark.ad_hessian +def test_hessian_autodiff(recompile_simple): + simple_data = str(STAN_FOLDER / "simple" / "simple.data.json") + model = bs.StanModel(recompile_simple, simple_data) + assert "BRIDGESTAN_AD_HESSIAN=true" in model.model_info() + D = 5 + y = np.random.uniform(size=D) + lp, grad, hess = model.log_density_hessian(y) + np.testing.assert_allclose(-y, grad) + np.testing.assert_allclose(-np.identity(D), hess) + + if __name__ == "__main__": print("") print("TESTING BrigeStan Python API") diff --git a/src/bridgestan.cpp b/src/bridgestan.cpp index 997dba1..6de9dfa 100644 --- a/src/bridgestan.cpp +++ b/src/bridgestan.cpp @@ -2,10 +2,10 @@ #include "model_rng.cpp" #include "bridgestanR.cpp" -model_rng* construct(char* data_file, unsigned int seed, - unsigned int chain_id) { +bs_model_rng* bs_construct(char* data_file, unsigned int seed, + unsigned int chain_id) { try { - return new model_rng(data_file, seed, chain_id); + return new bs_model_rng(data_file, seed, chain_id); } catch (const std::exception& e) { std::cerr << "construct(" << data_file << ", " << seed << ", " << chain_id << ")" @@ -18,7 +18,7 @@ model_rng* construct(char* data_file, unsigned int seed, return nullptr; } -int destruct(model_rng* mr) { +int bs_destruct(bs_model_rng* mr) { try { delete (mr); return 0; @@ -28,24 +28,26 @@ int destruct(model_rng* mr) { return -1; } -const char* name(model_rng* mr) { return mr->name(); } +const char* bs_name(bs_model_rng* mr) { return mr->name(); } -const char* model_info(model_rng* mr) { return mr->model_info(); } +const char* bs_model_info(bs_model_rng* mr) { return mr->model_info(); } -const char* param_names(model_rng* mr, bool include_tp, bool include_gq) { +const char* bs_param_names(bs_model_rng* mr, bool include_tp, bool include_gq) { return mr->param_names(include_tp, include_gq); } -const char* param_unc_names(model_rng* mr) { return mr->param_unc_names(); } +const char* bs_param_unc_names(bs_model_rng* mr) { + return mr->param_unc_names(); +} -int param_num(model_rng* mr, bool include_tp, bool include_gq) { +int bs_param_num(bs_model_rng* mr, bool include_tp, bool include_gq) { return mr->param_num(include_tp, include_gq); } -int param_unc_num(model_rng* mr) { return mr->param_unc_num(); } +int bs_param_unc_num(bs_model_rng* mr) { return mr->param_unc_num(); } -int param_constrain(model_rng* mr, bool include_tp, bool include_gq, - const double* theta_unc, double* theta) { +int bs_param_constrain(bs_model_rng* mr, bool include_tp, bool include_gq, + const double* theta_unc, double* theta) { try { mr->param_constrain(include_tp, include_gq, theta_unc, theta); return 0; @@ -57,7 +59,8 @@ int param_constrain(model_rng* mr, bool include_tp, bool include_gq, return 1; } -int param_unconstrain(model_rng* mr, const double* theta, double* theta_unc) { +int bs_param_unconstrain(bs_model_rng* mr, const double* theta, + double* theta_unc) { try { mr->param_unconstrain(theta, theta_unc); return 0; @@ -69,7 +72,8 @@ int param_unconstrain(model_rng* mr, const double* theta, double* theta_unc) { return -1; } -int param_unconstrain_json(model_rng* mr, const char* json, double* theta_unc) { +int bs_param_unconstrain_json(bs_model_rng* mr, const char* json, + double* theta_unc) { try { mr->param_unconstrain_json(json, theta_unc); return 0; @@ -81,8 +85,8 @@ int param_unconstrain_json(model_rng* mr, const char* json, double* theta_unc) { return -1; } -int log_density(model_rng* mr, bool propto, bool jacobian, - const double* theta_unc, double* val) { +int bs_log_density(bs_model_rng* mr, bool propto, bool jacobian, + const double* theta_unc, double* val) { try { mr->log_density(propto, jacobian, theta_unc, val); return 0; @@ -94,8 +98,9 @@ int log_density(model_rng* mr, bool propto, bool jacobian, return -1; } -int log_density_gradient(model_rng* mr, bool propto, bool jacobian, - const double* theta_unc, double* val, double* grad) { +int bs_log_density_gradient(bs_model_rng* mr, bool propto, bool jacobian, + const double* theta_unc, double* val, + double* grad) { try { mr->log_density_gradient(propto, jacobian, theta_unc, val, grad); return 0; @@ -108,9 +113,9 @@ int log_density_gradient(model_rng* mr, bool propto, bool jacobian, return -1; } -int log_density_hessian(model_rng* mr, bool propto, bool jacobian, - const double* theta_unc, double* val, double* grad, - double* hessian) { +int bs_log_density_hessian(bs_model_rng* mr, bool propto, bool jacobian, + const double* theta_unc, double* val, double* grad, + double* hessian) { try { mr->log_density_hessian(propto, jacobian, theta_unc, val, grad, hessian); return 0; diff --git a/src/bridgestan.h b/src/bridgestan.h index c5c9cdb..96b84d6 100644 --- a/src/bridgestan.h +++ b/src/bridgestan.h @@ -5,7 +5,7 @@ #include "model_rng.hpp" extern "C" { #else -typedef struct model_rng model_rng; +typedef struct bs_model_rng bs_model_rng; typedef int bool; #endif /** @@ -13,14 +13,17 @@ typedef int bool; * generator (PRNG) wrapper. Data must be encoded in JSON as * indicated in the *CmdStan Reference Manual*. * - * @param[in] data_file C-style path to JSON-encoded data file + * @param[in] data_file C-style string. This is either a + * path to JSON-encoded data file (must end with ".json"), or + * a string representation of a JSON object. * @param[in] seed seed for PRNG * @param[in] chain_id identifier for concurrent sequence of PRNG * draws * @return pointer to constructed model or `nullptr` if construction * fails */ -model_rng* construct(char* data_file, unsigned int seed, unsigned int chain_id); +bs_model_rng* bs_construct(char* data_file, unsigned int seed, + unsigned int chain_id); /** * Destroy the model and return 0 for success and -1 if there is an @@ -30,7 +33,7 @@ model_rng* construct(char* data_file, unsigned int seed, unsigned int chain_id); * @return 0 for success and -1 if there is an exception freeing one * of the model components. */ -int destruct(model_rng* mr); +int bs_destruct(bs_model_rng* mr); /** * Return the name of the specified model as a C-style string. @@ -41,7 +44,7 @@ int destruct(model_rng* mr); * @param[in] mr pointer to model and RNG structure * @return name of model */ -const char* name(model_rng* mr); +const char* bs_name(bs_model_rng* mr); /** * Return information about the compiled model as a C-style string. @@ -53,7 +56,7 @@ const char* name(model_rng* mr); * @return Information about the model including Stan version, Stan defines, and * compiler flags. */ -const char* model_info(model_rng* mr); +const char* bs_model_info(bs_model_rng* mr); /** * Return a comma-separated sequence of indexed parameter names, @@ -74,7 +77,7 @@ const char* model_info(model_rng* mr); * @param[in] include_gq `true` to include generated quantities * @return CSV-separated, indexed, parameter names */ -const char* param_names(model_rng* mr, bool include_tp, bool include_gq); +const char* bs_param_names(bs_model_rng* mr, bool include_tp, bool include_gq); /** * Return a comma-separated sequence of unconstrained parameters. @@ -93,7 +96,7 @@ const char* param_names(model_rng* mr, bool include_tp, bool include_gq); * @param[in] mr pointer to model and RNG structure * @return CSV-separated, indexed, unconstrained parameter names */ -const char* param_unc_names(model_rng* mr); +const char* bs_param_unc_names(bs_model_rng* mr); /** * Return the number of scalar parameters, optionally including the @@ -105,7 +108,7 @@ const char* param_unc_names(model_rng* mr); * @param[in] include_gq `true` to include generated quantities * @return number of parameters */ -int param_num(model_rng* mr, bool include_tp, bool include_gq); +int bs_param_num(bs_model_rng* mr, bool include_tp, bool include_gq); /** * Return the number of unconstrained parameters. The number of @@ -116,7 +119,7 @@ int param_num(model_rng* mr, bool include_tp, bool include_gq); * @param[in] mr pointer to model and RNG structure * @return number of unconstrained parameters */ -int param_unc_num(model_rng* mr); +int bs_param_unc_num(bs_model_rng* mr); /** * Set the sequence of constrained parameters based on the specified @@ -134,8 +137,8 @@ int param_unc_num(model_rng* mr); * @return code 0 if successful and code -1 if there is an exception * in the underlying Stan code */ -int param_constrain(model_rng* mr, bool include_tp, bool include_gq, - const double* theta_unc, double* theta); +int bs_param_constrain(bs_model_rng* mr, bool include_tp, bool include_gq, + const double* theta_unc, double* theta); /** * Set the sequence of unconstrained parameters based on the @@ -150,23 +153,25 @@ int param_constrain(model_rng* mr, bool include_tp, bool include_gq, * @return code 0 if successful and code -1 if there is an exception * in the underlying Stan code */ -int param_unconstrain(model_rng* mr, const double* theta, double* theta_unc); +int bs_param_unconstrain(bs_model_rng* mr, const double* theta, + double* theta_unc); /** * Set the sequence of unconstrained parameters based on the JSON * specification of the constrained parameters, and return a return * code of 0 for success and -1 for failure. Parameter order is as * declared in the Stan program, with multivariate parameters given - * in last-index-major order. The JSON schema assumed is fully + * in last-index-major order. The JSON schema assumed is fully * defined in the *CmdStan Reference Manual*. * * @param[in] mr pointer to model and RNG structure - * @param[in] json json-encoded constrained parameters + * @param[in] json JSON-encoded constrained parameters * @param[out] theta_unc sequence of unconstrained parameters * @return code 0 if successful and code -1 if there is an exception * in the underlying Stan code */ -int param_unconstrain_json(model_rng* mr, const char* json, double* theta_unc); +int bs_param_unconstrain_json(bs_model_rng* mr, const char* json, + double* theta_unc); /** * Set the log density of the specified parameters, dropping @@ -183,8 +188,8 @@ int param_unconstrain_json(model_rng* mr, const char* json, double* theta_unc); * @return code 0 if successful and code -1 if there is an exception * in the underlying Stan code */ -int log_density(model_rng* mr, bool propto, bool jacobian, const double* theta, - double* lp); +int bs_log_density(bs_model_rng* mr, bool propto, bool jacobian, + const double* theta, double* lp); /** * Set the log density and gradient of the specified parameters, @@ -205,8 +210,8 @@ int log_density(model_rng* mr, bool propto, bool jacobian, const double* theta, * @return code 0 if successful and code -1 if there is an exception * in the underlying Stan code */ -int log_density_gradient(model_rng* mr, bool propto, bool jacobian, - const double* theta, double* val, double* grad); +int bs_log_density_gradient(bs_model_rng* mr, bool propto, bool jacobian, + const double* theta, double* val, double* grad); /** * Set the log density, gradient, and Hessian of the specified parameters, @@ -226,12 +231,13 @@ int log_density_gradient(model_rng* mr, bool propto, bool jacobian, * @param[in] theta unconstrained parameters * @param[out] val log density to be set * @param[out] grad gradient to set + * @param[out] hessian hessian to set * @return code 0 if successful and code -1 if there is an exception * in the underlying Stan code */ -int log_density_hessian(model_rng* mr, bool propto, bool jacobian, - const double* theta, double* val, double* grad, - double* hessian); +int bs_log_density_hessian(bs_model_rng* mr, bool propto, bool jacobian, + const double* theta, double* val, double* grad, + double* hessian); #ifdef __cplusplus } diff --git a/src/bridgestanR.cpp b/src/bridgestanR.cpp index 72a850d..6f6796d 100644 --- a/src/bridgestanR.cpp +++ b/src/bridgestanR.cpp @@ -1,58 +1,58 @@ #include "bridgestanR.h" -void construct_R(char** data, int* rng, int* chain, model_rng** ptr_out) { - *ptr_out = construct(*data, *rng, *chain); +void bs_construct_R(char** data, int* rng, int* chain, bs_model_rng** ptr_out) { + *ptr_out = bs_construct(*data, *rng, *chain); } -void destruct_R(model_rng** model, int* return_code) { - *return_code = destruct(*model); +void bs_destruct_R(bs_model_rng** model, int* return_code) { + *return_code = bs_destruct(*model); } -void name_R(model_rng** model, char const** name_out) { - *name_out = name(*model); +void bs_name_R(bs_model_rng** model, char const** name_out) { + *name_out = bs_name(*model); } -void model_info_R(model_rng** model, char const** info_out){ - *info_out = model_info(*model); +void bs_model_info_R(bs_model_rng** model, char const** info_out) { + *info_out = bs_model_info(*model); } -void param_names_R(model_rng** model, int* include_tp, int* include_gq, - char const** names_out) { - *names_out = param_names(*model, *include_tp, *include_gq); +void bs_param_names_R(bs_model_rng** model, int* include_tp, int* include_gq, + char const** names_out) { + *names_out = bs_param_names(*model, *include_tp, *include_gq); } -void param_unc_names_R(model_rng** model, char const** names_out) { - *names_out = param_unc_names(*model); +void bs_param_unc_names_R(bs_model_rng** model, char const** names_out) { + *names_out = bs_param_unc_names(*model); } -void param_num_R(model_rng** model, int* include_tp, int* include_gq, - int* num_out) { - *num_out = param_num(*model, *include_tp, *include_gq); +void bs_param_num_R(bs_model_rng** model, int* include_tp, int* include_gq, + int* num_out) { + *num_out = bs_param_num(*model, *include_tp, *include_gq); } -void param_unc_num_R(model_rng** model, int* num_out) { - *num_out = param_unc_num(*model); +void bs_param_unc_num_R(bs_model_rng** model, int* num_out) { + *num_out = bs_param_unc_num(*model); } -void param_constrain_R(model_rng** model, int* include_tp, int* include_gq, - const double* theta_unc, double* theta, - int* return_code) { +void bs_param_constrain_R(bs_model_rng** model, int* include_tp, + int* include_gq, const double* theta_unc, + double* theta, int* return_code) { *return_code - = param_constrain(*model, *include_tp, *include_gq, theta_unc, theta); + = bs_param_constrain(*model, *include_tp, *include_gq, theta_unc, theta); } -void param_unconstrain_R(model_rng** model, const double* theta, - double* theta_unc, int* return_code) { - *return_code = param_unconstrain(*model, theta, theta_unc); +void bs_param_unconstrain_R(bs_model_rng** model, const double* theta, + double* theta_unc, int* return_code) { + *return_code = bs_param_unconstrain(*model, theta, theta_unc); } -void param_unconstrain_json_R(model_rng** model, char const** json, - double* theta_unc, int* return_code) { - *return_code = param_unconstrain_json(*model, *json, theta_unc); +void bs_param_unconstrain_json_R(bs_model_rng** model, char const** json, + double* theta_unc, int* return_code) { + *return_code = bs_param_unconstrain_json(*model, *json, theta_unc); } -void log_density_R(model_rng** model, int* propto, int* jacobian, - const double* theta, double* val, int* return_code) { - *return_code = log_density(*model, *propto, *jacobian, theta, val); +void bs_log_density_R(bs_model_rng** model, int* propto, int* jacobian, + const double* theta, double* val, int* return_code) { + *return_code = bs_log_density(*model, *propto, *jacobian, theta, val); } -void log_density_gradient_R(model_rng** model, int* propto, int* jacobian, - const double* theta, double* val, double* grad, - int* return_code) { +void bs_log_density_gradient_R(bs_model_rng** model, int* propto, int* jacobian, + const double* theta, double* val, double* grad, + int* return_code) { *return_code - = log_density_gradient(*model, *propto, *jacobian, theta, val, grad); + = bs_log_density_gradient(*model, *propto, *jacobian, theta, val, grad); } -void log_density_hessian_R(model_rng** model, int* propto, int* jacobian, - const double* theta, double* val, double* grad, - double* hess, int* return_code) { - *return_code - = log_density_hessian(*model, *propto, *jacobian, theta, val, grad, hess); +void bs_log_density_hessian_R(bs_model_rng** model, int* propto, int* jacobian, + const double* theta, double* val, double* grad, + double* hess, int* return_code) { + *return_code = bs_log_density_hessian(*model, *propto, *jacobian, theta, val, + grad, hess); } diff --git a/src/bridgestanR.h b/src/bridgestanR.h index 086c36f..f3ee1d4 100644 --- a/src/bridgestanR.h +++ b/src/bridgestanR.h @@ -5,50 +5,50 @@ #include "model_rng.hpp" extern "C" { #else -typedef struct model_rng model_rng; +typedef struct bs_model_rng bs_model_rng; typedef int bool; #endif // Shim to convert to R interface requirement of void with pointer args // All calls directly delegated to versions without _R suffix -void construct_R(char** data, int* rng, int* chain, model_rng** ptr_out); +void bs_construct_R(char** data, int* rng, int* chain, bs_model_rng** ptr_out); -void destruct_R(model_rng** model, int* return_code); +void bs_destruct_R(bs_model_rng** model, int* return_code); -void name_R(model_rng** model, char const** name_out); +void bs_name_R(bs_model_rng** model, char const** name_out); -void model_info_R(model_rng** model, char const** info_out); +void bs_model_info_R(bs_model_rng** model, char const** info_out); -void param_names_R(model_rng** model, int* include_tp, int* include_gq, - char const** name_out); +void bs_param_names_R(bs_model_rng** model, int* include_tp, int* include_gq, + char const** name_out); -void param_unc_names_R(model_rng** model, char const** name_out); +void bs_param_unc_names_R(bs_model_rng** model, char const** name_out); -void param_num_R(model_rng** model, int* include_tp, int* include_gq, - int* num_out); +void bs_param_num_R(bs_model_rng** model, int* include_tp, int* include_gq, + int* num_out); -void param_unc_num_R(model_rng** model, int* num_out); +void bs_param_unc_num_R(bs_model_rng** model, int* num_out); -void param_constrain_R(model_rng** model, int* include_tp, int* include_gq, - const double* theta_unc, double* theta, - int* return_code); +void bs_param_constrain_R(bs_model_rng** model, int* include_tp, + int* include_gq, const double* theta_unc, + double* theta, int* return_code); -void param_unconstrain_R(model_rng** model, const double* theta, - double* theta_unc, int* return_code); +void bs_param_unconstrain_R(bs_model_rng** model, const double* theta, + double* theta_unc, int* return_code); -void param_unconstrain_json_R(model_rng** model, char const** json, - double* theta_unc, int* return_code); +void bs_param_unconstrain_json_R(bs_model_rng** model, char const** json, + double* theta_unc, int* return_code); -void log_density_R(model_rng** model, int* propto, int* jacobian, - const double* theta, double* val, int* return_code); +void bs_log_density_R(bs_model_rng** model, int* propto, int* jacobian, + const double* theta, double* val, int* return_code); -void log_density_gradient_R(model_rng** model, int* propto, int* jacobian, - const double* theta, double* val, double* grad, - int* return_code); +void bs_log_density_gradient_R(bs_model_rng** model, int* propto, int* jacobian, + const double* theta, double* val, double* grad, + int* return_code); -void log_density_hessian_R(model_rng** model, int* propto, int* jacobian, - const double* theta, double* val, double* grad, - double* hess, int* return_code); +void bs_log_density_hessian_R(bs_model_rng** model, int* propto, int* jacobian, + const double* theta, double* val, double* grad, + double* hess, int* return_code); #ifdef __cplusplus } diff --git a/src/model_rng.cpp b/src/model_rng.cpp index 9b4e882..c8fc488 100644 --- a/src/model_rng.cpp +++ b/src/model_rng.cpp @@ -1,11 +1,16 @@ #include "model_rng.hpp" +#include #include #include #include #include #include #include +#include #include +#ifdef BRIDGESTAN_AD_HESSIAN +#include +#endif #include #include #include @@ -51,19 +56,25 @@ char* to_csv(const std::vector& names) { return strdup(s_c); } -model_rng::model_rng(const char* data_file, unsigned int seed, - unsigned int chain_id) { +bs_model_rng::bs_model_rng(const char* data_file, unsigned int seed, + unsigned int chain_id) { std::string data(data_file); if (data.empty()) { auto data_context = stan::io::empty_var_context(); model_ = &new_model(data_context, seed, &std::cerr); } else { - std::ifstream in(data); - if (!in.good()) - throw std::runtime_error("Cannot read input file: " + data); - auto data_context = stan::json::json_data(in); - in.close(); - model_ = &new_model(data_context, seed, &std::cerr); + if (stan::io::ends_with(".json", data)) { + std::ifstream in(data); + if (!in.good()) + throw std::runtime_error("Cannot read input file: " + data); + auto data_context = stan::json::json_data(in); + in.close(); + model_ = &new_model(data_context, seed, &std::cerr); + } else { + std::istringstream json(data); + auto data_context = stan::json::json_data(json); + model_ = &new_model(data_context, seed, &std::cerr); + } } boost::ecuyer1988 rng(seed); rng.discard(chain_id * 1000000000000L); @@ -103,6 +114,11 @@ model_rng::model_rng(const char* data_file, unsigned int seed, #else info << "\tSTAN_CPP_OPTIMS=false" << std::endl; #endif +#ifdef BRIDGESTAN_AD_HESSIAN + info << "\tBRIDGESTAN_AD_HESSIAN=true" << std::endl; +#else + info << "\tBRIDGESTAN_AD_HESSIAN=false" << std::endl; +#endif info << "Stan Compiler Details:" << std::endl; for (auto s : model_->model_compile_info()) { @@ -137,7 +153,7 @@ model_rng::model_rng(const char* data_file, unsigned int seed, param_tp_gq_num_ = names.size(); } -model_rng::~model_rng() { +bs_model_rng::~bs_model_rng() { delete (model_); free(name_); free(model_info_); @@ -148,11 +164,11 @@ model_rng::~model_rng() { free(param_tp_gq_names_); } -const char* model_rng::name() { return name_; } +const char* bs_model_rng::name() { return name_; } -const char* model_rng::model_info() { return model_info_; } +const char* bs_model_rng::model_info() { return model_info_; } -const char* model_rng::param_names(bool include_tp, bool include_gq) { +const char* bs_model_rng::param_names(bool include_tp, bool include_gq) { if (include_tp && include_gq) return param_tp_gq_names_; if (include_tp) @@ -162,11 +178,11 @@ const char* model_rng::param_names(bool include_tp, bool include_gq) { return param_names_; } -const char* model_rng::param_unc_names() { return param_unc_names_; } +const char* bs_model_rng::param_unc_names() { return param_unc_names_; } -int model_rng::param_unc_num() { return param_unc_num_; } +int bs_model_rng::param_unc_num() { return param_unc_num_; } -int model_rng::param_num(bool include_tp, bool include_gq) { +int bs_model_rng::param_num(bool include_tp, bool include_gq) { if (include_tp && include_gq) return param_tp_gq_num_; if (include_tp) @@ -176,7 +192,7 @@ int model_rng::param_num(bool include_tp, bool include_gq) { return param_num_; } -void model_rng::param_unconstrain(const double* theta, double* theta_unc) { +void bs_model_rng::param_unconstrain(const double* theta, double* theta_unc) { using std::set; using std::string; using std::vector; @@ -209,7 +225,7 @@ void model_rng::param_unconstrain(const double* theta, double* theta_unc) { Eigen::VectorXd::Map(theta_unc, unc_params.size()) = unc_params; } -void model_rng::param_unconstrain_json(const char* json, double* theta_unc) { +void bs_model_rng::param_unconstrain_json(const char* json, double* theta_unc) { std::stringstream in(json); stan::json::json_data inits_context(in); Eigen::VectorXd params_unc; @@ -217,8 +233,8 @@ void model_rng::param_unconstrain_json(const char* json, double* theta_unc) { Eigen::VectorXd::Map(theta_unc, params_unc.size()) = params_unc; } -void model_rng::param_constrain(bool include_tp, bool include_gq, - const double* theta_unc, double* theta) { +void bs_model_rng::param_constrain(bool include_tp, bool include_gq, + const double* theta_unc, double* theta) { using Eigen::VectorXd; VectorXd params_unc = VectorXd::Map(theta_unc, param_unc_num_); Eigen::VectorXd params; @@ -227,26 +243,30 @@ void model_rng::param_constrain(bool include_tp, bool include_gq, Eigen::VectorXd::Map(theta, params.size()) = params; } -auto model_rng::make_model_lambda(bool propto, bool jacobian) { +auto bs_model_rng::make_model_lambda(bool propto, bool jacobian) { return [model = this->model_, propto, jacobian](auto& x) { + // log_prob() requires non-const but doesn't modify its argument + auto& params + = const_cast, -1, 1>&>( + x); if (propto) { if (jacobian) { - return model->log_prob_propto_jacobian(x, &std::cerr); + return model->log_prob_propto_jacobian(params, &std::cerr); } else { - return model->log_prob_propto(x, &std::cerr); + return model->log_prob_propto(params, &std::cerr); } } else { if (jacobian) { - return model->log_prob_jacobian(x, &std::cerr); + return model->log_prob_jacobian(params, &std::cerr); } else { - return model->log_prob(x, &std::cerr); + return model->log_prob(params, &std::cerr); } } }; } -void model_rng::log_density(bool propto, bool jacobian, const double* theta_unc, - double* val) { +void bs_model_rng::log_density(bool propto, bool jacobian, + const double* theta_unc, double* val) { int N = param_unc_num_; if (propto) { Eigen::Map params_unc(theta_unc, N); @@ -264,9 +284,9 @@ void model_rng::log_density(bool propto, bool jacobian, const double* theta_unc, } } -void model_rng::log_density_gradient(bool propto, bool jacobian, - const double* theta_unc, double* val, - double* grad) { +void bs_model_rng::log_density_gradient(bool propto, bool jacobian, + const double* theta_unc, double* val, + double* grad) { static thread_local stan::math::ChainableStack thread_instance; auto logp = make_model_lambda(propto, jacobian); int N = param_unc_num_; @@ -274,17 +294,23 @@ void model_rng::log_density_gradient(bool propto, bool jacobian, stan::math::gradient(logp, params_unc, *val, grad, grad + N); } -void model_rng::log_density_hessian(bool propto, bool jacobian, - const double* theta_unc, double* val, - double* grad, double* hessian) { +void bs_model_rng::log_density_hessian(bool propto, bool jacobian, + const double* theta_unc, double* val, + double* grad, double* hessian) { static thread_local stan::math::ChainableStack thread_instance; auto logp = make_model_lambda(propto, jacobian); int N = param_unc_num_; Eigen::Map params_unc(theta_unc, N); Eigen::VectorXd grad_vec(N); Eigen::MatrixXd hess_mat(N, N); + +#ifdef BRIDGESTAN_AD_HESSIAN + stan::math::hessian(logp, params_unc, *val, grad_vec, hess_mat); +#else stan::math::internal::finite_diff_hessian_auto(logp, params_unc, *val, grad_vec, hess_mat); +#endif + Eigen::VectorXd::Map(grad, N) = grad_vec; Eigen::MatrixXd::Map(hessian, N, N) = hess_mat; } diff --git a/src/model_rng.hpp b/src/model_rng.hpp index 0b70302..6dfad65 100644 --- a/src/model_rng.hpp +++ b/src/model_rng.hpp @@ -12,7 +12,7 @@ * CSV format. Instances can be constructed with the C function * `construct()` and destroyed with the C function `destruct()`. */ -class model_rng { +class bs_model_rng { public: /** * Construct a model and random number generator with cached @@ -23,12 +23,12 @@ class model_rng { * @param[in] chain_id number of gaps to skip in the pseudorandom * number generator for concurrent computations */ - model_rng(const char* data_file, unsigned int seed, unsigned int chain_id); + bs_model_rng(const char* data_file, unsigned int seed, unsigned int chain_id); /** * Destroy this object and free all of the memory allocated for it. */ - ~model_rng(); + ~bs_model_rng(); /** * Return the name of the model. This class manages the memory, diff --git a/stan b/stan new file mode 160000 index 0000000..a3ee52e --- /dev/null +++ b/stan @@ -0,0 +1 @@ +Subproject commit a3ee52eee6a294638445f2db5fed37214fae3142