diff --git a/.github/workflows/deploy-mkdocs.yml b/.github/workflows/deploy-mkdocs.yml
new file mode 100644
index 00000000..4529cb0b
--- /dev/null
+++ b/.github/workflows/deploy-mkdocs.yml
@@ -0,0 +1,29 @@
+name: ci
+on:
+  push:
+    branches:
+      - master
+      - main
+permissions:
+  contents: write
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Configure Git Credentials
+        run: |
+          git config user.name github-actions[bot]
+          git config user.email 41898282+github-actions[bot]@users.noreply.github.com
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.x
+      - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
+      - uses: actions/cache@v4
+        with:
+          key: mkdocs-material-${{ env.cache_id }}
+          path: .cache
+          restore-keys: |
+            mkdocs-material-
+      - run: pip install mkdocs-material
+      - run: mkdocs gh-deploy --force
diff --git a/README.md b/README.md
index bc0a4d16..fac194fd 100644
--- a/README.md
+++ b/README.md
@@ -450,17 +450,16 @@ For example, currently the [`curvature.CurvlinopsInterface`](https://github.com/
 
 ## Documentation
 
-The documentation is available [here](https://aleximmer.github.io/Laplace) or can be generated and/or viewed locally:
+The documentation is available [here](https://aleximmer.github.io/Laplace) or can be 
+generated and/or viewed locally:
 
 **With `uv`**
 
 ```bash
 # assuming the repository was cloned
 uv sync --all-extras
-# create docs and write to html
-uv run bash update_docs.sh
-# .. or serve the docs directly
-uv run pdoc --http 0.0.0.0:8080 laplace --template-dir template
+# serve the docs on a liveserver
+uv run mkdocs serve
 ```
 
 **With `pip`**
@@ -468,10 +467,8 @@ uv run pdoc --http 0.0.0.0:8080 laplace --template-dir template
 ```bash
 # assuming the repository was cloned
 pip install -e ".[dev]"
-# create docs and write to html
-bash update_docs.sh
-# .. or serve the docs directly
-pdoc --http 0.0.0.0:8080 laplace --template-dir template
+# serve the docs on a liveserver
+mkdocs serve
 ```
 
 ## Contributing
@@ -481,7 +478,9 @@ Pull requests are very welcome. Please follow these guidelines:
 1. Follow the [development setup](#setup-dev-environment).
 2. Use [ruff](https://github.com/astral-sh/ruff) as autoformatter. Please refer to the following [makefile](https://github.com/aleximmer/Laplace/blob/main/makefile) and run it via `make ruff`. Please note that the order of `ruff check --fix` and `ruff format` is important!
 3. Also use [ruff](https://github.com/astral-sh/ruff) as linter. Please manually fix all linting errors/warnings before opening a pull request.
-4. Fully document your changes in the form of Python docstrings, typehinting, and (if applicable) code/markdown examples in the `./examples` subdirectory.
+4. Fully document your changes in the form of Python docstrings, typehinting.
+5. Add tutorial-style documentation in both `docs/` (in the form of markdown files) 
+    and `examples/` (in the form of runnable scripts) dirs.
 5. Provide as many test cases as possible. Make sure all test cases pass.
 
 Issues, bug reports, and ideas are also very welcome!
diff --git a/docs/api_reference/baselaplace.md b/docs/api_reference/baselaplace.md
new file mode 100644
index 00000000..247af2c0
--- /dev/null
+++ b/docs/api_reference/baselaplace.md
@@ -0,0 +1,4 @@
+::: laplace.baselaplace
+    options:
+        members:
+            - BaseLaplace
diff --git a/docs/api_reference/curvatures.md b/docs/api_reference/curvatures.md
new file mode 100644
index 00000000..ea62ce18
--- /dev/null
+++ b/docs/api_reference/curvatures.md
@@ -0,0 +1,17 @@
+::: laplace.curvature
+    options:
+        members:
+            - CurvatureInterface
+            - GGNInterface
+            - EFInterface
+            - AsdlInterface
+            - AsdlGGN
+            - AsdlEF
+            - AsdlHessian
+            - BackPackInterface
+            - BackPackGGN
+            - BackPackEF
+            - CurvlinopsInterface
+            - CurvlinopsGGN
+            - CurvlinopsEF
+            - CurvlinopsHessian
diff --git a/docs/api_reference/enums.md b/docs/api_reference/enums.md
new file mode 100644
index 00000000..c2da2bb7
--- /dev/null
+++ b/docs/api_reference/enums.md
@@ -0,0 +1 @@
+::: laplace.utils.enums
diff --git a/docs/api_reference/functionallaplace.md b/docs/api_reference/functionallaplace.md
new file mode 100644
index 00000000..b687aa73
--- /dev/null
+++ b/docs/api_reference/functionallaplace.md
@@ -0,0 +1,4 @@
+::: laplace.baselaplace
+    options:
+        members:
+            - FunctionalLaplace
diff --git a/docs/api_reference/laplace.md b/docs/api_reference/laplace.md
new file mode 100644
index 00000000..2d3f1031
--- /dev/null
+++ b/docs/api_reference/laplace.md
@@ -0,0 +1,4 @@
+::: laplace.laplace
+    options:
+        members:
+            - Laplace
diff --git a/docs/api_reference/lllaplace.md b/docs/api_reference/lllaplace.md
new file mode 100644
index 00000000..6dcee884
--- /dev/null
+++ b/docs/api_reference/lllaplace.md
@@ -0,0 +1,7 @@
+::: laplace.lllaplace
+    options:
+        members:
+            - LLLaplace
+            - DiagLLLaplace
+            - KronLLLaplace
+            - FullLLLaplace
diff --git a/docs/api_reference/marglik_training.md b/docs/api_reference/marglik_training.md
new file mode 100644
index 00000000..d66c51e9
--- /dev/null
+++ b/docs/api_reference/marglik_training.md
@@ -0,0 +1,4 @@
+::: laplace.marglik_training
+    options:
+        members:
+            - marglik_training
diff --git a/docs/api_reference/parametriclaplace.md b/docs/api_reference/parametriclaplace.md
new file mode 100644
index 00000000..eb8c12f2
--- /dev/null
+++ b/docs/api_reference/parametriclaplace.md
@@ -0,0 +1,8 @@
+::: laplace.baselaplace
+    options:
+        members:
+            - ParametricLaplace
+            - DiagLaplace
+            - KronLaplace
+            - LowRankLaplace
+            - FullLaplace
diff --git a/docs/api_reference/subnetlaplace.md b/docs/api_reference/subnetlaplace.md
new file mode 100644
index 00000000..9c65a3aa
--- /dev/null
+++ b/docs/api_reference/subnetlaplace.md
@@ -0,0 +1,6 @@
+::: laplace.subnetlaplace
+    options:
+        members:
+            - SubnetLaplace
+            - DiagSubnetLaplace
+            - FullSubnetLaplace
diff --git a/docs/api_reference/utils.md b/docs/api_reference/utils.md
new file mode 100644
index 00000000..0aea4c22
--- /dev/null
+++ b/docs/api_reference/utils.md
@@ -0,0 +1,30 @@
+::: laplace.utils
+    options:
+        members: 
+            - "get_nll"
+            - "validate"
+            - "parameters_per_layer"
+            - "invsqrt_precision"
+            - "kron"
+            - "diagonal_add_scalar"
+            - "symeig"
+            - "block_diag"
+            - "normal_samples"
+            - "SoDSampler"
+            - "_is_batchnorm"
+            - "_is_valid_scalar"
+            - "expand_prior_precision"
+            - "fix_prior_prec_structure"
+            - "FeatureExtractor"
+            - "Kron"
+            - "KronDecomposed"
+            - "fit_diagonal_swag_var"
+            - "SubnetMask"
+            - "RandomSubnetMask"
+            - "LargestMagnitudeSubnetMask"
+            - "LargestVarianceDiagLaplaceSubnetMask"
+            - "LargestVarianceSWAGSubnetMask"
+            - "ParamNameSubnetMask"
+            - "ModuleNameSubnetMask"
+            - "LastLayerSubnetMask"
+            - "RunningNLLMetric"
diff --git a/docs/assets/laplace_logo.png b/docs/assets/laplace_logo.png
new file mode 100644
index 00000000..4b483aa1
Binary files /dev/null and b/docs/assets/laplace_logo.png differ
diff --git a/docs/assets/laplace_logo_inv.png b/docs/assets/laplace_logo_inv.png
new file mode 100644
index 00000000..f19295f0
Binary files /dev/null and b/docs/assets/laplace_logo_inv.png differ
diff --git a/docs/regression_example.png b/docs/assets/regression_example.png
similarity index 99%
rename from docs/regression_example.png
rename to docs/assets/regression_example.png
index 7d1cbe6e..1a630a3b 100644
Binary files a/docs/regression_example.png and b/docs/assets/regression_example.png differ
diff --git a/docs/regression_example_online.png b/docs/assets/regression_example_online.png
similarity index 99%
rename from docs/regression_example_online.png
rename to docs/assets/regression_example_online.png
index 3033b067..834a41ea 100644
Binary files a/docs/regression_example_online.png and b/docs/assets/regression_example_online.png differ
diff --git a/docs/baselaplace.html b/docs/baselaplace.html
deleted file mode 100644
index c700918c..00000000
--- a/docs/baselaplace.html
+++ /dev/null
@@ -1,1070 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
-<meta name="generator" content="pdoc3 0.11.1">
-<title>laplace.baselaplace API documentation</title>
-<meta name="description" content="">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script type="text/x-mathjax-config">MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], processEscapes: true } });</script>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => {
-hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
-hljs.highlightAll();
-})</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Module <code>laplace.baselaplace</code></h1>
-</header>
-<section id="section-intro">
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-classes">Classes</h2>
-<dl>
-<dt id="laplace.baselaplace.BaseLaplace"><code class="flex name class">
-<span>class <span class="ident">BaseLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend: type[CurvatureInterface] | None = None, backend_kwargs: dict[str, Any] | None = None, asdl_fisher_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Baseclass for all Laplace approximations in this library.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>likelihood</code></strong> :&ensp;<code>Likelihood</code> or <code>str in {'classification', 'regression', 'reward_modeling'}</code></dt>
-<dd>determines the log likelihood Hessian approximation.
-In the case of 'reward_modeling', it fits Laplace using the classification likelihood,
-then does prediction as in regression likelihood. The model needs to be defined accordingly:
-The forward pass during training takes <code>x.shape == (batch_size, 2, dim)</code> with
-<code>y.shape = (batch_size,)</code>. Meanwhile, during evaluation <code>x.shape == (batch_size, dim)</code>.
-Note that 'reward_modeling' only supports <code><a title="laplace.baselaplace.KronLaplace" href="#laplace.baselaplace.KronLaplace">KronLaplace</a></code> and <code><a title="laplace.baselaplace.DiagLaplace" href="#laplace.baselaplace.DiagLaplace">DiagLaplace</a></code>.</dd>
-<dt><strong><code>sigma_noise</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code>, default=<code>1</code></dt>
-<dd>observation noise for the regression setting; must be 1 for classification</dd>
-<dt><strong><code>prior_precision</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code>, default=<code>1</code></dt>
-<dd>prior precision of a Gaussian prior (= weight decay);
-can be scalar, per-layer, or diagonal in the most general case</dd>
-<dt><strong><code>prior_mean</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code>, default=<code>0</code></dt>
-<dd>prior mean of a Gaussian prior, useful for continual learning</dd>
-<dt><strong><code>temperature</code></strong> :&ensp;<code>float</code>, default=<code>1</code></dt>
-<dd>temperature of the likelihood; lower temperature leads to more
-concentrated posterior and vice versa.</dd>
-<dt><strong><code>enable_backprop</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>whether to enable backprop to the input <code>x</code> through the Laplace predictive.
-Useful for e.g. Bayesian optimization.</dd>
-<dt><strong><code>dict_key_x</code></strong> :&ensp;<code>str</code>, default=<code>'input_ids'</code></dt>
-<dd>The dictionary key under which the input tensor <code>x</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-<dt><strong><code>dict_key_y</code></strong> :&ensp;<code>str</code>, default=<code>'labels'</code></dt>
-<dd>The dictionary key under which the target tensor <code>y</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-<dt><strong><code>backend</code></strong> :&ensp;<code>subclasses</code> of <code><a title="laplace.curvature.CurvatureInterface" href="curvature/index.html#laplace.curvature.CurvatureInterface">CurvatureInterface</a></code></dt>
-<dd>backend for access to curvature/Hessian approximations. Defaults to CurvlinopsGGN if None.</dd>
-<dt><strong><code>backend_kwargs</code></strong> :&ensp;<code>dict</code>, default=<code>None</code></dt>
-<dd>arguments passed to the backend on initialization, for example to
-set the number of MC samples for stochastic approximations.</dd>
-<dt><strong><code>asdl_fisher_kwargs</code></strong> :&ensp;<code>dict</code>, default=<code>None</code></dt>
-<dd>arguments passed to the ASDL backend specifically on initialization.</dd>
-</dl></div>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.FunctionalLaplace" href="#laplace.baselaplace.FunctionalLaplace">FunctionalLaplace</a></li>
-<li><a title="laplace.baselaplace.ParametricLaplace" href="#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.baselaplace.BaseLaplace.backend"><code class="name">prop <span class="ident">backend</span> : CurvatureInterface</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.baselaplace.BaseLaplace.log_likelihood"><code class="name">prop <span class="ident">log_likelihood</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Compute log likelihood on the training data after <code>.fit()</code> has been called.
-The log likelihood is computed on-demand based on the loss and, for example,
-the observation noise which makes it differentiable in the latter for
-iterative updates.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>log_likelihood</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.BaseLaplace.prior_precision_diag"><code class="name">prop <span class="ident">prior_precision_diag</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Obtain the diagonal prior precision <span><span class="MathJax_Preview">p_0</span><script type="math/tex">p_0</script></span> constructed from either
-a scalar, layer-wise, or diagonal prior precision.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>prior_precision_diag</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.BaseLaplace.prior_mean"><code class="name">prop <span class="ident">prior_mean</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.baselaplace.BaseLaplace.prior_precision"><code class="name">prop <span class="ident">prior_precision</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.baselaplace.BaseLaplace.sigma_noise"><code class="name">prop <span class="ident">sigma_noise</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.baselaplace.BaseLaplace.fit"><code class="name flex">
-<span>def <span class="ident">fit</span></span>(<span>self, train_loader: DataLoader) ‑> None</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.baselaplace.BaseLaplace.log_marginal_likelihood"><code class="name flex">
-<span>def <span class="ident">log_marginal_likelihood</span></span>(<span>self, prior_precision: torch.Tensor | None = None, sigma_noise: torch.Tensor | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.baselaplace.BaseLaplace.predictive"><code class="name flex">
-<span>def <span class="ident">predictive</span></span>(<span>self, x: torch.Tensor, pred_type: PredType | str, link_approx: LinkApprox | str, n_samples: int)</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.baselaplace.BaseLaplace.optimize_prior_precision"><code class="name flex">
-<span>def <span class="ident">optimize_prior_precision</span></span>(<span>self, pred_type: PredType | str, method: TuningMethod | str = TuningMethod.MARGLIK, n_steps: int = 100, lr: float = 0.1, init_prior_prec: float | torch.Tensor = 1.0, prior_structure: PriorStructure | str = PriorStructure.DIAG, val_loader: DataLoader | None = None, loss: torchmetrics.Metric | Callable[[torch.Tensor], torch.Tensor | float] | None = None, log_prior_prec_min: float = -4, log_prior_prec_max: float = 4, grid_size: int = 100, link_approx: LinkApprox | str = LinkApprox.PROBIT, n_samples: int = 100, verbose: bool = False, progress_bar: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Optimize the prior precision post-hoc using the <code>method</code>
-specified by the user.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>pred_type</code></strong> :&ensp;<code>PredType</code> or <code>str in {'glm', 'nn'}</code></dt>
-<dd>type of posterior predictive, linearized GLM predictive or neural
-network sampling predictiv. The GLM predictive is consistent with the
-curvature approximations used here.</dd>
-<dt><strong><code>method</code></strong> :&ensp;<code>TuningMethod</code> or <code>str in {'marglik', 'gridsearch'}</code>, default=<code>PredType.MARGLIK</code></dt>
-<dd>specifies how the prior precision should be optimized.</dd>
-<dt><strong><code>n_steps</code></strong> :&ensp;<code>int</code>, default=<code>100</code></dt>
-<dd>the number of gradient descent steps to take.</dd>
-<dt><strong><code>lr</code></strong> :&ensp;<code>float</code>, default=<code>1e-1</code></dt>
-<dd>the learning rate to use for gradient descent.</dd>
-<dt><strong><code>init_prior_prec</code></strong> :&ensp;<code>float</code> or <code>tensor</code>, default=<code>1.0</code></dt>
-<dd>initial prior precision before the first optimization step.</dd>
-<dt><strong><code>prior_structure</code></strong> :&ensp;<code>PriorStructure</code> or <code>str in {'scalar', 'layerwise', 'diag'}</code>, default=<code>PriorStructure.SCALAR</code></dt>
-<dd>if init_prior_prec is scalar, the prior precision is optimized with this structure.
-otherwise, the structure of init_prior_prec is maintained.</dd>
-<dt><strong><code>val_loader</code></strong> :&ensp;<code>torch.data.utils.DataLoader</code>, default=<code>None</code></dt>
-<dd>DataLoader for the validation set; each iterate is a training batch (X, y).</dd>
-<dt><strong><code>loss</code></strong> :&ensp;<code>callable</code> or <code>torchmetrics.Metric</code>, default=<code>None</code></dt>
-<dd>loss function to use for CV. If callable, the loss is computed offline (memory intensive).
-If torchmetrics.Metric, running loss is computed (efficient). The default
-depends on the likelihood: <code>RunningNLLMetric()</code> for classification and
-reward modeling, running <code>MeanSquaredError()</code> for regression.</dd>
-<dt><strong><code>log_prior_prec_min</code></strong> :&ensp;<code>float</code>, default=<code>-4</code></dt>
-<dd>lower bound of gridsearch interval.</dd>
-<dt><strong><code>log_prior_prec_max</code></strong> :&ensp;<code>float</code>, default=<code>4</code></dt>
-<dd>upper bound of gridsearch interval.</dd>
-<dt><strong><code>grid_size</code></strong> :&ensp;<code>int</code>, default=<code>100</code></dt>
-<dd>number of values to consider inside the gridsearch interval.</dd>
-<dt><strong><code>link_approx</code></strong> :&ensp;<code>LinkApprox</code> or <code>str in {'mc', 'probit', 'bridge'}</code>, default=<code>LinkApprox.PROBIT</code></dt>
-<dd>how to approximate the classification link function for the <code>'glm'</code>.
-For <code>pred_type='nn'</code>, only <code>'mc'</code> is possible.</dd>
-<dt><strong><code>n_samples</code></strong> :&ensp;<code>int</code>, default=<code>100</code></dt>
-<dd>number of samples for <code>link_approx='mc'</code>.</dd>
-<dt><strong><code>verbose</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>if true, the optimized prior precision will be printed
-(can be a large tensor if the prior has a diagonal covariance).</dd>
-<dt><strong><code>progress_bar</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>whether to show a progress bar; updated at every batch-Hessian computation.
-Useful for very large model and large amount of data, esp. when <code>subset_of_weights='all'</code>.</dd>
-</dl></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.baselaplace.ParametricLaplace"><code class="flex name class">
-<span>class <span class="ident">ParametricLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend: type[CurvatureInterface] | None = None, backend_kwargs: dict[str, Any] | None = None, asdl_fisher_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Parametric Laplace class.</p>
-<p>Subclasses need to specify how the Hessian approximation is initialized,
-how to add up curvature over training data, how to sample from the
-Laplace approximation, and how to compute the functional variance.</p>
-<p>A Laplace approximation is represented by a MAP which is given by the
-<code>model</code> parameter and a posterior precision or covariance specifying
-a Gaussian distribution <span><span class="MathJax_Preview">\mathcal{N}(\theta_{MAP}, P^{-1})</span><script type="math/tex">\mathcal{N}(\theta_{MAP}, P^{-1})</script></span>.
-The goal of this class is to compute the posterior precision <span><span class="MathJax_Preview">P</span><script type="math/tex">P</script></span>
-which sums as
-<span><span class="MathJax_Preview">
-P = \sum_{n=1}^N \nabla^2_\theta \log p(\mathcal{D}_n \mid \theta)
-\vert_{\theta_{MAP}} + \nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}}.
-</span><script type="math/tex; mode=display">
-P = \sum_{n=1}^N \nabla^2_\theta \log p(\mathcal{D}_n \mid \theta)
-\vert_{\theta_{MAP}} + \nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}}.
-</script></span>
-Every subclass implements different approximations to the log likelihood Hessians,
-for example, a diagonal one. The prior is assumed to be Gaussian and therefore we have
-a simple form for <span><span class="MathJax_Preview">\nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}} = P_0 </span><script type="math/tex">\nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}} = P_0 </script></span>.
-In particular, we assume a scalar, layer-wise, or diagonal prior precision so that in
-all cases <span><span class="MathJax_Preview">P_0 = \textrm{diag}(p_0)</span><script type="math/tex">P_0 = \textrm{diag}(p_0)</script></span> and the structure of <span><span class="MathJax_Preview">p_0</span><script type="math/tex">p_0</script></span> can be varied.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.BaseLaplace" href="#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.DiagLaplace" href="#laplace.baselaplace.DiagLaplace">DiagLaplace</a></li>
-<li><a title="laplace.baselaplace.FullLaplace" href="#laplace.baselaplace.FullLaplace">FullLaplace</a></li>
-<li><a title="laplace.baselaplace.KronLaplace" href="#laplace.baselaplace.KronLaplace">KronLaplace</a></li>
-<li><a title="laplace.baselaplace.LowRankLaplace" href="#laplace.baselaplace.LowRankLaplace">LowRankLaplace</a></li>
-<li><a title="laplace.lllaplace.LLLaplace" href="lllaplace.html#laplace.lllaplace.LLLaplace">LLLaplace</a></li>
-<li><a title="laplace.subnetlaplace.SubnetLaplace" href="subnetlaplace.html#laplace.subnetlaplace.SubnetLaplace">SubnetLaplace</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.baselaplace.ParametricLaplace.scatter"><code class="name">prop <span class="ident">scatter</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Computes the <em>scatter</em>, a term of the log marginal likelihood that
-corresponds to L-2 regularization:
-<code>scatter</code> = <span><span class="MathJax_Preview">(\theta_{MAP} - \mu_0)^{T} P_0 (\theta_{MAP} - \mu_0) </span><script type="math/tex">(\theta_{MAP} - \mu_0)^{T} P_0 (\theta_{MAP} - \mu_0) </script></span>.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>scatter</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.ParametricLaplace.log_det_prior_precision"><code class="name">prop <span class="ident">log_det_prior_precision</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Compute log determinant of the prior precision
-<span><span class="MathJax_Preview">\log \det P_0</span><script type="math/tex">\log \det P_0</script></span></p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>log_det</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.ParametricLaplace.log_det_posterior_precision"><code class="name">prop <span class="ident">log_det_posterior_precision</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Compute log determinant of the posterior precision
-<span><span class="MathJax_Preview">\log \det P</span><script type="math/tex">\log \det P</script></span> which depends on the subclasses structure
-used for the Hessian approximation.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>log_det</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.ParametricLaplace.log_det_ratio"><code class="name">prop <span class="ident">log_det_ratio</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Compute the log determinant ratio, a part of the log marginal likelihood.
-<span><span class="MathJax_Preview">
-\log \frac{\det P}{\det P_0} = \log \det P - \log \det P_0
-</span><script type="math/tex; mode=display">
-\log \frac{\det P}{\det P_0} = \log \det P - \log \det P_0
-</script></span></p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>log_det_ratio</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.ParametricLaplace.posterior_precision"><code class="name">prop <span class="ident">posterior_precision</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Compute or return the posterior precision <span><span class="MathJax_Preview">P</span><script type="math/tex">P</script></span>.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>posterior_prec</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.baselaplace.ParametricLaplace.fit"><code class="name flex">
-<span>def <span class="ident">fit</span></span>(<span>self, train_loader: DataLoader, override: bool = True, progress_bar: bool = False) ‑> None</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Fit the local Laplace approximation at the parameters of the model.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>train_loader</code></strong> :&ensp;<code>torch.data.utils.DataLoader</code></dt>
-<dd>each iterate is a training batch, either <code>(X, y)</code> tensors or a dict-like
-object containing keys as expressed by <code>self.dict_key_x</code> and
-<code>self.dict_key_y</code>. <code>train_loader.dataset</code> needs to be set to access
-<span><span class="MathJax_Preview">N</span><script type="math/tex">N</script></span>, size of the data set.</dd>
-<dt><strong><code>override</code></strong> :&ensp;<code>bool</code>, default=<code>True</code></dt>
-<dd>whether to initialize H, loss, and n_data again; setting to False is useful for
-online learning settings to accumulate a sequential posterior approximation.</dd>
-<dt><strong><code>progress_bar</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>whether to show a progress bar; updated at every batch-Hessian computation.
-Useful for very large model and large amount of data, esp. when <code>subset_of_weights='all'</code>.</dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.ParametricLaplace.square_norm"><code class="name flex">
-<span>def <span class="ident">square_norm</span></span>(<span>self, value) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute the square norm under post. Precision with <code>value-self.mean</code> as 𝛥:
-<span><span class="MathJax_Preview">
-\Delta^
-op P \Delta
-</span><script type="math/tex; mode=display">
-\Delta^
-op P \Delta
-</script></span>
-Returns</p>
-<hr>
-<dl>
-<dt><code>square_form</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.ParametricLaplace.log_prob"><code class="name flex">
-<span>def <span class="ident">log_prob</span></span>(<span>self, value: torch.Tensor, normalized: bool = True) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute the log probability under the (current) Laplace approximation.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>value</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>normalized</code></strong> :&ensp;<code>bool</code>, default=<code>True</code></dt>
-<dd>whether to return log of a properly normalized Gaussian or just the
-terms that depend on <code>value</code>.</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>log_prob</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.ParametricLaplace.log_marginal_likelihood"><code class="name flex">
-<span>def <span class="ident">log_marginal_likelihood</span></span>(<span>self, prior_precision: torch.Tensor | None = None, sigma_noise: torch.Tensor | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute the Laplace approximation to the log marginal likelihood subject
-to specific Hessian approximations that subclasses implement.
-Requires that the Laplace approximation has been fit before.
-The resulting torch.Tensor is differentiable in <code>prior_precision</code> and
-<code>sigma_noise</code> if these have gradients enabled.
-By passing <code>prior_precision</code> or <code>sigma_noise</code>, the current value is
-overwritten. This is useful for iterating on the log marginal likelihood.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>prior_precision</code></strong> :&ensp;<code>torch.Tensor</code>, optional</dt>
-<dd>prior precision if should be changed from current <code>prior_precision</code> value</dd>
-<dt><strong><code>sigma_noise</code></strong> :&ensp;<code>torch.Tensor</code>, optional</dt>
-<dd>observation noise standard deviation if should be changed</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>log_marglik</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.ParametricLaplace.predictive_samples"><code class="name flex">
-<span>def <span class="ident">predictive_samples</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], pred_type: PredType | str = PredType.GLM, n_samples: int = 100, diagonal_output: bool = False, generator: torch.Generator | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Sample from the posterior predictive on input data <code>x</code>.
-Can be used, for example, for Thompson sampling.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code> or <code>MutableMapping</code></dt>
-<dd>input data <code>(batch_size, input_shape)</code></dd>
-<dt><strong><code>pred_type</code></strong> :&ensp;<code>{'glm', 'nn'}</code>, default=<code>'glm'</code></dt>
-<dd>type of posterior predictive, linearized GLM predictive or neural
-network sampling predictive. The GLM predictive is consistent with
-the curvature approximations used here.</dd>
-<dt><strong><code>n_samples</code></strong> :&ensp;<code>int</code></dt>
-<dd>number of samples</dd>
-<dt><strong><code>diagonal_output</code></strong> :&ensp;<code>bool</code></dt>
-<dd>whether to use a diagonalized glm posterior predictive on the outputs.
-Only applies when <code>pred_type='glm'</code>.</dd>
-<dt><strong><code>generator</code></strong> :&ensp;<code>torch.Generator</code>, optional</dt>
-<dd>random number generator to control the samples (if sampling used)</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>samples</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>samples <code>(n_samples, batch_size, output_shape)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.ParametricLaplace.functional_variance"><code class="name flex">
-<span>def <span class="ident">functional_variance</span></span>(<span>self, Js: torch.Tensor) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute functional variance for the <code>'glm'</code> predictive:
-<code>f_var[i] = Js[i] @ P.inv() @ Js[i].T</code>, which is a output x output
-predictive covariance matrix.
-Mathematically, we have for a single Jacobian
-<span><span class="MathJax_Preview">\mathcal{J} = \nabla_\theta f(x;\theta)\vert_{\theta_{MAP}}</span><script type="math/tex">\mathcal{J} = \nabla_\theta f(x;\theta)\vert_{\theta_{MAP}}</script></span>
-the output covariance matrix
-<span><span class="MathJax_Preview"> \mathcal{J} P^{-1} \mathcal{J}^T </span><script type="math/tex"> \mathcal{J} P^{-1} \mathcal{J}^T </script></span>.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>Js</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>Jacobians of model output wrt parameters
-<code>(batch, outputs, parameters)</code></dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>f_var</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>output covariance <code>(batch, outputs, outputs)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.ParametricLaplace.functional_covariance"><code class="name flex">
-<span>def <span class="ident">functional_covariance</span></span>(<span>self, Js: torch.Tensor) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute functional covariance for the <code>'glm'</code> predictive:
-<code>f_cov = Js @ P.inv() @ Js.T</code>, which is a batch<em>output x batch</em>output
-predictive covariance matrix.</p>
-<p>This emulates the GP posterior covariance N([f(x1), &hellip;,f(xm)], Cov[f(x1), &hellip;, f(xm)]).
-Useful for joint predictions, such as in batched Bayesian optimization.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>Js</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>Jacobians of model output wrt parameters
-<code>(batch*outputs, parameters)</code></dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>f_cov</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>output covariance <code>(batch*outputs, batch*outputs)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.ParametricLaplace.sample"><code class="name flex">
-<span>def <span class="ident">sample</span></span>(<span>self, n_samples: int = 100, generator: torch.Generator | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Sample from the Laplace posterior approximation, i.e.,
-<span><span class="MathJax_Preview"> \theta \sim \mathcal{N}(\theta_{MAP}, P^{-1})</span><script type="math/tex"> \theta \sim \mathcal{N}(\theta_{MAP}, P^{-1})</script></span>.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>n_samples</code></strong> :&ensp;<code>int</code>, default=<code>100</code></dt>
-<dd>number of samples</dd>
-<dt><strong><code>generator</code></strong> :&ensp;<code>torch.Generator</code>, optional</dt>
-<dd>random number generator to control the samples</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>samples</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.ParametricLaplace.state_dict"><code class="name flex">
-<span>def <span class="ident">state_dict</span></span>(<span>self) ‑> dict[str, typing.Any]</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.baselaplace.ParametricLaplace.load_state_dict"><code class="name flex">
-<span>def <span class="ident">load_state_dict</span></span>(<span>self, state_dict: dict[str, Any]) ‑> None</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.baselaplace.BaseLaplace" href="#laplace.baselaplace.BaseLaplace">BaseLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.BaseLaplace.log_likelihood" href="#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.BaseLaplace.optimize_prior_precision" href="#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.BaseLaplace.prior_precision_diag" href="#laplace.baselaplace.BaseLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.baselaplace.FunctionalLaplace"><code class="flex name class">
-<span>class <span class="ident">FunctionalLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, n_subset: int, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, dict_key_x='input_ids', dict_key_y='labels', backend: type[CurvatureInterface] | None = laplace.curvature.backpack.BackPackGGN, backend_kwargs: dict[str, Any] | None = None, independent_outputs: bool = False, seed: int = 0)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Applying the GGN (Generalized Gauss-Newton) approximation for the Hessian in the Laplace approximation of the posterior
-turns the underlying probabilistic model from a BNN into a GLM (generalized linear model).
-This GLM (in the weight space) is equivalent to a GP (in the function space), see
-<a href="https://arxiv.org/abs/1906.01930">Approximate Inference Turns Deep Networks into Gaussian Processes (Khan et al., 2019)</a></p>
-<p>This class implements the (approximate) GP inference through which
-we obtain the desired quantities (posterior predictive, marginal log-likelihood).
-See <a href="https://arxiv.org/abs/2008.08400">Improving predictions of Bayesian neural nets via local linearization (Immer et al., 2021)</a>
-for more details.</p>
-<p>Note that for <code>likelihood='classification'</code>, we approximate <span><span class="MathJax_Preview"> L_{NN} </span><script type="math/tex"> L_{NN} </script></span> with a diagonal matrix
-( <span><span class="MathJax_Preview"> L_{NN} </span><script type="math/tex"> L_{NN} </script></span> is a block-diagonal matrix, where blocks represent Hessians of per-data-point log-likelihood w.r.t.
-neural network output <span><span class="MathJax_Preview"> f </span><script type="math/tex"> f </script></span>, See Appendix <a href="https://arxiv.org/abs/2008.08400">A.2.1</a> for exact definition). We
-resort to such an approximation because of the (possible) errors found in Laplace approximation for
-multiclass GP classification in Chapter 3.5 of <a href="http://www.gaussianprocess.org/gpml/">R&amp;W 2006 GP book</a>,
-see the question
-<a href="https://stats.stackexchange.com/questions/555183/gaussian-processes-multi-class-laplace-approximation">here</a>
-for more details. Alternatively, one could also resort to <em>one-vs-one</em> or <em>one-vs-rest</em> implementations
-for multiclass classification, however, that is not (yet) supported here.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>num_data</code></strong> :&ensp;<code>int</code></dt>
-<dd>number of data points for Subset-of-Data (SOD) approximate GP inference.</dd>
-<dt><strong><code>diagonal_kernel</code></strong> :&ensp;<code>bool</code></dt>
-<dd>GP kernel here is product of Jacobians, which results in a <span><span class="MathJax_Preview"> C \times C</span><script type="math/tex"> C \times C</script></span> matrix where <span><span class="MathJax_Preview">C</span><script type="math/tex">C</script></span> is the output
-dimension. If <code>diagonal_kernel=True</code>, only a diagonal of a GP kernel is used. This is (somewhat) equivalent to
-assuming independent GPs across output channels.</dd>
-</dl>
-<p>See <code><a title="laplace.baselaplace.BaseLaplace" href="#laplace.baselaplace.BaseLaplace">BaseLaplace</a></code> class for the full interface.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.BaseLaplace" href="#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.lllaplace.FunctionalLLLaplace" href="lllaplace.html#laplace.lllaplace.FunctionalLLLaplace">FunctionalLLLaplace</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.baselaplace.FunctionalLaplace.gp_kernel_prior_variance"><code class="name">prop <span class="ident">gp_kernel_prior_variance</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.baselaplace.FunctionalLaplace.log_det_ratio"><code class="name">prop <span class="ident">log_det_ratio</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Computes log determinant term in GP marginal likelihood</p>
-<p>For <code>classification</code> we use eq. (3.44) from Chapter 3.5 from
-<a href="http://www.gaussianprocess.org/gpml/chapters/">GP book R&amp;W 2006</a> with
-(note that we always use diagonal approximation <span><span class="MathJax_Preview">D</span><script type="math/tex">D</script></span> of the Hessian of log likelihood w.r.t. <span><span class="MathJax_Preview">f</span><script type="math/tex">f</script></span>):</p>
-<p>log determinant term := <span><span class="MathJax_Preview"> \log | I + D^{1/2}K D^{1/2} | </span><script type="math/tex"> \log | I + D^{1/2}K D^{1/2} | </script></span></p>
-<p>For <code>regression</code>, we use <a href="https://stats.stackexchange.com/questions/280105/log-marginal-likelihood-for-gaussian-process">"standard" GP marginal likelihood</a>:</p>
-<p>log determinant term := <span><span class="MathJax_Preview"> \log | K + \sigma_2 I | </span><script type="math/tex"> \log | K + \sigma_2 I | </script></span></p></div>
-</dd>
-<dt id="laplace.baselaplace.FunctionalLaplace.scatter"><code class="name">prop <span class="ident">scatter</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Compute scatter term in GP log marginal likelihood.</p>
-<p>For <code>classification</code> we use eq. (3.44) from Chapter 3.5 from
-<a href="http://www.gaussianprocess.org/gpml/chapters/">GP book R&amp;W 2006</a> with <span><span class="MathJax_Preview">\hat{f} = f </span><script type="math/tex">\hat{f} = f </script></span>:</p>
-<p>scatter term := <span><span class="MathJax_Preview"> f K^{-1} f^{T} </span><script type="math/tex"> f K^{-1} f^{T} </script></span></p>
-<p>For <code>regression</code>, we use <a href="https://stats.stackexchange.com/questions/280105/log-marginal-likelihood-for-gaussian-process">"standard" GP marginal likelihood</a>:</p>
-<p>scatter term := <span><span class="MathJax_Preview"> (y - m)K^{-1}(y -m )^T </span><script type="math/tex"> (y - m)K^{-1}(y -m )^T </script></span>,
-where <span><span class="MathJax_Preview"> m </span><script type="math/tex"> m </script></span> is the mean of the GP prior, which in our case corresponds to
-<span><span class="MathJax_Preview"> m := f + J (\theta - \theta_{MAP}) </span><script type="math/tex"> m := f + J (\theta - \theta_{MAP}) </script></span></p></div>
-</dd>
-<dt id="laplace.baselaplace.FunctionalLaplace.prior_precision"><code class="name">prop <span class="ident">prior_precision</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.baselaplace.FunctionalLaplace.fit"><code class="name flex">
-<span>def <span class="ident">fit</span></span>(<span>self, train_loader: DataLoader | MutableMapping, progress_bar: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Fit the Laplace approximation of a GP posterior.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>train_loader</code></strong> :&ensp;<code>torch.data.utils.DataLoader</code></dt>
-<dd><code>train_loader.dataset</code> needs to be set to access <span><span class="MathJax_Preview">N</span><script type="math/tex">N</script></span>, size of the data set
-<code>train_loader.batch_size</code> needs to be set to access <span><span class="MathJax_Preview">b</span><script type="math/tex">b</script></span> batch_size</dd>
-<dt><strong><code>progress_bar</code></strong> :&ensp;<code>bool</code></dt>
-<dd>whether to show a progress bar during the fitting process.</dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.FunctionalLaplace.predictive_samples"><code class="name flex">
-<span>def <span class="ident">predictive_samples</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], pred_type: PredType | str = PredType.GLM, n_samples: int = 100, diagonal_output: bool = False, generator: torch.Generator | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Sample from the posterior predictive on input data <code>x</code>.
-Can be used, for example, for Thompson sampling.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code> or <code>MutableMapping</code></dt>
-<dd>input data <code>(batch_size, input_shape)</code></dd>
-<dt><strong><code>pred_type</code></strong> :&ensp;<code>{'glm'}</code>, default=<code>'glm'</code></dt>
-<dd>type of posterior predictive, linearized GLM predictive.</dd>
-<dt><strong><code>n_samples</code></strong> :&ensp;<code>int</code></dt>
-<dd>number of samples</dd>
-<dt><strong><code>diagonal_output</code></strong> :&ensp;<code>bool</code></dt>
-<dd>whether to use a diagonalized glm posterior predictive on the outputs.
-Only applies when <code>pred_type='glm'</code>.</dd>
-<dt><strong><code>generator</code></strong> :&ensp;<code>torch.Generator</code>, optional</dt>
-<dd>random number generator to control the samples (if sampling used)</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>samples</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>samples <code>(n_samples, batch_size, output_shape)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.FunctionalLaplace.functional_variance"><code class="name flex">
-<span>def <span class="ident">functional_variance</span></span>(<span>self, Js_star: torch.Tensor) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>GP posterior variance:</p>
-<p><span><span class="MathJax_Preview"> k_{**} - K_{*M} (K_{MM}+ L_{MM}^{-1})^{-1} K_{M*}</span><script type="math/tex; mode=display"> k_{**} - K_{*M} (K_{MM}+ L_{MM}^{-1})^{-1} K_{M*}</script></span></p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>Js_star</code></strong> :&ensp;<code>torch.Tensor</code> of <code>shape (N*, C, P)</code></dt>
-<dd>Jacobians of test data points</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>f_var</code></strong> :&ensp;<code>torch.Tensor</code> of <code>shape (N*,C, C)</code></dt>
-<dd>Contains the posterior variances of N* testing points.</dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.FunctionalLaplace.functional_covariance"><code class="name flex">
-<span>def <span class="ident">functional_covariance</span></span>(<span>self, Js_star: torch.Tensor) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>GP posterior covariance:</p>
-<p><span><span class="MathJax_Preview"> k_{**} - K_{*M} (K_{MM}+ L_{MM}^{-1})^{-1} K_{M*}</span><script type="math/tex; mode=display"> k_{**} - K_{*M} (K_{MM}+ L_{MM}^{-1})^{-1} K_{M*}</script></span></p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>Js_star</code></strong> :&ensp;<code>torch.Tensor</code> of <code>shape (N*, C, P)</code></dt>
-<dd>Jacobians of test data points</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>f_var</code></strong> :&ensp;<code>torch.Tensor</code> of <code>shape (N*xC, N*xC)</code></dt>
-<dd>Contains the posterior covariances of N* testing points.</dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.FunctionalLaplace.optimize_prior_precision"><code class="name flex">
-<span>def <span class="ident">optimize_prior_precision</span></span>(<span>self, pred_type: PredType | str = PredType.GP, method: TuningMethod | str = TuningMethod.MARGLIK, n_steps: int = 100, lr: float = 0.1, init_prior_prec: float | torch.Tensor = 1.0, prior_structure: PriorStructure | str = PriorStructure.SCALAR, val_loader: DataLoader | None = None, loss: torchmetrics.Metric | Callable[[torch.Tensor], torch.Tensor | float] | None = None, log_prior_prec_min: float = -4, log_prior_prec_max: float = 4, grid_size: int = 100, link_approx: LinkApprox | str = LinkApprox.PROBIT, n_samples: int = 100, verbose: bool = False, progress_bar: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p><code>optimize_prior_precision_base</code> from <code><a title="laplace.baselaplace.BaseLaplace" href="#laplace.baselaplace.BaseLaplace">BaseLaplace</a></code> with <code>pred_type='gp'</code></p></div>
-</dd>
-<dt id="laplace.baselaplace.FunctionalLaplace.log_marginal_likelihood"><code class="name flex">
-<span>def <span class="ident">log_marginal_likelihood</span></span>(<span>self, prior_precision: torch.Tensor | None = None, sigma_noise: torch.Tensor | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute the Laplace approximation to the log marginal likelihood.
-Requires that the Laplace approximation has been fit before.
-The resulting torch.Tensor is differentiable in <code>prior_precision</code> and
-<code>sigma_noise</code> if these have gradients enabled.
-By passing <code>prior_precision</code> or <code>sigma_noise</code>, the current value is
-overwritten. This is useful for iterating on the log marginal likelihood.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>prior_precision</code></strong> :&ensp;<code>torch.Tensor</code>, optional</dt>
-<dd>prior precision if should be changed from current <code>prior_precision</code> value</dd>
-<dt><strong><code>sigma_noise</code></strong> :&ensp;<code>torch.Tensor</code>, optional</dt>
-<dd>observation noise standard deviation if should be changed</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>log_marglik</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.FunctionalLaplace.state_dict"><code class="name flex">
-<span>def <span class="ident">state_dict</span></span>(<span>self) ‑> dict</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.baselaplace.FunctionalLaplace.load_state_dict"><code class="name flex">
-<span>def <span class="ident">load_state_dict</span></span>(<span>self, state_dict: dict)</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.baselaplace.BaseLaplace" href="#laplace.baselaplace.BaseLaplace">BaseLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.BaseLaplace.log_likelihood" href="#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.BaseLaplace.prior_precision_diag" href="#laplace.baselaplace.BaseLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.baselaplace.FullLaplace"><code class="flex name class">
-<span>class <span class="ident">FullLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend: type[CurvatureInterface] | None = None, backend_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Laplace approximation with full, i.e., dense, log likelihood Hessian approximation
-and hence posterior precision. Based on the chosen <code>backend</code> parameter, the full
-approximation can be, for example, a generalized Gauss-Newton matrix.
-Mathematically, we have <span><span class="MathJax_Preview">P \in \mathbb{R}^{P \times P}</span><script type="math/tex">P \in \mathbb{R}^{P \times P}</script></span>.
-See <code><a title="laplace.baselaplace.BaseLaplace" href="#laplace.baselaplace.BaseLaplace">BaseLaplace</a></code> for the full interface.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.ParametricLaplace" href="#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.lllaplace.FullLLLaplace" href="lllaplace.html#laplace.lllaplace.FullLLLaplace">FullLLLaplace</a></li>
-<li><a title="laplace.subnetlaplace.FullSubnetLaplace" href="subnetlaplace.html#laplace.subnetlaplace.FullSubnetLaplace">FullSubnetLaplace</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.baselaplace.FullLaplace.posterior_scale"><code class="name">prop <span class="ident">posterior_scale</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Posterior scale (square root of the covariance), i.e.,
-<span><span class="MathJax_Preview">P^{-\frac{1}{2}}</span><script type="math/tex">P^{-\frac{1}{2}}</script></span>.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>scale</code></strong> :&ensp;<code>torch.tensor</code></dt>
-<dd><code>(parameters, parameters)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.FullLaplace.posterior_covariance"><code class="name">prop <span class="ident">posterior_covariance</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Posterior covariance, i.e., <span><span class="MathJax_Preview">P^{-1}</span><script type="math/tex">P^{-1}</script></span>.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>covariance</code></strong> :&ensp;<code>torch.tensor</code></dt>
-<dd><code>(parameters, parameters)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.FullLaplace.posterior_precision"><code class="name">prop <span class="ident">posterior_precision</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Posterior precision <span><span class="MathJax_Preview">P</span><script type="math/tex">P</script></span>.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>precision</code></strong> :&ensp;<code>torch.tensor</code></dt>
-<dd><code>(parameters, parameters)</code></dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.baselaplace.ParametricLaplace" href="#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.ParametricLaplace.fit" href="#laplace.baselaplace.ParametricLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_covariance" href="#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_variance" href="#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_posterior_precision" href="#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_prior_precision" href="#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_ratio" href="#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_likelihood" href="#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_marginal_likelihood" href="#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_prob" href="#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.optimize_prior_precision" href="#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.predictive_samples" href="#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.prior_precision_diag" href="#laplace.baselaplace.BaseLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.sample" href="#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.scatter" href="#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.square_norm" href="#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.baselaplace.KronLaplace"><code class="flex name class">
-<span>class <span class="ident">KronLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend: type[CurvatureInterface] | None = None, damping: bool = False, backend_kwargs: dict[str, Any] | None = None, asdl_fisher_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Laplace approximation with Kronecker factored log likelihood Hessian approximation
-and hence posterior precision.
-Mathematically, we have for each parameter group, e.g., torch.nn.Module,
-that \P\approx Q \otimes H.
-See <code><a title="laplace.baselaplace.BaseLaplace" href="#laplace.baselaplace.BaseLaplace">BaseLaplace</a></code> for the full interface and see
-<code><a title="laplace.utils.matrix.Kron" href="utils/matrix.html#laplace.utils.matrix.Kron">Kron</a></code> and <code><a title="laplace.utils.matrix.KronDecomposed" href="utils/matrix.html#laplace.utils.matrix.KronDecomposed">KronDecomposed</a></code> for the structure of
-the Kronecker factors. <code>Kron</code> is used to aggregate factors by summing up and
-<code>KronDecomposed</code> is used to add the prior, a Hessian factor (e.g. temperature),
-and computing posterior covariances, marginal likelihood, etc.
-Damping can be enabled by setting <code>damping=True</code>.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.ParametricLaplace" href="#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.lllaplace.KronLLLaplace" href="lllaplace.html#laplace.lllaplace.KronLLLaplace">KronLLLaplace</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.baselaplace.KronLaplace.posterior_precision"><code class="name">prop <span class="ident">posterior_precision</span> : KronDecomposed</code></dt>
-<dd>
-<div class="desc"><p>Kronecker factored Posterior precision <span><span class="MathJax_Preview">P</span><script type="math/tex">P</script></span>.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>precision</code></strong> :&ensp;<code><a title="laplace.utils.matrix.KronDecomposed" href="utils/matrix.html#laplace.utils.matrix.KronDecomposed">KronDecomposed</a></code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.KronLaplace.prior_precision"><code class="name">prop <span class="ident">prior_precision</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.baselaplace.KronLaplace.state_dict"><code class="name flex">
-<span>def <span class="ident">state_dict</span></span>(<span>self) ‑> dict[str, typing.Any]</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.baselaplace.KronLaplace.load_state_dict"><code class="name flex">
-<span>def <span class="ident">load_state_dict</span></span>(<span>self, state_dict: dict[str, Any])</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.baselaplace.ParametricLaplace" href="#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.ParametricLaplace.fit" href="#laplace.baselaplace.ParametricLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_covariance" href="#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_variance" href="#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_posterior_precision" href="#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_prior_precision" href="#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_ratio" href="#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_likelihood" href="#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_marginal_likelihood" href="#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_prob" href="#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.optimize_prior_precision" href="#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.predictive_samples" href="#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.prior_precision_diag" href="#laplace.baselaplace.BaseLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.sample" href="#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.scatter" href="#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.square_norm" href="#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.baselaplace.DiagLaplace"><code class="flex name class">
-<span>class <span class="ident">DiagLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend: type[CurvatureInterface] | None = None, backend_kwargs: dict[str, Any] | None = None, asdl_fisher_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Laplace approximation with diagonal log likelihood Hessian approximation
-and hence posterior precision.
-Mathematically, we have <span><span class="MathJax_Preview">P \approx \textrm{diag}(P)</span><script type="math/tex">P \approx \textrm{diag}(P)</script></span>.
-See <code><a title="laplace.baselaplace.BaseLaplace" href="#laplace.baselaplace.BaseLaplace">BaseLaplace</a></code> for the full interface.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.ParametricLaplace" href="#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.lllaplace.DiagLLLaplace" href="lllaplace.html#laplace.lllaplace.DiagLLLaplace">DiagLLLaplace</a></li>
-<li><a title="laplace.subnetlaplace.DiagSubnetLaplace" href="subnetlaplace.html#laplace.subnetlaplace.DiagSubnetLaplace">DiagSubnetLaplace</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.baselaplace.DiagLaplace.posterior_precision"><code class="name">prop <span class="ident">posterior_precision</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Diagonal posterior precision <span><span class="MathJax_Preview">p</span><script type="math/tex">p</script></span>.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>precision</code></strong> :&ensp;<code>torch.tensor</code></dt>
-<dd><code>(parameters)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.DiagLaplace.posterior_scale"><code class="name">prop <span class="ident">posterior_scale</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Diagonal posterior scale <span><span class="MathJax_Preview">\sqrt{p^{-1}}</span><script type="math/tex">\sqrt{p^{-1}}</script></span>.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>precision</code></strong> :&ensp;<code>torch.tensor</code></dt>
-<dd><code>(parameters)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.baselaplace.DiagLaplace.posterior_variance"><code class="name">prop <span class="ident">posterior_variance</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Diagonal posterior variance <span><span class="MathJax_Preview">p^{-1}</span><script type="math/tex">p^{-1}</script></span>.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>precision</code></strong> :&ensp;<code>torch.tensor</code></dt>
-<dd><code>(parameters)</code></dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.baselaplace.ParametricLaplace" href="#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.ParametricLaplace.fit" href="#laplace.baselaplace.ParametricLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_covariance" href="#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_variance" href="#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_posterior_precision" href="#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_prior_precision" href="#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_ratio" href="#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_likelihood" href="#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_marginal_likelihood" href="#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_prob" href="#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.optimize_prior_precision" href="#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.predictive_samples" href="#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.prior_precision_diag" href="#laplace.baselaplace.BaseLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.sample" href="#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.scatter" href="#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.square_norm" href="#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.baselaplace.LowRankLaplace"><code class="flex name class">
-<span>class <span class="ident">LowRankLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, backend: type[CurvatureInterface] = laplace.curvature.curvature.CurvatureInterface, sigma_noise: float | torch.Tensor = 1, prior_precision: float | torch.Tensor = 1, prior_mean: float | torch.Tensor = 0, temperature: float = 1, enable_backprop: bool = False, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Laplace approximation with low-rank log likelihood Hessian (approximation).
-The low-rank matrix is represented by an eigendecomposition (vecs, values).
-Based on the chosen <code>backend</code>, either a true Hessian or, for example, GGN
-approximation could be used.
-The posterior precision is computed as
-<span><span class="MathJax_Preview"> P = V diag(l) V^T + P_0.</span><script type="math/tex"> P = V diag(l) V^T + P_0.</script></span>
-To sample, compute the functional variance, and log determinant, algebraic tricks
-are usedto reduce the costs of inversion to the that of a <span><span class="MathJax_Preview">K
-imes K</span><script type="math/tex">K
-imes K</script></span> matrix
-if we have a rank of K.</p>
-<p>Note that only <code>AsdfghjklHessian</code> backend is supported. Install it via:
-pip install git+<a href="https://git@github.com/wiseodd/asdl@asdfghjkl">https://git@github.com/wiseodd/asdl@asdfghjkl</a></p>
-<p>See <code><a title="laplace.baselaplace.BaseLaplace" href="#laplace.baselaplace.BaseLaplace">BaseLaplace</a></code> for the full interface.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.ParametricLaplace" href="#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.baselaplace.LowRankLaplace.V"><code class="name">prop <span class="ident">V</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.baselaplace.LowRankLaplace.Kinv"><code class="name">prop <span class="ident">Kinv</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.baselaplace.LowRankLaplace.posterior_precision"><code class="name">prop <span class="ident">posterior_precision</span> : tuple[tuple[torch.Tensor, torch.Tensor], torch.Tensor]</code></dt>
-<dd>
-<div class="desc"><p>Return correctly scaled posterior precision that would be constructed
-as H[0] @ diag(H[1]) @ H[0].T + self.prior_precision_diag.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>H</code></strong> :&ensp;<code>tuple(eigenvectors, eigenvalues)</code></dt>
-<dd>scaled self.H with temperature and loss factors.</dd>
-<dt><strong><code>prior_precision_diag</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>diagonal prior precision shape <code>parameters</code> to be added to H.</dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.baselaplace.ParametricLaplace" href="#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.ParametricLaplace.fit" href="#laplace.baselaplace.ParametricLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_covariance" href="#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_variance" href="#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_posterior_precision" href="#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_prior_precision" href="#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_ratio" href="#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_likelihood" href="#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_marginal_likelihood" href="#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_prob" href="#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.optimize_prior_precision" href="#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.predictive_samples" href="#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.prior_precision_diag" href="#laplace.baselaplace.BaseLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.sample" href="#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.scatter" href="#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.square_norm" href="#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-</dl>
-</section>
-</article>
-<nav id="sidebar">
-<div class="toc">
-<ul></ul>
-</div>
-<ul id="index">
-<li><h3>Super-module</h3>
-<ul>
-<li><code><a title="laplace" href="index.html">laplace</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-classes">Classes</a></h3>
-<ul>
-<li>
-<h4><code><a title="laplace.baselaplace.BaseLaplace" href="#laplace.baselaplace.BaseLaplace">BaseLaplace</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.baselaplace.BaseLaplace.fit" href="#laplace.baselaplace.BaseLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.baselaplace.BaseLaplace.log_marginal_likelihood" href="#laplace.baselaplace.BaseLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.BaseLaplace.predictive" href="#laplace.baselaplace.BaseLaplace.predictive">predictive</a></code></li>
-<li><code><a title="laplace.baselaplace.BaseLaplace.optimize_prior_precision" href="#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.baselaplace.ParametricLaplace" href="#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.baselaplace.ParametricLaplace.fit" href="#laplace.baselaplace.ParametricLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.square_norm" href="#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_prob" href="#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_marginal_likelihood" href="#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.predictive_samples" href="#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_variance" href="#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_covariance" href="#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.sample" href="#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.state_dict" href="#laplace.baselaplace.ParametricLaplace.state_dict">state_dict</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.load_state_dict" href="#laplace.baselaplace.ParametricLaplace.load_state_dict">load_state_dict</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.baselaplace.FunctionalLaplace" href="#laplace.baselaplace.FunctionalLaplace">FunctionalLaplace</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.fit" href="#laplace.baselaplace.FunctionalLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.predictive_samples" href="#laplace.baselaplace.FunctionalLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.functional_variance" href="#laplace.baselaplace.FunctionalLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.functional_covariance" href="#laplace.baselaplace.FunctionalLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.optimize_prior_precision" href="#laplace.baselaplace.FunctionalLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.log_marginal_likelihood" href="#laplace.baselaplace.FunctionalLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.state_dict" href="#laplace.baselaplace.FunctionalLaplace.state_dict">state_dict</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.load_state_dict" href="#laplace.baselaplace.FunctionalLaplace.load_state_dict">load_state_dict</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.baselaplace.FullLaplace" href="#laplace.baselaplace.FullLaplace">FullLaplace</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.baselaplace.KronLaplace" href="#laplace.baselaplace.KronLaplace">KronLaplace</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.baselaplace.KronLaplace.state_dict" href="#laplace.baselaplace.KronLaplace.state_dict">state_dict</a></code></li>
-<li><code><a title="laplace.baselaplace.KronLaplace.load_state_dict" href="#laplace.baselaplace.KronLaplace.load_state_dict">load_state_dict</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.baselaplace.DiagLaplace" href="#laplace.baselaplace.DiagLaplace">DiagLaplace</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.baselaplace.LowRankLaplace" href="#laplace.baselaplace.LowRankLaplace">LowRankLaplace</a></code></h4>
-</li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.1</a>.</p>
-</footer>
-</body>
-</html>
diff --git a/examples/calibration_example.md b/docs/calibration_example.md
similarity index 95%
rename from examples/calibration_example.md
rename to docs/calibration_example.md
index 3d56c605..68c54733 100644
--- a/examples/calibration_example.md
+++ b/docs/calibration_example.md
@@ -1,8 +1,6 @@
-## Full example: _post-hoc_ Laplace on a large image classifier
-
 An advantage of the Laplace approximation over variational Bayes and Markov Chain Monte Carlo methods is its _post-hoc_ nature. That means we can apply LA on (almost) any _pre-trained_ neural network. In this example, we will see how we can apply the last-layer LA on a deep WideResNet model, trained on CIFAR-10.
 
-#### Data loading
+## Data loading
 
 First, let us load the CIFAR-10 dataset. The helper scripts for CIFAR-10 and WideResNet are available in the `examples/helper` directory in the main repository.
 
@@ -28,7 +26,7 @@ test_loader = dl.CIFAR10(train=False)
 targets = torch.cat([y for x, y in test_loader], dim=0).numpy()
 ```
 
-#### Load a pre-trained model
+## Load a pre-trained model
 
 Next, we will load a pre-trained WideResNet-16-4 model. Note that a GPU with CUDA support is needed for this example.
 
@@ -57,7 +55,7 @@ def predict(dataloader, model, laplace=False):
     return torch.cat(py).cpu().numpy()
 ```
 
-#### The calibration of MAP
+## The calibration of MAP
 
 We are now ready to see how calibrated is the model. The metrics we use are the expected calibration error (ECE, Naeni et al., AAAI 2015) and the negative (Categorical) log-likelihood. Note that lower values are better for both these metrics.
 
@@ -78,7 +76,7 @@ Running this snippet, we would get:
 [MAP] Acc.: 94.8%; ECE: 2.0%; NLL: 0.172
 ```
 
-### The calibration of Laplace
+## The calibration of Laplace
 
 Now we inspect the benefit of the LA. Let us apply the simple last-layer LA model, and optimize the prior precision hyperparameter using a _post-hoc_ marginal likelihood maximization.
 
diff --git a/examples/calibration_gp_example.md b/docs/calibration_gp_example.md
similarity index 92%
rename from examples/calibration_gp_example.md
rename to docs/calibration_gp_example.md
index bf269fb8..fd17065b 100644
--- a/examples/calibration_gp_example.md
+++ b/docs/calibration_gp_example.md
@@ -1,13 +1,12 @@
-## Full example: Functional Laplace (GP) on FMNIST image classifier
 Applying the General-Gauss-Newton (GGN) approximation to the Hessian in the Laplace approximation (LA) of the BNN posterior
 turns the underlying probabilistic model from a BNN into a generalized linear model (GLM).
-This GLM is equivalent to a Gaussian Process (GP) with a particular kernel [1, 2]. 
+This GLM is equivalent to a Gaussian Process (GP) with a particular kernel [1, 2].
 
-In this notebook, we will show how to use `laplace` library to perform GP inference on top of a *pre-trained* neural network.
+In this notebook, we will show how to use `laplace` library to perform GP inference on top of a _pre-trained_ neural network.
 
 Note that a GPU with CUDA support is needed for this example. We recommend using a GPU with at least 24 GB of memory. If less memory is available, we suggest reducing `BATCH_SIZE` below.
 
-#### Data loading
+## Data loading
 
 First, let us load the FMIST dataset. The helper scripts for FMNIST and pre-trained CNN are available in the `examples/helper` directory in the main repository.
 
@@ -36,11 +35,11 @@ test_loader = DataLoader(ds_test, batch_size=BATCH_SIZE, shuffle=False)
 targets = torch.cat([y for x, y in test_loader], dim=0).cpu()
 ```
 
-#### Load a pre-trained model
+## Load a pre-trained model
 
 Next, we load a pre-trained CNN model. The code to train the model can be found in [BNN-predictions repo](https://github.com/AlexImmer/BNN-predictions).
 
-``` python
+```python
 MODEL_NAME = 'FMNIST_CNN_10_2.2e+02.pt'
 model = CIFAR10Net(ds_train.channels, ds_train.K, use_tanh=True).to('cuda')
 state = torch.load(f'helper/models/{MODEL_NAME}')
@@ -51,7 +50,7 @@ prior_precision = state['delta']
 
 To simplify the downstream tasks, we will use the following helper function to make predictions. It simply iterates through all minibatches and obtains the predictive probabilities of the FMNIST classes.
 
-``` python
+```python
 @torch.no_grad()
 def predict(dataloader, model, laplace=False):
     py = []
@@ -65,13 +64,13 @@ def predict(dataloader, model, laplace=False):
     return torch.cat(py).cpu().numpy()
 ```
 
-#### The calibration of MAP
+## The calibration of MAP
 
 We are now ready to see how calibrated is the model. The metrics we use are the expected calibration error (ECE, Naeni et al., AAAI 2015) and the negative (Categorical) log-likelihood. Note that lower values are better for both these metrics.
 
 First, let us inspect the MAP model. We shall use the [`netcal`](https://github.com/fabiankueppers/calibration-framework) library to easily compute the ECE.
 
-``` python
+```python
 probs_map = predict(test_loader, model, laplace=False)
 acc_map = (probs_map.argmax(-1) == targets).float().mean()
 ece_map = ECE(bins=15).measure(probs_map.numpy(), targets.numpy())
@@ -86,19 +85,19 @@ Running this snippet, we would get:
 [MAP] Acc.: 91.7%; ECE: 1.6%; NLL: 0.253
 ```
 
-### The calibration of Laplace
+## The calibration of Laplace
 
-Next, we run Laplace-GP inference to calibrate neural network's predictions. Since running exact GP inference is computationally infeasible, we perform Subset-of-Datapoints (SoD) [3] approximation here. In the code below, `m`denotes the number of datapoints used in the SoD posterior. 
+Next, we run Laplace-GP inference to calibrate neural network's predictions. Since running exact GP inference is computationally infeasible, we perform Subset-of-Datapoints (SoD) [3] approximation here. In the code below, `m`denotes the number of datapoints used in the SoD posterior.
 
 Execution of the cell below can take up to 5min (depending on the exact hardware used).
 
-``` python
+```python
 for m in [50, 200, 800, 1600]:
     print(f'Fitting Laplace-GP for m={m}')
     la = Laplace(model, 'classification',
                  subset_of_weights='all',
                  hessian_structure='gp',
-                 diagonal_kernel=True, 
+                 diagonal_kernel=True,
                  num_data=m,
                  prior_precision=prior_precision)
     la.fit(train_loader)
@@ -123,15 +122,11 @@ Fitting Laplace-GP for m=1600
 ```
 
 Notice that the post-hoc Laplace-GP inference does not have a significant impact on the accuracy, yet it improves the calibration (in terms of ECE) of the MAP model substantially.
-<br />
-<br />
-<br />
-<br />
 
-### References
+## References
+
 [1] Khan, Mohammad Emtiyaz E., et al. "Approximate inference turns deep networks into gaussian processes." Advances in neural information processing systems 32 (2019)
 
 [2] Immer, Alexander, Maciej Korzepa, and Matthias Bauer. "Improving predictions of Bayesian neural nets via local linearization." International Conference on Artificial Intelligence and Statistics. PMLR, 2021
 
 [3] Rasmussen, Carl Edward. "Gaussian processes in machine learning." Springer, 2004
-
diff --git a/docs/css/mkdocstrings.css b/docs/css/mkdocstrings.css
new file mode 100644
index 00000000..136db40e
--- /dev/null
+++ b/docs/css/mkdocstrings.css
@@ -0,0 +1,50 @@
+html {
+  font-size: 19px;
+}
+
+.md-typeset .admonition {
+  font-size: 0.7rem;
+}
+
+/* Indentation. */
+div.doc-contents:not(.first) {
+  padding-left: 25px;
+  border-left: 0.05rem solid var(--md-typeset-table-color);
+}
+
+/* Mark external links as such. */
+a.external::after,
+a.autorefs-external::after {
+  /* https://primer.style/octicons/arrow-up-right-24 */
+  mask-image: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18.25 15.5a.75.75 0 00.75-.75v-9a.75.75 0 00-.75-.75h-9a.75.75 0 000 1.5h7.19L6.22 16.72a.75.75 0 101.06 1.06L17.5 7.56v7.19c0 .414.336.75.75.75z"></path></svg>');
+  -webkit-mask-image: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M18.25 15.5a.75.75 0 00.75-.75v-9a.75.75 0 00-.75-.75h-9a.75.75 0 000 1.5h7.19L6.22 16.72a.75.75 0 101.06 1.06L17.5 7.56v7.19c0 .414.336.75.75.75z"></path></svg>');
+  content: " ";
+
+  display: inline-block;
+  vertical-align: middle;
+  position: relative;
+
+  height: 1em;
+  width: 1em;
+  background-color: currentColor;
+}
+
+a.external:hover::after,
+a.autorefs-external:hover::after {
+  background-color: var(--md-accent-fg-color);
+}
+
+/* Avoid breaking parameters name, etc. in table cells. */
+td code {
+  word-break: normal !important;
+}
+
+[data-md-color-scheme="default"] {
+  --doc-symbol-parameter-fg-color: #d3a81b;
+  --doc-symbol-parameter-bg-color: #d3a81b1a;
+}
+
+[data-md-color-scheme="slate"] {
+  --doc-symbol-parameter-fg-color: #dfbe50;
+  --doc-symbol-parameter-bg-color: #dfbe501a;
+}
diff --git a/docs/curvature/asdfghjkl.html b/docs/curvature/asdfghjkl.html
deleted file mode 100644
index 107f2d38..00000000
--- a/docs/curvature/asdfghjkl.html
+++ /dev/null
@@ -1,246 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
-<meta name="generator" content="pdoc3 0.11.1">
-<title>laplace.curvature.asdfghjkl API documentation</title>
-<meta name="description" content="">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script type="text/x-mathjax-config">MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], processEscapes: true } });</script>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => {
-hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
-hljs.highlightAll();
-})</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Module <code>laplace.curvature.asdfghjkl</code></h1>
-</header>
-<section id="section-intro">
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-classes">Classes</h2>
-<dl>
-<dt id="laplace.curvature.asdfghjkl.AsdfghjklInterface"><code class="flex name class">
-<span>class <span class="ident">AsdfghjklInterface</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Interface for asdfghjkl backend.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.asdfghjkl.AsdfghjklEF" href="#laplace.curvature.asdfghjkl.AsdfghjklEF">AsdfghjklEF</a></li>
-<li><a title="laplace.curvature.asdfghjkl.AsdfghjklGGN" href="#laplace.curvature.asdfghjkl.AsdfghjklGGN">AsdfghjklGGN</a></li>
-<li><a title="laplace.curvature.asdfghjkl.AsdfghjklHessian" href="#laplace.curvature.asdfghjkl.AsdfghjklHessian">AsdfghjklHessian</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.curvature.asdfghjkl.AsdfghjklInterface.jacobians"><code class="name flex">
-<span>def <span class="ident">jacobians</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], enable_backprop: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute Jacobians <span><span class="MathJax_Preview">\nabla_\theta f(x;\theta)</span><script type="math/tex">\nabla_\theta f(x;\theta)</script></span> at current parameter <span><span class="MathJax_Preview">\theta</span><script type="math/tex">\theta</script></span>
-using asdfghjkl's gradient per output dimension.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code> on compatible device with model.</dd>
-<dt><strong><code>enable_backprop</code></strong> :&ensp;<code>bool</code>, default <code>= False</code></dt>
-<dd>whether to enable backprop through the Js and f w.r.t. x</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>Js</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>Jacobians <code>(batch, parameters, outputs)</code></dd>
-<dt><strong><code>f</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>output function <code>(batch, outputs)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.curvature.asdfghjkl.AsdfghjklInterface.gradients"><code class="name flex">
-<span>def <span class="ident">gradients</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], y: torch.Tensor)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute gradients <span><span class="MathJax_Preview">\nabla_\theta \ell(f(x;\theta, y)</span><script type="math/tex">\nabla_\theta \ell(f(x;\theta, y)</script></span> at current parameter
-<span><span class="MathJax_Preview">\theta</span><script type="math/tex">\theta</script></span> using asdfghjkl's backend.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code> on compatible device with model.</dd>
-<dt><strong><code>y</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>loss</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>Gs</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>gradients <code>(batch, parameters)</code></dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.asdfghjkl.AsdfghjklHessian"><code class="flex name class">
-<span>class <span class="ident">AsdfghjklHessian</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', low_rank: int = 10)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Interface for asdfghjkl backend.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface" href="#laplace.curvature.asdfghjkl.AsdfghjklInterface">AsdfghjklInterface</a></li>
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.curvature.asdfghjkl.AsdfghjklHessian.eig_lowrank"><code class="name flex">
-<span>def <span class="ident">eig_lowrank</span></span>(<span>self, data_loader: DataLoader) ‑> tuple[torch.Tensor, torch.Tensor, torch.Tensor]</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface" href="#laplace.curvature.asdfghjkl.AsdfghjklInterface">AsdfghjklInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.gradients" href="#laplace.curvature.asdfghjkl.AsdfghjklInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.jacobians" href="#laplace.curvature.asdfghjkl.AsdfghjklInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.asdfghjkl.AsdfghjklGGN"><code class="flex name class">
-<span>class <span class="ident">AsdfghjklGGN</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', stochastic: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Implementation of the <code>GGNInterface</code> using asdfghjkl.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface" href="#laplace.curvature.asdfghjkl.AsdfghjklInterface">AsdfghjklInterface</a></li>
-<li><a title="laplace.curvature.curvature.GGNInterface" href="curvature.html#laplace.curvature.curvature.GGNInterface">GGNInterface</a></li>
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface" href="#laplace.curvature.asdfghjkl.AsdfghjklInterface">AsdfghjklInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.gradients" href="#laplace.curvature.asdfghjkl.AsdfghjklInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.jacobians" href="#laplace.curvature.asdfghjkl.AsdfghjklInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.asdfghjkl.AsdfghjklEF"><code class="flex name class">
-<span>class <span class="ident">AsdfghjklEF</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | None, last_layer: bool = False, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Implementation of the <code>EFInterface</code> using asdfghjkl.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface" href="#laplace.curvature.asdfghjkl.AsdfghjklInterface">AsdfghjklInterface</a></li>
-<li><a title="laplace.curvature.curvature.EFInterface" href="curvature.html#laplace.curvature.curvature.EFInterface">EFInterface</a></li>
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface" href="#laplace.curvature.asdfghjkl.AsdfghjklInterface">AsdfghjklInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.gradients" href="#laplace.curvature.asdfghjkl.AsdfghjklInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.jacobians" href="#laplace.curvature.asdfghjkl.AsdfghjklInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-</dl>
-</section>
-</article>
-<nav id="sidebar">
-<div class="toc">
-<ul></ul>
-</div>
-<ul id="index">
-<li><h3>Super-module</h3>
-<ul>
-<li><code><a title="laplace.curvature" href="index.html">laplace.curvature</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-classes">Classes</a></h3>
-<ul>
-<li>
-<h4><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface" href="#laplace.curvature.asdfghjkl.AsdfghjklInterface">AsdfghjklInterface</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.jacobians" href="#laplace.curvature.asdfghjkl.AsdfghjklInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface.gradients" href="#laplace.curvature.asdfghjkl.AsdfghjklInterface.gradients">gradients</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.asdfghjkl.AsdfghjklHessian" href="#laplace.curvature.asdfghjkl.AsdfghjklHessian">AsdfghjklHessian</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.curvature.asdfghjkl.AsdfghjklHessian.eig_lowrank" href="#laplace.curvature.asdfghjkl.AsdfghjklHessian.eig_lowrank">eig_lowrank</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.asdfghjkl.AsdfghjklGGN" href="#laplace.curvature.asdfghjkl.AsdfghjklGGN">AsdfghjklGGN</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.asdfghjkl.AsdfghjklEF" href="#laplace.curvature.asdfghjkl.AsdfghjklEF">AsdfghjklEF</a></code></h4>
-</li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.1</a>.</p>
-</footer>
-</body>
-</html>
diff --git a/docs/curvature/asdl.html b/docs/curvature/asdl.html
deleted file mode 100644
index 3c8611ac..00000000
--- a/docs/curvature/asdl.html
+++ /dev/null
@@ -1,243 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
-<meta name="generator" content="pdoc3 0.11.1">
-<title>laplace.curvature.asdl API documentation</title>
-<meta name="description" content="">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script type="text/x-mathjax-config">MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], processEscapes: true } });</script>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => {
-hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
-hljs.highlightAll();
-})</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Module <code>laplace.curvature.asdl</code></h1>
-</header>
-<section id="section-intro">
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-classes">Classes</h2>
-<dl>
-<dt id="laplace.curvature.asdl.AsdlInterface"><code class="flex name class">
-<span>class <span class="ident">AsdlInterface</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Interface for asdfghjkl backend.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.asdl.AsdlEF" href="#laplace.curvature.asdl.AsdlEF">AsdlEF</a></li>
-<li><a title="laplace.curvature.asdl.AsdlGGN" href="#laplace.curvature.asdl.AsdlGGN">AsdlGGN</a></li>
-<li><a title="laplace.curvature.asdl.AsdlHessian" href="#laplace.curvature.asdl.AsdlHessian">AsdlHessian</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.curvature.asdl.AsdlInterface.loss_type"><code class="name">prop <span class="ident">loss_type</span> : str</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.curvature.asdl.AsdlInterface.jacobians"><code class="name flex">
-<span>def <span class="ident">jacobians</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], enable_backprop: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute Jacobians <span><span class="MathJax_Preview">\nabla_\theta f(x;\theta)</span><script type="math/tex">\nabla_\theta f(x;\theta)</script></span> at current parameter <span><span class="MathJax_Preview">\theta</span><script type="math/tex">\theta</script></span>
-using asdfghjkl's gradient per output dimension.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code> or <code>MutableMapping (e.g. dict, UserDict)</code></dt>
-<dd>input data <code>(batch, input_shape)</code> on compatible device with model if torch.Tensor.
-If MutableMapping, then at least contains <code>self.dict_key_x</code>.
-The latter is specific for reward modeling.</dd>
-<dt><strong><code>enable_backprop</code></strong> :&ensp;<code>bool</code>, default <code>= False</code></dt>
-<dd>whether to enable backprop through the Js and f w.r.t. x</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>Js</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>Jacobians <code>(batch, parameters, outputs)</code></dd>
-<dt><strong><code>f</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>output function <code>(batch, outputs)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.curvature.asdl.AsdlInterface.gradients"><code class="name flex">
-<span>def <span class="ident">gradients</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], y: torch.Tensor)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute gradients <span><span class="MathJax_Preview">\nabla_\theta \ell(f(x;\theta, y)</span><script type="math/tex">\nabla_\theta \ell(f(x;\theta, y)</script></span> at current parameter
-<span><span class="MathJax_Preview">\theta</span><script type="math/tex">\theta</script></span> using asdfghjkl's backend.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code> on compatible device with model.</dd>
-<dt><strong><code>y</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>loss</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>Gs</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>gradients <code>(batch, parameters)</code></dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.asdl.AsdlHessian"><code class="flex name class">
-<span>class <span class="ident">AsdlHessian</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Interface for asdfghjkl backend.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.asdl.AsdlInterface" href="#laplace.curvature.asdl.AsdlInterface">AsdlInterface</a></li>
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.asdl.AsdlInterface" href="#laplace.curvature.asdl.AsdlInterface">AsdlInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.gradients" href="#laplace.curvature.asdl.AsdlInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.jacobians" href="#laplace.curvature.asdl.AsdlInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.asdl.AsdlGGN"><code class="flex name class">
-<span>class <span class="ident">AsdlGGN</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', stochastic: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Implementation of the <code>GGNInterface</code> using asdfghjkl.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.asdl.AsdlInterface" href="#laplace.curvature.asdl.AsdlInterface">AsdlInterface</a></li>
-<li><a title="laplace.curvature.curvature.GGNInterface" href="curvature.html#laplace.curvature.curvature.GGNInterface">GGNInterface</a></li>
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.asdl.AsdlInterface" href="#laplace.curvature.asdl.AsdlInterface">AsdlInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.gradients" href="#laplace.curvature.asdl.AsdlInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.jacobians" href="#laplace.curvature.asdl.AsdlInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.asdl.AsdlEF"><code class="flex name class">
-<span>class <span class="ident">AsdlEF</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Implementation of the <code>EFInterface</code> using asdfghjkl.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.asdl.AsdlInterface" href="#laplace.curvature.asdl.AsdlInterface">AsdlInterface</a></li>
-<li><a title="laplace.curvature.curvature.EFInterface" href="curvature.html#laplace.curvature.curvature.EFInterface">EFInterface</a></li>
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.asdl.AsdlInterface" href="#laplace.curvature.asdl.AsdlInterface">AsdlInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.gradients" href="#laplace.curvature.asdl.AsdlInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.jacobians" href="#laplace.curvature.asdl.AsdlInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-</dl>
-</section>
-</article>
-<nav id="sidebar">
-<div class="toc">
-<ul></ul>
-</div>
-<ul id="index">
-<li><h3>Super-module</h3>
-<ul>
-<li><code><a title="laplace.curvature" href="index.html">laplace.curvature</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-classes">Classes</a></h3>
-<ul>
-<li>
-<h4><code><a title="laplace.curvature.asdl.AsdlInterface" href="#laplace.curvature.asdl.AsdlInterface">AsdlInterface</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.jacobians" href="#laplace.curvature.asdl.AsdlInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.gradients" href="#laplace.curvature.asdl.AsdlInterface.gradients">gradients</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.asdl.AsdlHessian" href="#laplace.curvature.asdl.AsdlHessian">AsdlHessian</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.asdl.AsdlGGN" href="#laplace.curvature.asdl.AsdlGGN">AsdlGGN</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.asdl.AsdlEF" href="#laplace.curvature.asdl.AsdlEF">AsdlEF</a></code></h4>
-</li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.1</a>.</p>
-</footer>
-</body>
-</html>
diff --git a/docs/curvature/backpack.html b/docs/curvature/backpack.html
deleted file mode 100644
index 2c1bafb8..00000000
--- a/docs/curvature/backpack.html
+++ /dev/null
@@ -1,206 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
-<meta name="generator" content="pdoc3 0.11.1">
-<title>laplace.curvature.backpack API documentation</title>
-<meta name="description" content="">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script type="text/x-mathjax-config">MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], processEscapes: true } });</script>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => {
-hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
-hljs.highlightAll();
-})</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Module <code>laplace.curvature.backpack</code></h1>
-</header>
-<section id="section-intro">
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-classes">Classes</h2>
-<dl>
-<dt id="laplace.curvature.backpack.BackPackInterface"><code class="flex name class">
-<span>class <span class="ident">BackPackInterface</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Interface for Backpack backend.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.backpack.BackPackEF" href="#laplace.curvature.backpack.BackPackEF">BackPackEF</a></li>
-<li><a title="laplace.curvature.backpack.BackPackGGN" href="#laplace.curvature.backpack.BackPackGGN">BackPackGGN</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.curvature.backpack.BackPackInterface.jacobians"><code class="name flex">
-<span>def <span class="ident">jacobians</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], enable_backprop: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute Jacobians <span><span class="MathJax_Preview">\nabla_{\theta} f(x;\theta)</span><script type="math/tex">\nabla_{\theta} f(x;\theta)</script></span> at current parameter <span><span class="MathJax_Preview">\theta</span><script type="math/tex">\theta</script></span>
-using backpack's BatchGrad per output dimension. Note that BackPACK doesn't play well
-with torch.func, so this method has to be overridden.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code> on compatible device with model.</dd>
-<dt><strong><code>enable_backprop</code></strong> :&ensp;<code>bool</code>, default <code>= False</code></dt>
-<dd>whether to enable backprop through the Js and f w.r.t. x</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>Js</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>Jacobians <code>(batch, parameters, outputs)</code></dd>
-<dt><strong><code>f</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>output function <code>(batch, outputs)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.curvature.backpack.BackPackInterface.gradients"><code class="name flex">
-<span>def <span class="ident">gradients</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], y: torch.Tensor)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute gradients <span><span class="MathJax_Preview">\nabla_\theta \ell(f(x;\theta, y)</span><script type="math/tex">\nabla_\theta \ell(f(x;\theta, y)</script></span> at current parameter
-<span><span class="MathJax_Preview">\theta</span><script type="math/tex">\theta</script></span> using Backpack's BatchGrad. Note that BackPACK doesn't play well
-with torch.func, so this method has to be overridden.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code> on compatible device with model.</dd>
-<dt><strong><code>y</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>Gs</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>gradients <code>(batch, parameters)</code></dd>
-<dt><strong><code>loss</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.backpack.BackPackGGN"><code class="flex name class">
-<span>class <span class="ident">BackPackGGN</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', stochastic: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Implementation of the <code>GGNInterface</code> using Backpack.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.backpack.BackPackInterface" href="#laplace.curvature.backpack.BackPackInterface">BackPackInterface</a></li>
-<li><a title="laplace.curvature.curvature.GGNInterface" href="curvature.html#laplace.curvature.curvature.GGNInterface">GGNInterface</a></li>
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.backpack.BackPackInterface" href="#laplace.curvature.backpack.BackPackInterface">BackPackInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.gradients" href="#laplace.curvature.backpack.BackPackInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.jacobians" href="#laplace.curvature.backpack.BackPackInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.backpack.BackPackEF"><code class="flex name class">
-<span>class <span class="ident">BackPackEF</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Implementation of <code>EFInterface</code> using Backpack.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.backpack.BackPackInterface" href="#laplace.curvature.backpack.BackPackInterface">BackPackInterface</a></li>
-<li><a title="laplace.curvature.curvature.EFInterface" href="curvature.html#laplace.curvature.curvature.EFInterface">EFInterface</a></li>
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.backpack.BackPackInterface" href="#laplace.curvature.backpack.BackPackInterface">BackPackInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.gradients" href="#laplace.curvature.backpack.BackPackInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.jacobians" href="#laplace.curvature.backpack.BackPackInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-</dl>
-</section>
-</article>
-<nav id="sidebar">
-<div class="toc">
-<ul></ul>
-</div>
-<ul id="index">
-<li><h3>Super-module</h3>
-<ul>
-<li><code><a title="laplace.curvature" href="index.html">laplace.curvature</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-classes">Classes</a></h3>
-<ul>
-<li>
-<h4><code><a title="laplace.curvature.backpack.BackPackInterface" href="#laplace.curvature.backpack.BackPackInterface">BackPackInterface</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.jacobians" href="#laplace.curvature.backpack.BackPackInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.gradients" href="#laplace.curvature.backpack.BackPackInterface.gradients">gradients</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.backpack.BackPackGGN" href="#laplace.curvature.backpack.BackPackGGN">BackPackGGN</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.backpack.BackPackEF" href="#laplace.curvature.backpack.BackPackEF">BackPackEF</a></code></h4>
-</li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.1</a>.</p>
-</footer>
-</body>
-</html>
diff --git a/docs/curvature/curvature.html b/docs/curvature/curvature.html
deleted file mode 100644
index 81539633..00000000
--- a/docs/curvature/curvature.html
+++ /dev/null
@@ -1,459 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
-<meta name="generator" content="pdoc3 0.11.1">
-<title>laplace.curvature.curvature API documentation</title>
-<meta name="description" content="">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script type="text/x-mathjax-config">MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], processEscapes: true } });</script>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => {
-hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
-hljs.highlightAll();
-})</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Module <code>laplace.curvature.curvature</code></h1>
-</header>
-<section id="section-intro">
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-classes">Classes</h2>
-<dl>
-<dt id="laplace.curvature.curvature.CurvatureInterface"><code class="flex name class">
-<span>class <span class="ident">CurvatureInterface</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Interface to access curvature for a model and corresponding likelihood.
-A <code><a title="laplace.curvature.curvature.CurvatureInterface" href="#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></code> must inherit from this baseclass and implement the
-necessary functions <code>jacobians</code>, <code>full</code>, <code>kron</code>, and <code>diag</code>.
-The interface might be extended in the future to account for other curvature
-structures, for example, a block-diagonal one.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code> or <code><a title="laplace.utils.feature_extractor.FeatureExtractor" href="../utils/feature_extractor.html#laplace.utils.feature_extractor.FeatureExtractor">FeatureExtractor</a></code></dt>
-<dd>torch model (neural network)</dd>
-<dt><strong><code>likelihood</code></strong> :&ensp;<code>{'classification', 'regression'}</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>last_layer</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>only consider curvature of last layer</dd>
-<dt><strong><code>subnetwork_indices</code></strong> :&ensp;<code>torch.LongTensor</code>, default=<code>None</code></dt>
-<dd>indices of the vectorized model parameters that define the subnetwork
-to apply the Laplace approximation over</dd>
-<dt><strong><code>dict_key_x</code></strong> :&ensp;<code>str</code>, default=<code>'input_ids'</code></dt>
-<dd>The dictionary key under which the input tensor <code>x</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-<dt><strong><code>dict_key_y</code></strong> :&ensp;<code>str</code>, default=<code>'labels'</code></dt>
-<dd>The dictionary key under which the target tensor <code>y</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-</dl>
-<h2 id="attributes">Attributes</h2>
-<dl>
-<dt><strong><code>lossfunc</code></strong> :&ensp;<code>torch.nn.MSELoss</code> or <code>torch.nn.CrossEntropyLoss</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>factor</code></strong> :&ensp;<code>float</code></dt>
-<dd>conversion factor between torch losses and base likelihoods
-For example, <span><span class="MathJax_Preview">\frac{1}{2}</span><script type="math/tex">\frac{1}{2}</script></span> to get to <span><span class="MathJax_Preview">\mathcal{N}(f, 1)</span><script type="math/tex">\mathcal{N}(f, 1)</script></span> from MSELoss.</dd>
-</dl></div>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface" href="asdfghjkl.html#laplace.curvature.asdfghjkl.AsdfghjklInterface">AsdfghjklInterface</a></li>
-<li><a title="laplace.curvature.asdl.AsdlInterface" href="asdl.html#laplace.curvature.asdl.AsdlInterface">AsdlInterface</a></li>
-<li><a title="laplace.curvature.backpack.BackPackInterface" href="backpack.html#laplace.curvature.backpack.BackPackInterface">BackPackInterface</a></li>
-<li><a title="laplace.curvature.curvature.EFInterface" href="#laplace.curvature.curvature.EFInterface">EFInterface</a></li>
-<li><a title="laplace.curvature.curvature.GGNInterface" href="#laplace.curvature.curvature.GGNInterface">GGNInterface</a></li>
-<li><a title="laplace.curvature.curvlinops.CurvlinopsInterface" href="curvlinops.html#laplace.curvature.curvlinops.CurvlinopsInterface">CurvlinopsInterface</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.curvature.curvature.CurvatureInterface.jacobians"><code class="name flex">
-<span>def <span class="ident">jacobians</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], enable_backprop: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute Jacobians <span><span class="MathJax_Preview">\nabla_{\theta} f(x;\theta)</span><script type="math/tex">\nabla_{\theta} f(x;\theta)</script></span> at current parameter <span><span class="MathJax_Preview">\theta</span><script type="math/tex">\theta</script></span>,
-via torch.func.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code> on compatible device with model.</dd>
-<dt><strong><code>enable_backprop</code></strong> :&ensp;<code>bool</code>, default <code>= False</code></dt>
-<dd>whether to enable backprop through the Js and f w.r.t. x</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>Js</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>Jacobians <code>(batch, parameters, outputs)</code></dd>
-<dt><strong><code>f</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>output function <code>(batch, outputs)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians"><code class="name flex">
-<span>def <span class="ident">last_layer_jacobians</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], enable_backprop: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute Jacobians <span><span class="MathJax_Preview">\nabla_{\theta_\textrm{last}} f(x;\theta_\textrm{last})</span><script type="math/tex">\nabla_{\theta_\textrm{last}} f(x;\theta_\textrm{last})</script></span>
-only at current last-layer parameter <span><span class="MathJax_Preview">\theta_{\textrm{last}}</span><script type="math/tex">\theta_{\textrm{last}}</script></span>.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>enable_backprop</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>Js</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>Jacobians <code>(batch, outputs, last-layer-parameters)</code></dd>
-<dt><strong><code>f</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>output function <code>(batch, outputs)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.curvature.curvature.CurvatureInterface.gradients"><code class="name flex">
-<span>def <span class="ident">gradients</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], y: torch.Tensor)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute batch gradients <span><span class="MathJax_Preview">\nabla_\theta \ell(f(x;\theta, y)</span><script type="math/tex">\nabla_\theta \ell(f(x;\theta, y)</script></span> at
-current parameter <span><span class="MathJax_Preview">\theta</span><script type="math/tex">\theta</script></span>.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code> on compatible device with model.</dd>
-<dt><strong><code>y</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>Gs</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>gradients <code>(batch, parameters)</code></dd>
-<dt><strong><code>loss</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.curvature.curvature.CurvatureInterface.full"><code class="name flex">
-<span>def <span class="ident">full</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], y: torch.Tensor, **kwargs: dict[str, Any])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute a dense curvature (approximation) in the form of a <span><span class="MathJax_Preview">P \times P</span><script type="math/tex">P \times P</script></span> matrix
-<span><span class="MathJax_Preview">H</span><script type="math/tex">H</script></span> with respect to parameters <span><span class="MathJax_Preview">\theta \in \mathbb{R}^P</span><script type="math/tex">\theta \in \mathbb{R}^P</script></span>.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code></dd>
-<dt><strong><code>y</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>labels <code>(batch, label_shape)</code></dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>loss</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>H</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>Hessian approximation <code>(parameters, parameters)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.curvature.curvature.CurvatureInterface.kron"><code class="name flex">
-<span>def <span class="ident">kron</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], y: torch.Tensor, N: int, **kwargs: dict[str, Any])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute a Kronecker factored curvature approximation (such as KFAC).
-The approximation to <span><span class="MathJax_Preview">H</span><script type="math/tex">H</script></span> takes the form of two Kronecker factors <span><span class="MathJax_Preview">Q, H</span><script type="math/tex">Q, H</script></span>,
-i.e., <span><span class="MathJax_Preview">H \approx Q \otimes H</span><script type="math/tex">H \approx Q \otimes H</script></span> for each Module in the neural network permitting
-such curvature.
-<span><span class="MathJax_Preview">Q</span><script type="math/tex">Q</script></span> is quadratic in the input-dimension of a module <span><span class="MathJax_Preview">p_{in} \times p_{in}</span><script type="math/tex">p_{in} \times p_{in}</script></span>
-and <span><span class="MathJax_Preview">H</span><script type="math/tex">H</script></span> in the output-dimension <span><span class="MathJax_Preview">p_{out} \times p_{out}</span><script type="math/tex">p_{out} \times p_{out}</script></span>.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code></dd>
-<dt><strong><code>y</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>labels <code>(batch, label_shape)</code></dd>
-<dt><strong><code>N</code></strong> :&ensp;<code>int</code></dt>
-<dd>total number of data points</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>loss</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>H</code></strong> :&ensp;<code><a title="laplace.utils.matrix.Kron" href="../utils/matrix.html#laplace.utils.matrix.Kron">Kron</a></code></dt>
-<dd>Kronecker factored Hessian approximation.</dd>
-</dl></div>
-</dd>
-<dt id="laplace.curvature.curvature.CurvatureInterface.diag"><code class="name flex">
-<span>def <span class="ident">diag</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], y: torch.Tensor, **kwargs: dict[str, Any])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute a diagonal Hessian approximation to <span><span class="MathJax_Preview">H</span><script type="math/tex">H</script></span> and is represented as a
-vector of the dimensionality of parameters <span><span class="MathJax_Preview">\theta</span><script type="math/tex">\theta</script></span>.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code></dd>
-<dt><strong><code>y</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>labels <code>(batch, label_shape)</code></dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>loss</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>H</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>vector representing the diagonal of H</dd>
-</dl></div>
-</dd>
-<dt id="laplace.curvature.curvature.CurvatureInterface.functorch_jacobians"><code class="name flex">
-<span>def <span class="ident">functorch_jacobians</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], enable_backprop: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute Jacobians <span><span class="MathJax_Preview">\nabla_{\theta} f(x;\theta)</span><script type="math/tex">\nabla_{\theta} f(x;\theta)</script></span> at current parameter <span><span class="MathJax_Preview">\theta</span><script type="math/tex">\theta</script></span>,
-via torch.func.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code> on compatible device with model.</dd>
-<dt><strong><code>enable_backprop</code></strong> :&ensp;<code>bool</code>, default <code>= False</code></dt>
-<dd>whether to enable backprop through the Js and f w.r.t. x</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>Js</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>Jacobians <code>(batch, parameters, outputs)</code></dd>
-<dt><strong><code>f</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>output function <code>(batch, outputs)</code></dd>
-</dl></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.curvature.curvature.GGNInterface"><code class="flex name class">
-<span>class <span class="ident">GGNInterface</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', stochastic: bool = False, num_samples: int = 1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Generalized Gauss-Newton or Fisher Curvature Interface.
-The GGN is equal to the Fisher information for the available likelihoods.
-In addition to <code><a title="laplace.curvature.curvature.CurvatureInterface" href="#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></code>, methods for Jacobians are required by subclasses.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code> or <code><a title="laplace.utils.feature_extractor.FeatureExtractor" href="../utils/feature_extractor.html#laplace.utils.feature_extractor.FeatureExtractor">FeatureExtractor</a></code></dt>
-<dd>torch model (neural network)</dd>
-<dt><strong><code>likelihood</code></strong> :&ensp;<code>{'classification', 'regression'}</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>last_layer</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>only consider curvature of last layer</dd>
-<dt><strong><code>subnetwork_indices</code></strong> :&ensp;<code>torch.Tensor</code>, default=<code>None</code></dt>
-<dd>indices of the vectorized model parameters that define the subnetwork
-to apply the Laplace approximation over</dd>
-<dt><strong><code>dict_key_x</code></strong> :&ensp;<code>str</code>, default=<code>'input_ids'</code></dt>
-<dd>The dictionary key under which the input tensor <code>x</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-<dt><strong><code>dict_key_y</code></strong> :&ensp;<code>str</code>, default=<code>'labels'</code></dt>
-<dd>The dictionary key under which the target tensor <code>y</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-<dt><strong><code>stochastic</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>Fisher if stochastic else GGN</dd>
-<dt><strong><code>num_samples</code></strong> :&ensp;<code>int</code>, default=<code>1</code></dt>
-<dd>Number of samples used to approximate the stochastic Fisher</dd>
-</dl></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.asdfghjkl.AsdfghjklGGN" href="asdfghjkl.html#laplace.curvature.asdfghjkl.AsdfghjklGGN">AsdfghjklGGN</a></li>
-<li><a title="laplace.curvature.asdl.AsdlGGN" href="asdl.html#laplace.curvature.asdl.AsdlGGN">AsdlGGN</a></li>
-<li><a title="laplace.curvature.backpack.BackPackGGN" href="backpack.html#laplace.curvature.backpack.BackPackGGN">BackPackGGN</a></li>
-<li><a title="laplace.curvature.curvlinops.CurvlinopsGGN" href="curvlinops.html#laplace.curvature.curvlinops.CurvlinopsGGN">CurvlinopsGGN</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.curvature.curvature.GGNInterface.full"><code class="name flex">
-<span>def <span class="ident">full</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], y: torch.Tensor, **kwargs: dict[str, Any])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute the full GGN <span><span class="MathJax_Preview">P \times P</span><script type="math/tex">P \times P</script></span> matrix as Hessian approximation
-<span><span class="MathJax_Preview">H_{ggn}</span><script type="math/tex">H_{ggn}</script></span> with respect to parameters <span><span class="MathJax_Preview">\theta \in \mathbb{R}^P</span><script type="math/tex">\theta \in \mathbb{R}^P</script></span>.
-For last-layer, reduced to <span><span class="MathJax_Preview">\theta_{last}</span><script type="math/tex">\theta_{last}</script></span></p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code></dd>
-<dt><strong><code>y</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>labels <code>(batch, label_shape)</code></dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>loss</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>H</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>GGN <code>(parameters, parameters)</code></dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.curvature.CurvatureInterface" href="#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.diag" href="#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.functorch_jacobians" href="#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.gradients" href="#laplace.curvature.curvature.CurvatureInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.jacobians" href="#laplace.curvature.curvature.CurvatureInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.kron" href="#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians" href="#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.curvature.EFInterface"><code class="flex name class">
-<span>class <span class="ident">EFInterface</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Interface for Empirical Fisher as Hessian approximation.
-In addition to <code><a title="laplace.curvature.curvature.CurvatureInterface" href="#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></code>, methods for gradients are required by subclasses.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code> or <code><a title="laplace.utils.feature_extractor.FeatureExtractor" href="../utils/feature_extractor.html#laplace.utils.feature_extractor.FeatureExtractor">FeatureExtractor</a></code></dt>
-<dd>torch model (neural network)</dd>
-<dt><strong><code>likelihood</code></strong> :&ensp;<code>{'classification', 'regression'}</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>last_layer</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>only consider curvature of last layer</dd>
-<dt><strong><code>subnetwork_indices</code></strong> :&ensp;<code>torch.Tensor</code>, default=<code>None</code></dt>
-<dd>indices of the vectorized model parameters that define the subnetwork
-to apply the Laplace approximation over</dd>
-<dt><strong><code>dict_key_x</code></strong> :&ensp;<code>str</code>, default=<code>'input_ids'</code></dt>
-<dd>The dictionary key under which the input tensor <code>x</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-<dt><strong><code>dict_key_y</code></strong> :&ensp;<code>str</code>, default=<code>'labels'</code></dt>
-<dd>The dictionary key under which the target tensor <code>y</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-</dl>
-<h2 id="attributes">Attributes</h2>
-<dl>
-<dt><strong><code>lossfunc</code></strong> :&ensp;<code>torch.nn.MSELoss</code> or <code>torch.nn.CrossEntropyLoss</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>factor</code></strong> :&ensp;<code>float</code></dt>
-<dd>conversion factor between torch losses and base likelihoods
-For example, <span><span class="MathJax_Preview">\frac{1}{2}</span><script type="math/tex">\frac{1}{2}</script></span> to get to <span><span class="MathJax_Preview">\mathcal{N}(f, 1)</span><script type="math/tex">\mathcal{N}(f, 1)</script></span> from MSELoss.</dd>
-</dl></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.asdfghjkl.AsdfghjklEF" href="asdfghjkl.html#laplace.curvature.asdfghjkl.AsdfghjklEF">AsdfghjklEF</a></li>
-<li><a title="laplace.curvature.asdl.AsdlEF" href="asdl.html#laplace.curvature.asdl.AsdlEF">AsdlEF</a></li>
-<li><a title="laplace.curvature.backpack.BackPackEF" href="backpack.html#laplace.curvature.backpack.BackPackEF">BackPackEF</a></li>
-<li><a title="laplace.curvature.curvlinops.CurvlinopsEF" href="curvlinops.html#laplace.curvature.curvlinops.CurvlinopsEF">CurvlinopsEF</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.curvature.curvature.EFInterface.full"><code class="name flex">
-<span>def <span class="ident">full</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], y: torch.Tensor, **kwargs: dict[str, Any])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute the full EF <span><span class="MathJax_Preview">P \times P</span><script type="math/tex">P \times P</script></span> matrix as Hessian approximation
-<span><span class="MathJax_Preview">H_{ef}</span><script type="math/tex">H_{ef}</script></span> with respect to parameters <span><span class="MathJax_Preview">\theta \in \mathbb{R}^P</span><script type="math/tex">\theta \in \mathbb{R}^P</script></span>.
-For last-layer, reduced to <span><span class="MathJax_Preview">\theta_{last}</span><script type="math/tex">\theta_{last}</script></span></p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code></dd>
-<dt><strong><code>y</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>labels <code>(batch, label_shape)</code></dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>loss</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>H_ef</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>EF <code>(parameters, parameters)</code></dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.curvature.CurvatureInterface" href="#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.diag" href="#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.functorch_jacobians" href="#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.gradients" href="#laplace.curvature.curvature.CurvatureInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.jacobians" href="#laplace.curvature.curvature.CurvatureInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.kron" href="#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians" href="#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-</dl>
-</section>
-</article>
-<nav id="sidebar">
-<div class="toc">
-<ul></ul>
-</div>
-<ul id="index">
-<li><h3>Super-module</h3>
-<ul>
-<li><code><a title="laplace.curvature" href="index.html">laplace.curvature</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-classes">Classes</a></h3>
-<ul>
-<li>
-<h4><code><a title="laplace.curvature.curvature.CurvatureInterface" href="#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.jacobians" href="#laplace.curvature.curvature.CurvatureInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians" href="#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.gradients" href="#laplace.curvature.curvature.CurvatureInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.full" href="#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.kron" href="#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.diag" href="#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.functorch_jacobians" href="#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.curvature.GGNInterface" href="#laplace.curvature.curvature.GGNInterface">GGNInterface</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.curvature.curvature.GGNInterface.full" href="#laplace.curvature.curvature.GGNInterface.full">full</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.curvature.EFInterface" href="#laplace.curvature.curvature.EFInterface">EFInterface</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.curvature.curvature.EFInterface.full" href="#laplace.curvature.curvature.EFInterface.full">full</a></code></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.1</a>.</p>
-</footer>
-</body>
-</html>
diff --git a/docs/curvature/curvlinops.html b/docs/curvature/curvlinops.html
deleted file mode 100644
index 86d331e1..00000000
--- a/docs/curvature/curvlinops.html
+++ /dev/null
@@ -1,187 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
-<meta name="generator" content="pdoc3 0.11.1">
-<title>laplace.curvature.curvlinops API documentation</title>
-<meta name="description" content="">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script type="text/x-mathjax-config">MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], processEscapes: true } });</script>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => {
-hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
-hljs.highlightAll();
-})</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Module <code>laplace.curvature.curvlinops</code></h1>
-</header>
-<section id="section-intro">
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-classes">Classes</h2>
-<dl>
-<dt id="laplace.curvature.curvlinops.CurvlinopsInterface"><code class="flex name class">
-<span>class <span class="ident">CurvlinopsInterface</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Interface for Curvlinops backend. <a href="https://github.com/f-dangel/curvlinops">https://github.com/f-dangel/curvlinops</a></p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.curvlinops.CurvlinopsEF" href="#laplace.curvature.curvlinops.CurvlinopsEF">CurvlinopsEF</a></li>
-<li><a title="laplace.curvature.curvlinops.CurvlinopsGGN" href="#laplace.curvature.curvlinops.CurvlinopsGGN">CurvlinopsGGN</a></li>
-<li><a title="laplace.curvature.curvlinops.CurvlinopsHessian" href="#laplace.curvature.curvlinops.CurvlinopsHessian">CurvlinopsHessian</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.gradients" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.curvlinops.CurvlinopsGGN"><code class="flex name class">
-<span>class <span class="ident">CurvlinopsGGN</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', stochastic: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Implementation of the <code>GGNInterface</code> using Curvlinops.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.curvlinops.CurvlinopsInterface" href="#laplace.curvature.curvlinops.CurvlinopsInterface">CurvlinopsInterface</a></li>
-<li><a title="laplace.curvature.curvature.GGNInterface" href="curvature.html#laplace.curvature.curvature.GGNInterface">GGNInterface</a></li>
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.curvlinops.CurvlinopsInterface" href="#laplace.curvature.curvlinops.CurvlinopsInterface">CurvlinopsInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.gradients" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.curvlinops.CurvlinopsEF"><code class="flex name class">
-<span>class <span class="ident">CurvlinopsEF</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Implementation of <code>EFInterface</code> using Curvlinops.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.curvlinops.CurvlinopsInterface" href="#laplace.curvature.curvlinops.CurvlinopsInterface">CurvlinopsInterface</a></li>
-<li><a title="laplace.curvature.curvature.EFInterface" href="curvature.html#laplace.curvature.curvature.EFInterface">EFInterface</a></li>
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.curvlinops.CurvlinopsInterface" href="#laplace.curvature.curvlinops.CurvlinopsInterface">CurvlinopsInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.gradients" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.curvlinops.CurvlinopsHessian"><code class="flex name class">
-<span>class <span class="ident">CurvlinopsHessian</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Implementation of the full Hessian using Curvlinops.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.curvlinops.CurvlinopsInterface" href="#laplace.curvature.curvlinops.CurvlinopsInterface">CurvlinopsInterface</a></li>
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.curvlinops.CurvlinopsInterface" href="#laplace.curvature.curvlinops.CurvlinopsInterface">CurvlinopsInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.gradients" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-</dl>
-</section>
-</article>
-<nav id="sidebar">
-<div class="toc">
-<ul></ul>
-</div>
-<ul id="index">
-<li><h3>Super-module</h3>
-<ul>
-<li><code><a title="laplace.curvature" href="index.html">laplace.curvature</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-classes">Classes</a></h3>
-<ul>
-<li>
-<h4><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface" href="#laplace.curvature.curvlinops.CurvlinopsInterface">CurvlinopsInterface</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.curvlinops.CurvlinopsGGN" href="#laplace.curvature.curvlinops.CurvlinopsGGN">CurvlinopsGGN</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.curvlinops.CurvlinopsEF" href="#laplace.curvature.curvlinops.CurvlinopsEF">CurvlinopsEF</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.curvlinops.CurvlinopsHessian" href="#laplace.curvature.curvlinops.CurvlinopsHessian">CurvlinopsHessian</a></code></h4>
-</li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.1</a>.</p>
-</footer>
-</body>
-</html>
diff --git a/docs/curvature/index.html b/docs/curvature/index.html
deleted file mode 100644
index bd65f715..00000000
--- a/docs/curvature/index.html
+++ /dev/null
@@ -1,935 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
-<meta name="generator" content="pdoc3 0.11.1">
-<title>laplace.curvature API documentation</title>
-<meta name="description" content="">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script type="text/x-mathjax-config">MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], processEscapes: true } });</script>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => {
-hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
-hljs.highlightAll();
-})</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Module <code>laplace.curvature</code></h1>
-</header>
-<section id="section-intro">
-</section>
-<section>
-<h2 class="section-title" id="header-submodules">Sub-modules</h2>
-<dl>
-<dt><code class="name"><a title="laplace.curvature.asdfghjkl" href="asdfghjkl.html">laplace.curvature.asdfghjkl</a></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt><code class="name"><a title="laplace.curvature.asdl" href="asdl.html">laplace.curvature.asdl</a></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt><code class="name"><a title="laplace.curvature.backpack" href="backpack.html">laplace.curvature.backpack</a></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt><code class="name"><a title="laplace.curvature.curvature" href="curvature.html">laplace.curvature.curvature</a></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt><code class="name"><a title="laplace.curvature.curvlinops" href="curvlinops.html">laplace.curvature.curvlinops</a></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-classes">Classes</h2>
-<dl>
-<dt id="laplace.curvature.CurvatureInterface"><code class="flex name class">
-<span>class <span class="ident">CurvatureInterface</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Interface to access curvature for a model and corresponding likelihood.
-A <code><a title="laplace.curvature.CurvatureInterface" href="#laplace.curvature.CurvatureInterface">CurvatureInterface</a></code> must inherit from this baseclass and implement the
-necessary functions <code>jacobians</code>, <code>full</code>, <code>kron</code>, and <code>diag</code>.
-The interface might be extended in the future to account for other curvature
-structures, for example, a block-diagonal one.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code> or <code><a title="laplace.utils.feature_extractor.FeatureExtractor" href="../utils/feature_extractor.html#laplace.utils.feature_extractor.FeatureExtractor">FeatureExtractor</a></code></dt>
-<dd>torch model (neural network)</dd>
-<dt><strong><code>likelihood</code></strong> :&ensp;<code>{'classification', 'regression'}</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>last_layer</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>only consider curvature of last layer</dd>
-<dt><strong><code>subnetwork_indices</code></strong> :&ensp;<code>torch.LongTensor</code>, default=<code>None</code></dt>
-<dd>indices of the vectorized model parameters that define the subnetwork
-to apply the Laplace approximation over</dd>
-<dt><strong><code>dict_key_x</code></strong> :&ensp;<code>str</code>, default=<code>'input_ids'</code></dt>
-<dd>The dictionary key under which the input tensor <code>x</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-<dt><strong><code>dict_key_y</code></strong> :&ensp;<code>str</code>, default=<code>'labels'</code></dt>
-<dd>The dictionary key under which the target tensor <code>y</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-</dl>
-<h2 id="attributes">Attributes</h2>
-<dl>
-<dt><strong><code>lossfunc</code></strong> :&ensp;<code>torch.nn.MSELoss</code> or <code>torch.nn.CrossEntropyLoss</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>factor</code></strong> :&ensp;<code>float</code></dt>
-<dd>conversion factor between torch losses and base likelihoods
-For example, <span><span class="MathJax_Preview">\frac{1}{2}</span><script type="math/tex">\frac{1}{2}</script></span> to get to <span><span class="MathJax_Preview">\mathcal{N}(f, 1)</span><script type="math/tex">\mathcal{N}(f, 1)</script></span> from MSELoss.</dd>
-</dl></div>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.asdfghjkl.AsdfghjklInterface" href="asdfghjkl.html#laplace.curvature.asdfghjkl.AsdfghjklInterface">AsdfghjklInterface</a></li>
-<li><a title="laplace.curvature.asdl.AsdlInterface" href="asdl.html#laplace.curvature.asdl.AsdlInterface">AsdlInterface</a></li>
-<li><a title="laplace.curvature.backpack.BackPackInterface" href="backpack.html#laplace.curvature.backpack.BackPackInterface">BackPackInterface</a></li>
-<li><a title="laplace.curvature.curvature.EFInterface" href="curvature.html#laplace.curvature.curvature.EFInterface">EFInterface</a></li>
-<li><a title="laplace.curvature.curvature.GGNInterface" href="curvature.html#laplace.curvature.curvature.GGNInterface">GGNInterface</a></li>
-<li><a title="laplace.curvature.curvlinops.CurvlinopsInterface" href="curvlinops.html#laplace.curvature.curvlinops.CurvlinopsInterface">CurvlinopsInterface</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.curvature.CurvatureInterface.jacobians"><code class="name flex">
-<span>def <span class="ident">jacobians</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], enable_backprop: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute Jacobians <span><span class="MathJax_Preview">\nabla_{\theta} f(x;\theta)</span><script type="math/tex">\nabla_{\theta} f(x;\theta)</script></span> at current parameter <span><span class="MathJax_Preview">\theta</span><script type="math/tex">\theta</script></span>,
-via torch.func.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code> on compatible device with model.</dd>
-<dt><strong><code>enable_backprop</code></strong> :&ensp;<code>bool</code>, default <code>= False</code></dt>
-<dd>whether to enable backprop through the Js and f w.r.t. x</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>Js</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>Jacobians <code>(batch, parameters, outputs)</code></dd>
-<dt><strong><code>f</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>output function <code>(batch, outputs)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.curvature.CurvatureInterface.last_layer_jacobians"><code class="name flex">
-<span>def <span class="ident">last_layer_jacobians</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], enable_backprop: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute Jacobians <span><span class="MathJax_Preview">\nabla_{\theta_\textrm{last}} f(x;\theta_\textrm{last})</span><script type="math/tex">\nabla_{\theta_\textrm{last}} f(x;\theta_\textrm{last})</script></span>
-only at current last-layer parameter <span><span class="MathJax_Preview">\theta_{\textrm{last}}</span><script type="math/tex">\theta_{\textrm{last}}</script></span>.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>enable_backprop</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>Js</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>Jacobians <code>(batch, outputs, last-layer-parameters)</code></dd>
-<dt><strong><code>f</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>output function <code>(batch, outputs)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.curvature.CurvatureInterface.gradients"><code class="name flex">
-<span>def <span class="ident">gradients</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], y: torch.Tensor)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute batch gradients <span><span class="MathJax_Preview">\nabla_\theta \ell(f(x;\theta, y)</span><script type="math/tex">\nabla_\theta \ell(f(x;\theta, y)</script></span> at
-current parameter <span><span class="MathJax_Preview">\theta</span><script type="math/tex">\theta</script></span>.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code> on compatible device with model.</dd>
-<dt><strong><code>y</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>Gs</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>gradients <code>(batch, parameters)</code></dd>
-<dt><strong><code>loss</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.curvature.CurvatureInterface.full"><code class="name flex">
-<span>def <span class="ident">full</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], y: torch.Tensor, **kwargs: dict[str, Any])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute a dense curvature (approximation) in the form of a <span><span class="MathJax_Preview">P \times P</span><script type="math/tex">P \times P</script></span> matrix
-<span><span class="MathJax_Preview">H</span><script type="math/tex">H</script></span> with respect to parameters <span><span class="MathJax_Preview">\theta \in \mathbb{R}^P</span><script type="math/tex">\theta \in \mathbb{R}^P</script></span>.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code></dd>
-<dt><strong><code>y</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>labels <code>(batch, label_shape)</code></dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>loss</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>H</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>Hessian approximation <code>(parameters, parameters)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.curvature.CurvatureInterface.kron"><code class="name flex">
-<span>def <span class="ident">kron</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], y: torch.Tensor, N: int, **kwargs: dict[str, Any])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute a Kronecker factored curvature approximation (such as KFAC).
-The approximation to <span><span class="MathJax_Preview">H</span><script type="math/tex">H</script></span> takes the form of two Kronecker factors <span><span class="MathJax_Preview">Q, H</span><script type="math/tex">Q, H</script></span>,
-i.e., <span><span class="MathJax_Preview">H \approx Q \otimes H</span><script type="math/tex">H \approx Q \otimes H</script></span> for each Module in the neural network permitting
-such curvature.
-<span><span class="MathJax_Preview">Q</span><script type="math/tex">Q</script></span> is quadratic in the input-dimension of a module <span><span class="MathJax_Preview">p_{in} \times p_{in}</span><script type="math/tex">p_{in} \times p_{in}</script></span>
-and <span><span class="MathJax_Preview">H</span><script type="math/tex">H</script></span> in the output-dimension <span><span class="MathJax_Preview">p_{out} \times p_{out}</span><script type="math/tex">p_{out} \times p_{out}</script></span>.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code></dd>
-<dt><strong><code>y</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>labels <code>(batch, label_shape)</code></dd>
-<dt><strong><code>N</code></strong> :&ensp;<code>int</code></dt>
-<dd>total number of data points</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>loss</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>H</code></strong> :&ensp;<code><a title="laplace.utils.matrix.Kron" href="../utils/matrix.html#laplace.utils.matrix.Kron">Kron</a></code></dt>
-<dd>Kronecker factored Hessian approximation.</dd>
-</dl></div>
-</dd>
-<dt id="laplace.curvature.CurvatureInterface.diag"><code class="name flex">
-<span>def <span class="ident">diag</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], y: torch.Tensor, **kwargs: dict[str, Any])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute a diagonal Hessian approximation to <span><span class="MathJax_Preview">H</span><script type="math/tex">H</script></span> and is represented as a
-vector of the dimensionality of parameters <span><span class="MathJax_Preview">\theta</span><script type="math/tex">\theta</script></span>.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code></dd>
-<dt><strong><code>y</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>labels <code>(batch, label_shape)</code></dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>loss</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>H</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>vector representing the diagonal of H</dd>
-</dl></div>
-</dd>
-<dt id="laplace.curvature.CurvatureInterface.functorch_jacobians"><code class="name flex">
-<span>def <span class="ident">functorch_jacobians</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], enable_backprop: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute Jacobians <span><span class="MathJax_Preview">\nabla_{\theta} f(x;\theta)</span><script type="math/tex">\nabla_{\theta} f(x;\theta)</script></span> at current parameter <span><span class="MathJax_Preview">\theta</span><script type="math/tex">\theta</script></span>,
-via torch.func.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code> on compatible device with model.</dd>
-<dt><strong><code>enable_backprop</code></strong> :&ensp;<code>bool</code>, default <code>= False</code></dt>
-<dd>whether to enable backprop through the Js and f w.r.t. x</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>Js</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>Jacobians <code>(batch, parameters, outputs)</code></dd>
-<dt><strong><code>f</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>output function <code>(batch, outputs)</code></dd>
-</dl></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.curvature.GGNInterface"><code class="flex name class">
-<span>class <span class="ident">GGNInterface</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', stochastic: bool = False, num_samples: int = 1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Generalized Gauss-Newton or Fisher Curvature Interface.
-The GGN is equal to the Fisher information for the available likelihoods.
-In addition to <code><a title="laplace.curvature.CurvatureInterface" href="#laplace.curvature.CurvatureInterface">CurvatureInterface</a></code>, methods for Jacobians are required by subclasses.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code> or <code><a title="laplace.utils.feature_extractor.FeatureExtractor" href="../utils/feature_extractor.html#laplace.utils.feature_extractor.FeatureExtractor">FeatureExtractor</a></code></dt>
-<dd>torch model (neural network)</dd>
-<dt><strong><code>likelihood</code></strong> :&ensp;<code>{'classification', 'regression'}</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>last_layer</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>only consider curvature of last layer</dd>
-<dt><strong><code>subnetwork_indices</code></strong> :&ensp;<code>torch.Tensor</code>, default=<code>None</code></dt>
-<dd>indices of the vectorized model parameters that define the subnetwork
-to apply the Laplace approximation over</dd>
-<dt><strong><code>dict_key_x</code></strong> :&ensp;<code>str</code>, default=<code>'input_ids'</code></dt>
-<dd>The dictionary key under which the input tensor <code>x</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-<dt><strong><code>dict_key_y</code></strong> :&ensp;<code>str</code>, default=<code>'labels'</code></dt>
-<dd>The dictionary key under which the target tensor <code>y</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-<dt><strong><code>stochastic</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>Fisher if stochastic else GGN</dd>
-<dt><strong><code>num_samples</code></strong> :&ensp;<code>int</code>, default=<code>1</code></dt>
-<dd>Number of samples used to approximate the stochastic Fisher</dd>
-</dl></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.asdfghjkl.AsdfghjklGGN" href="asdfghjkl.html#laplace.curvature.asdfghjkl.AsdfghjklGGN">AsdfghjklGGN</a></li>
-<li><a title="laplace.curvature.asdl.AsdlGGN" href="asdl.html#laplace.curvature.asdl.AsdlGGN">AsdlGGN</a></li>
-<li><a title="laplace.curvature.backpack.BackPackGGN" href="backpack.html#laplace.curvature.backpack.BackPackGGN">BackPackGGN</a></li>
-<li><a title="laplace.curvature.curvlinops.CurvlinopsGGN" href="curvlinops.html#laplace.curvature.curvlinops.CurvlinopsGGN">CurvlinopsGGN</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.curvature.GGNInterface.full"><code class="name flex">
-<span>def <span class="ident">full</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], y: torch.Tensor, **kwargs: dict[str, Any])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute the full GGN <span><span class="MathJax_Preview">P \times P</span><script type="math/tex">P \times P</script></span> matrix as Hessian approximation
-<span><span class="MathJax_Preview">H_{ggn}</span><script type="math/tex">H_{ggn}</script></span> with respect to parameters <span><span class="MathJax_Preview">\theta \in \mathbb{R}^P</span><script type="math/tex">\theta \in \mathbb{R}^P</script></span>.
-For last-layer, reduced to <span><span class="MathJax_Preview">\theta_{last}</span><script type="math/tex">\theta_{last}</script></span></p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code></dd>
-<dt><strong><code>y</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>labels <code>(batch, label_shape)</code></dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>loss</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>H</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>GGN <code>(parameters, parameters)</code></dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.gradients" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.EFInterface"><code class="flex name class">
-<span>class <span class="ident">EFInterface</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Interface for Empirical Fisher as Hessian approximation.
-In addition to <code><a title="laplace.curvature.CurvatureInterface" href="#laplace.curvature.CurvatureInterface">CurvatureInterface</a></code>, methods for gradients are required by subclasses.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code> or <code><a title="laplace.utils.feature_extractor.FeatureExtractor" href="../utils/feature_extractor.html#laplace.utils.feature_extractor.FeatureExtractor">FeatureExtractor</a></code></dt>
-<dd>torch model (neural network)</dd>
-<dt><strong><code>likelihood</code></strong> :&ensp;<code>{'classification', 'regression'}</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>last_layer</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>only consider curvature of last layer</dd>
-<dt><strong><code>subnetwork_indices</code></strong> :&ensp;<code>torch.Tensor</code>, default=<code>None</code></dt>
-<dd>indices of the vectorized model parameters that define the subnetwork
-to apply the Laplace approximation over</dd>
-<dt><strong><code>dict_key_x</code></strong> :&ensp;<code>str</code>, default=<code>'input_ids'</code></dt>
-<dd>The dictionary key under which the input tensor <code>x</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-<dt><strong><code>dict_key_y</code></strong> :&ensp;<code>str</code>, default=<code>'labels'</code></dt>
-<dd>The dictionary key under which the target tensor <code>y</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-</dl>
-<h2 id="attributes">Attributes</h2>
-<dl>
-<dt><strong><code>lossfunc</code></strong> :&ensp;<code>torch.nn.MSELoss</code> or <code>torch.nn.CrossEntropyLoss</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>factor</code></strong> :&ensp;<code>float</code></dt>
-<dd>conversion factor between torch losses and base likelihoods
-For example, <span><span class="MathJax_Preview">\frac{1}{2}</span><script type="math/tex">\frac{1}{2}</script></span> to get to <span><span class="MathJax_Preview">\mathcal{N}(f, 1)</span><script type="math/tex">\mathcal{N}(f, 1)</script></span> from MSELoss.</dd>
-</dl></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.asdfghjkl.AsdfghjklEF" href="asdfghjkl.html#laplace.curvature.asdfghjkl.AsdfghjklEF">AsdfghjklEF</a></li>
-<li><a title="laplace.curvature.asdl.AsdlEF" href="asdl.html#laplace.curvature.asdl.AsdlEF">AsdlEF</a></li>
-<li><a title="laplace.curvature.backpack.BackPackEF" href="backpack.html#laplace.curvature.backpack.BackPackEF">BackPackEF</a></li>
-<li><a title="laplace.curvature.curvlinops.CurvlinopsEF" href="curvlinops.html#laplace.curvature.curvlinops.CurvlinopsEF">CurvlinopsEF</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.curvature.EFInterface.full"><code class="name flex">
-<span>def <span class="ident">full</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], y: torch.Tensor, **kwargs: dict[str, Any])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute the full EF <span><span class="MathJax_Preview">P \times P</span><script type="math/tex">P \times P</script></span> matrix as Hessian approximation
-<span><span class="MathJax_Preview">H_{ef}</span><script type="math/tex">H_{ef}</script></span> with respect to parameters <span><span class="MathJax_Preview">\theta \in \mathbb{R}^P</span><script type="math/tex">\theta \in \mathbb{R}^P</script></span>.
-For last-layer, reduced to <span><span class="MathJax_Preview">\theta_{last}</span><script type="math/tex">\theta_{last}</script></span></p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code></dd>
-<dt><strong><code>y</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>labels <code>(batch, label_shape)</code></dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>loss</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>H_ef</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>EF <code>(parameters, parameters)</code></dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.gradients" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.BackPackInterface"><code class="flex name class">
-<span>class <span class="ident">BackPackInterface</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Interface for Backpack backend.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.backpack.BackPackEF" href="backpack.html#laplace.curvature.backpack.BackPackEF">BackPackEF</a></li>
-<li><a title="laplace.curvature.backpack.BackPackGGN" href="backpack.html#laplace.curvature.backpack.BackPackGGN">BackPackGGN</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.curvature.BackPackInterface.jacobians"><code class="name flex">
-<span>def <span class="ident">jacobians</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], enable_backprop: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute Jacobians <span><span class="MathJax_Preview">\nabla_{\theta} f(x;\theta)</span><script type="math/tex">\nabla_{\theta} f(x;\theta)</script></span> at current parameter <span><span class="MathJax_Preview">\theta</span><script type="math/tex">\theta</script></span>
-using backpack's BatchGrad per output dimension. Note that BackPACK doesn't play well
-with torch.func, so this method has to be overridden.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code> on compatible device with model.</dd>
-<dt><strong><code>enable_backprop</code></strong> :&ensp;<code>bool</code>, default <code>= False</code></dt>
-<dd>whether to enable backprop through the Js and f w.r.t. x</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>Js</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>Jacobians <code>(batch, parameters, outputs)</code></dd>
-<dt><strong><code>f</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>output function <code>(batch, outputs)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.curvature.BackPackInterface.gradients"><code class="name flex">
-<span>def <span class="ident">gradients</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], y: torch.Tensor)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute gradients <span><span class="MathJax_Preview">\nabla_\theta \ell(f(x;\theta, y)</span><script type="math/tex">\nabla_\theta \ell(f(x;\theta, y)</script></span> at current parameter
-<span><span class="MathJax_Preview">\theta</span><script type="math/tex">\theta</script></span> using Backpack's BatchGrad. Note that BackPACK doesn't play well
-with torch.func, so this method has to be overridden.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code> on compatible device with model.</dd>
-<dt><strong><code>y</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>Gs</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>gradients <code>(batch, parameters)</code></dd>
-<dt><strong><code>loss</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.BackPackGGN"><code class="flex name class">
-<span>class <span class="ident">BackPackGGN</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', stochastic: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Implementation of the <code><a title="laplace.curvature.GGNInterface" href="#laplace.curvature.GGNInterface">GGNInterface</a></code> using Backpack.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.backpack.BackPackInterface" href="backpack.html#laplace.curvature.backpack.BackPackInterface">BackPackInterface</a></li>
-<li><a title="laplace.curvature.curvature.GGNInterface" href="curvature.html#laplace.curvature.curvature.GGNInterface">GGNInterface</a></li>
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.backpack.BackPackInterface" href="backpack.html#laplace.curvature.backpack.BackPackInterface">BackPackInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.gradients" href="backpack.html#laplace.curvature.backpack.BackPackInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.jacobians" href="backpack.html#laplace.curvature.backpack.BackPackInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.BackPackEF"><code class="flex name class">
-<span>class <span class="ident">BackPackEF</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Implementation of <code><a title="laplace.curvature.EFInterface" href="#laplace.curvature.EFInterface">EFInterface</a></code> using Backpack.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.backpack.BackPackInterface" href="backpack.html#laplace.curvature.backpack.BackPackInterface">BackPackInterface</a></li>
-<li><a title="laplace.curvature.curvature.EFInterface" href="curvature.html#laplace.curvature.curvature.EFInterface">EFInterface</a></li>
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.backpack.BackPackInterface" href="backpack.html#laplace.curvature.backpack.BackPackInterface">BackPackInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.gradients" href="backpack.html#laplace.curvature.backpack.BackPackInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.jacobians" href="backpack.html#laplace.curvature.backpack.BackPackInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.backpack.BackPackInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.AsdlInterface"><code class="flex name class">
-<span>class <span class="ident">AsdlInterface</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Interface for asdfghjkl backend.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.asdl.AsdlEF" href="asdl.html#laplace.curvature.asdl.AsdlEF">AsdlEF</a></li>
-<li><a title="laplace.curvature.asdl.AsdlGGN" href="asdl.html#laplace.curvature.asdl.AsdlGGN">AsdlGGN</a></li>
-<li><a title="laplace.curvature.asdl.AsdlHessian" href="asdl.html#laplace.curvature.asdl.AsdlHessian">AsdlHessian</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.curvature.AsdlInterface.loss_type"><code class="name">prop <span class="ident">loss_type</span> : str</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.curvature.AsdlInterface.jacobians"><code class="name flex">
-<span>def <span class="ident">jacobians</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], enable_backprop: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute Jacobians <span><span class="MathJax_Preview">\nabla_\theta f(x;\theta)</span><script type="math/tex">\nabla_\theta f(x;\theta)</script></span> at current parameter <span><span class="MathJax_Preview">\theta</span><script type="math/tex">\theta</script></span>
-using asdfghjkl's gradient per output dimension.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code> or <code>MutableMapping (e.g. dict, UserDict)</code></dt>
-<dd>input data <code>(batch, input_shape)</code> on compatible device with model if torch.Tensor.
-If MutableMapping, then at least contains <code>self.dict_key_x</code>.
-The latter is specific for reward modeling.</dd>
-<dt><strong><code>enable_backprop</code></strong> :&ensp;<code>bool</code>, default <code>= False</code></dt>
-<dd>whether to enable backprop through the Js and f w.r.t. x</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>Js</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>Jacobians <code>(batch, parameters, outputs)</code></dd>
-<dt><strong><code>f</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>output function <code>(batch, outputs)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.curvature.AsdlInterface.gradients"><code class="name flex">
-<span>def <span class="ident">gradients</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], y: torch.Tensor)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute gradients <span><span class="MathJax_Preview">\nabla_\theta \ell(f(x;\theta, y)</span><script type="math/tex">\nabla_\theta \ell(f(x;\theta, y)</script></span> at current parameter
-<span><span class="MathJax_Preview">\theta</span><script type="math/tex">\theta</script></span> using asdfghjkl's backend.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>input data <code>(batch, input_shape)</code> on compatible device with model.</dd>
-<dt><strong><code>y</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>loss</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>Gs</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>gradients <code>(batch, parameters)</code></dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.AsdlGGN"><code class="flex name class">
-<span>class <span class="ident">AsdlGGN</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', stochastic: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Implementation of the <code><a title="laplace.curvature.GGNInterface" href="#laplace.curvature.GGNInterface">GGNInterface</a></code> using asdfghjkl.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.asdl.AsdlInterface" href="asdl.html#laplace.curvature.asdl.AsdlInterface">AsdlInterface</a></li>
-<li><a title="laplace.curvature.curvature.GGNInterface" href="curvature.html#laplace.curvature.curvature.GGNInterface">GGNInterface</a></li>
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.asdl.AsdlInterface" href="asdl.html#laplace.curvature.asdl.AsdlInterface">AsdlInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.gradients" href="asdl.html#laplace.curvature.asdl.AsdlInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.jacobians" href="asdl.html#laplace.curvature.asdl.AsdlInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.AsdlEF"><code class="flex name class">
-<span>class <span class="ident">AsdlEF</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Implementation of the <code><a title="laplace.curvature.EFInterface" href="#laplace.curvature.EFInterface">EFInterface</a></code> using asdfghjkl.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.asdl.AsdlInterface" href="asdl.html#laplace.curvature.asdl.AsdlInterface">AsdlInterface</a></li>
-<li><a title="laplace.curvature.curvature.EFInterface" href="curvature.html#laplace.curvature.curvature.EFInterface">EFInterface</a></li>
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.asdl.AsdlInterface" href="asdl.html#laplace.curvature.asdl.AsdlInterface">AsdlInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.gradients" href="asdl.html#laplace.curvature.asdl.AsdlInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.jacobians" href="asdl.html#laplace.curvature.asdl.AsdlInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.AsdlHessian"><code class="flex name class">
-<span>class <span class="ident">AsdlHessian</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Interface for asdfghjkl backend.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.asdl.AsdlInterface" href="asdl.html#laplace.curvature.asdl.AsdlInterface">AsdlInterface</a></li>
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.asdl.AsdlInterface" href="asdl.html#laplace.curvature.asdl.AsdlInterface">AsdlInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.gradients" href="asdl.html#laplace.curvature.asdl.AsdlInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.jacobians" href="asdl.html#laplace.curvature.asdl.AsdlInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.asdl.AsdlInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.CurvlinopsInterface"><code class="flex name class">
-<span>class <span class="ident">CurvlinopsInterface</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Interface for Curvlinops backend. <a href="https://github.com/f-dangel/curvlinops">https://github.com/f-dangel/curvlinops</a></p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.curvlinops.CurvlinopsEF" href="curvlinops.html#laplace.curvature.curvlinops.CurvlinopsEF">CurvlinopsEF</a></li>
-<li><a title="laplace.curvature.curvlinops.CurvlinopsGGN" href="curvlinops.html#laplace.curvature.curvlinops.CurvlinopsGGN">CurvlinopsGGN</a></li>
-<li><a title="laplace.curvature.curvlinops.CurvlinopsHessian" href="curvlinops.html#laplace.curvature.curvlinops.CurvlinopsHessian">CurvlinopsHessian</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.gradients" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.CurvlinopsGGN"><code class="flex name class">
-<span>class <span class="ident">CurvlinopsGGN</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', stochastic: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Implementation of the <code><a title="laplace.curvature.GGNInterface" href="#laplace.curvature.GGNInterface">GGNInterface</a></code> using Curvlinops.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.curvlinops.CurvlinopsInterface" href="curvlinops.html#laplace.curvature.curvlinops.CurvlinopsInterface">CurvlinopsInterface</a></li>
-<li><a title="laplace.curvature.curvature.GGNInterface" href="curvature.html#laplace.curvature.curvature.GGNInterface">GGNInterface</a></li>
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.curvlinops.CurvlinopsInterface" href="curvlinops.html#laplace.curvature.curvlinops.CurvlinopsInterface">CurvlinopsInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.gradients" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.CurvlinopsEF"><code class="flex name class">
-<span>class <span class="ident">CurvlinopsEF</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Implementation of <code><a title="laplace.curvature.EFInterface" href="#laplace.curvature.EFInterface">EFInterface</a></code> using Curvlinops.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.curvlinops.CurvlinopsInterface" href="curvlinops.html#laplace.curvature.curvlinops.CurvlinopsInterface">CurvlinopsInterface</a></li>
-<li><a title="laplace.curvature.curvature.EFInterface" href="curvature.html#laplace.curvature.curvature.EFInterface">EFInterface</a></li>
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.curvlinops.CurvlinopsInterface" href="curvlinops.html#laplace.curvature.curvlinops.CurvlinopsInterface">CurvlinopsInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.gradients" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.curvature.CurvlinopsHessian"><code class="flex name class">
-<span>class <span class="ident">CurvlinopsHessian</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, last_layer: bool = False, subnetwork_indices: torch.LongTensor | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Implementation of the full Hessian using Curvlinops.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.curvature.curvlinops.CurvlinopsInterface" href="curvlinops.html#laplace.curvature.curvlinops.CurvlinopsInterface">CurvlinopsInterface</a></li>
-<li><a title="laplace.curvature.curvature.CurvatureInterface" href="curvature.html#laplace.curvature.curvature.CurvatureInterface">CurvatureInterface</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.curvature.curvlinops.CurvlinopsInterface" href="curvlinops.html#laplace.curvature.curvlinops.CurvlinopsInterface">CurvlinopsInterface</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.diag" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.full" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.functorch_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.gradients" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.kron" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops.CurvlinopsInterface.last_layer_jacobians" href="curvature.html#laplace.curvature.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-</dl>
-</section>
-</article>
-<nav id="sidebar">
-<div class="toc">
-<ul></ul>
-</div>
-<ul id="index">
-<li><h3>Super-module</h3>
-<ul>
-<li><code><a title="laplace" href="../index.html">laplace</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-submodules">Sub-modules</a></h3>
-<ul>
-<li><code><a title="laplace.curvature.asdfghjkl" href="asdfghjkl.html">laplace.curvature.asdfghjkl</a></code></li>
-<li><code><a title="laplace.curvature.asdl" href="asdl.html">laplace.curvature.asdl</a></code></li>
-<li><code><a title="laplace.curvature.backpack" href="backpack.html">laplace.curvature.backpack</a></code></li>
-<li><code><a title="laplace.curvature.curvature" href="curvature.html">laplace.curvature.curvature</a></code></li>
-<li><code><a title="laplace.curvature.curvlinops" href="curvlinops.html">laplace.curvature.curvlinops</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-classes">Classes</a></h3>
-<ul>
-<li>
-<h4><code><a title="laplace.curvature.CurvatureInterface" href="#laplace.curvature.CurvatureInterface">CurvatureInterface</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.curvature.CurvatureInterface.jacobians" href="#laplace.curvature.CurvatureInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.CurvatureInterface.last_layer_jacobians" href="#laplace.curvature.CurvatureInterface.last_layer_jacobians">last_layer_jacobians</a></code></li>
-<li><code><a title="laplace.curvature.CurvatureInterface.gradients" href="#laplace.curvature.CurvatureInterface.gradients">gradients</a></code></li>
-<li><code><a title="laplace.curvature.CurvatureInterface.full" href="#laplace.curvature.CurvatureInterface.full">full</a></code></li>
-<li><code><a title="laplace.curvature.CurvatureInterface.kron" href="#laplace.curvature.CurvatureInterface.kron">kron</a></code></li>
-<li><code><a title="laplace.curvature.CurvatureInterface.diag" href="#laplace.curvature.CurvatureInterface.diag">diag</a></code></li>
-<li><code><a title="laplace.curvature.CurvatureInterface.functorch_jacobians" href="#laplace.curvature.CurvatureInterface.functorch_jacobians">functorch_jacobians</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.GGNInterface" href="#laplace.curvature.GGNInterface">GGNInterface</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.curvature.GGNInterface.full" href="#laplace.curvature.GGNInterface.full">full</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.EFInterface" href="#laplace.curvature.EFInterface">EFInterface</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.curvature.EFInterface.full" href="#laplace.curvature.EFInterface.full">full</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.BackPackInterface" href="#laplace.curvature.BackPackInterface">BackPackInterface</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.curvature.BackPackInterface.jacobians" href="#laplace.curvature.BackPackInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.BackPackInterface.gradients" href="#laplace.curvature.BackPackInterface.gradients">gradients</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.BackPackGGN" href="#laplace.curvature.BackPackGGN">BackPackGGN</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.BackPackEF" href="#laplace.curvature.BackPackEF">BackPackEF</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.AsdlInterface" href="#laplace.curvature.AsdlInterface">AsdlInterface</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.curvature.AsdlInterface.jacobians" href="#laplace.curvature.AsdlInterface.jacobians">jacobians</a></code></li>
-<li><code><a title="laplace.curvature.AsdlInterface.gradients" href="#laplace.curvature.AsdlInterface.gradients">gradients</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.AsdlGGN" href="#laplace.curvature.AsdlGGN">AsdlGGN</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.AsdlEF" href="#laplace.curvature.AsdlEF">AsdlEF</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.AsdlHessian" href="#laplace.curvature.AsdlHessian">AsdlHessian</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.CurvlinopsInterface" href="#laplace.curvature.CurvlinopsInterface">CurvlinopsInterface</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.CurvlinopsGGN" href="#laplace.curvature.CurvlinopsGGN">CurvlinopsGGN</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.CurvlinopsEF" href="#laplace.curvature.CurvlinopsEF">CurvlinopsEF</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.curvature.CurvlinopsHessian" href="#laplace.curvature.CurvlinopsHessian">CurvlinopsHessian</a></code></h4>
-</li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.1</a>.</p>
-</footer>
-</body>
-</html>
diff --git a/docs/devs_guide.md b/docs/devs_guide.md
new file mode 100644
index 00000000..86d11ec6
--- /dev/null
+++ b/docs/devs_guide.md
@@ -0,0 +1,113 @@
+### Setup dev environment
+
+For development purposes, e.g. if you would like to make contributions, follow
+the following steps:
+
+**With `uv`**
+
+1. Install [`uv`](https://github.com/astral-sh/uv), e.g. `pip install --upgrade uv`
+2. Then clone this repository and install the development dependencies:
+
+```bash
+git clone git@github.com:aleximmer/Laplace.git
+uv sync --all-extras
+```
+
+3. `laplace-torch` is now available in editable mode, e.g. you can run:
+
+```bash
+uv run python examples/regression_example.py
+
+# Or, equivalently:
+source .venv/bin/activate
+python examples/regression_example.py
+```
+
+**With `pip`**
+
+```bash
+git clone git@github.com:aleximmer/Laplace.git
+
+# Recommended to create a virtualenv before the following step
+pip install -e ".[dev]"
+
+# Run as usual, e.g.
+python examples/regression_examples.py
+```
+
+## Contributing
+
+Pull requests are very welcome. Please follow these guidelines:
+
+1. Follow the [development setup](#setup-dev-environment).
+2. Use [ruff](https://github.com/astral-sh/ruff) as autoformatter. Please refer to the following [makefile](https://github.com/aleximmer/Laplace/blob/main/makefile) and run it via `make ruff`. Please note that the order of `ruff check --fix` and `ruff format` is important!
+3. Also use [ruff](https://github.com/astral-sh/ruff) as linter. Please manually fix all linting errors/warnings before opening a pull request.
+4. Fully document your changes in the form of Python docstrings, typehinting, and (if applicable) code/markdown examples in the `./examples` subdirectory.
+   1. See `docs/api_reference/*.md` on how to include a newly added class in the docs.
+5. Provide as many test cases as possible. Make sure all test cases pass.
+
+Issues, bug reports, and ideas are also very welcome!
+
+## Documentation
+
+The documentation is available [here](https://aleximmer.github.io/Laplace) or can be generated and/or viewed locally:
+
+**With `uv`**
+
+```bash
+# assuming the repository was cloned
+uv sync --all-extras
+# create docs and write to html
+uv run bash update_docs.sh
+# .. or serve the docs directly
+uv run pdoc --http 0.0.0.0:8080 laplace --template-dir template
+```
+
+**With `pip`**
+
+```bash
+# assuming the repository was cloned
+pip install -e ".[dev]"
+# create docs and write to html
+bash update_docs.sh
+# .. or serve the docs directly
+pdoc --http 0.0.0.0:8080 laplace --template-dir template
+```
+
+## Publishing the `laplace-torch` package to PyPi
+
+With `uv`, this is done via: <https://docs.astral.sh/uv/guides/publish/>.
+
+If you want to make your life much easier, you can use `pdm`:
+
+```bash
+pip install --upgrade pdm
+pdm publish
+```
+
+## Structure
+
+The laplace package consists of two main components:
+
+1. The subclasses of [`laplace.BaseLaplace`](https://github.com/AlexImmer/Laplace/blob/main/laplace/baselaplace.py) that implement different sparsity structures: different subsets of weights (`'all'`, `'subnetwork'` and `'last_layer'`) and different structures of the Hessian approximation (`'full'`, `'kron'`, `'lowrank'`, `'diag'` and `'gp'`). This results in _ten_ currently available options: `laplace.FullLaplace`, `laplace.KronLaplace`, `laplace.DiagLaplace`, `laplace.FunctionalLaplace` the corresponding last-layer variations `laplace.FullLLLaplace`, `laplace.KronLLLaplace`, `laplace.DiagLLLaplace` and `laplace.FunctionalLLLaplace` (which are all subclasses of [`laplace.LLLaplace`](https://github.com/AlexImmer/Laplace/blob/main/laplace/lllaplace.py)), [`laplace.SubnetLaplace`](https://github.com/AlexImmer/Laplace/blob/main/laplace/subnetlaplace.py) (which only supports `'full'` and `'diag'` Hessian approximations) and `laplace.LowRankLaplace` (which only supports inference over `'all'` weights). All of these can be conveniently accessed via the [`laplace.Laplace`](https://github.com/AlexImmer/Laplace/blob/main/laplace/laplace.py) function.
+2. The backends in [`laplace.curvature`](https://github.com/AlexImmer/Laplace/blob/main/laplace/curvature/) which provide access to Hessian approximations of
+   the corresponding sparsity structures, for example, the diagonal GGN.
+
+Additionally, the package provides utilities for
+decomposing a neural network into feature extractor and last layer for `LLLaplace` subclasses ([`laplace.utils.feature_extractor`](https://github.com/AlexImmer/Laplace/blob/main/laplace/utils/feature_extractor.py))
+and
+effectively dealing with Kronecker factors ([`laplace.utils.matrix`](https://github.com/AlexImmer/Laplace/blob/main/laplace/utils/matrix.py)).
+
+Finally, the package implements several options to select/specify a subnetwork for `SubnetLaplace` (as subclasses of [`laplace.utils.subnetmask.SubnetMask`](https://github.com/AlexImmer/Laplace/blob/main/laplace/utils/subnetmask.py)).
+Automatic subnetwork selection strategies include: uniformly at random (`laplace.utils.subnetmask.RandomSubnetMask`), by largest parameter magnitudes (`LargestMagnitudeSubnetMask`), and by largest marginal parameter variances (`LargestVarianceDiagLaplaceSubnetMask` and `LargestVarianceSWAGSubnetMask`).
+In addition to that, subnetworks can also be specified manually, by listing the names of either the model parameters (`ParamNameSubnetMask`) or modules (`ModuleNameSubnetMask`) to perform Laplace inference over.
+
+## Extendability
+
+To extend the laplace package, new `BaseLaplace` subclasses can be designed, for example,
+Laplace with a block-diagonal Hessian structure.
+One can also implement custom subnetwork selection strategies as new subclasses of `SubnetMask`.
+
+Alternatively, extending or integrating backends (subclasses of [`curvature.curvature`](https://github.com/AlexImmer/Laplace/blob/main/laplace/curvature/curvature.py)) allows to provide different Hessian
+approximations to the Laplace approximations.
+For example, currently the [`curvature.CurvlinopsInterface`](https://github.com/AlexImmer/Laplace/blob/main/laplace/curvature/curvlinops.py) based on [Curvlinops](https://github.com/f-dangel/curvlinops) and the native `torch.func` (previously known as `functorch`), [`curvature.BackPackInterface`](https://github.com/AlexImmer/Laplace/blob/main/laplace/curvature/backpack.py) based on [BackPACK](https://github.com/f-dangel/backpack/) and [`curvature.AsdlInterface`](https://github.com/AlexImmer/Laplace/blob/main/laplace/curvature/asdl.py) based on [ASDL](https://github.com/kazukiosawa/asdfghjkl) are available.
diff --git a/examples/huggingface_example.md b/docs/huggingface_example.md
similarity index 98%
rename from examples/huggingface_example.md
rename to docs/huggingface_example.md
index 37df9f3b..8fec5278 100644
--- a/examples/huggingface_example.md
+++ b/docs/huggingface_example.md
@@ -1,5 +1,3 @@
-## Full Example: Applying Laplace on a Huggingface LLM model
-
 In this example, we will see how to apply Laplace on a GPT2 Huggingface (HF) model.
 Laplace only has lightweight requirements for this; namely that the model's `forward`
 method must only take a single dict-like object (`dict`, `UserDict`, or in general,
@@ -83,7 +81,7 @@ attention_mask torch.Size([4, 9])
 labels torch.Size([4])
 ```
 
-### Laplace on a subset of an LLM's weights
+## Laplace on a subset of an LLM's weights
 
 Now, let's do the main "meat" of this example: Wrapping the HF model into a model that is
 compatible with Laplace. Notice that this wrapper just wraps the HF model and nothing else.
@@ -248,7 +246,7 @@ As a final note, the dict-like input requirement of Laplace is very flexible. It
 be applicable to any tasks and any models. You just need to wrap the said model and make sure
 that your data loaders emit dict-like objects, where the input tensors are the dicts' values.
 
-### Caveats
+## Caveats
 
 Currently, diagonal EF with the Curvlinops backend is unsupported for dict-based inputs.
 This is because we use `torch.func`'s `vmap` to compute the diag-EF, and it only accepts
diff --git a/docs/index.html b/docs/index.html
deleted file mode 100644
index b9020716..00000000
--- a/docs/index.html
+++ /dev/null
@@ -1,3076 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
-<meta name="generator" content="pdoc3 0.11.1">
-<title>laplace API documentation</title>
-<meta name="description" content="&lt;div align=&#34;center&#34;&gt;
-&lt;img src=&#34;https://raw.githubusercontent.com/AlexImmer/Laplace/main/logo/laplace_logo.png&#34; alt=&#34;Laplace&#34; width=&#34;300&#34;/&gt; …">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script type="text/x-mathjax-config">MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], processEscapes: true } });</script>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => {
-hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
-hljs.highlightAll();
-})</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Package <code>laplace</code></h1>
-</header>
-<section id="section-intro">
-<div align="center">
-<img src="https://raw.githubusercontent.com/AlexImmer/Laplace/main/logo/laplace_logo.png" alt="Laplace" width="300"/>
-![pytest](https://github.com/aleximmer/laplace/actions/workflows/pytest-default.yml/badge.svg)
-![lint](https://github.com/aleximmer/laplace/actions/workflows/lint-ruff.yml/badge.svg)
-![format](https://github.com/aleximmer/laplace/actions/workflows/format-ruff.yml/badge.svg)
-</div>
-<p>The laplace package facilitates the application of Laplace approximations for entire neural networks, subnetworks of neural networks, or just their last layer.
-The package enables posterior approximations, marginal-likelihood estimation, and various posterior predictive computations.
-The library documentation is available at <a href="https://aleximmer.github.io/Laplace">https://aleximmer.github.io/Laplace</a>.</p>
-<p>There is also a corresponding paper, <a href="https://arxiv.org/abs/2106.14806"><em>Laplace Redux — Effortless Bayesian Deep Learning</em></a>, which introduces the library, provides an introduction to the Laplace approximation, reviews its use in deep learning, and empirically demonstrates its versatility and competitiveness. Please consider referring to the paper when using our library:</p>
-<pre><code class="language-bibtex">@inproceedings{laplace2021,
-  title={Laplace Redux--Effortless {B}ayesian Deep Learning},
-  author={Erik Daxberger and Agustinus Kristiadi and Alexander Immer
-          and Runa Eschenhagen and Matthias Bauer and Philipp Hennig},
-  booktitle={{N}eur{IPS}},
-  year={2021}
-}
-</code></pre>
-<p>The <a href="https://github.com/runame/laplace-redux">code</a> to reproduce the experiments in the paper is also publicly available; it provides examples of how to use our library for predictive uncertainty quantification, model selection, and continual learning.</p>
-<blockquote>
-<p>[!IMPORTANT]
-As a user, one should not expect Laplace to work automatically.
-That is, one should experiment with different Laplace's options
-(hessian_factorization, prior precision tuning method, predictive method, backend,
-etc!). Try looking at various papers that use Laplace for references on how to
-set all those options depending on the applications/problems at hand.</p>
-</blockquote>
-<h2 id="table-of-contents">Table of contents</h2>
-<ol>
-<li><a href="#setup">Setup</a></li>
-<li><a href="#example-usage">Example usage</a></li>
-<li><a href="#simple-usage">Simple usage</a></li>
-<li><a href="#marginal-likelihood">Marginal likelihood</a></li>
-<li><a href="#laplace-on-llm">Laplace on LLM</a></li>
-<li><a href="#subnetwork-laplace">Subnetwork Laplace</a></li>
-<li><a href="#serialization">Serialization</a></li>
-<li><a href="#structure">Structure</a></li>
-<li><a href="#extendability">Extendability</a></li>
-<li><a href="#when-to-use-which-backend">When to use which backend?</a></li>
-<li><a href="#contributing">Contributing</a></li>
-<li><a href="#references">References</a></li>
-</ol>
-<h2 id="setup">Setup</h2>
-<blockquote>
-<p>[!IMPORTANT]
-We assume Python &gt;= 3.9 since lower versions are <a href="https://devguide.python.org/versions/">(soon to be) deprecated</a>.
-PyTorch version 2.0 and up is also required for full compatibility.</p>
-</blockquote>
-<p>To install laplace with <code>pip</code>, run the following:</p>
-<pre><code class="language-bash">pip install laplace-torch
-</code></pre>
-<p>Additionally, if you want to use the <code>asdfghjkl</code> backend, please install it via:</p>
-<pre><code class="language-bash">pip install git+https://git@github.com/wiseodd/asdl@asdfghjkl
-</code></pre>
-<h3 id="setup-dev-environment">Setup dev environment</h3>
-<p>For development purposes, e.g. if you would like to make contributions, follow
-the following steps:</p>
-<ol>
-<li>Install <a href="https://github.com/astral-sh/uv"><code>uv</code></a></li>
-<li>Then clone this repository and install the development dependencies:</li>
-</ol>
-<pre><code class="language-bash">git clone git@github.com:aleximmer/Laplace.git
-uv sync --all-extras
-</code></pre>
-<ol>
-<li><code>laplace-torch</code> is now available in editable mode, e.g. you can run:</li>
-</ol>
-<pre><code class="language-bash">uv run python example/regression_example.py
-
-# Or, equivalently:
-source .venv/bin/activate
-python example/regression_example.py
-</code></pre>
-<blockquote>
-<p>[!NOTE]
-See <a href="#contributing">contributing guideline</a>.
-We're looking forward to your contributions!</p>
-</blockquote>
-<h2 id="example-usage">Example usage</h2>
-<h3 id="simple-usage">Simple usage</h3>
-<p>In the following example, a pre-trained model is loaded,
-then the Laplace approximation is fit to the training data
-(using a diagonal Hessian approximation over all parameters),
-and the prior precision is optimized with cross-validation <code>"gridsearch"</code>.
-After that, the resulting LA is used for prediction with
-the <code>"probit"</code> predictive for classification.</p>
-<blockquote>
-<p>[!IMPORTANT]
-Laplace expects all data loaders, e.g. <code>train_loader</code> and <code>val_loader</code> below,
-to be instances of PyTorch
-<a href="https://pytorch.org/tutorials/beginner/basics/data_tutorial.html"><code>DataLoader</code></a>.
-Each batch, <code>next(iter(data_loader))</code> must either be the standard <code>(X, y)</code> tensors
-or a dict-like object containing at least the keys specified in
-<code>dict_key_x</code> and <code>dict_key_y</code> in Laplace's constructor.</p>
-<p>[!IMPORTANT]
-The total number of data points in all data loaders must be accessible via
-<code>len(train_loader.dataset)</code>.</p>
-<p>[!IMPORTANT]
-In <code>optimize_prior_precision</code>, make sure to match the arguments with
-the ones you want to pass in <code>la(x, &hellip;)</code> during prediction.</p>
-</blockquote>
-<pre><code class="language-python">from laplace import Laplace
-
-# Pre-trained model
-model = load_map_model()
-
-# User-specified LA flavor
-la = Laplace(model, &quot;classification&quot;,
-             subset_of_weights=&quot;all&quot;,
-             hessian_structure=&quot;diag&quot;)
-la.fit(train_loader)
-la.optimize_prior_precision(
-    method=&quot;gridsearch&quot;,
-    pred_type=&quot;glm&quot;,
-    link_approx=&quot;probit&quot;,
-    val_loader=val_loader
-)
-
-# User-specified predictive approx.
-pred = la(x, pred_type=&quot;glm&quot;, link_approx=&quot;probit&quot;)
-</code></pre>
-<h3 id="marginal-likelihood">Marginal likelihood</h3>
-<p>The marginal likelihood can be used for model selection [10] and is differentiable
-for continuous hyperparameters like the prior precision or observation noise.
-Here, we fit the library default, KFAC last-layer LA and differentiate
-the log marginal likelihood.</p>
-<pre><code class="language-python">from laplace import Laplace
-
-# Un- or pre-trained model
-model = load_model()
-
-# Default to recommended last-layer KFAC LA:
-la = Laplace(model, likelihood=&quot;regression&quot;)
-la.fit(train_loader)
-
-# ML w.r.t. prior precision and observation noise
-ml = la.log_marginal_likelihood(prior_prec, obs_noise)
-ml.backward()
-</code></pre>
-<h3 id="laplace-on-llm">Laplace on LLM</h3>
-<blockquote>
-<p>[!TIP]
-This library also supports Huggingface models and parameter-efficient fine-tuning.
-See <code>examples/huggingface_examples.py</code> and <code>examples/huggingface_examples.md</code>
-for the full exposition.</p>
-</blockquote>
-<p>First, we need to wrap the pretrained model so that the <code>forward</code> method takes a
-dict-like input. Note that when you iterate over a Huggingface dataloader,
-this is what you get by default. Having a dict-like input is nice since different models
-have different number of inputs (e.g. GPT-like LLMs only take <code>input_ids</code>, while BERT-like
-ones take both <code>input_ids</code> and <code>attention_mask</code>, etc.). Inside this <code>forward</code> method you
-can do your usual preprocessing like moving the tensor inputs into the correct device.</p>
-<pre><code class="language-python">class MyGPT2(nn.Module):
-    def __init__(self, tokenizer: PreTrainedTokenizer) -&gt; None:
-        super().__init__()
-        config = GPT2Config.from_pretrained(&quot;gpt2&quot;)
-        config.pad_token_id = tokenizer.pad_token_id
-        config.num_labels = 2
-        self.hf_model = GPT2ForSequenceClassification.from_pretrained(
-            &quot;gpt2&quot;, config=config
-        )
-
-    def forward(self, data: MutableMapping) -&gt; torch.Tensor:
-        device = next(self.parameters()).device
-        input_ids = data[&quot;input_ids&quot;].to(device)
-        attn_mask = data[&quot;attention_mask&quot;].to(device)
-        output_dict = self.hf_model(input_ids=input_ids, attention_mask=attn_mask)
-        return output_dict.logits
-</code></pre>
-<p>Then you can "select" which parameters of the LLM you want to apply the Laplace approximation
-on, by switching off the gradients of the "unneeded" parameters.
-For example, we can replicate a last-layer Laplace: (in actual practice, use <code>Laplace(..., subset_of_weights='last_layer', ...)</code> instead, though!)</p>
-<pre><code class="language-python">model = MyGPT2(tokenizer)
-model.eval()
-
-# Enable grad only for the last layer
-for p in model.hf_model.parameters():
-    p.requires_grad = False
-for p in model.hf_model.score.parameters():
-    p.requires_grad = True
-
-la = Laplace(
-    model,
-    likelihood=&quot;classification&quot;,
-    # Will only hit the last-layer since it's the only one that is grad-enabled
-    subset_of_weights=&quot;all&quot;,
-    hessian_structure=&quot;diag&quot;,
-)
-la.fit(dataloader)
-la.optimize_prior_precision()
-
-test_data = next(iter(dataloader))
-pred = la(test_data)
-</code></pre>
-<p>This is useful because we can apply the LA only on the parameter-efficient finetuning
-weights. E.g., we can fix the LLM itself, and apply the Laplace approximation only
-on the LoRA weights. Huggingface will automatically switch off the non-LoRA weights'
-gradients.</p>
-<pre><code class="language-python">def get_lora_model():
-    model = MyGPT2(tokenizer)  # Note we don't disable grad
-    config = LoraConfig(
-        r=4,
-        lora_alpha=16,
-        target_modules=[&quot;c_attn&quot;],  # LoRA on the attention weights
-        lora_dropout=0.1,
-        bias=&quot;none&quot;,
-    )
-    lora_model = get_peft_model(model, config)
-    return lora_model
-
-lora_model = get_lora_model()
-
-# Train it as usual here...
-
-lora_model.eval()
-
-lora_la = Laplace(
-    lora_model,
-    likelihood=&quot;classification&quot;,
-    subset_of_weights=&quot;all&quot;,
-    hessian_structure=&quot;diag&quot;,
-    backend=AsdlGGN,
-)
-
-test_data = next(iter(dataloader))
-lora_pred = lora_la(test_data)
-</code></pre>
-<h3 id="subnetwork-laplace">Subnetwork Laplace</h3>
-<p>This example shows how to fit the Laplace approximation over only
-a subnetwork within a neural network (while keeping all other parameters
-fixed at their MAP estimates), as proposed in [11]. It also exemplifies
-different ways to specify the subnetwork to perform inference over.</p>
-<p>First, we make use of <code><a title="laplace.SubnetLaplace" href="#laplace.SubnetLaplace">SubnetLaplace</a></code>, where we specify the subnetwork by
-generating a list of indices for the active model parameters.</p>
-<pre><code class="language-python">from laplace import Laplace
-
-# Pre-trained model
-model = load_model()
-
-# Examples of different ways to specify the subnetwork
-# via indices of the vectorized model parameters
-#
-# Example 1: select the 128 parameters with the largest magnitude
-from laplace.utils import LargestMagnitudeSubnetMask
-subnetwork_mask = LargestMagnitudeSubnetMask(model, n_params_subnet=128)
-subnetwork_indices = subnetwork_mask.select()
-
-# Example 2: specify the layers that define the subnetwork
-from laplace.utils import ModuleNameSubnetMask
-subnetwork_mask = ModuleNameSubnetMask(model, module_names=[&quot;layer.1&quot;, &quot;layer.3&quot;])
-subnetwork_mask.select()
-subnetwork_indices = subnetwork_mask.indices
-
-# Example 3: manually define the subnetwork via custom subnetwork indices
-import torch
-subnetwork_indices = torch.tensor([0, 4, 11, 42, 123, 2021])
-
-# Define and fit subnetwork LA using the specified subnetwork indices
-la = Laplace(model, &quot;classification&quot;,
-             subset_of_weights=&quot;subnetwork&quot;,
-             hessian_structure=&quot;full&quot;,
-             subnetwork_indices=subnetwork_indices)
-la.fit(train_loader)
-</code></pre>
-<p>Besides <code><a title="laplace.SubnetLaplace" href="#laplace.SubnetLaplace">SubnetLaplace</a></code>, you can, as already mentioned, also treat the last
-layer only using <code>Laplace(..., subset_of_weights='last_layer')</code>, which uses
-<code><a title="laplace.LLLaplace" href="#laplace.LLLaplace">LLLaplace</a></code>. As a third method, you may define a subnetwork by disabling
-gradients of fixed model parameters. The different methods target different use
-cases. Each method has pros and cons, please see <a href="https://github.com/aleximmer/Laplace/issues/217#issuecomment-2278311460">this
-discussion</a>
-for details. In summary</p>
-<ul>
-<li>Disable-grad: General method to perform Laplace on specific types of
-layer/parameter, e.g. in an LLM with LoRA. Can be used to emulate <code><a title="laplace.LLLaplace" href="#laplace.LLLaplace">LLLaplace</a></code>
-as well. Always use <code>subset_of_weights='all'</code> for this method.</li>
-<li>subnet selection by disabling grads is more efficient than
-<code><a title="laplace.SubnetLaplace" href="#laplace.SubnetLaplace">SubnetLaplace</a></code> since it avoids calculating full Jacobians first</li>
-<li>disabling grads can only be performed on <code>Parameter</code> level and not for
-individual weights, so this doesn't cover all cases that <code><a title="laplace.SubnetLaplace" href="#laplace.SubnetLaplace">SubnetLaplace</a></code>
-offers such as <code>Largest*SubnetMask</code> or <code>RandomSubnetMask</code></li>
-<li><code><a title="laplace.LLLaplace" href="#laplace.LLLaplace">LLLaplace</a></code>: last-layer specific code with improved performance (#145)</li>
-<li><code><a title="laplace.SubnetLaplace" href="#laplace.SubnetLaplace">SubnetLaplace</a></code>: more fine-grained partitioning such as
-<code>LargestMagnitudeSubnetMask</code></li>
-</ul>
-<h3 id="serialization">Serialization</h3>
-<p>As with plain <code>torch</code>, we support to ways to serialize data.</p>
-<p>One is the familiar <code>state_dict</code> approach. Here you need to save and re-create
-both <code>model</code> and <code><a title="laplace.Laplace" href="#laplace.Laplace">Laplace()</a></code>. Use this for long-term storage of models and
-sharing of a fitted <code><a title="laplace.Laplace" href="#laplace.Laplace">Laplace()</a></code> instance.</p>
-<pre><code class="language-py"># Save model and Laplace instance
-torch.save(model.state_dict(), &quot;model_state_dict.bin&quot;)
-torch.save(la.state_dict(), &quot;la_state_dict.bin&quot;)
-
-# Load serialized data
-model2 = MyModel(...)
-model2.load_state_dict(torch.load(&quot;model_state_dict.bin&quot;))
-la2 = Laplace(model2, &quot;classification&quot;,
-              subset_of_weights=&quot;all&quot;,
-              hessian_structure=&quot;diag&quot;)
-la2.load_state_dict(torch.load(&quot;la_state_dict.bin&quot;))
-</code></pre>
-<p>The second approach is to save the whole <code><a title="laplace.Laplace" href="#laplace.Laplace">Laplace()</a></code> object, including
-<code>self.model</code>. This is less verbose and more convenient since you have the
-trained model and the fitted <code><a title="laplace.Laplace" href="#laplace.Laplace">Laplace()</a></code> data stored in one place, but <a href="https://pytorch.org/tutorials/beginner/saving_loading_models.html#saving-loading-model-for-inference">also comes with
-some
-drawbacks</a>.
-Use this for quick save-load cycles during experiments, say.</p>
-<pre><code class="language-py"># Save Laplace, including la.model
-torch.save(la, &quot;la.pt&quot;)
-
-# Load both
-torch.load(&quot;la.pt&quot;)
-</code></pre>
-<p>Some Laplace variants such as <code><a title="laplace.LLLaplace" href="#laplace.LLLaplace">LLLaplace</a></code> might have trouble being serialized
-using the default <code>pickle</code> module, which <code>torch.save()</code> and <code>torch.load()</code> use
-(<code>AttributeError: Can't pickle local object ...</code>). In this case, the
-<a href="https://github.com/uqfoundation/dill"><code>dill</code></a> package will come in handy.</p>
-<pre><code class="language-py">import dill
-
-torch.save(la, &quot;la.pt&quot;, pickle_module=dill)
-</code></pre>
-<p>With both methods, you are free to switch devices, for instance when you
-trained on a GPU but want to run predictions on CPU. In this case, use</p>
-<pre><code class="language-py">torch.load(..., map_location=&quot;cpu&quot;)
-</code></pre>
-<blockquote>
-<p>[!WARNING]
-Currently, this library always assumes that the model has an
-output tensor of shape <code>(batch_size, &hellip;, n_classes)</code>, so in
-the case of image outputs, you need to rearrange from NCHW to NHWC.</p>
-</blockquote>
-<h2 id="structure">Structure</h2>
-<p>The laplace package consists of two main components:</p>
-<ol>
-<li>The subclasses of <a href="https://github.com/AlexImmer/Laplace/blob/main/laplace/baselaplace.py"><code>laplace.BaseLaplace</code></a> that implement different sparsity structures: different subsets of weights (<code>'all'</code>, <code>'subnetwork'</code> and <code>'last_layer'</code>) and different structures of the Hessian approximation (<code>'full'</code>, <code>'kron'</code>, <code>'lowrank'</code>, <code>'diag'</code> and <code>'gp'</code>). This results in <em>ten</em> currently available options: <code><a title="laplace.FullLaplace" href="#laplace.FullLaplace">FullLaplace</a></code>, <code><a title="laplace.KronLaplace" href="#laplace.KronLaplace">KronLaplace</a></code>, <code><a title="laplace.DiagLaplace" href="#laplace.DiagLaplace">DiagLaplace</a></code>, <code><a title="laplace.FunctionalLaplace" href="#laplace.FunctionalLaplace">FunctionalLaplace</a></code> the corresponding last-layer variations <code><a title="laplace.FullLLLaplace" href="#laplace.FullLLLaplace">FullLLLaplace</a></code>, <code><a title="laplace.KronLLLaplace" href="#laplace.KronLLLaplace">KronLLLaplace</a></code>, <code><a title="laplace.DiagLLLaplace" href="#laplace.DiagLLLaplace">DiagLLLaplace</a></code> and <code><a title="laplace.FunctionalLLLaplace" href="#laplace.FunctionalLLLaplace">FunctionalLLLaplace</a></code> (which are all subclasses of <a href="https://github.com/AlexImmer/Laplace/blob/main/laplace/lllaplace.py"><code>laplace.LLLaplace</code></a>), <a href="https://github.com/AlexImmer/Laplace/blob/main/laplace/subnetlaplace.py"><code>laplace.SubnetLaplace</code></a> (which only supports <code>'full'</code> and <code>'diag'</code> Hessian approximations) and <code><a title="laplace.LowRankLaplace" href="#laplace.LowRankLaplace">LowRankLaplace</a></code> (which only supports inference over <code>'all'</code> weights). All of these can be conveniently accessed via the <a href="https://github.com/AlexImmer/Laplace/blob/main/laplace/laplace.py"><code>laplace.Laplace</code></a> function.</li>
-<li>The backends in <a href="https://github.com/AlexImmer/Laplace/blob/main/laplace/curvature/"><code>laplace.curvature</code></a> which provide access to Hessian approximations of
-the corresponding sparsity structures, for example, the diagonal GGN.</li>
-</ol>
-<p>Additionally, the package provides utilities for
-decomposing a neural network into feature extractor and last layer for <code><a title="laplace.LLLaplace" href="#laplace.LLLaplace">LLLaplace</a></code> subclasses (<a href="https://github.com/AlexImmer/Laplace/blob/main/laplace/utils/feature_extractor.py"><code>laplace.utils.feature_extractor</code></a>)
-and
-effectively dealing with Kronecker factors (<a href="https://github.com/AlexImmer/Laplace/blob/main/laplace/utils/matrix.py"><code>laplace.utils.matrix</code></a>).</p>
-<p>Finally, the package implements several options to select/specify a subnetwork for <code><a title="laplace.SubnetLaplace" href="#laplace.SubnetLaplace">SubnetLaplace</a></code> (as subclasses of <a href="https://github.com/AlexImmer/Laplace/blob/main/laplace/utils/subnetmask.py"><code>laplace.utils.subnetmask.SubnetMask</code></a>).
-Automatic subnetwork selection strategies include: uniformly at random (<code><a title="laplace.utils.subnetmask.RandomSubnetMask" href="utils/subnetmask.html#laplace.utils.subnetmask.RandomSubnetMask">RandomSubnetMask</a></code>), by largest parameter magnitudes (<code>LargestMagnitudeSubnetMask</code>), and by largest marginal parameter variances (<code>LargestVarianceDiagLaplaceSubnetMask</code> and <code>LargestVarianceSWAGSubnetMask</code>).
-In addition to that, subnetworks can also be specified manually, by listing the names of either the model parameters (<code>ParamNameSubnetMask</code>) or modules (<code>ModuleNameSubnetMask</code>) to perform Laplace inference over.</p>
-<h2 id="extendability">Extendability</h2>
-<p>To extend the laplace package, new <code><a title="laplace.BaseLaplace" href="#laplace.BaseLaplace">BaseLaplace</a></code> subclasses can be designed, for example,
-Laplace with a block-diagonal Hessian structure.
-One can also implement custom subnetwork selection strategies as new subclasses of <code>SubnetMask</code>.</p>
-<p>Alternatively, extending or integrating backends (subclasses of <a href="https://github.com/AlexImmer/Laplace/blob/main/laplace/curvature/curvature.py"><code>curvature.curvature</code></a>) allows to provide different Hessian
-approximations to the Laplace approximations.
-For example, currently the <a href="https://github.com/AlexImmer/Laplace/blob/main/laplace/curvature/curvlinops.py"><code>curvature.CurvlinopsInterface</code></a> based on <a href="https://github.com/f-dangel/curvlinops">Curvlinops</a> and the native <code>torch.func</code> (previously known as <code>functorch</code>), <a href="https://github.com/AlexImmer/Laplace/blob/main/laplace/curvature/backpack.py"><code>curvature.BackPackInterface</code></a> based on <a href="https://github.com/f-dangel/backpack/">BackPACK</a> and <a href="https://github.com/AlexImmer/Laplace/blob/main/laplace/curvature/asdl.py"><code>curvature.AsdlInterface</code></a> based on <a href="https://github.com/kazukiosawa/asdfghjkl">ASDL</a> are available.</p>
-<h2 id="when-to-use-which-backend">When to use which backend</h2>
-<blockquote>
-<p>[!TIP]
-Each backend as its own caveat/behavior. The use the following to guide you
-picking the suitable backend, depending on you model &amp; application.</p>
-</blockquote>
-<ul>
-<li><strong>Small, simple MLP, or last-layer Laplace:</strong> Any backend should work well.
-<code>CurvlinopsGGN</code> or <code>CurvlinopsEF</code> is recommended if
-<code>hessian_factorization = 'kron'</code>, but it's inefficient for other factorizations.</li>
-<li><strong>LLMs with PEFT (e.g. LoRA):</strong> <code>AsdlGGN</code> and <code>AsdlEF</code> are recommended.</li>
-<li><strong>Continuous Bayesian optimization:</strong> <code>CurvlinopsGGN/EF</code> and <code>BackpackGGN/EF</code> are
-recommended since they are the only ones supporting backprop over Jacobians.</li>
-</ul>
-<blockquote>
-<p>[!CAUTION]
-The <code>curvlinops</code> backends are inefficient for full and diagonal factorizations.
-Moreover, they're also inefficient for computing the Jacobians of large models
-since they rely on <code>torch.func.jacrev</code> along <code>torch.func.vmap</code>!
-Finally, <code>curvlinops</code> only computes K-FAC (<code>hessian_factorization = 'kron'</code>)
-for <code>nn.Linear</code> and <code>nn.Conv2d</code> modules (including those inside larger modules
-like Attention).</p>
-<p>[!CAUTION]
-The <code>BackPack</code> backends are limited to models expressed as <code>nn.Sequential</code>.
-Also, they're not compatible with normalization layers.</p>
-</blockquote>
-<h2 id="documentation">Documentation</h2>
-<p>The documentation is available <a href="https://aleximmer.github.io/Laplace">here</a> or can be generated and/or viewed locally:</p>
-<pre><code class="language-bash"># assuming the repository was cloned
-uv sync --all-extras
-# create docs and write to html
-uv run bash update_docs.sh
-# .. or serve the docs directly
-uv run pdoc --http 0.0.0.0:8080 laplace --template-dir template
-</code></pre>
-<h2 id="contributing">Contributing</h2>
-<p>Pull requests are very welcome. Please follow these guidelines:</p>
-<ol>
-<li>Install Laplace via <code>uv sync --all-extras</code> which will install <code>ruff</code> and all requirements necessary to run the tests and build the docs.</li>
-<li>Use <a href="https://github.com/astral-sh/ruff">ruff</a> as autoformatter. Please refer to the following <a href="https://github.com/aleximmer/Laplace/blob/main/makefile">makefile</a> and run it via <code>make ruff</code>. Please note that the order of <code>ruff check --fix</code> and <code>ruff format</code> is important!</li>
-<li>Also use <a href="https://github.com/astral-sh/ruff">ruff</a> as linter. Please manually fix all linting errors/warnings before opening a pull request.</li>
-<li>Fully document your changes in the form of Python docstrings, typehinting, and (if applicable) code/markdown examples in the <code>./examples</code> subdirectory.</li>
-<li>Provide as many test cases as possible. Make sure all test cases pass.</li>
-</ol>
-<p>Issues, bug reports, and ideas are also very welcome!</p>
-<h2 id="useful-links">Useful links</h2>
-<ul>
-<li>Publishing package with <code>uv</code>: <a href="https://docs.astral.sh/uv/guides/publish/">https://docs.astral.sh/uv/guides/publish/</a></li>
-</ul>
-<h2 id="references">References</h2>
-<p>This package relies on various improvements to the Laplace approximation for neural networks, which was originally due to MacKay [1]. Please consider citing the respective papers if you use any of their proposed methods via our laplace library.</p>
-<ul>
-<li>[1] MacKay, DJC. <a href="https://authors.library.caltech.edu/13793/"><em>A Practical Bayesian Framework for Backpropagation Networks</em></a>. Neural Computation 1992.</li>
-<li>[2] Gibbs, M. N. <a href="https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.147.1130&amp;rep=rep1&amp;type=pdf"><em>Bayesian Gaussian Processes for Regression and Classification</em></a>. PhD Thesis 1997.</li>
-<li>[3] Snoek, J., Rippel, O., Swersky, K., Kiros, R., Satish, N., Sundaram, N., Patwary, M., Prabhat, M., Adams, R. <a href="https://arxiv.org/abs/1502.05700"><em>Scalable Bayesian Optimization Using Deep Neural Networks</em></a>. ICML 2015.</li>
-<li>[4] Ritter, H., Botev, A., Barber, D. <a href="https://openreview.net/forum?id=Skdvd2xAZ"><em>A Scalable Laplace Approximation for Neural Networks</em></a>. ICLR 2018.</li>
-<li>[5] Foong, A. Y., Li, Y., Hernández-Lobato, J. M., Turner, R. E. <a href="https://arxiv.org/abs/1906.11537"><em>'In-Between' Uncertainty in Bayesian Neural Networks</em></a>. ICML UDL Workshop 2019.</li>
-<li>[6] Khan, M. E., Immer, A., Abedi, E., Korzepa, M. <a href="https://arxiv.org/abs/1906.01930"><em>Approximate Inference Turns Deep Networks into Gaussian Processes</em></a>. NeurIPS 2019.</li>
-<li>[7] Kristiadi, A., Hein, M., Hennig, P. <a href="https://arxiv.org/abs/2002.10118"><em>Being Bayesian, Even Just a Bit, Fixes Overconfidence in ReLU Networks</em></a>. ICML 2020.</li>
-<li>[8] Immer, A., Korzepa, M., Bauer, M. <a href="https://arxiv.org/abs/2008.08400"><em>Improving predictions of Bayesian neural nets via local linearization</em></a>. AISTATS 2021.</li>
-<li>[9] Sharma, A., Azizan, N., Pavone, M. <a href="https://arxiv.org/abs/2102.12567"><em>Sketching Curvature for Efficient Out-of-Distribution Detection for Deep Neural Networks</em></a>. UAI 2021.</li>
-<li>[10] Immer, A., Bauer, M., Fortuin, V., Rätsch, G., Khan, EM. <a href="https://arxiv.org/abs/2104.04975"><em>Scalable Marginal Likelihood Estimation for Model Selection in Deep Learning</em></a>. ICML 2021.</li>
-<li>[11] Daxberger, E., Nalisnick, E., Allingham, JU., Antorán, J., Hernández-Lobato, JM. <a href="https://arxiv.org/abs/2010.14689"><em>Bayesian Deep Learning via Subnetwork Inference</em></a>. ICML 2021.</li>
-</ul>
-<h2 id="full-example-optimization-of-the-marginal-likelihood-and-prediction">Full example: Optimization of the marginal likelihood and prediction</h2>
-<h3 id="sinusoidal-toy-data">Sinusoidal toy data</h3>
-<p>We show how the marginal likelihood can be used after training a MAP network on a simple sinusoidal regression task.
-Subsequently, we use the optimized LA to predict which provides uncertainty on top of the MAP prediction.
-We also show how the <code><a title="laplace.marglik_training" href="#laplace.marglik_training">marglik_training()</a></code> utility method can be used to jointly train the MAP and hyperparameters.
-First, we set up the training data for the problem with observation noise \(\sigma=0.3\):</p>
-<pre><code class="language-python">from laplace.baselaplace import FullLaplace
-from laplace.curvature.backpack import BackPackGGN
-import numpy as np
-import torch
-
-from laplace import Laplace, marglik_training
-
-from helper.dataloaders import get_sinusoid_example
-from helper.util import plot_regression
-
-n_epochs = 1000
-torch.manual_seed(711)
-# sample toy data example
-X_train, y_train, train_loader, X_test = get_sinusoid_example(sigma_noise=0.3)
-</code></pre>
-<h3 id="training-a-map">Training a MAP</h3>
-<p>We now use <code>pytorch</code> to train a neural network with single hidden layer and Tanh activation.
-The trained neural network will be our MAP estimate.
-This is standard so nothing new here, yet:</p>
-<pre><code class="language-python"># create and train MAP model
-def get_model():
-    torch.manual_seed(711)
-    return torch.nn.Sequential(
-        torch.nn.Linear(1, 50), torch.nn.Tanh(), torch.nn.Linear(50, 1)
-    )
-model = get_model()
-
-criterion = torch.nn.MSELoss()
-optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
-for i in range(n_epochs):
-    for X, y in train_loader:
-        optimizer.zero_grad()
-        loss = criterion(model(X), y)
-        loss.backward()
-        optimizer.step()
-</code></pre>
-<h3 id="fitting-and-optimizing-the-laplace-approximation-using-empirical-bayes">Fitting and optimizing the Laplace approximation using empirical Bayes</h3>
-<p>With the MAP-trained model at hand, we can estimate the prior precision and observation noise
-using empirical Bayes after training.
-The <code><a title="laplace.Laplace" href="#laplace.Laplace">Laplace()</a></code> method is called to construct a LA for <code>"regression"</code> with <code>"all"</code> weights.
-As default <code><a title="laplace.Laplace" href="#laplace.Laplace">Laplace()</a></code> returns a Kronecker factored LA, we use <code>"full"</code> instead on this small example.
-We fit the LA to the training data and initialize <code>log_prior</code> and <code>log_sigma</code>.
-Using Adam, we minimize the negative log marginal likelihood for <code>n_epochs</code>.</p>
-<pre><code class="language-python">la = Laplace(model, &quot;regression&quot;, subset_of_weights=&quot;all&quot;, hessian_structure=&quot;full&quot;)
-la.fit(train_loader)
-log_prior, log_sigma = torch.ones(1, requires_grad=True), torch.ones(1, requires_grad=True)
-hyper_optimizer = torch.optim.Adam([log_prior, log_sigma], lr=1e-1)
-for i in range(n_epochs):
-    hyper_optimizer.zero_grad()
-    neg_marglik = - la.log_marginal_likelihood(log_prior.exp(), log_sigma.exp())
-    neg_marglik.backward()
-    hyper_optimizer.step()
-</code></pre>
-<p>The obtained observation noise is close to the ground truth with a value of \(\sigma \approx 0.28\)
-without the need for any validation data.
-The resulting prior precision is \(\delta \approx 0.10\).</p>
-<h3 id="bayesian-predictive">Bayesian predictive</h3>
-<p>Here, we compare the MAP prediction to the obtained LA prediction.
-For LA, we have a closed-form predictive distribution on the output \(f\) which is a Gaussian
-\(\mathcal{N}(f(x;\theta_{MAP}), \mathbb{V}[f] + \sigma^2)\):</p>
-<pre><code class="language-python">x = X_test.flatten().cpu().numpy()
-f_mu, f_var = la(X_test)
-f_mu = f_mu.squeeze().detach().cpu().numpy()
-f_sigma = f_var.squeeze().sqrt().cpu().numpy()
-pred_std = np.sqrt(f_sigma**2 + la.sigma_noise.item()**2)
-
-plot_regression(X_train, y_train, x, f_mu, pred_std)
-</code></pre>
-<p><img alt="" src="regression_example.png"></p>
-<p>:align: center</p>
-<p>In comparison to the MAP, the predictive shows useful uncertainties.
-When our MAP is over or underfit, the Laplace approximation cannot fix this anymore.
-In this case, joint optimization of MAP and marginal likelihood can be useful.</p>
-<h3 id="jointly-optimize-map-and-hyperparameters-using-online-empirical-bayes">Jointly optimize MAP and hyperparameters using online empirical Bayes</h3>
-<p>We provide a utility method <code><a title="laplace.marglik_training" href="#laplace.marglik_training">marglik_training()</a></code> that implements the algorithm proposed in [1].
-The method optimizes the neural network and the hyperparameters in an interleaved way
-and returns an optimally regularized LA.
-Below, we use this method and plot the corresponding predictive uncertainties again:</p>
-<pre><code class="language-python">model = get_model()
-la, model, margliks, losses = marglik_training(
-    model=model, train_loader=train_loader, likelihood=&quot;regression&quot;,
-    hessian_structure=&quot;full&quot;, backend=BackPackGGN, n_epochs=n_epochs,
-    optimizer_kwargs={&quot;lr&quot;: 1e-2}, prior_structure=&quot;scalar&quot;
-)
-
-f_mu, f_var = la(X_test)
-f_mu = f_mu.squeeze().detach().cpu().numpy()
-f_sigma = f_var.squeeze().sqrt().cpu().numpy()
-pred_std = np.sqrt(f_sigma**2 + la.sigma_noise.item()**2)
-
-plot_regression(X_train, y_train, x, f_mu, pred_std)
-</code></pre>
-<p><img alt="" src="regression_example_online.png"></p>
-<p>:align: center</p>
-<h2 id="full-example-post-hoc-laplace-on-a-large-image-classifier">Full example: <em>post-hoc</em> Laplace on a large image classifier</h2>
-<p>An advantage of the Laplace approximation over variational Bayes and Markov Chain Monte Carlo methods is its <em>post-hoc</em> nature. That means we can apply LA on (almost) any <em>pre-trained</em> neural network. In this example, we will see how we can apply the last-layer LA on a deep WideResNet model, trained on CIFAR-10.</p>
-<h4 id="data-loading">Data loading</h4>
-<p>First, let us load the CIFAR-10 dataset. The helper scripts for CIFAR-10 and WideResNet are available in the <code>examples/helper</code> directory in the main repository.</p>
-<pre><code class="language-python">import torch
-import torch.distributions as dists
-import numpy as np
-import helper.wideresnet as wrn
-import helper.dataloaders as dl
-from helper import util
-from netcal.metrics import ECE
-
-from laplace import Laplace
-
-
-np.random.seed(7777)
-torch.manual_seed(7777)
-torch.backends.cudnn.deterministic = True
-torch.backends.cudnn.benchmark = True
-
-train_loader = dl.CIFAR10(train=True)
-test_loader = dl.CIFAR10(train=False)
-targets = torch.cat([y for x, y in test_loader], dim=0).numpy()
-</code></pre>
-<h4 id="load-a-pre-trained-model">Load a pre-trained model</h4>
-<p>Next, we will load a pre-trained WideResNet-16-4 model. Note that a GPU with CUDA support is needed for this example.</p>
-<pre><code class="language-python"># The model is a standard WideResNet 16-4
-# Taken as is from https://github.com/hendrycks/outlier-exposure
-model = wrn.WideResNet(16, 4, num_classes=10).cuda().eval()
-
-util.download_pretrained_model()
-model.load_state_dict(torch.load(&quot;./temp/CIFAR10_plain.pt&quot;))
-</code></pre>
-<p>To simplify the downstream tasks, we will use the following helper function to make predictions. It simply iterates through all minibatches and obtains the predictive probabilities of the CIFAR-10 classes.</p>
-<pre><code class="language-python">@torch.no_grad()
-def predict(dataloader, model, laplace=False):
-    py = []
-
-    for x, _ in dataloader:
-        if laplace:
-            py.append(model(x.cuda()))
-        else:
-            py.append(torch.softmax(model(x.cuda()), dim=-1))
-
-    return torch.cat(py).cpu().numpy()
-</code></pre>
-<h4 id="the-calibration-of-map">The calibration of MAP</h4>
-<p>We are now ready to see how calibrated is the model. The metrics we use are the expected calibration error (ECE, Naeni et al., AAAI 2015) and the negative (Categorical) log-likelihood. Note that lower values are better for both these metrics.</p>
-<p>First, let us inspect the MAP model. We shall use the <a href="https://github.com/fabiankueppers/calibration-framework"><code>netcal</code></a> library to easily compute the ECE.</p>
-<pre><code class="language-python">probs_map = predict(test_loader, model, laplace=False)
-acc_map = (probs_map.argmax(-1) == targets).float().mean()
-ece_map = ECE(bins=15).measure(probs_map.numpy(), targets.numpy())
-nll_map = -dists.Categorical(probs_map).log_prob(targets).mean()
-
-print(f&quot;[MAP] Acc.: {acc_map:.1%}; ECE: {ece_map:.1%}; NLL: {nll_map:.3}&quot;)
-</code></pre>
-<p>Running this snippet, we would get:</p>
-<pre><code>[MAP] Acc.: 94.8%; ECE: 2.0%; NLL: 0.172
-</code></pre>
-<h3 id="the-calibration-of-laplace">The calibration of Laplace</h3>
-<p>Now we inspect the benefit of the LA. Let us apply the simple last-layer LA model, and optimize the prior precision hyperparameter using a <em>post-hoc</em> marginal likelihood maximization.</p>
-<pre><code class="language-python"># Laplace
-la = Laplace(model, &quot;classification&quot;,
-             subset_of_weights=&quot;last_layer&quot;,
-             hessian_structure=&quot;kron&quot;)
-la.fit(train_loader)
-la.optimize_prior_precision(method=&quot;marglik&quot;)
-</code></pre>
-<p>Then, we are ready to see how well does LA improves the calibration of the MAP model:</p>
-<pre><code class="language-python">probs_laplace = predict(test_loader, la, laplace=True)
-acc_laplace = (probs_laplace.argmax(-1) == targets).float().mean()
-ece_laplace = ECE(bins=15).measure(probs_laplace.numpy(), targets.numpy())
-nll_laplace = -dists.Categorical(probs_laplace).log_prob(targets).mean()
-
-print(f&quot;[Laplace] Acc.: {acc_laplace:.1%}; ECE: {ece_laplace:.1%}; NLL: {nll_laplace:.3}&quot;)
-</code></pre>
-<p>Running this snippet, we obtain:</p>
-<pre><code>[Laplace] Acc.: 94.8%; ECE: 0.8%; NLL: 0.157
-</code></pre>
-<p>Notice that the last-layer LA does not do any harm to the accuracy, yet it improves the calibration of the MAP model substantially.</p>
-<h2 id="full-example-applying-laplace-on-a-huggingface-llm-model">Full Example: Applying Laplace on a Huggingface LLM model</h2>
-<p>In this example, we will see how to apply Laplace on a GPT2 Huggingface (HF) model.
-Laplace only has lightweight requirements for this; namely that the model's <code>forward</code>
-method must only take a single dict-like object (<code>dict</code>, <code>UserDict</code>, or in general,
-<code>collections.abc.MutableMapping</code>). This is entirely compatible with HF since HF's
-data loaders are assumed to emit an object derived from <code>UserDict</code>. However, you
-need to ensure this yourself &mdash; you need to wrap the standard HF model to conform
-to that requirement. Also, you need to e.g. do <code>torch.to(device)</code> <em>inside</em> the
-said <code>forward</code> method.</p>
-<p>Let's start with as usual with importing stuff.</p>
-<pre><code class="language-python">from collections.abc import MutableMapping
-from collections import UserDict
-import numpy
-import torch
-from torch import nn
-import torch.utils.data as data_utils
-
-from laplace import Laplace
-
-import logging
-import warnings
-
-logging.basicConfig(level=&quot;ERROR&quot;)
-warnings.filterwarnings(&quot;ignore&quot;)
-
-from transformers import ( # noqa: E402
-    GPT2Config,
-    GPT2ForSequenceClassification,
-    GPT2Tokenizer,
-    DataCollatorWithPadding,
-    PreTrainedTokenizer,
-)
-from peft import LoraConfig, get_peft_model # noqa: E402
-from datasets import Dataset # noqa: E402
-
-# make deterministic
-
-torch.manual_seed(0)
-numpy.random.seed(0)
-</code></pre>
-<p>Next, we create a toy dataset. You can use any HF datasets or your own, of course.</p>
-<pre><code class="language-python">tokenizer = GPT2Tokenizer.from_pretrained(&quot;gpt2&quot;)
-tokenizer.pad_token_id = tokenizer.eos_token_id
-
-data = [
-    {&quot;text&quot;: &quot;Today is hot, but I will manage!!!!&quot;, &quot;label&quot;: 1},
-    {&quot;text&quot;: &quot;Tomorrow is cold&quot;, &quot;label&quot;: 0},
-    {&quot;text&quot;: &quot;Carpe diem&quot;, &quot;label&quot;: 1},
-    {&quot;text&quot;: &quot;Tempus fugit&quot;, &quot;label&quot;: 1},
-]
-dataset = Dataset.from_list(data)
-
-def tokenize(row):
-    return tokenizer(row[&quot;text&quot;])
-
-dataset = dataset.map(tokenize, remove_columns=[&quot;text&quot;])
-dataset.set_format(type=&quot;torch&quot;, columns=[&quot;input_ids&quot;, &quot;attention_mask&quot;, &quot;label&quot;])
-dataloader = data_utils.DataLoader(
-    dataset, batch_size=100, collate_fn=DataCollatorWithPadding(tokenizer)
-)
-
-data = next(iter(dataloader))
-print(
-    f&quot;Huggingface data defaults to UserDict, which is a MutableMapping? {isinstance(data, UserDict)}&quot;
-)
-for k, v in data.items():
-    print(k, v.shape)
-</code></pre>
-<p>This is the output:</p>
-<pre><code>Huggingface data defaults to UserDict, which is a MutableMapping? True
-input_ids torch.Size([4, 9])
-attention_mask torch.Size([4, 9])
-labels torch.Size([4])
-</code></pre>
-<h3 id="laplace-on-a-subset-of-an-llms-weights">Laplace on a subset of an LLM's weights</h3>
-<p>Now, let's do the main "meat" of this example: Wrapping the HF model into a model that is
-compatible with Laplace. Notice that this wrapper just wraps the HF model and nothing else.
-Notice also we do <code>inputs.to(device)</code> inside <code>self.forward()</code>.</p>
-<pre><code class="language-python">class MyGPT2(nn.Module):
-    &quot;&quot;&quot;
-    Huggingface LLM wrapper.
-
-    Args:
-        tokenizer: The tokenizer used for preprocessing the text data. Needed
-            since the model needs to know the padding token id.
-    &quot;&quot;&quot;
-
-    def __init__(self, tokenizer: PreTrainedTokenizer) -&gt; None:
-        super().__init__()
-        config = GPT2Config.from_pretrained(&quot;gpt2&quot;)
-        config.pad_token_id = tokenizer.pad_token_id
-        config.num_labels = 2
-        self.hf_model = GPT2ForSequenceClassification.from_pretrained(
-            &quot;gpt2&quot;, config=config
-        )
-
-    def forward(self, data: MutableMapping) -&gt; torch.Tensor:
-        &quot;&quot;&quot;
-        Custom forward function. Handles things like moving the
-        input tensor to the correct device inside.
-
-        Args:
-            data: A dict-like data structure with `input_ids` inside.
-                This is the default data structure assumed by Huggingface
-                dataloaders.
-
-        Returns:
-            logits: An `(batch_size, n_classes)`-sized tensor of logits.
-        &quot;&quot;&quot;
-        device = next(self.parameters()).device
-        input_ids = data[&quot;input_ids&quot;].to(device)
-        attn_mask = data[&quot;attention_mask&quot;].to(device)
-        output_dict = self.hf_model(input_ids=input_ids, attention_mask=attn_mask)
-        return output_dict.logits
-
-model = MyGPT2(tokenizer)
-</code></pre>
-<p>Now, let's apply Laplace. Let's do a last-layer Laplace first.
-Notice that we add
-an argument <code>feature_reduction</code> there. This is because Huggingface models reduce the
-logits and <a href="https://github.com/huggingface/transformers/blob/a98c41798cf6ed99e1ff17e3792d6e06a2ff2ff3/src/transformers/models/gpt2/modeling_gpt2.py#L1678-L1704">not the features</a>.</p>
-<pre><code class="language-python">model = MyGPT2(tokenizer)
-model.eval()
-
-la = Laplace(
-    model,
-    likelihood=&quot;classification&quot;,
-    subset_of_weights=&quot;last_layer&quot;,
-    hessian_structure=&quot;full&quot;,
-    # This must reflect faithfully the reduction technique used in the model
-    # Otherwise, correctness is not guaranteed
-    feature_reduction=&quot;pick_last&quot;,
-)
-la.fit(dataloader)
-la.optimize_prior_precision()
-
-X_test = next(iter(dataloader))
-print(f&quot;[Last-layer Laplace] The predictive tensor is of shape: {la(X_test).shape}.&quot;)
-</code></pre>
-<p>Here's the output:</p>
-<pre><code>[Last-layer Laplace] The predictive tensor is of shape: torch.Size([4, 2]).
-</code></pre>
-<h2 id="subnetwork-laplace_1">Subnetwork Laplace</h2>
-<p>Also, we can do the same thing by switching off the gradients of all layers except the
-top layer. Laplace will automatically only compute the Hessian (and Jacobians) of the
-parameters in which <code>requires_grad</code> is <code>True</code>.</p>
-<p>Notice that you can "mix-and-match" this gradient switching. You can do a subnetwork Laplace
-easily by doing so!</p>
-<pre><code class="language-python">model.eval()
-
-# Enable grad only for the last layer
-
-for p in model.hf_model.parameters():
-    p.requires_grad = False
-
-for p in model.hf_model.score.parameters():
-    p.requires_grad = True
-
-la = Laplace(
-    model,
-    # Will only hit the last-layer since it's the only one that is grad-enabled
-    likelihood=&quot;classification&quot;,
-    subset_of_weights=&quot;all&quot;,
-    hessian_structure=&quot;diag&quot;,
-)
-la.fit(dataloader)
-la.optimize_prior_precision()
-
-X_test = next(iter(dataloader))
-print(f&quot;[Subnetwork Laplace] The predictive tensor is of shape: {la(X_test).shape}.&quot;)
-</code></pre>
-<p>Here are the outputs to validate that Laplace works:</p>
-<pre><code>[Subnetwork Laplace] The predictive tensor is of shape: torch.Size([4, 2]).
-</code></pre>
-<h2 id="full-laplace-on-lora-parameters-only">Full Laplace on LoRA parameters only</h2>
-<p>Of course, you can also apply Laplace on the parameter-efficient fine tuning weights (like LoRA).
-To do this, simply extend your LLM with LoRA, using HF's <code>peft</code> library, and apply Laplace as
-usual. Note that <code>peft</code> automatically switches off the non-LoRA weights.</p>
-<pre><code class="language-python">def get_lora_model():
-    model = MyGPT2(tokenizer) # Note we don't disable grad
-    config = LoraConfig(
-        r=4,
-        lora_alpha=16,
-        target_modules=[&quot;c_attn&quot;], # LoRA on the attention weights
-        lora_dropout=0.1,
-        bias=&quot;none&quot;,
-    )
-    lora_model = get_peft_model(model, config)
-    return lora_model
-
-lora_model = get_lora_model()
-
-# Train it as usual
-
-lora_model.eval()
-
-lora_la = Laplace(
-    lora_model,
-    likelihood=&quot;classification&quot;,
-    subset_of_weights=&quot;all&quot;,
-    hessian_structure=&quot;kron&quot;,
-)
-lora_la.fit(dataloader)
-
-X_test = next(iter(dataloader))
-print(f&quot;[LoRA-LLM] The predictive tensor is of shape: {lora_la(X_test).shape}.&quot;)
-</code></pre>
-<p>Here is the output, as expected:</p>
-<pre><code>[LoRA-LLM] The predictive tensor is of shape: torch.Size([4, 2]).
-</code></pre>
-<p>As a final note, the dict-like input requirement of Laplace is very flexible. It can essentially
-be applicable to any tasks and any models. You just need to wrap the said model and make sure
-that your data loaders emit dict-like objects, where the input tensors are the dicts' values.</p>
-<h3 id="caveats">Caveats</h3>
-<p>Currently, diagonal EF with the Curvlinops backend is unsupported for dict-based inputs.
-This is because we use <code>torch.func</code>'s <code>vmap</code> to compute the diag-EF, and it only accepts
-tensor input in the model's <code>forward</code>.
-See <a href="https://github.com/pytorch/functorch/issues/159">this issue</a>.
-So, if you can write down your Huggingface model's <code>forward</code> to accept only a single tensor,
-this is much preferable.</p>
-<p>For instance, in the case of causal LLM like GPTs, only <code>input_ids</code>
-tensor is necessary.
-Then, any backend and any hessian factorization can be used in this case.</p>
-<p>Otherwise, if you must use dict-based inputs, choose the following backends:</p>
-<ul>
-<li><code>CurvlinopsGGN</code> for <code>hessian_factorization = {"kron", "diag"}</code></li>
-<li><code>CurvlinopsEF</code> for <code>hessian_factorization = {"kron"}</code></li>
-<li><code>AsdlGGN</code> for <code>hessian_factorization = {"kron", "diag"}</code></li>
-<li><code>AsdlEF</code> for <code>hessian_factorization = {"kron", "diag"}</code></li>
-</ul>
-<h2 id="full-example-bayesian-bradley-terry-reward-modeling">Full Example: Bayesian Bradley-Terry Reward Modeling</h2>
-<p>The <code>laplace-torch</code> library can also be used to "Bayesianize" a pretrained Bradley-Terry
-reward model, popular in large language models. See <a href="http://arxiv.org/abs/2009.01325">http://arxiv.org/abs/2009.01325</a>
-for a primer in reward modeling.</p>
-<p>First order of business, let's define our comparison dataset. We will use the <code>datasets</code>
-library from Huggingface to handle the data.</p>
-<pre><code class="language-python">import numpy as np
-import torch
-from torch import nn, optim
-from torch.nn import functional as F
-import torch.utils.data as data_utils
-
-from datasets import Dataset
-
-from laplace import Laplace
-
-import logging
-import warnings
-
-logging.basicConfig(level=&quot;ERROR&quot;)
-warnings.filterwarnings(&quot;ignore&quot;)
-
-# make deterministic
-torch.manual_seed(0)
-np.random.seed(0)
-
-
-# Pairwise comparison dataset. The label indicates which `x0` or `x1` is preferred.
-data_dict = [
-    {
-        &quot;x0&quot;: torch.randn(3),
-        &quot;x1&quot;: torch.randn(3),
-        &quot;label&quot;: torch.randint(2, size=(1,)).item(),
-    }
-    for _ in range(10)
-]
-dataset = Dataset.from_list(data_dict)
-</code></pre>
-<p>Now, let's define the reward model. During training, it assumes that <code>x</code> is a tensor
-of shape <code>(batch_size, 2, dim)</code>, which is a concatenation of <code>x0</code> and <code>x1</code> above.
-The second dimension of size 2 is preserved through the forward pass, resulting in
-a logit tensor of shape <code>(batch_size, 2)</code> (the network itself is single-output).
-Then, the standard cross-entropy loss is applied.</p>
-<p>Note that this requirement is quite weak and can covers general cases. However, if you
-prefer to use the dict-like inputs as in Huggingface LLM models, this can also be done.
-Simply combine what you have learned from this example with the Huggingface LLM example
-provided in this library.</p>
-<p>During testing, this model behaves like a standard single-output regression
-model.</p>
-<pre><code class="language-python">class SimpleRewardModel(nn.Module):
-    &quot;&quot;&quot;A simple reward model, compatible with the Bradley-Terry likelihood.
-    &quot;&quot;&quot;
-
-    def __init__(self):
-        super().__init__()
-        self.net = nn.Sequential(nn.Linear(3, 100), nn.ReLU(), nn.Linear(100, 1))
-
-    def forward(self, x):
-        &quot;&quot;&quot;Args:
-            x: torch.Tensor
-                If training == True then shape (batch_size, 2, dim)
-                Else shape (batch_size, dim)
-
-        Returns:
-            logits: torch.Tensor
-                If training then shape (batch_size, 2)
-                Else shape (batch_size, 1)
-        &quot;&quot;&quot;
-        if len(x.shape) == 3:
-            batch_size, _, dim = x.shape
-
-            # Flatten to (batch_size*2, dim)
-            flat_x = x.reshape(-1, dim)
-
-            # Forward
-            flat_logits = self.net(flat_x)  # (batch_size*2, 1)
-
-            # Reshape back to (batch_size, 2)
-            return flat_logits.reshape(batch_size, 2)
-        else:
-            logits = self.net(x)  # (batch_size, 1)
-            return logits
-</code></pre>
-<p>To fulfill the 3D tensor requirement, we need to preprocess the dict-based dataset.</p>
-<pre><code class="language-python"># Preprocess to coalesce x0 and x1 into a single array/tensor
-def append_x0_x1(row):
-    # The tensor values above are automatically casted as lists by `Dataset`
-    row[&quot;x&quot;] = np.stack([row[&quot;x0&quot;], row[&quot;x1&quot;]])  # (2, dim)
-    return row
-
-
-tensor_dataset = dataset.map(append_x0_x1, remove_columns=[&quot;x0&quot;, &quot;x1&quot;])
-tensor_dataset.set_format(type=&quot;torch&quot;, columns=[&quot;x&quot;, &quot;label&quot;])
-tensor_dataloader = data_utils.DataLoader(
-    data_utils.TensorDataset(tensor_dataset[&quot;x&quot;], tensor_dataset[&quot;label&quot;]), batch_size=3
-)
-</code></pre>
-<p>Then, we can train as usual using the cross entropy loss.</p>
-<pre><code class="language-python">reward_model = SimpleRewardModel()
-opt = optim.AdamW(reward_model.parameters(), weight_decay=1e-3)
-
-# Train as usual
-for epoch in range(10):
-    for x, y in tensor_dataloader:
-        opt.zero_grad()
-        out = reward_model(x)
-        loss = F.cross_entropy(out, y)
-        loss.backward()
-        opt.step()
-</code></pre>
-<p>Applying Laplace to this model is a breeze. Simply state that the likelihood is <code>reward_modeling</code>.</p>
-<pre><code class="language-python"># Laplace !!! Notice the likelihood !!!
-reward_model.eval()
-la = Laplace(reward_model, likelihood=&quot;reward_modeling&quot;, subset_of_weights=&quot;all&quot;)
-la.fit(tensor_dataloader)
-la.optimize_prior_precision()
-</code></pre>
-<p>As we can see, during prediction, even though we train &amp; fit Laplace using the cross entropy
-loss (i.e. classification), in test time, the model behaves like a regression model.
-So, you don't get probability vectors as outputs. Instead, you get two tensors
-containing the predictive means and predictive variance.</p>
-<pre><code class="language-python">x_test = torch.randn(5, 3)
-pred_mean, pred_var = la(x_test)
-print(
-    f&quot;Input shape {tuple(x_test.shape)}, predictive mean of shape &quot;
-    + f&quot;{tuple(pred_mean.shape)}, predictive covariance of shape &quot;
-    + f&quot;{tuple(pred_var.shape)}&quot;
-)
-</code></pre>
-<p>Here's the output:</p>
-<pre><code>Input shape (5, 3), predictive mean of shape (5, 1), predictive covariance of shape (5, 1, 1)
-</code></pre>
-</section>
-<section>
-<h2 class="section-title" id="header-submodules">Sub-modules</h2>
-<dl>
-<dt><code class="name"><a title="laplace.baselaplace" href="baselaplace.html">laplace.baselaplace</a></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt><code class="name"><a title="laplace.curvature" href="curvature/index.html">laplace.curvature</a></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt><code class="name"><a title="laplace.laplace" href="laplace.html">laplace.laplace</a></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt><code class="name"><a title="laplace.lllaplace" href="lllaplace.html">laplace.lllaplace</a></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt><code class="name"><a title="laplace.subnetlaplace" href="subnetlaplace.html">laplace.subnetlaplace</a></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt><code class="name"><a title="laplace.utils" href="utils/index.html">laplace.utils</a></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-functions">Functions</h2>
-<dl>
-<dt id="laplace.Laplace"><code class="name flex">
-<span>def <span class="ident">Laplace</span></span>(<span>model: torch.nn.Module, likelihood: <a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a> | str, subset_of_weights: <a title="laplace.SubsetOfWeights" href="#laplace.SubsetOfWeights">SubsetOfWeights</a> | str = SubsetOfWeights.LAST_LAYER, hessian_structure: <a title="laplace.HessianStructure" href="#laplace.HessianStructure">HessianStructure</a> | str = HessianStructure.KRON, *args, **kwargs)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Simplified Laplace access using strings instead of different classes.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>likelihood</code></strong> :&ensp;<code><a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a></code> or <code>str in {'classification', 'regression'}</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>subset_of_weights</code></strong> :&ensp;<code>SubsetofWeights</code> or <code>{'last_layer', 'subnetwork', 'all'}</code>, default=<code><a title="laplace.SubsetOfWeights.LAST_LAYER" href="#laplace.SubsetOfWeights.LAST_LAYER">SubsetOfWeights.LAST_LAYER</a></code></dt>
-<dd>subset of weights to consider for inference</dd>
-<dt><strong><code>hessian_structure</code></strong> :&ensp;<code><a title="laplace.HessianStructure" href="#laplace.HessianStructure">HessianStructure</a></code> or <code>str in {'diag', 'kron', 'full', 'lowrank', 'gp'}</code>, default=<code><a title="laplace.HessianStructure.KRON" href="#laplace.HessianStructure.KRON">HessianStructure.KRON</a></code></dt>
-<dd>structure of the Hessian approximation (note that in case of 'gp',
-we are not actually doing any Hessian approximation, the inference is instead done in the functional space)</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>laplace</code></strong> :&ensp;<code><a title="laplace.BaseLaplace" href="#laplace.BaseLaplace">BaseLaplace</a></code></dt>
-<dd>chosen subclass of BaseLaplace instantiated with additional arguments</dd>
-</dl></div>
-</dd>
-<dt id="laplace.marglik_training"><code class="name flex">
-<span>def <span class="ident">marglik_training</span></span>(<span>model: torch.nn.Module, train_loader: DataLoader, likelihood: <a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a> | str = Likelihood.CLASSIFICATION, hessian_structure: <a title="laplace.HessianStructure" href="#laplace.HessianStructure">HessianStructure</a> | str = HessianStructure.KRON, backend: Type[CurvatureInterface] = laplace.curvature.asdl.AsdlGGN, optimizer_cls: Type[Optimizer] = torch.optim.adam.Adam, optimizer_kwargs: dict | None = None, scheduler_cls: Type[LRScheduler] | None = None, scheduler_kwargs: dict | None = None, n_epochs: int = 300, lr_hyp: float = 0.1, prior_structure: <a title="laplace.PriorStructure" href="#laplace.PriorStructure">PriorStructure</a> | str = PriorStructure.LAYERWISE, n_epochs_burnin: int = 0, n_hypersteps: int = 10, marglik_frequency: int = 1, prior_prec_init: float = 1.0, sigma_noise_init: float = 1.0, temperature: float = 1.0, fix_sigma_noise: bool = False, progress_bar: bool = False, enable_backprop: bool = False, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Marginal-likelihood based training (Algorithm 1 in [1]).
-Optimize model parameters and hyperparameters jointly.
-Model parameters are optimized to minimize negative log joint (train loss)
-while hyperparameters minimize negative log marginal likelihood.</p>
-<p>This method replaces standard neural network training and adds hyperparameter
-optimization to the procedure.</p>
-<p>The settings of standard training can be controlled by passing <code>train_loader</code>,
-<code>optimizer_cls</code>, <code>optimizer_kwargs</code>, <code>scheduler_cls</code>, <code>scheduler_kwargs</code>, and <code>n_epochs</code>.
-The <code>model</code> should return logits, i.e., no softmax should be applied.
-With <code>likelihood=Likelihood.CLASSIFICATION</code> or <code><a title="laplace.Likelihood.REGRESSION" href="#laplace.Likelihood.REGRESSION">Likelihood.REGRESSION</a></code>, one can choose between
-categorical likelihood (CrossEntropyLoss) and Gaussian likelihood (MSELoss).</p>
-<p>As in [1], we optimize prior precision and, for regression, observation noise
-using the marginal likelihood. The prior precision structure can be chosen
-as <code>'scalar'</code>, <code>'layerwise'</code>, or <code>'diagonal'</code>. <code>'layerwise'</code> is a good default
-and available to all Laplace approximations. <code>lr_hyp</code> is the step size of the
-Adam hyperparameter optimizer, <code>n_hypersteps</code> controls the number of steps
-for each estimated marginal likelihood, <code>n_epochs_burnin</code> controls how many
-epochs to skip marginal likelihood estimation, <code>marglik_frequency</code> controls
-how often to estimate the marginal likelihood (default of 1 re-estimates
-after every epoch, 5 would estimate every 5-th epoch).</p>
-<h2 id="references">References</h2>
-<p>[1] Immer, A., Bauer, M., Fortuin, V., Rätsch, G., Khan, EM.
-<a href="https://arxiv.org/abs/2104.04975"><em>Scalable Marginal Likelihood Estimation for Model Selection in Deep Learning</em></a>.
-ICML 2021.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>torch neural network model (needs to comply with Backend choice)</dd>
-<dt><strong><code>train_loader</code></strong> :&ensp;<code>DataLoader</code></dt>
-<dd>pytorch dataloader that implements <code>len(train_loader.dataset)</code> to obtain number of data points</dd>
-<dt><strong><code>likelihood</code></strong> :&ensp;<code>str</code>, default=<code><a title="laplace.Likelihood.CLASSIFICATION" href="#laplace.Likelihood.CLASSIFICATION">Likelihood.CLASSIFICATION</a></code></dt>
-<dd>Likelihood.CLASSIFICATION or Likelihood.REGRESSION</dd>
-<dt><strong><code>hessian_structure</code></strong> :&ensp;<code>{'diag', 'kron', 'full'}</code>, default=<code>'kron'</code></dt>
-<dd>structure of the Hessian approximation</dd>
-<dt><strong><code>backend</code></strong> :&ensp;<code>Backend</code>, default=<code>AsdlGGN</code></dt>
-<dd>Curvature subclass, e.g. AsdlGGN/AsdlEF or BackPackGGN/BackPackEF</dd>
-<dt><strong><code>optimizer_cls</code></strong> :&ensp;<code>torch.optim.Optimizer</code>, default=<code>Adam</code></dt>
-<dd>optimizer to use for optimizing the neural network parameters togeth with <code>train_loader</code></dd>
-<dt><strong><code>optimizer_kwargs</code></strong> :&ensp;<code>dict</code>, default=<code>None</code></dt>
-<dd>keyword arguments for <code>optimizer_cls</code>, for example to change learning rate or momentum</dd>
-<dt><strong><code>scheduler_cls</code></strong> :&ensp;<code>torch.optim.lr_scheduler._LRScheduler</code>, default=<code>None</code></dt>
-<dd>optionally, a scheduler to use on the learning rate of the optimizer.
-<code>scheduler.step()</code> is called after every batch of the standard training.</dd>
-<dt><strong><code>scheduler_kwargs</code></strong> :&ensp;<code>dict</code>, default=<code>None</code></dt>
-<dd>keyword arguments for <code>scheduler_cls</code>, e.g. <code>lr_min</code> for CosineAnnealingLR</dd>
-<dt><strong><code>n_epochs</code></strong> :&ensp;<code>int</code>, default=<code>300</code></dt>
-<dd>number of epochs to train for</dd>
-<dt><strong><code>lr_hyp</code></strong> :&ensp;<code>float</code>, default=<code>0.1</code></dt>
-<dd>Adam learning rate for hyperparameters</dd>
-<dt><strong><code>prior_structure</code></strong> :&ensp;<code>str</code>, default=<code>'layerwise'</code></dt>
-<dd>structure of the prior. one of <code>['scalar', 'layerwise', 'diag']</code></dd>
-<dt><strong><code>n_epochs_burnin</code></strong> :&ensp;<code>int default=0</code></dt>
-<dd>how many epochs to train without estimating and differentiating marglik</dd>
-<dt><strong><code>n_hypersteps</code></strong> :&ensp;<code>int</code>, default=<code>10</code></dt>
-<dd>how many steps to take on the hyperparameters when marglik is estimated</dd>
-<dt><strong><code>marglik_frequency</code></strong> :&ensp;<code>int</code></dt>
-<dd>how often to estimate (and differentiate) the marginal likelihood
-<code>marglik_frequency=1</code> would be every epoch,
-<code>marglik_frequency=5</code> would be every 5 epochs.</dd>
-<dt><strong><code>prior_prec_init</code></strong> :&ensp;<code>float</code>, default=<code>1.0</code></dt>
-<dd>initial prior precision</dd>
-<dt><strong><code>sigma_noise_init</code></strong> :&ensp;<code>float</code>, default=<code>1.0</code></dt>
-<dd>initial observation noise (for regression only)</dd>
-<dt><strong><code>temperature</code></strong> :&ensp;<code>float</code>, default=<code>1.0</code></dt>
-<dd>factor for the likelihood for 'overcounting' data. Might be required for data augmentation.</dd>
-<dt><strong><code>fix_sigma_noise</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>if False, optimize observation noise via marglik otherwise use <code>sigma_noise_init</code> throughout.
-Only works for regression.</dd>
-<dt><strong><code>progress_bar</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>whether to show a progress bar (updated per epoch) or not</dd>
-<dt><strong><code>enable_backprop</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>make the returned Laplace instance backpropable&mdash;useful for e.g. Bayesian optimization.</dd>
-<dt><strong><code>dict_key_x</code></strong> :&ensp;<code>str</code>, default=<code>'input_ids'</code></dt>
-<dd>The dictionary key under which the input tensor <code>x</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-<dt><strong><code>dict_key_y</code></strong> :&ensp;<code>str</code>, default=<code>'labels'</code></dt>
-<dd>The dictionary key under which the target tensor <code>y</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>lap</code></strong> :&ensp;<code><a title="laplace.laplace" href="laplace.html">laplace.laplace</a></code></dt>
-<dd>fit Laplace approximation with the best obtained marginal likelihood during training</dd>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>corresponding model with the MAP parameters</dd>
-<dt><strong><code>margliks</code></strong> :&ensp;<code>list</code></dt>
-<dd>list of marginal likelihoods obtained during training (to monitor convergence)</dd>
-<dt><strong><code>losses</code></strong> :&ensp;<code>list</code></dt>
-<dd>list of losses (log joints) obtained during training (to monitor convergence)</dd>
-</dl></div>
-</dd>
-</dl>
-</section>
-<section>
-<h2 class="section-title" id="header-classes">Classes</h2>
-<dl>
-<dt id="laplace.BaseLaplace"><code class="flex name class">
-<span>class <span class="ident">BaseLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: <a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a> | str, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend: type[CurvatureInterface] | None = None, backend_kwargs: dict[str, Any] | None = None, asdl_fisher_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Baseclass for all Laplace approximations in this library.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>likelihood</code></strong> :&ensp;<code><a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a></code> or <code>str in {'classification', 'regression', 'reward_modeling'}</code></dt>
-<dd>determines the log likelihood Hessian approximation.
-In the case of 'reward_modeling', it fits Laplace using the classification likelihood,
-then does prediction as in regression likelihood. The model needs to be defined accordingly:
-The forward pass during training takes <code>x.shape == (batch_size, 2, dim)</code> with
-<code>y.shape = (batch_size,)</code>. Meanwhile, during evaluation <code>x.shape == (batch_size, dim)</code>.
-Note that 'reward_modeling' only supports <code><a title="laplace.KronLaplace" href="#laplace.KronLaplace">KronLaplace</a></code> and <code><a title="laplace.DiagLaplace" href="#laplace.DiagLaplace">DiagLaplace</a></code>.</dd>
-<dt><strong><code>sigma_noise</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code>, default=<code>1</code></dt>
-<dd>observation noise for the regression setting; must be 1 for classification</dd>
-<dt><strong><code>prior_precision</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code>, default=<code>1</code></dt>
-<dd>prior precision of a Gaussian prior (= weight decay);
-can be scalar, per-layer, or diagonal in the most general case</dd>
-<dt><strong><code>prior_mean</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code>, default=<code>0</code></dt>
-<dd>prior mean of a Gaussian prior, useful for continual learning</dd>
-<dt><strong><code>temperature</code></strong> :&ensp;<code>float</code>, default=<code>1</code></dt>
-<dd>temperature of the likelihood; lower temperature leads to more
-concentrated posterior and vice versa.</dd>
-<dt><strong><code>enable_backprop</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>whether to enable backprop to the input <code>x</code> through the Laplace predictive.
-Useful for e.g. Bayesian optimization.</dd>
-<dt><strong><code>dict_key_x</code></strong> :&ensp;<code>str</code>, default=<code>'input_ids'</code></dt>
-<dd>The dictionary key under which the input tensor <code>x</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-<dt><strong><code>dict_key_y</code></strong> :&ensp;<code>str</code>, default=<code>'labels'</code></dt>
-<dd>The dictionary key under which the target tensor <code>y</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-<dt><strong><code>backend</code></strong> :&ensp;<code>subclasses</code> of <code><a title="laplace.curvature.CurvatureInterface" href="curvature/index.html#laplace.curvature.CurvatureInterface">CurvatureInterface</a></code></dt>
-<dd>backend for access to curvature/Hessian approximations. Defaults to CurvlinopsGGN if None.</dd>
-<dt><strong><code>backend_kwargs</code></strong> :&ensp;<code>dict</code>, default=<code>None</code></dt>
-<dd>arguments passed to the backend on initialization, for example to
-set the number of MC samples for stochastic approximations.</dd>
-<dt><strong><code>asdl_fisher_kwargs</code></strong> :&ensp;<code>dict</code>, default=<code>None</code></dt>
-<dd>arguments passed to the ASDL backend specifically on initialization.</dd>
-</dl></div>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.FunctionalLaplace" href="baselaplace.html#laplace.baselaplace.FunctionalLaplace">FunctionalLaplace</a></li>
-<li><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.BaseLaplace.backend"><code class="name">prop <span class="ident">backend</span> : CurvatureInterface</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.BaseLaplace.log_likelihood"><code class="name">prop <span class="ident">log_likelihood</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Compute log likelihood on the training data after <code>.fit()</code> has been called.
-The log likelihood is computed on-demand based on the loss and, for example,
-the observation noise which makes it differentiable in the latter for
-iterative updates.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>log_likelihood</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.BaseLaplace.prior_precision_diag"><code class="name">prop <span class="ident">prior_precision_diag</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Obtain the diagonal prior precision <span><span class="MathJax_Preview">p_0</span><script type="math/tex">p_0</script></span> constructed from either
-a scalar, layer-wise, or diagonal prior precision.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>prior_precision_diag</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.BaseLaplace.prior_mean"><code class="name">prop <span class="ident">prior_mean</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.BaseLaplace.prior_precision"><code class="name">prop <span class="ident">prior_precision</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.BaseLaplace.sigma_noise"><code class="name">prop <span class="ident">sigma_noise</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.BaseLaplace.fit"><code class="name flex">
-<span>def <span class="ident">fit</span></span>(<span>self, train_loader: DataLoader) ‑> None</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.BaseLaplace.log_marginal_likelihood"><code class="name flex">
-<span>def <span class="ident">log_marginal_likelihood</span></span>(<span>self, prior_precision: torch.Tensor | None = None, sigma_noise: torch.Tensor | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.BaseLaplace.predictive"><code class="name flex">
-<span>def <span class="ident">predictive</span></span>(<span>self, x: torch.Tensor, pred_type: <a title="laplace.PredType" href="#laplace.PredType">PredType</a> | str, link_approx: <a title="laplace.LinkApprox" href="#laplace.LinkApprox">LinkApprox</a> | str, n_samples: int)</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.BaseLaplace.optimize_prior_precision"><code class="name flex">
-<span>def <span class="ident">optimize_prior_precision</span></span>(<span>self, pred_type: <a title="laplace.PredType" href="#laplace.PredType">PredType</a> | str, method: <a title="laplace.TuningMethod" href="#laplace.TuningMethod">TuningMethod</a> | str = TuningMethod.MARGLIK, n_steps: int = 100, lr: float = 0.1, init_prior_prec: float | torch.Tensor = 1.0, prior_structure: <a title="laplace.PriorStructure" href="#laplace.PriorStructure">PriorStructure</a> | str = PriorStructure.DIAG, val_loader: DataLoader | None = None, loss: torchmetrics.Metric | Callable[[torch.Tensor], torch.Tensor | float] | None = None, log_prior_prec_min: float = -4, log_prior_prec_max: float = 4, grid_size: int = 100, link_approx: <a title="laplace.LinkApprox" href="#laplace.LinkApprox">LinkApprox</a> | str = LinkApprox.PROBIT, n_samples: int = 100, verbose: bool = False, progress_bar: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Optimize the prior precision post-hoc using the <code>method</code>
-specified by the user.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>pred_type</code></strong> :&ensp;<code><a title="laplace.PredType" href="#laplace.PredType">PredType</a></code> or <code>str in {'glm', 'nn'}</code></dt>
-<dd>type of posterior predictive, linearized GLM predictive or neural
-network sampling predictiv. The GLM predictive is consistent with the
-curvature approximations used here.</dd>
-<dt><strong><code>method</code></strong> :&ensp;<code><a title="laplace.TuningMethod" href="#laplace.TuningMethod">TuningMethod</a></code> or <code>str in {'marglik', 'gridsearch'}</code>, default=<code>PredType.MARGLIK</code></dt>
-<dd>specifies how the prior precision should be optimized.</dd>
-<dt><strong><code>n_steps</code></strong> :&ensp;<code>int</code>, default=<code>100</code></dt>
-<dd>the number of gradient descent steps to take.</dd>
-<dt><strong><code>lr</code></strong> :&ensp;<code>float</code>, default=<code>1e-1</code></dt>
-<dd>the learning rate to use for gradient descent.</dd>
-<dt><strong><code>init_prior_prec</code></strong> :&ensp;<code>float</code> or <code>tensor</code>, default=<code>1.0</code></dt>
-<dd>initial prior precision before the first optimization step.</dd>
-<dt><strong><code>prior_structure</code></strong> :&ensp;<code><a title="laplace.PriorStructure" href="#laplace.PriorStructure">PriorStructure</a></code> or <code>str in {'scalar', 'layerwise', 'diag'}</code>, default=<code><a title="laplace.PriorStructure.SCALAR" href="#laplace.PriorStructure.SCALAR">PriorStructure.SCALAR</a></code></dt>
-<dd>if init_prior_prec is scalar, the prior precision is optimized with this structure.
-otherwise, the structure of init_prior_prec is maintained.</dd>
-<dt><strong><code>val_loader</code></strong> :&ensp;<code>torch.data.utils.DataLoader</code>, default=<code>None</code></dt>
-<dd>DataLoader for the validation set; each iterate is a training batch (X, y).</dd>
-<dt><strong><code>loss</code></strong> :&ensp;<code>callable</code> or <code>torchmetrics.Metric</code>, default=<code>None</code></dt>
-<dd>loss function to use for CV. If callable, the loss is computed offline (memory intensive).
-If torchmetrics.Metric, running loss is computed (efficient). The default
-depends on the likelihood: <code>RunningNLLMetric()</code> for classification and
-reward modeling, running <code>MeanSquaredError()</code> for regression.</dd>
-<dt><strong><code>log_prior_prec_min</code></strong> :&ensp;<code>float</code>, default=<code>-4</code></dt>
-<dd>lower bound of gridsearch interval.</dd>
-<dt><strong><code>log_prior_prec_max</code></strong> :&ensp;<code>float</code>, default=<code>4</code></dt>
-<dd>upper bound of gridsearch interval.</dd>
-<dt><strong><code>grid_size</code></strong> :&ensp;<code>int</code>, default=<code>100</code></dt>
-<dd>number of values to consider inside the gridsearch interval.</dd>
-<dt><strong><code>link_approx</code></strong> :&ensp;<code><a title="laplace.LinkApprox" href="#laplace.LinkApprox">LinkApprox</a></code> or <code>str in {'mc', 'probit', 'bridge'}</code>, default=<code><a title="laplace.LinkApprox.PROBIT" href="#laplace.LinkApprox.PROBIT">LinkApprox.PROBIT</a></code></dt>
-<dd>how to approximate the classification link function for the <code>'glm'</code>.
-For <code>pred_type='nn'</code>, only <code>'mc'</code> is possible.</dd>
-<dt><strong><code>n_samples</code></strong> :&ensp;<code>int</code>, default=<code>100</code></dt>
-<dd>number of samples for <code>link_approx='mc'</code>.</dd>
-<dt><strong><code>verbose</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>if true, the optimized prior precision will be printed
-(can be a large tensor if the prior has a diagonal covariance).</dd>
-<dt><strong><code>progress_bar</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>whether to show a progress bar; updated at every batch-Hessian computation.
-Useful for very large model and large amount of data, esp. when <code>subset_of_weights='all'</code>.</dd>
-</dl></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.ParametricLaplace"><code class="flex name class">
-<span>class <span class="ident">ParametricLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: <a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a> | str, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend: type[CurvatureInterface] | None = None, backend_kwargs: dict[str, Any] | None = None, asdl_fisher_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Parametric Laplace class.</p>
-<p>Subclasses need to specify how the Hessian approximation is initialized,
-how to add up curvature over training data, how to sample from the
-Laplace approximation, and how to compute the functional variance.</p>
-<p>A Laplace approximation is represented by a MAP which is given by the
-<code>model</code> parameter and a posterior precision or covariance specifying
-a Gaussian distribution <span><span class="MathJax_Preview">\mathcal{N}(\theta_{MAP}, P^{-1})</span><script type="math/tex">\mathcal{N}(\theta_{MAP}, P^{-1})</script></span>.
-The goal of this class is to compute the posterior precision <span><span class="MathJax_Preview">P</span><script type="math/tex">P</script></span>
-which sums as
-<span><span class="MathJax_Preview">
-P = \sum_{n=1}^N \nabla^2_\theta \log p(\mathcal{D}_n \mid \theta)
-\vert_{\theta_{MAP}} + \nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}}.
-</span><script type="math/tex; mode=display">
-P = \sum_{n=1}^N \nabla^2_\theta \log p(\mathcal{D}_n \mid \theta)
-\vert_{\theta_{MAP}} + \nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}}.
-</script></span>
-Every subclass implements different approximations to the log likelihood Hessians,
-for example, a diagonal one. The prior is assumed to be Gaussian and therefore we have
-a simple form for <span><span class="MathJax_Preview">\nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}} = P_0 </span><script type="math/tex">\nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}} = P_0 </script></span>.
-In particular, we assume a scalar, layer-wise, or diagonal prior precision so that in
-all cases <span><span class="MathJax_Preview">P_0 = \textrm{diag}(p_0)</span><script type="math/tex">P_0 = \textrm{diag}(p_0)</script></span> and the structure of <span><span class="MathJax_Preview">p_0</span><script type="math/tex">p_0</script></span> can be varied.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.DiagLaplace" href="baselaplace.html#laplace.baselaplace.DiagLaplace">DiagLaplace</a></li>
-<li><a title="laplace.baselaplace.FullLaplace" href="baselaplace.html#laplace.baselaplace.FullLaplace">FullLaplace</a></li>
-<li><a title="laplace.baselaplace.KronLaplace" href="baselaplace.html#laplace.baselaplace.KronLaplace">KronLaplace</a></li>
-<li><a title="laplace.baselaplace.LowRankLaplace" href="baselaplace.html#laplace.baselaplace.LowRankLaplace">LowRankLaplace</a></li>
-<li><a title="laplace.lllaplace.LLLaplace" href="lllaplace.html#laplace.lllaplace.LLLaplace">LLLaplace</a></li>
-<li><a title="laplace.subnetlaplace.SubnetLaplace" href="subnetlaplace.html#laplace.subnetlaplace.SubnetLaplace">SubnetLaplace</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.ParametricLaplace.scatter"><code class="name">prop <span class="ident">scatter</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Computes the <em>scatter</em>, a term of the log marginal likelihood that
-corresponds to L-2 regularization:
-<code>scatter</code> = <span><span class="MathJax_Preview">(\theta_{MAP} - \mu_0)^{T} P_0 (\theta_{MAP} - \mu_0) </span><script type="math/tex">(\theta_{MAP} - \mu_0)^{T} P_0 (\theta_{MAP} - \mu_0) </script></span>.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>scatter</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.ParametricLaplace.log_det_prior_precision"><code class="name">prop <span class="ident">log_det_prior_precision</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Compute log determinant of the prior precision
-<span><span class="MathJax_Preview">\log \det P_0</span><script type="math/tex">\log \det P_0</script></span></p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>log_det</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.ParametricLaplace.log_det_posterior_precision"><code class="name">prop <span class="ident">log_det_posterior_precision</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Compute log determinant of the posterior precision
-<span><span class="MathJax_Preview">\log \det P</span><script type="math/tex">\log \det P</script></span> which depends on the subclasses structure
-used for the Hessian approximation.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>log_det</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.ParametricLaplace.log_det_ratio"><code class="name">prop <span class="ident">log_det_ratio</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Compute the log determinant ratio, a part of the log marginal likelihood.
-<span><span class="MathJax_Preview">
-\log \frac{\det P}{\det P_0} = \log \det P - \log \det P_0
-</span><script type="math/tex; mode=display">
-\log \frac{\det P}{\det P_0} = \log \det P - \log \det P_0
-</script></span></p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>log_det_ratio</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.ParametricLaplace.posterior_precision"><code class="name">prop <span class="ident">posterior_precision</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Compute or return the posterior precision <span><span class="MathJax_Preview">P</span><script type="math/tex">P</script></span>.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>posterior_prec</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.ParametricLaplace.fit"><code class="name flex">
-<span>def <span class="ident">fit</span></span>(<span>self, train_loader: DataLoader, override: bool = True, progress_bar: bool = False) ‑> None</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Fit the local Laplace approximation at the parameters of the model.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>train_loader</code></strong> :&ensp;<code>torch.data.utils.DataLoader</code></dt>
-<dd>each iterate is a training batch, either <code>(X, y)</code> tensors or a dict-like
-object containing keys as expressed by <code>self.dict_key_x</code> and
-<code>self.dict_key_y</code>. <code>train_loader.dataset</code> needs to be set to access
-<span><span class="MathJax_Preview">N</span><script type="math/tex">N</script></span>, size of the data set.</dd>
-<dt><strong><code>override</code></strong> :&ensp;<code>bool</code>, default=<code>True</code></dt>
-<dd>whether to initialize H, loss, and n_data again; setting to False is useful for
-online learning settings to accumulate a sequential posterior approximation.</dd>
-<dt><strong><code>progress_bar</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>whether to show a progress bar; updated at every batch-Hessian computation.
-Useful for very large model and large amount of data, esp. when <code>subset_of_weights='all'</code>.</dd>
-</dl></div>
-</dd>
-<dt id="laplace.ParametricLaplace.square_norm"><code class="name flex">
-<span>def <span class="ident">square_norm</span></span>(<span>self, value) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute the square norm under post. Precision with <code>value-self.mean</code> as 𝛥:
-<span><span class="MathJax_Preview">
-\Delta^
-op P \Delta
-</span><script type="math/tex; mode=display">
-\Delta^
-op P \Delta
-</script></span>
-Returns</p>
-<hr>
-<dl>
-<dt><code>square_form</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.ParametricLaplace.log_prob"><code class="name flex">
-<span>def <span class="ident">log_prob</span></span>(<span>self, value: torch.Tensor, normalized: bool = True) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute the log probability under the (current) Laplace approximation.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>value</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>normalized</code></strong> :&ensp;<code>bool</code>, default=<code>True</code></dt>
-<dd>whether to return log of a properly normalized Gaussian or just the
-terms that depend on <code>value</code>.</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>log_prob</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.ParametricLaplace.log_marginal_likelihood"><code class="name flex">
-<span>def <span class="ident">log_marginal_likelihood</span></span>(<span>self, prior_precision: torch.Tensor | None = None, sigma_noise: torch.Tensor | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute the Laplace approximation to the log marginal likelihood subject
-to specific Hessian approximations that subclasses implement.
-Requires that the Laplace approximation has been fit before.
-The resulting torch.Tensor is differentiable in <code>prior_precision</code> and
-<code>sigma_noise</code> if these have gradients enabled.
-By passing <code>prior_precision</code> or <code>sigma_noise</code>, the current value is
-overwritten. This is useful for iterating on the log marginal likelihood.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>prior_precision</code></strong> :&ensp;<code>torch.Tensor</code>, optional</dt>
-<dd>prior precision if should be changed from current <code>prior_precision</code> value</dd>
-<dt><strong><code>sigma_noise</code></strong> :&ensp;<code>torch.Tensor</code>, optional</dt>
-<dd>observation noise standard deviation if should be changed</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>log_marglik</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.ParametricLaplace.predictive_samples"><code class="name flex">
-<span>def <span class="ident">predictive_samples</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], pred_type: <a title="laplace.PredType" href="#laplace.PredType">PredType</a> | str = PredType.GLM, n_samples: int = 100, diagonal_output: bool = False, generator: torch.Generator | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Sample from the posterior predictive on input data <code>x</code>.
-Can be used, for example, for Thompson sampling.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code> or <code>MutableMapping</code></dt>
-<dd>input data <code>(batch_size, input_shape)</code></dd>
-<dt><strong><code>pred_type</code></strong> :&ensp;<code>{'glm', 'nn'}</code>, default=<code>'glm'</code></dt>
-<dd>type of posterior predictive, linearized GLM predictive or neural
-network sampling predictive. The GLM predictive is consistent with
-the curvature approximations used here.</dd>
-<dt><strong><code>n_samples</code></strong> :&ensp;<code>int</code></dt>
-<dd>number of samples</dd>
-<dt><strong><code>diagonal_output</code></strong> :&ensp;<code>bool</code></dt>
-<dd>whether to use a diagonalized glm posterior predictive on the outputs.
-Only applies when <code>pred_type='glm'</code>.</dd>
-<dt><strong><code>generator</code></strong> :&ensp;<code>torch.Generator</code>, optional</dt>
-<dd>random number generator to control the samples (if sampling used)</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>samples</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>samples <code>(n_samples, batch_size, output_shape)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.ParametricLaplace.functional_variance"><code class="name flex">
-<span>def <span class="ident">functional_variance</span></span>(<span>self, Js: torch.Tensor) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute functional variance for the <code>'glm'</code> predictive:
-<code>f_var[i] = Js[i] @ P.inv() @ Js[i].T</code>, which is a output x output
-predictive covariance matrix.
-Mathematically, we have for a single Jacobian
-<span><span class="MathJax_Preview">\mathcal{J} = \nabla_\theta f(x;\theta)\vert_{\theta_{MAP}}</span><script type="math/tex">\mathcal{J} = \nabla_\theta f(x;\theta)\vert_{\theta_{MAP}}</script></span>
-the output covariance matrix
-<span><span class="MathJax_Preview"> \mathcal{J} P^{-1} \mathcal{J}^T </span><script type="math/tex"> \mathcal{J} P^{-1} \mathcal{J}^T </script></span>.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>Js</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>Jacobians of model output wrt parameters
-<code>(batch, outputs, parameters)</code></dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>f_var</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>output covariance <code>(batch, outputs, outputs)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.ParametricLaplace.functional_covariance"><code class="name flex">
-<span>def <span class="ident">functional_covariance</span></span>(<span>self, Js: torch.Tensor) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute functional covariance for the <code>'glm'</code> predictive:
-<code>f_cov = Js @ P.inv() @ Js.T</code>, which is a batch<em>output x batch</em>output
-predictive covariance matrix.</p>
-<p>This emulates the GP posterior covariance N([f(x1), &hellip;,f(xm)], Cov[f(x1), &hellip;, f(xm)]).
-Useful for joint predictions, such as in batched Bayesian optimization.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>Js</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>Jacobians of model output wrt parameters
-<code>(batch*outputs, parameters)</code></dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>f_cov</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>output covariance <code>(batch*outputs, batch*outputs)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.ParametricLaplace.sample"><code class="name flex">
-<span>def <span class="ident">sample</span></span>(<span>self, n_samples: int = 100, generator: torch.Generator | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Sample from the Laplace posterior approximation, i.e.,
-<span><span class="MathJax_Preview"> \theta \sim \mathcal{N}(\theta_{MAP}, P^{-1})</span><script type="math/tex"> \theta \sim \mathcal{N}(\theta_{MAP}, P^{-1})</script></span>.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>n_samples</code></strong> :&ensp;<code>int</code>, default=<code>100</code></dt>
-<dd>number of samples</dd>
-<dt><strong><code>generator</code></strong> :&ensp;<code>torch.Generator</code>, optional</dt>
-<dd>random number generator to control the samples</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>samples</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.ParametricLaplace.state_dict"><code class="name flex">
-<span>def <span class="ident">state_dict</span></span>(<span>self) ‑> dict[str, typing.Any]</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.ParametricLaplace.load_state_dict"><code class="name flex">
-<span>def <span class="ident">load_state_dict</span></span>(<span>self, state_dict: dict[str, Any]) ‑> None</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.BaseLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.BaseLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.BaseLaplace.prior_precision_diag" href="baselaplace.html#laplace.baselaplace.BaseLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.FullLaplace"><code class="flex name class">
-<span>class <span class="ident">FullLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: <a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a> | str, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend: type[CurvatureInterface] | None = None, backend_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Laplace approximation with full, i.e., dense, log likelihood Hessian approximation
-and hence posterior precision. Based on the chosen <code>backend</code> parameter, the full
-approximation can be, for example, a generalized Gauss-Newton matrix.
-Mathematically, we have <span><span class="MathJax_Preview">P \in \mathbb{R}^{P \times P}</span><script type="math/tex">P \in \mathbb{R}^{P \times P}</script></span>.
-See <code><a title="laplace.BaseLaplace" href="#laplace.BaseLaplace">BaseLaplace</a></code> for the full interface.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.lllaplace.FullLLLaplace" href="lllaplace.html#laplace.lllaplace.FullLLLaplace">FullLLLaplace</a></li>
-<li><a title="laplace.subnetlaplace.FullSubnetLaplace" href="subnetlaplace.html#laplace.subnetlaplace.FullSubnetLaplace">FullSubnetLaplace</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.FullLaplace.posterior_scale"><code class="name">prop <span class="ident">posterior_scale</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Posterior scale (square root of the covariance), i.e.,
-<span><span class="MathJax_Preview">P^{-\frac{1}{2}}</span><script type="math/tex">P^{-\frac{1}{2}}</script></span>.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>scale</code></strong> :&ensp;<code>torch.tensor</code></dt>
-<dd><code>(parameters, parameters)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.FullLaplace.posterior_covariance"><code class="name">prop <span class="ident">posterior_covariance</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Posterior covariance, i.e., <span><span class="MathJax_Preview">P^{-1}</span><script type="math/tex">P^{-1}</script></span>.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>covariance</code></strong> :&ensp;<code>torch.tensor</code></dt>
-<dd><code>(parameters, parameters)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.FullLaplace.posterior_precision"><code class="name">prop <span class="ident">posterior_precision</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Posterior precision <span><span class="MathJax_Preview">P</span><script type="math/tex">P</script></span>.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>precision</code></strong> :&ensp;<code>torch.tensor</code></dt>
-<dd><code>(parameters, parameters)</code></dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.ParametricLaplace.fit" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_prior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_prob" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.prior_precision_diag" href="baselaplace.html#laplace.baselaplace.BaseLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.sample" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.scatter" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.square_norm" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.KronLaplace"><code class="flex name class">
-<span>class <span class="ident">KronLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: <a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a> | str, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend: type[CurvatureInterface] | None = None, damping: bool = False, backend_kwargs: dict[str, Any] | None = None, asdl_fisher_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Laplace approximation with Kronecker factored log likelihood Hessian approximation
-and hence posterior precision.
-Mathematically, we have for each parameter group, e.g., torch.nn.Module,
-that \P\approx Q \otimes H.
-See <code><a title="laplace.BaseLaplace" href="#laplace.BaseLaplace">BaseLaplace</a></code> for the full interface and see
-<code><a title="laplace.utils.matrix.Kron" href="utils/matrix.html#laplace.utils.matrix.Kron">Kron</a></code> and <code><a title="laplace.utils.matrix.KronDecomposed" href="utils/matrix.html#laplace.utils.matrix.KronDecomposed">KronDecomposed</a></code> for the structure of
-the Kronecker factors. <code>Kron</code> is used to aggregate factors by summing up and
-<code>KronDecomposed</code> is used to add the prior, a Hessian factor (e.g. temperature),
-and computing posterior covariances, marginal likelihood, etc.
-Damping can be enabled by setting <code>damping=True</code>.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.lllaplace.KronLLLaplace" href="lllaplace.html#laplace.lllaplace.KronLLLaplace">KronLLLaplace</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.KronLaplace.posterior_precision"><code class="name">prop <span class="ident">posterior_precision</span> : KronDecomposed</code></dt>
-<dd>
-<div class="desc"><p>Kronecker factored Posterior precision <span><span class="MathJax_Preview">P</span><script type="math/tex">P</script></span>.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>precision</code></strong> :&ensp;<code><a title="laplace.utils.matrix.KronDecomposed" href="utils/matrix.html#laplace.utils.matrix.KronDecomposed">KronDecomposed</a></code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.KronLaplace.prior_precision"><code class="name">prop <span class="ident">prior_precision</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.KronLaplace.state_dict"><code class="name flex">
-<span>def <span class="ident">state_dict</span></span>(<span>self) ‑> dict[str, typing.Any]</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.KronLaplace.load_state_dict"><code class="name flex">
-<span>def <span class="ident">load_state_dict</span></span>(<span>self, state_dict: dict[str, Any])</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.ParametricLaplace.fit" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_prior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_prob" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.prior_precision_diag" href="baselaplace.html#laplace.baselaplace.BaseLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.sample" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.scatter" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.square_norm" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.DiagLaplace"><code class="flex name class">
-<span>class <span class="ident">DiagLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: <a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a> | str, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend: type[CurvatureInterface] | None = None, backend_kwargs: dict[str, Any] | None = None, asdl_fisher_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Laplace approximation with diagonal log likelihood Hessian approximation
-and hence posterior precision.
-Mathematically, we have <span><span class="MathJax_Preview">P \approx \textrm{diag}(P)</span><script type="math/tex">P \approx \textrm{diag}(P)</script></span>.
-See <code><a title="laplace.BaseLaplace" href="#laplace.BaseLaplace">BaseLaplace</a></code> for the full interface.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.lllaplace.DiagLLLaplace" href="lllaplace.html#laplace.lllaplace.DiagLLLaplace">DiagLLLaplace</a></li>
-<li><a title="laplace.subnetlaplace.DiagSubnetLaplace" href="subnetlaplace.html#laplace.subnetlaplace.DiagSubnetLaplace">DiagSubnetLaplace</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.DiagLaplace.posterior_precision"><code class="name">prop <span class="ident">posterior_precision</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Diagonal posterior precision <span><span class="MathJax_Preview">p</span><script type="math/tex">p</script></span>.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>precision</code></strong> :&ensp;<code>torch.tensor</code></dt>
-<dd><code>(parameters)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.DiagLaplace.posterior_scale"><code class="name">prop <span class="ident">posterior_scale</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Diagonal posterior scale <span><span class="MathJax_Preview">\sqrt{p^{-1}}</span><script type="math/tex">\sqrt{p^{-1}}</script></span>.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>precision</code></strong> :&ensp;<code>torch.tensor</code></dt>
-<dd><code>(parameters)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.DiagLaplace.posterior_variance"><code class="name">prop <span class="ident">posterior_variance</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Diagonal posterior variance <span><span class="MathJax_Preview">p^{-1}</span><script type="math/tex">p^{-1}</script></span>.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>precision</code></strong> :&ensp;<code>torch.tensor</code></dt>
-<dd><code>(parameters)</code></dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.ParametricLaplace.fit" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_prior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_prob" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.prior_precision_diag" href="baselaplace.html#laplace.baselaplace.BaseLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.sample" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.scatter" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.square_norm" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.FunctionalLaplace"><code class="flex name class">
-<span>class <span class="ident">FunctionalLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: <a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a> | str, n_subset: int, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, dict_key_x='input_ids', dict_key_y='labels', backend: type[CurvatureInterface] | None = laplace.curvature.backpack.BackPackGGN, backend_kwargs: dict[str, Any] | None = None, independent_outputs: bool = False, seed: int = 0)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Applying the GGN (Generalized Gauss-Newton) approximation for the Hessian in the Laplace approximation of the posterior
-turns the underlying probabilistic model from a BNN into a GLM (generalized linear model).
-This GLM (in the weight space) is equivalent to a GP (in the function space), see
-<a href="https://arxiv.org/abs/1906.01930">Approximate Inference Turns Deep Networks into Gaussian Processes (Khan et al., 2019)</a></p>
-<p>This class implements the (approximate) GP inference through which
-we obtain the desired quantities (posterior predictive, marginal log-likelihood).
-See <a href="https://arxiv.org/abs/2008.08400">Improving predictions of Bayesian neural nets via local linearization (Immer et al., 2021)</a>
-for more details.</p>
-<p>Note that for <code>likelihood='classification'</code>, we approximate <span><span class="MathJax_Preview"> L_{NN} </span><script type="math/tex"> L_{NN} </script></span> with a diagonal matrix
-( <span><span class="MathJax_Preview"> L_{NN} </span><script type="math/tex"> L_{NN} </script></span> is a block-diagonal matrix, where blocks represent Hessians of per-data-point log-likelihood w.r.t.
-neural network output <span><span class="MathJax_Preview"> f </span><script type="math/tex"> f </script></span>, See Appendix <a href="https://arxiv.org/abs/2008.08400">A.2.1</a> for exact definition). We
-resort to such an approximation because of the (possible) errors found in Laplace approximation for
-multiclass GP classification in Chapter 3.5 of <a href="http://www.gaussianprocess.org/gpml/">R&amp;W 2006 GP book</a>,
-see the question
-<a href="https://stats.stackexchange.com/questions/555183/gaussian-processes-multi-class-laplace-approximation">here</a>
-for more details. Alternatively, one could also resort to <em>one-vs-one</em> or <em>one-vs-rest</em> implementations
-for multiclass classification, however, that is not (yet) supported here.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>num_data</code></strong> :&ensp;<code>int</code></dt>
-<dd>number of data points for Subset-of-Data (SOD) approximate GP inference.</dd>
-<dt><strong><code>diagonal_kernel</code></strong> :&ensp;<code>bool</code></dt>
-<dd>GP kernel here is product of Jacobians, which results in a <span><span class="MathJax_Preview"> C \times C</span><script type="math/tex"> C \times C</script></span> matrix where <span><span class="MathJax_Preview">C</span><script type="math/tex">C</script></span> is the output
-dimension. If <code>diagonal_kernel=True</code>, only a diagonal of a GP kernel is used. This is (somewhat) equivalent to
-assuming independent GPs across output channels.</dd>
-</dl>
-<p>See <code><a title="laplace.BaseLaplace" href="#laplace.BaseLaplace">BaseLaplace</a></code> class for the full interface.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.lllaplace.FunctionalLLLaplace" href="lllaplace.html#laplace.lllaplace.FunctionalLLLaplace">FunctionalLLLaplace</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.FunctionalLaplace.gp_kernel_prior_variance"><code class="name">prop <span class="ident">gp_kernel_prior_variance</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.FunctionalLaplace.log_det_ratio"><code class="name">prop <span class="ident">log_det_ratio</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Computes log determinant term in GP marginal likelihood</p>
-<p>For <code>classification</code> we use eq. (3.44) from Chapter 3.5 from
-<a href="http://www.gaussianprocess.org/gpml/chapters/">GP book R&amp;W 2006</a> with
-(note that we always use diagonal approximation <span><span class="MathJax_Preview">D</span><script type="math/tex">D</script></span> of the Hessian of log likelihood w.r.t. <span><span class="MathJax_Preview">f</span><script type="math/tex">f</script></span>):</p>
-<p>log determinant term := <span><span class="MathJax_Preview"> \log | I + D^{1/2}K D^{1/2} | </span><script type="math/tex"> \log | I + D^{1/2}K D^{1/2} | </script></span></p>
-<p>For <code>regression</code>, we use <a href="https://stats.stackexchange.com/questions/280105/log-marginal-likelihood-for-gaussian-process">"standard" GP marginal likelihood</a>:</p>
-<p>log determinant term := <span><span class="MathJax_Preview"> \log | K + \sigma_2 I | </span><script type="math/tex"> \log | K + \sigma_2 I | </script></span></p></div>
-</dd>
-<dt id="laplace.FunctionalLaplace.scatter"><code class="name">prop <span class="ident">scatter</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Compute scatter term in GP log marginal likelihood.</p>
-<p>For <code>classification</code> we use eq. (3.44) from Chapter 3.5 from
-<a href="http://www.gaussianprocess.org/gpml/chapters/">GP book R&amp;W 2006</a> with <span><span class="MathJax_Preview">\hat{f} = f </span><script type="math/tex">\hat{f} = f </script></span>:</p>
-<p>scatter term := <span><span class="MathJax_Preview"> f K^{-1} f^{T} </span><script type="math/tex"> f K^{-1} f^{T} </script></span></p>
-<p>For <code>regression</code>, we use <a href="https://stats.stackexchange.com/questions/280105/log-marginal-likelihood-for-gaussian-process">"standard" GP marginal likelihood</a>:</p>
-<p>scatter term := <span><span class="MathJax_Preview"> (y - m)K^{-1}(y -m )^T </span><script type="math/tex"> (y - m)K^{-1}(y -m )^T </script></span>,
-where <span><span class="MathJax_Preview"> m </span><script type="math/tex"> m </script></span> is the mean of the GP prior, which in our case corresponds to
-<span><span class="MathJax_Preview"> m := f + J (\theta - \theta_{MAP}) </span><script type="math/tex"> m := f + J (\theta - \theta_{MAP}) </script></span></p></div>
-</dd>
-<dt id="laplace.FunctionalLaplace.prior_precision"><code class="name">prop <span class="ident">prior_precision</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.FunctionalLaplace.fit"><code class="name flex">
-<span>def <span class="ident">fit</span></span>(<span>self, train_loader: DataLoader | MutableMapping, progress_bar: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Fit the Laplace approximation of a GP posterior.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>train_loader</code></strong> :&ensp;<code>torch.data.utils.DataLoader</code></dt>
-<dd><code>train_loader.dataset</code> needs to be set to access <span><span class="MathJax_Preview">N</span><script type="math/tex">N</script></span>, size of the data set
-<code>train_loader.batch_size</code> needs to be set to access <span><span class="MathJax_Preview">b</span><script type="math/tex">b</script></span> batch_size</dd>
-<dt><strong><code>progress_bar</code></strong> :&ensp;<code>bool</code></dt>
-<dd>whether to show a progress bar during the fitting process.</dd>
-</dl></div>
-</dd>
-<dt id="laplace.FunctionalLaplace.predictive_samples"><code class="name flex">
-<span>def <span class="ident">predictive_samples</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any], pred_type: <a title="laplace.PredType" href="#laplace.PredType">PredType</a> | str = PredType.GLM, n_samples: int = 100, diagonal_output: bool = False, generator: torch.Generator | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Sample from the posterior predictive on input data <code>x</code>.
-Can be used, for example, for Thompson sampling.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code> or <code>MutableMapping</code></dt>
-<dd>input data <code>(batch_size, input_shape)</code></dd>
-<dt><strong><code>pred_type</code></strong> :&ensp;<code>{'glm'}</code>, default=<code>'glm'</code></dt>
-<dd>type of posterior predictive, linearized GLM predictive.</dd>
-<dt><strong><code>n_samples</code></strong> :&ensp;<code>int</code></dt>
-<dd>number of samples</dd>
-<dt><strong><code>diagonal_output</code></strong> :&ensp;<code>bool</code></dt>
-<dd>whether to use a diagonalized glm posterior predictive on the outputs.
-Only applies when <code>pred_type='glm'</code>.</dd>
-<dt><strong><code>generator</code></strong> :&ensp;<code>torch.Generator</code>, optional</dt>
-<dd>random number generator to control the samples (if sampling used)</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>samples</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>samples <code>(n_samples, batch_size, output_shape)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.FunctionalLaplace.functional_variance"><code class="name flex">
-<span>def <span class="ident">functional_variance</span></span>(<span>self, Js_star: torch.Tensor) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>GP posterior variance:</p>
-<p><span><span class="MathJax_Preview"> k_{**} - K_{*M} (K_{MM}+ L_{MM}^{-1})^{-1} K_{M*}</span><script type="math/tex; mode=display"> k_{**} - K_{*M} (K_{MM}+ L_{MM}^{-1})^{-1} K_{M*}</script></span></p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>Js_star</code></strong> :&ensp;<code>torch.Tensor</code> of <code>shape (N*, C, P)</code></dt>
-<dd>Jacobians of test data points</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>f_var</code></strong> :&ensp;<code>torch.Tensor</code> of <code>shape (N*,C, C)</code></dt>
-<dd>Contains the posterior variances of N* testing points.</dd>
-</dl></div>
-</dd>
-<dt id="laplace.FunctionalLaplace.functional_covariance"><code class="name flex">
-<span>def <span class="ident">functional_covariance</span></span>(<span>self, Js_star: torch.Tensor) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>GP posterior covariance:</p>
-<p><span><span class="MathJax_Preview"> k_{**} - K_{*M} (K_{MM}+ L_{MM}^{-1})^{-1} K_{M*}</span><script type="math/tex; mode=display"> k_{**} - K_{*M} (K_{MM}+ L_{MM}^{-1})^{-1} K_{M*}</script></span></p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>Js_star</code></strong> :&ensp;<code>torch.Tensor</code> of <code>shape (N*, C, P)</code></dt>
-<dd>Jacobians of test data points</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>f_var</code></strong> :&ensp;<code>torch.Tensor</code> of <code>shape (N*xC, N*xC)</code></dt>
-<dd>Contains the posterior covariances of N* testing points.</dd>
-</dl></div>
-</dd>
-<dt id="laplace.FunctionalLaplace.optimize_prior_precision"><code class="name flex">
-<span>def <span class="ident">optimize_prior_precision</span></span>(<span>self, pred_type: <a title="laplace.PredType" href="#laplace.PredType">PredType</a> | str = PredType.GP, method: <a title="laplace.TuningMethod" href="#laplace.TuningMethod">TuningMethod</a> | str = TuningMethod.MARGLIK, n_steps: int = 100, lr: float = 0.1, init_prior_prec: float | torch.Tensor = 1.0, prior_structure: <a title="laplace.PriorStructure" href="#laplace.PriorStructure">PriorStructure</a> | str = PriorStructure.SCALAR, val_loader: DataLoader | None = None, loss: torchmetrics.Metric | Callable[[torch.Tensor], torch.Tensor | float] | None = None, log_prior_prec_min: float = -4, log_prior_prec_max: float = 4, grid_size: int = 100, link_approx: <a title="laplace.LinkApprox" href="#laplace.LinkApprox">LinkApprox</a> | str = LinkApprox.PROBIT, n_samples: int = 100, verbose: bool = False, progress_bar: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p><code>optimize_prior_precision_base</code> from <code><a title="laplace.BaseLaplace" href="#laplace.BaseLaplace">BaseLaplace</a></code> with <code>pred_type='gp'</code></p></div>
-</dd>
-<dt id="laplace.FunctionalLaplace.log_marginal_likelihood"><code class="name flex">
-<span>def <span class="ident">log_marginal_likelihood</span></span>(<span>self, prior_precision: torch.Tensor | None = None, sigma_noise: torch.Tensor | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute the Laplace approximation to the log marginal likelihood.
-Requires that the Laplace approximation has been fit before.
-The resulting torch.Tensor is differentiable in <code>prior_precision</code> and
-<code>sigma_noise</code> if these have gradients enabled.
-By passing <code>prior_precision</code> or <code>sigma_noise</code>, the current value is
-overwritten. This is useful for iterating on the log marginal likelihood.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>prior_precision</code></strong> :&ensp;<code>torch.Tensor</code>, optional</dt>
-<dd>prior precision if should be changed from current <code>prior_precision</code> value</dd>
-<dt><strong><code>sigma_noise</code></strong> :&ensp;<code>torch.Tensor</code>, optional</dt>
-<dd>observation noise standard deviation if should be changed</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>log_marglik</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.FunctionalLaplace.state_dict"><code class="name flex">
-<span>def <span class="ident">state_dict</span></span>(<span>self) ‑> dict</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.FunctionalLaplace.load_state_dict"><code class="name flex">
-<span>def <span class="ident">load_state_dict</span></span>(<span>self, state_dict: dict)</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.BaseLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.BaseLaplace.prior_precision_diag" href="baselaplace.html#laplace.baselaplace.BaseLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.LowRankLaplace"><code class="flex name class">
-<span>class <span class="ident">LowRankLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: <a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a> | str, backend: type[CurvatureInterface] = laplace.curvature.curvature.CurvatureInterface, sigma_noise: float | torch.Tensor = 1, prior_precision: float | torch.Tensor = 1, prior_mean: float | torch.Tensor = 0, temperature: float = 1, enable_backprop: bool = False, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Laplace approximation with low-rank log likelihood Hessian (approximation).
-The low-rank matrix is represented by an eigendecomposition (vecs, values).
-Based on the chosen <code>backend</code>, either a true Hessian or, for example, GGN
-approximation could be used.
-The posterior precision is computed as
-<span><span class="MathJax_Preview"> P = V diag(l) V^T + P_0.</span><script type="math/tex"> P = V diag(l) V^T + P_0.</script></span>
-To sample, compute the functional variance, and log determinant, algebraic tricks
-are usedto reduce the costs of inversion to the that of a <span><span class="MathJax_Preview">K
-imes K</span><script type="math/tex">K
-imes K</script></span> matrix
-if we have a rank of K.</p>
-<p>Note that only <code>AsdfghjklHessian</code> backend is supported. Install it via:
-pip install git+<a href="https://git@github.com/wiseodd/asdl@asdfghjkl">https://git@github.com/wiseodd/asdl@asdfghjkl</a></p>
-<p>See <code><a title="laplace.BaseLaplace" href="#laplace.BaseLaplace">BaseLaplace</a></code> for the full interface.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.LowRankLaplace.V"><code class="name">prop <span class="ident">V</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.LowRankLaplace.Kinv"><code class="name">prop <span class="ident">Kinv</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.LowRankLaplace.posterior_precision"><code class="name">prop <span class="ident">posterior_precision</span> : tuple[tuple[torch.Tensor, torch.Tensor], torch.Tensor]</code></dt>
-<dd>
-<div class="desc"><p>Return correctly scaled posterior precision that would be constructed
-as H[0] @ diag(H[1]) @ H[0].T + self.prior_precision_diag.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>H</code></strong> :&ensp;<code>tuple(eigenvectors, eigenvalues)</code></dt>
-<dd>scaled self.H with temperature and loss factors.</dd>
-<dt><strong><code>prior_precision_diag</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>diagonal prior precision shape <code>parameters</code> to be added to H.</dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.ParametricLaplace.fit" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_prior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_prob" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.prior_precision_diag" href="baselaplace.html#laplace.baselaplace.BaseLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.sample" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.scatter" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.square_norm" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.LLLaplace"><code class="flex name class">
-<span>class <span class="ident">LLLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: <a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a> | str, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, feature_reduction: FeatureReduction | str | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend: type[CurvatureInterface] | None = None, last_layer_name: str | None = None, backend_kwargs: dict[str, Any] | None = None, asdl_fisher_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Baseclass for all last-layer Laplace approximations in this library.
-Subclasses specify the structure of the Hessian approximation.
-See <code><a title="laplace.BaseLaplace" href="#laplace.BaseLaplace">BaseLaplace</a></code> for the full interface.</p>
-<p>A Laplace approximation is represented by a MAP which is given by the
-<code>model</code> parameter and a posterior precision or covariance specifying
-a Gaussian distribution <span><span class="MathJax_Preview">\mathcal{N}(\theta_{MAP}, P^{-1})</span><script type="math/tex">\mathcal{N}(\theta_{MAP}, P^{-1})</script></span>.
-Here, only the parameters of the last layer of the neural network
-are treated probabilistically.
-The goal of this class is to compute the posterior precision <span><span class="MathJax_Preview">P</span><script type="math/tex">P</script></span>
-which sums as
-<span><span class="MathJax_Preview">
-P = \sum_{n=1}^N \nabla^2_\theta \log p(\mathcal{D}_n \mid \theta)
-\vert_{\theta_{MAP}} + \nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}}.
-</span><script type="math/tex; mode=display">
-P = \sum_{n=1}^N \nabla^2_\theta \log p(\mathcal{D}_n \mid \theta)
-\vert_{\theta_{MAP}} + \nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}}.
-</script></span>
-Every subclass implements different approximations to the log likelihood Hessians,
-for example, a diagonal one. The prior is assumed to be Gaussian and therefore we have
-a simple form for <span><span class="MathJax_Preview">\nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}} = P_0 </span><script type="math/tex">\nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}} = P_0 </script></span>.
-In particular, we assume a scalar or diagonal prior precision so that in
-all cases <span><span class="MathJax_Preview">P_0 = \textrm{diag}(p_0)</span><script type="math/tex">P_0 = \textrm{diag}(p_0)</script></span> and the structure of <span><span class="MathJax_Preview">p_0</span><script type="math/tex">p_0</script></span> can be varied.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code> or <code><a title="laplace.utils.feature_extractor.FeatureExtractor" href="utils/feature_extractor.html#laplace.utils.feature_extractor.FeatureExtractor">FeatureExtractor</a></code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>likelihood</code></strong> :&ensp;<code><a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a></code> or <code>{'classification', 'regression'}</code></dt>
-<dd>determines the log likelihood Hessian approximation</dd>
-<dt><strong><code>sigma_noise</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code>, default=<code>1</code></dt>
-<dd>observation noise for the regression setting; must be 1 for classification</dd>
-<dt><strong><code>prior_precision</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code>, default=<code>1</code></dt>
-<dd>prior precision of a Gaussian prior (= weight decay);
-can be scalar, per-layer, or diagonal in the most general case</dd>
-<dt><strong><code>prior_mean</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code>, default=<code>0</code></dt>
-<dd>prior mean of a Gaussian prior, useful for continual learning</dd>
-<dt><strong><code>temperature</code></strong> :&ensp;<code>float</code>, default=<code>1</code></dt>
-<dd>temperature of the likelihood; lower temperature leads to more
-concentrated posterior and vice versa.</dd>
-<dt><strong><code>enable_backprop</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>whether to enable backprop to the input <code>x</code> through the Laplace predictive.
-Useful for e.g. Bayesian optimization.</dd>
-<dt><strong><code>feature_reduction</code></strong> :&ensp;<code>FeatureReduction</code> or <code>str</code>, optional, default=<code>None</code></dt>
-<dd>when the last-layer <code>features</code> is a tensor of dim &gt;= 3, this tells how to reduce
-it into a dim-2 tensor. E.g. in LLMs for non-language modeling problems,
-the penultultimate output is a tensor of shape <code>(batch_size, seq_len, embd_dim)</code>.
-But the last layer maps <code>(batch_size, embd_dim)</code> to <code>(batch_size, n_classes)</code>.
-Note: Make sure that this option faithfully reflects the reduction in the model
-definition. When inputting a string, available options are
-<code>{'pick_first', 'pick_last', 'average'}</code>.</dd>
-<dt><strong><code>dict_key_x</code></strong> :&ensp;<code>str</code>, default=<code>'input_ids'</code></dt>
-<dd>The dictionary key under which the input tensor <code>x</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-<dt><strong><code>dict_key_y</code></strong> :&ensp;<code>str</code>, default=<code>'labels'</code></dt>
-<dd>The dictionary key under which the target tensor <code>y</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-<dt><strong><code>backend</code></strong> :&ensp;<code>subclasses</code> of <code><a title="laplace.curvature.CurvatureInterface" href="curvature/index.html#laplace.curvature.CurvatureInterface">CurvatureInterface</a></code></dt>
-<dd>backend for access to curvature/Hessian approximations</dd>
-<dt><strong><code>last_layer_name</code></strong> :&ensp;<code>str</code>, default=<code>None</code></dt>
-<dd>name of the model's last layer, if None it will be determined automatically</dd>
-<dt><strong><code>backend_kwargs</code></strong> :&ensp;<code>dict</code>, default=<code>None</code></dt>
-<dd>arguments passed to the backend on initialization, for example to
-set the number of MC samples for stochastic approximations.</dd>
-</dl></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.lllaplace.DiagLLLaplace" href="lllaplace.html#laplace.lllaplace.DiagLLLaplace">DiagLLLaplace</a></li>
-<li><a title="laplace.lllaplace.FullLLLaplace" href="lllaplace.html#laplace.lllaplace.FullLLLaplace">FullLLLaplace</a></li>
-<li><a title="laplace.lllaplace.KronLLLaplace" href="lllaplace.html#laplace.lllaplace.KronLLLaplace">KronLLLaplace</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.LLLaplace.prior_precision_diag"><code class="name">prop <span class="ident">prior_precision_diag</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Obtain the diagonal prior precision <span><span class="MathJax_Preview">p_0</span><script type="math/tex">p_0</script></span> constructed from either
-a scalar or diagonal prior precision.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>prior_precision_diag</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.LLLaplace.fit"><code class="name flex">
-<span>def <span class="ident">fit</span></span>(<span>self, train_loader: DataLoader, override: bool = True, progress_bar: bool = False) ‑> None</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Fit the local Laplace approximation at the parameters of the model.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>train_loader</code></strong> :&ensp;<code>torch.data.utils.DataLoader</code></dt>
-<dd>each iterate is a training batch, either <code>(X, y)</code> tensors or a dict-like
-object containing keys as expressed by <code>self.dict_key_x</code> and
-<code>self.dict_key_y</code>. <code>train_loader.dataset</code> needs to be set to access
-<span><span class="MathJax_Preview">N</span><script type="math/tex">N</script></span>, size of the data set.</dd>
-<dt><strong><code>override</code></strong> :&ensp;<code>bool</code>, default=<code>True</code></dt>
-<dd>whether to initialize H, loss, and n_data again; setting to False is useful for
-online learning settings to accumulate a sequential posterior approximation.</dd>
-<dt><strong><code>progress_bar</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.LLLaplace.functional_variance_fast"><code class="name flex">
-<span>def <span class="ident">functional_variance_fast</span></span>(<span>self, X)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Should be overriden if there exists a trick to make this fast!</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>X</code></strong> :&ensp;<code>torch.Tensor</code> of <code>shape (batch_size, input_dim)</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>f_var_diag</code></strong> :&ensp;<code>torch.Tensor</code> of <code>shape (batch_size, num_outputs)</code></dt>
-<dd>Corresponding to the diagonal of the covariance matrix of the outputs</dd>
-</dl></div>
-</dd>
-<dt id="laplace.LLLaplace.state_dict"><code class="name flex">
-<span>def <span class="ident">state_dict</span></span>(<span>self) ‑> dict[str, typing.Any]</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.LLLaplace.load_state_dict"><code class="name flex">
-<span>def <span class="ident">load_state_dict</span></span>(<span>self, state_dict: dict[str, Any]) ‑> None</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_prior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_prob" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.posterior_precision">posterior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.sample" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.scatter" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.square_norm" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.FullLLLaplace"><code class="flex name class">
-<span>class <span class="ident">FullLLLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: <a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a> | str, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, feature_reduction: FeatureReduction | str | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend: type[CurvatureInterface] | None = None, last_layer_name: str | None = None, backend_kwargs: dict[str, Any] | None = None, asdl_fisher_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Last-layer Laplace approximation with full, i.e., dense, log likelihood Hessian approximation
-and hence posterior precision. Based on the chosen <code>backend</code> parameter, the full
-approximation can be, for example, a generalized Gauss-Newton matrix.
-Mathematically, we have <span><span class="MathJax_Preview">P \in \mathbb{R}^{P \times P}</span><script type="math/tex">P \in \mathbb{R}^{P \times P}</script></span>.
-See <code><a title="laplace.FullLaplace" href="#laplace.FullLaplace">FullLaplace</a></code>, <code><a title="laplace.LLLaplace" href="#laplace.LLLaplace">LLLaplace</a></code>, and <code><a title="laplace.BaseLaplace" href="#laplace.BaseLaplace">BaseLaplace</a></code> for the full interface.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.lllaplace.LLLaplace" href="lllaplace.html#laplace.lllaplace.LLLaplace">LLLaplace</a></li>
-<li><a title="laplace.baselaplace.FullLaplace" href="baselaplace.html#laplace.baselaplace.FullLaplace">FullLaplace</a></li>
-<li><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.lllaplace.LLLaplace" href="lllaplace.html#laplace.lllaplace.LLLaplace">LLLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.lllaplace.LLLaplace.fit" href="lllaplace.html#laplace.lllaplace.LLLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.functional_variance_fast" href="lllaplace.html#laplace.lllaplace.LLLaplace.functional_variance_fast">functional_variance_fast</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_det_posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_det_prior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_prob" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.posterior_precision">posterior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.prior_precision_diag" href="lllaplace.html#laplace.lllaplace.LLLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.sample" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.scatter" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.square_norm" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-<li><code><b><a title="laplace.baselaplace.FullLaplace" href="baselaplace.html#laplace.baselaplace.FullLaplace">FullLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.FullLaplace.posterior_covariance" href="baselaplace.html#laplace.baselaplace.FullLaplace.posterior_covariance">posterior_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.FullLaplace.posterior_scale" href="baselaplace.html#laplace.baselaplace.FullLaplace.posterior_scale">posterior_scale</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.KronLLLaplace"><code class="flex name class">
-<span>class <span class="ident">KronLLLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: <a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a> | str, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, feature_reduction: FeatureReduction | str | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend: type[CurvatureInterface] | None = None, last_layer_name: str | None = None, damping: bool = False, backend_kwargs: dict[str, Any] | None = None, asdl_fisher_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Last-layer Laplace approximation with Kronecker factored log likelihood Hessian approximation
-and hence posterior precision.
-Mathematically, we have for the last parameter group, i.e., torch.nn.Linear,
-that \P\approx Q \otimes H.
-See <code><a title="laplace.KronLaplace" href="#laplace.KronLaplace">KronLaplace</a></code>, <code><a title="laplace.LLLaplace" href="#laplace.LLLaplace">LLLaplace</a></code>, and <code><a title="laplace.BaseLaplace" href="#laplace.BaseLaplace">BaseLaplace</a></code> for the full interface and see
-<code><a title="laplace.utils.matrix.Kron" href="utils/matrix.html#laplace.utils.matrix.Kron">Kron</a></code> and <code><a title="laplace.utils.matrix.KronDecomposed" href="utils/matrix.html#laplace.utils.matrix.KronDecomposed">KronDecomposed</a></code> for the structure of
-the Kronecker factors. <code>Kron</code> is used to aggregate factors by summing up and
-<code>KronDecomposed</code> is used to add the prior, a Hessian factor (e.g. temperature),
-and computing posterior covariances, marginal likelihood, etc.
-Use of <code>damping</code> is possible by initializing or setting <code>damping=True</code>.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.lllaplace.LLLaplace" href="lllaplace.html#laplace.lllaplace.LLLaplace">LLLaplace</a></li>
-<li><a title="laplace.baselaplace.KronLaplace" href="baselaplace.html#laplace.baselaplace.KronLaplace">KronLaplace</a></li>
-<li><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.lllaplace.LLLaplace" href="lllaplace.html#laplace.lllaplace.LLLaplace">LLLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.lllaplace.LLLaplace.fit" href="lllaplace.html#laplace.lllaplace.LLLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.functional_variance_fast" href="lllaplace.html#laplace.lllaplace.LLLaplace.functional_variance_fast">functional_variance_fast</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_det_posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_det_prior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_prob" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.posterior_precision">posterior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.prior_precision_diag" href="lllaplace.html#laplace.lllaplace.LLLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.sample" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.scatter" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.square_norm" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.DiagLLLaplace"><code class="flex name class">
-<span>class <span class="ident">DiagLLLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: <a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a> | str, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, feature_reduction: FeatureReduction | str | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend: type[CurvatureInterface] | None = None, last_layer_name: str | None = None, backend_kwargs: dict[str, Any] | None = None, asdl_fisher_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Last-layer Laplace approximation with diagonal log likelihood Hessian approximation
-and hence posterior precision.
-Mathematically, we have <span><span class="MathJax_Preview">P \approx \textrm{diag}(P)</span><script type="math/tex">P \approx \textrm{diag}(P)</script></span>.
-See <code><a title="laplace.DiagLaplace" href="#laplace.DiagLaplace">DiagLaplace</a></code>, <code><a title="laplace.LLLaplace" href="#laplace.LLLaplace">LLLaplace</a></code>, and <code><a title="laplace.BaseLaplace" href="#laplace.BaseLaplace">BaseLaplace</a></code> for the full interface.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.lllaplace.LLLaplace" href="lllaplace.html#laplace.lllaplace.LLLaplace">LLLaplace</a></li>
-<li><a title="laplace.baselaplace.DiagLaplace" href="baselaplace.html#laplace.baselaplace.DiagLaplace">DiagLaplace</a></li>
-<li><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.lllaplace.LLLaplace" href="lllaplace.html#laplace.lllaplace.LLLaplace">LLLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.lllaplace.LLLaplace.fit" href="lllaplace.html#laplace.lllaplace.LLLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.functional_variance_fast" href="lllaplace.html#laplace.lllaplace.LLLaplace.functional_variance_fast">functional_variance_fast</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_det_posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_det_prior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_prob" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.posterior_precision">posterior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.prior_precision_diag" href="lllaplace.html#laplace.lllaplace.LLLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.sample" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.scatter" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.square_norm" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-<li><code><b><a title="laplace.baselaplace.DiagLaplace" href="baselaplace.html#laplace.baselaplace.DiagLaplace">DiagLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.DiagLaplace.posterior_scale" href="baselaplace.html#laplace.baselaplace.DiagLaplace.posterior_scale">posterior_scale</a></code></li>
-<li><code><a title="laplace.baselaplace.DiagLaplace.posterior_variance" href="baselaplace.html#laplace.baselaplace.DiagLaplace.posterior_variance">posterior_variance</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.FunctionalLLLaplace"><code class="flex name class">
-<span>class <span class="ident">FunctionalLLLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: <a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a> | str, n_subset: int, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, feature_reduction: FeatureReduction | str | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', last_layer_name: str | None = None, backend: type[CurvatureInterface] | None = laplace.curvature.backpack.BackPackGGN, backend_kwargs: dict[str, Any] | None = None, independent_outputs: bool = False, seed: int = 0)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Here not much changes in terms of GP inference compared to FunctionalLaplace class.
-Since now we treat only the last layer probabilistically and the rest of the network is used as a "fixed feature
-extractor", that means that the <span><span class="MathJax_Preview">X \in \mathbb{R}^{M \times D}</span><script type="math/tex">X \in \mathbb{R}^{M \times D}</script></span> in GP inference changes
-to <span><span class="MathJax_Preview">\tilde{X} \in \mathbb{R}^{M \times l_{n-1}} </span><script type="math/tex">\tilde{X} \in \mathbb{R}^{M \times l_{n-1}} </script></span>,
-where <span><span class="MathJax_Preview">l_{n-1}</span><script type="math/tex">l_{n-1}</script></span> is the dimension of the output
-of the penultimate NN layer.</p>
-<p>See <code><a title="laplace.FunctionalLaplace" href="#laplace.FunctionalLaplace">FunctionalLaplace</a></code> for the full interface.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.FunctionalLaplace" href="baselaplace.html#laplace.baselaplace.FunctionalLaplace">FunctionalLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.FunctionalLLLaplace.fit"><code class="name flex">
-<span>def <span class="ident">fit</span></span>(<span>self, train_loader: DataLoader) ‑> None</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Fit the Laplace approximation of a GP posterior.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>train_loader</code></strong> :&ensp;<code>torch.data.utils.DataLoader</code></dt>
-<dd><code>train_loader.dataset</code> needs to be set to access <span><span class="MathJax_Preview">N</span><script type="math/tex">N</script></span>, size of the data set
-<code>train_loader.batch_size</code> needs to be set to access <span><span class="MathJax_Preview">b</span><script type="math/tex">b</script></span> batch_size</dd>
-</dl></div>
-</dd>
-<dt id="laplace.FunctionalLLLaplace.state_dict"><code class="name flex">
-<span>def <span class="ident">state_dict</span></span>(<span>self) ‑> dict</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.FunctionalLLLaplace.load_state_dict"><code class="name flex">
-<span>def <span class="ident">load_state_dict</span></span>(<span>self, state_dict: dict)</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.baselaplace.FunctionalLaplace" href="baselaplace.html#laplace.baselaplace.FunctionalLaplace">FunctionalLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.FunctionalLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.FunctionalLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.FunctionalLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.FunctionalLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.FunctionalLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.FunctionalLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.prior_precision_diag" href="baselaplace.html#laplace.baselaplace.BaseLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.scatter" href="baselaplace.html#laplace.baselaplace.FunctionalLaplace.scatter">scatter</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.SubnetLaplace"><code class="flex name class">
-<span>class <span class="ident">SubnetLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: <a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a> | str, subnetwork_indices: torch.LongTensor, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, backend: Type[CurvatureInterface] | None = None, backend_kwargs: dict | None = None, asdl_fisher_kwargs: dict | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Class for subnetwork Laplace, which computes the Laplace approximation over just a subset
-of the model parameters (i.e. a subnetwork within the neural network), as proposed in [1].
-Subnetwork Laplace can only be used with either a full or a diagonal Hessian approximation.</p>
-<p>A Laplace approximation is represented by a MAP which is given by the
-<code>model</code> parameter and a posterior precision or covariance specifying
-a Gaussian distribution <span><span class="MathJax_Preview">\mathcal{N}(\theta_{MAP}, P^{-1})</span><script type="math/tex">\mathcal{N}(\theta_{MAP}, P^{-1})</script></span>.
-Here, only a subset of the model parameters (i.e. a subnetwork of the
-neural network) are treated probabilistically.
-The goal of this class is to compute the posterior precision <span><span class="MathJax_Preview">P</span><script type="math/tex">P</script></span>
-which sums as
-<span><span class="MathJax_Preview">
-P = \sum_{n=1}^N \nabla^2_\theta \log p(\mathcal{D}_n \mid \theta)
-\vert_{\theta_{MAP}} + \nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}}.
-</span><script type="math/tex; mode=display">
-P = \sum_{n=1}^N \nabla^2_\theta \log p(\mathcal{D}_n \mid \theta)
-\vert_{\theta_{MAP}} + \nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}}.
-</script></span>
-The prior is assumed to be Gaussian and therefore we have a simple form for
-<span><span class="MathJax_Preview">\nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}} = P_0 </span><script type="math/tex">\nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}} = P_0 </script></span>.
-In particular, we assume a scalar or diagonal prior precision so that in
-all cases <span><span class="MathJax_Preview">P_0 = \textrm{diag}(p_0)</span><script type="math/tex">P_0 = \textrm{diag}(p_0)</script></span> and the structure of <span><span class="MathJax_Preview">p_0</span><script type="math/tex">p_0</script></span> can be varied.</p>
-<p>The subnetwork Laplace approximation only supports a full, i.e., dense, log likelihood
-Hessian approximation and hence posterior precision.
-Based on the chosen <code>backend</code>
-parameter, the full approximation can be, for example, a generalized Gauss-Newton
-matrix.
-Mathematically, we have <span><span class="MathJax_Preview">P \in \mathbb{R}^{P \times P}</span><script type="math/tex">P \in \mathbb{R}^{P \times P}</script></span>.
-See <code><a title="laplace.FullLaplace" href="#laplace.FullLaplace">FullLaplace</a></code> and <code><a title="laplace.BaseLaplace" href="#laplace.BaseLaplace">BaseLaplace</a></code> for the full interface.</p>
-<h2 id="references">References</h2>
-<p>[1] Daxberger, E., Nalisnick, E., Allingham, JU., Antorán, J., Hernández-Lobato, JM.
-<a href="https://arxiv.org/abs/2010.14689"><em>Bayesian Deep Learning via Subnetwork Inference</em></a>.
-ICML 2021.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code> or <code><a title="laplace.utils.feature_extractor.FeatureExtractor" href="utils/feature_extractor.html#laplace.utils.feature_extractor.FeatureExtractor">FeatureExtractor</a></code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>likelihood</code></strong> :&ensp;<code>{'classification', 'regression'}</code></dt>
-<dd>determines the log likelihood Hessian approximation</dd>
-<dt><strong><code>subnetwork_indices</code></strong> :&ensp;<code>torch.LongTensor</code></dt>
-<dd>indices of the vectorized model parameters
-(i.e. <code>torch.nn.utils.parameters_to_vector(model.parameters())</code>)
-that define the subnetwork to apply the Laplace approximation over</dd>
-<dt><strong><code>sigma_noise</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code>, default=<code>1</code></dt>
-<dd>observation noise for the regression setting; must be 1 for classification</dd>
-<dt><strong><code>prior_precision</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code>, default=<code>1</code></dt>
-<dd>prior precision of a Gaussian prior (= weight decay);
-can be scalar, per-layer, or diagonal in the most general case</dd>
-<dt><strong><code>prior_mean</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code>, default=<code>0</code></dt>
-<dd>prior mean of a Gaussian prior, useful for continual learning</dd>
-<dt><strong><code>temperature</code></strong> :&ensp;<code>float</code>, default=<code>1</code></dt>
-<dd>temperature of the likelihood; lower temperature leads to more
-concentrated posterior and vice versa.</dd>
-<dt><strong><code>backend</code></strong> :&ensp;<code>subclasses</code> of <code><a title="laplace.curvature.CurvatureInterface" href="curvature/index.html#laplace.curvature.CurvatureInterface">CurvatureInterface</a></code></dt>
-<dd>backend for access to curvature/Hessian approximations</dd>
-<dt><strong><code>backend_kwargs</code></strong> :&ensp;<code>dict</code>, default=<code>None</code></dt>
-<dd>arguments passed to the backend on initialization, for example to
-set the number of MC samples for stochastic approximations.</dd>
-</dl></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.subnetlaplace.DiagSubnetLaplace" href="subnetlaplace.html#laplace.subnetlaplace.DiagSubnetLaplace">DiagSubnetLaplace</a></li>
-<li><a title="laplace.subnetlaplace.FullSubnetLaplace" href="subnetlaplace.html#laplace.subnetlaplace.FullSubnetLaplace">FullSubnetLaplace</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.SubnetLaplace.prior_precision_diag"><code class="name">prop <span class="ident">prior_precision_diag</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Obtain the diagonal prior precision <span><span class="MathJax_Preview">p_0</span><script type="math/tex">p_0</script></span> constructed from either
-a scalar or diagonal prior precision.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>prior_precision_diag</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.SubnetLaplace.mean_subnet"><code class="name">prop <span class="ident">mean_subnet</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.SubnetLaplace.assemble_full_samples"><code class="name flex">
-<span>def <span class="ident">assemble_full_samples</span></span>(<span>self, subnet_samples) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.ParametricLaplace.fit" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_prior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_prob" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.posterior_precision">posterior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.sample" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.scatter" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.square_norm" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.FullSubnetLaplace"><code class="flex name class">
-<span>class <span class="ident">FullSubnetLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: <a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a> | str, subnetwork_indices: torch.LongTensor, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, backend: Type[CurvatureInterface] | None = None, backend_kwargs: dict | None = None, asdl_fisher_kwargs: dict | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Subnetwork Laplace approximation with full, i.e., dense, log likelihood Hessian
-approximation and hence posterior precision. Based on the chosen <code>backend</code> parameter,
-the full approximation can be, for example, a generalized Gauss-Newton matrix.
-Mathematically, we have <span><span class="MathJax_Preview">P \in \mathbb{R}^{P \times P}</span><script type="math/tex">P \in \mathbb{R}^{P \times P}</script></span>.
-See <code><a title="laplace.FullLaplace" href="#laplace.FullLaplace">FullLaplace</a></code>, <code><a title="laplace.SubnetLaplace" href="#laplace.SubnetLaplace">SubnetLaplace</a></code>, and <code><a title="laplace.BaseLaplace" href="#laplace.BaseLaplace">BaseLaplace</a></code> for the full interface.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.subnetlaplace.SubnetLaplace" href="subnetlaplace.html#laplace.subnetlaplace.SubnetLaplace">SubnetLaplace</a></li>
-<li><a title="laplace.baselaplace.FullLaplace" href="baselaplace.html#laplace.baselaplace.FullLaplace">FullLaplace</a></li>
-<li><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.subnetlaplace.SubnetLaplace" href="subnetlaplace.html#laplace.subnetlaplace.SubnetLaplace">SubnetLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.fit" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_det_posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_det_prior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_prob" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.posterior_precision">posterior_precision</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.prior_precision_diag" href="subnetlaplace.html#laplace.subnetlaplace.SubnetLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.sample" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.scatter" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.square_norm" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-<li><code><b><a title="laplace.baselaplace.FullLaplace" href="baselaplace.html#laplace.baselaplace.FullLaplace">FullLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.FullLaplace.posterior_covariance" href="baselaplace.html#laplace.baselaplace.FullLaplace.posterior_covariance">posterior_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.FullLaplace.posterior_scale" href="baselaplace.html#laplace.baselaplace.FullLaplace.posterior_scale">posterior_scale</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.DiagSubnetLaplace"><code class="flex name class">
-<span>class <span class="ident">DiagSubnetLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: <a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a> | str, subnetwork_indices: torch.LongTensor, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, backend: Type[CurvatureInterface] | None = None, backend_kwargs: dict | None = None, asdl_fisher_kwargs: dict | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Subnetwork Laplace approximation with diagonal log likelihood Hessian approximation
-and hence posterior precision.
-Mathematically, we have <span><span class="MathJax_Preview">P \approx \textrm{diag}(P)</span><script type="math/tex">P \approx \textrm{diag}(P)</script></span>.
-See <code><a title="laplace.DiagLaplace" href="#laplace.DiagLaplace">DiagLaplace</a></code>, <code><a title="laplace.SubnetLaplace" href="#laplace.SubnetLaplace">SubnetLaplace</a></code>, and <code><a title="laplace.BaseLaplace" href="#laplace.BaseLaplace">BaseLaplace</a></code> for the full interface.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.subnetlaplace.SubnetLaplace" href="subnetlaplace.html#laplace.subnetlaplace.SubnetLaplace">SubnetLaplace</a></li>
-<li><a title="laplace.baselaplace.DiagLaplace" href="baselaplace.html#laplace.baselaplace.DiagLaplace">DiagLaplace</a></li>
-<li><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.subnetlaplace.SubnetLaplace" href="subnetlaplace.html#laplace.subnetlaplace.SubnetLaplace">SubnetLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.fit" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_det_posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_det_prior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_prob" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.posterior_precision">posterior_precision</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.prior_precision_diag" href="subnetlaplace.html#laplace.subnetlaplace.SubnetLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.sample" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.scatter" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.square_norm" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-<li><code><b><a title="laplace.baselaplace.DiagLaplace" href="baselaplace.html#laplace.baselaplace.DiagLaplace">DiagLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.DiagLaplace.posterior_scale" href="baselaplace.html#laplace.baselaplace.DiagLaplace.posterior_scale">posterior_scale</a></code></li>
-<li><code><a title="laplace.baselaplace.DiagLaplace.posterior_variance" href="baselaplace.html#laplace.baselaplace.DiagLaplace.posterior_variance">posterior_variance</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.SubsetOfWeights"><code class="flex name class">
-<span>class <span class="ident">SubsetOfWeights</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.SubsetOfWeights.ALL"><code class="name">var <span class="ident">ALL</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.SubsetOfWeights.LAST_LAYER"><code class="name">var <span class="ident">LAST_LAYER</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.SubsetOfWeights.SUBNETWORK"><code class="name">var <span class="ident">SUBNETWORK</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.HessianStructure"><code class="flex name class">
-<span>class <span class="ident">HessianStructure</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.HessianStructure.FULL"><code class="name">var <span class="ident">FULL</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.HessianStructure.KRON"><code class="name">var <span class="ident">KRON</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.HessianStructure.DIAG"><code class="name">var <span class="ident">DIAG</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.HessianStructure.LOWRANK"><code class="name">var <span class="ident">LOWRANK</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.HessianStructure.GP"><code class="name">var <span class="ident">GP</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.Likelihood"><code class="flex name class">
-<span>class <span class="ident">Likelihood</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.Likelihood.REGRESSION"><code class="name">var <span class="ident">REGRESSION</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.Likelihood.CLASSIFICATION"><code class="name">var <span class="ident">CLASSIFICATION</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.Likelihood.REWARD_MODELING"><code class="name">var <span class="ident">REWARD_MODELING</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.PredType"><code class="flex name class">
-<span>class <span class="ident">PredType</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.PredType.GLM"><code class="name">var <span class="ident">GLM</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.PredType.NN"><code class="name">var <span class="ident">NN</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.PredType.GP"><code class="name">var <span class="ident">GP</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.LinkApprox"><code class="flex name class">
-<span>class <span class="ident">LinkApprox</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.LinkApprox.MC"><code class="name">var <span class="ident">MC</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.LinkApprox.PROBIT"><code class="name">var <span class="ident">PROBIT</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.LinkApprox.BRIDGE"><code class="name">var <span class="ident">BRIDGE</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.LinkApprox.BRIDGE_NORM"><code class="name">var <span class="ident">BRIDGE_NORM</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.TuningMethod"><code class="flex name class">
-<span>class <span class="ident">TuningMethod</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.TuningMethod.MARGLIK"><code class="name">var <span class="ident">MARGLIK</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.TuningMethod.GRIDSEARCH"><code class="name">var <span class="ident">GRIDSEARCH</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.PriorStructure"><code class="flex name class">
-<span>class <span class="ident">PriorStructure</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.PriorStructure.SCALAR"><code class="name">var <span class="ident">SCALAR</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.PriorStructure.DIAG"><code class="name">var <span class="ident">DIAG</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.PriorStructure.LAYERWISE"><code class="name">var <span class="ident">LAYERWISE</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-</dl>
-</section>
-</article>
-<nav id="sidebar">
-<div class="toc">
-<ul>
-<li><a href="#table-of-contents">Table of contents</a></li>
-<li><a href="#setup">Setup</a><ul>
-<li><a href="#setup-dev-environment">Setup dev environment</a></li>
-</ul>
-</li>
-<li><a href="#example-usage">Example usage</a><ul>
-<li><a href="#simple-usage">Simple usage</a></li>
-<li><a href="#marginal-likelihood">Marginal likelihood</a></li>
-<li><a href="#laplace-on-llm">Laplace on LLM</a></li>
-<li><a href="#subnetwork-laplace">Subnetwork Laplace</a></li>
-<li><a href="#serialization">Serialization</a></li>
-</ul>
-</li>
-<li><a href="#structure">Structure</a></li>
-<li><a href="#extendability">Extendability</a></li>
-<li><a href="#when-to-use-which-backend">When to use which backend</a></li>
-<li><a href="#documentation">Documentation</a></li>
-<li><a href="#contributing">Contributing</a></li>
-<li><a href="#useful-links">Useful links</a></li>
-<li><a href="#references">References</a></li>
-<li><a href="#full-example-optimization-of-the-marginal-likelihood-and-prediction">Full example: Optimization of the marginal likelihood and prediction</a><ul>
-<li><a href="#sinusoidal-toy-data">Sinusoidal toy data</a></li>
-<li><a href="#training-a-map">Training a MAP</a></li>
-<li><a href="#fitting-and-optimizing-the-laplace-approximation-using-empirical-bayes">Fitting and optimizing the Laplace approximation using empirical Bayes</a></li>
-<li><a href="#bayesian-predictive">Bayesian predictive</a></li>
-<li><a href="#jointly-optimize-map-and-hyperparameters-using-online-empirical-bayes">Jointly optimize MAP and hyperparameters using online empirical Bayes</a></li>
-</ul>
-</li>
-<li><a href="#full-example-post-hoc-laplace-on-a-large-image-classifier">Full example: post-hoc Laplace on a large image classifier</a><ul>
-<li><a href="#data-loading">Data loading</a></li>
-<li><a href="#load-a-pre-trained-model">Load a pre-trained model</a></li>
-<li><a href="#the-calibration-of-map">The calibration of MAP</a></li>
-<li><a href="#the-calibration-of-laplace">The calibration of Laplace</a></li>
-</ul>
-</li>
-<li><a href="#full-example-applying-laplace-on-a-huggingface-llm-model">Full Example: Applying Laplace on a Huggingface LLM model</a><ul>
-<li><a href="#laplace-on-a-subset-of-an-llms-weights">Laplace on a subset of an LLM's weights</a></li>
-</ul>
-</li>
-<li><a href="#subnetwork-laplace_1">Subnetwork Laplace</a></li>
-<li><a href="#full-laplace-on-lora-parameters-only">Full Laplace on LoRA parameters only</a><ul>
-<li><a href="#caveats">Caveats</a></li>
-</ul>
-</li>
-<li><a href="#full-example-bayesian-bradley-terry-reward-modeling">Full Example: Bayesian Bradley-Terry Reward Modeling</a></li>
-</ul>
-</div>
-<ul id="index">
-<li><h3><a href="#header-submodules">Sub-modules</a></h3>
-<ul>
-<li><code><a title="laplace.baselaplace" href="baselaplace.html">laplace.baselaplace</a></code></li>
-<li><code><a title="laplace.curvature" href="curvature/index.html">laplace.curvature</a></code></li>
-<li><code><a title="laplace.laplace" href="laplace.html">laplace.laplace</a></code></li>
-<li><code><a title="laplace.lllaplace" href="lllaplace.html">laplace.lllaplace</a></code></li>
-<li><code><a title="laplace.subnetlaplace" href="subnetlaplace.html">laplace.subnetlaplace</a></code></li>
-<li><code><a title="laplace.utils" href="utils/index.html">laplace.utils</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-functions">Functions</a></h3>
-<ul class="">
-<li><code><a title="laplace.Laplace" href="#laplace.Laplace">Laplace</a></code></li>
-<li><code><a title="laplace.marglik_training" href="#laplace.marglik_training">marglik_training</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-classes">Classes</a></h3>
-<ul>
-<li>
-<h4><code><a title="laplace.BaseLaplace" href="#laplace.BaseLaplace">BaseLaplace</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.BaseLaplace.fit" href="#laplace.BaseLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.BaseLaplace.log_marginal_likelihood" href="#laplace.BaseLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.BaseLaplace.predictive" href="#laplace.BaseLaplace.predictive">predictive</a></code></li>
-<li><code><a title="laplace.BaseLaplace.optimize_prior_precision" href="#laplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.ParametricLaplace" href="#laplace.ParametricLaplace">ParametricLaplace</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.ParametricLaplace.fit" href="#laplace.ParametricLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.ParametricLaplace.square_norm" href="#laplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-<li><code><a title="laplace.ParametricLaplace.log_prob" href="#laplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.ParametricLaplace.log_marginal_likelihood" href="#laplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.ParametricLaplace.predictive_samples" href="#laplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.ParametricLaplace.functional_variance" href="#laplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.ParametricLaplace.functional_covariance" href="#laplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.ParametricLaplace.sample" href="#laplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.ParametricLaplace.state_dict" href="#laplace.ParametricLaplace.state_dict">state_dict</a></code></li>
-<li><code><a title="laplace.ParametricLaplace.load_state_dict" href="#laplace.ParametricLaplace.load_state_dict">load_state_dict</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.FullLaplace" href="#laplace.FullLaplace">FullLaplace</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.KronLaplace" href="#laplace.KronLaplace">KronLaplace</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.KronLaplace.state_dict" href="#laplace.KronLaplace.state_dict">state_dict</a></code></li>
-<li><code><a title="laplace.KronLaplace.load_state_dict" href="#laplace.KronLaplace.load_state_dict">load_state_dict</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.DiagLaplace" href="#laplace.DiagLaplace">DiagLaplace</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.FunctionalLaplace" href="#laplace.FunctionalLaplace">FunctionalLaplace</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.FunctionalLaplace.fit" href="#laplace.FunctionalLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.FunctionalLaplace.predictive_samples" href="#laplace.FunctionalLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.FunctionalLaplace.functional_variance" href="#laplace.FunctionalLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.FunctionalLaplace.functional_covariance" href="#laplace.FunctionalLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.FunctionalLaplace.optimize_prior_precision" href="#laplace.FunctionalLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.FunctionalLaplace.log_marginal_likelihood" href="#laplace.FunctionalLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.FunctionalLaplace.state_dict" href="#laplace.FunctionalLaplace.state_dict">state_dict</a></code></li>
-<li><code><a title="laplace.FunctionalLaplace.load_state_dict" href="#laplace.FunctionalLaplace.load_state_dict">load_state_dict</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.LowRankLaplace" href="#laplace.LowRankLaplace">LowRankLaplace</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.LLLaplace" href="#laplace.LLLaplace">LLLaplace</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.LLLaplace.fit" href="#laplace.LLLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.LLLaplace.functional_variance_fast" href="#laplace.LLLaplace.functional_variance_fast">functional_variance_fast</a></code></li>
-<li><code><a title="laplace.LLLaplace.state_dict" href="#laplace.LLLaplace.state_dict">state_dict</a></code></li>
-<li><code><a title="laplace.LLLaplace.load_state_dict" href="#laplace.LLLaplace.load_state_dict">load_state_dict</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.FullLLLaplace" href="#laplace.FullLLLaplace">FullLLLaplace</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.KronLLLaplace" href="#laplace.KronLLLaplace">KronLLLaplace</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.DiagLLLaplace" href="#laplace.DiagLLLaplace">DiagLLLaplace</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.FunctionalLLLaplace" href="#laplace.FunctionalLLLaplace">FunctionalLLLaplace</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.FunctionalLLLaplace.fit" href="#laplace.FunctionalLLLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.FunctionalLLLaplace.state_dict" href="#laplace.FunctionalLLLaplace.state_dict">state_dict</a></code></li>
-<li><code><a title="laplace.FunctionalLLLaplace.load_state_dict" href="#laplace.FunctionalLLLaplace.load_state_dict">load_state_dict</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.SubnetLaplace" href="#laplace.SubnetLaplace">SubnetLaplace</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.SubnetLaplace.assemble_full_samples" href="#laplace.SubnetLaplace.assemble_full_samples">assemble_full_samples</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.FullSubnetLaplace" href="#laplace.FullSubnetLaplace">FullSubnetLaplace</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.DiagSubnetLaplace" href="#laplace.DiagSubnetLaplace">DiagSubnetLaplace</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.SubsetOfWeights" href="#laplace.SubsetOfWeights">SubsetOfWeights</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.HessianStructure" href="#laplace.HessianStructure">HessianStructure</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.Likelihood" href="#laplace.Likelihood">Likelihood</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.PredType" href="#laplace.PredType">PredType</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.LinkApprox" href="#laplace.LinkApprox">LinkApprox</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.TuningMethod" href="#laplace.TuningMethod">TuningMethod</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.PriorStructure" href="#laplace.PriorStructure">PriorStructure</a></code></h4>
-</li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.1</a>.</p>
-</footer>
-</body>
-</html>
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 00000000..7b2feb3b
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,387 @@
+<div align="center">
+ <img src="https://raw.githubusercontent.com/AlexImmer/Laplace/main/logo/laplace_logo.png" alt="Laplace" width="300"/>
+
+</div>
+
+The laplace package facilitates the application of Laplace approximations for entire neural networks, subnetworks of neural networks, or just their last layer.
+The package enables posterior approximations, marginal-likelihood estimation, and various posterior predictive computations.
+The library documentation is available at [https://aleximmer.github.io/Laplace](https://aleximmer.github.io/Laplace).
+
+There is also a corresponding paper, [_Laplace Redux — Effortless Bayesian Deep Learning_](https://arxiv.org/abs/2106.14806), which introduces the library, provides an introduction to the Laplace approximation, reviews its use in deep learning, and empirically demonstrates its versatility and competitiveness. Please consider referring to the paper when using our library:
+
+```bibtex
+@inproceedings{laplace2021,
+  title={Laplace Redux--Effortless {B}ayesian Deep Learning},
+  author={Erik Daxberger and Agustinus Kristiadi and Alexander Immer
+          and Runa Eschenhagen and Matthias Bauer and Philipp Hennig},
+  booktitle={{N}eur{IPS}},
+  year={2021}
+}
+```
+
+The [code](https://github.com/runame/laplace-redux) to reproduce the experiments in the paper is also publicly available; it provides examples of how to use our library for predictive uncertainty quantification, model selection, and continual learning.
+
+!!! important
+
+    As a user, one should not expect Laplace to work automatically.
+    That is, one should experiment with different Laplace's options
+    (Hessian factorization, prior precision tuning method, predictive method, backend,
+    etc!). Try looking at various papers that use Laplace for references on how to
+    set all those options depending on the applications/problems at hand.
+
+## Setup
+
+!!! important
+
+    We assume Python >= 3.9 since lower versions are [(soon to be) deprecated](https://devguide.python.org/versions/).
+    PyTorch version 2.0 and up is also required for full compatibility.
+
+To install laplace with `pip`, run the following:
+
+```bash
+pip install laplace-torch
+```
+
+Additionally, if you want to use the `asdfghjkl` backend, please install it via:
+
+```bash
+pip install git+https://git@github.com/wiseodd/asdl@asdfghjkl
+```
+
+## Quickstart
+
+### Simple usage
+
+In the following example, a pre-trained model is loaded,
+then the Laplace approximation is fit to the training data
+(using a diagonal Hessian approximation over all parameters),
+and the prior precision is optimized with cross-validation `"gridsearch"`.
+After that, the resulting LA is used for prediction with
+the `"probit"` predictive for classification.
+
+!!! important
+
+    Laplace expects all data loaders, e.g. `train_loader` and `val_loader` below,
+    to be instances of PyTorch
+    [`DataLoader`](https://pytorch.org/tutorials/beginner/basics/data_tutorial.html).
+    Each batch, `next(iter(data_loader))` must either be the standard `(X, y)` tensors
+    or a dict-like object containing at least the keys specified in
+    `dict_key_x` and `dict_key_y` in Laplace's constructor.
+
+!!! important
+
+    The total number of data points in all data loaders must be accessible via
+    `len(train_loader.dataset)`.
+
+!!! important
+
+    In `optimize_prior_precision`, make sure to match the arguments with
+    the ones you want to pass in `la(x, ...)` during prediction.
+
+```python
+from laplace import Laplace
+
+# Pre-trained model
+model = load_map_model()
+
+# User-specified LA flavor
+la = Laplace(model, "classification",
+             subset_of_weights="all",
+             hessian_structure="diag")
+la.fit(train_loader)
+la.optimize_prior_precision(
+    method="gridsearch",
+    pred_type="glm",
+    link_approx="probit",
+    val_loader=val_loader
+)
+
+# User-specified predictive approx.
+pred = la(x, pred_type="glm", link_approx="probit")
+```
+
+### Marginal likelihood
+
+The marginal likelihood can be used for model selection [10] and is differentiable
+for continuous hyperparameters like the prior precision or observation noise.
+Here, we fit the library default, KFAC last-layer LA and differentiate
+the log marginal likelihood.
+
+```python
+from laplace import Laplace
+
+# Un- or pre-trained model
+model = load_model()
+
+# Default to recommended last-layer KFAC LA:
+la = Laplace(model, likelihood="regression")
+la.fit(train_loader)
+
+# ML w.r.t. prior precision and observation noise
+ml = la.log_marginal_likelihood(prior_prec, obs_noise)
+ml.backward()
+```
+
+### Laplace on LLMs
+
+!!! tip
+
+    This library also supports Huggingface models and parameter-efficient fine-tuning.
+    See [Huggingface LLM example](huggingface_example.md) for the full exposition.
+
+First, we need to wrap the pretrained model so that the `forward` method takes a
+dict-like input. Note that when you iterate over a Huggingface dataloader,
+this is what you get by default. Having a dict-like input is nice since different models
+have different number of inputs (e.g. GPT-like LLMs only take `input_ids`, while BERT-like
+ones take both `input_ids` and `attention_mask`, etc.). Inside this `forward` method you
+can do your usual preprocessing like moving the tensor inputs into the correct device.
+
+```python
+class MyGPT2(nn.Module):
+    def __init__(self, tokenizer: PreTrainedTokenizer) -    None:
+        super().__init__()
+        config = GPT2Config.from_pretrained("gpt2")
+        config.pad_token_id = tokenizer.pad_token_id
+        config.num_labels = 2
+        self.hf_model = GPT2ForSequenceClassification.from_pretrained(
+            "gpt2", config=config
+        )
+
+    def forward(self, data: MutableMapping) -    torch.Tensor:
+        device = next(self.parameters()).device
+        input_ids = data["input_ids"].to(device)
+        attn_mask = data["attention_mask"].to(device)
+        output_dict = self.hf_model(input_ids=input_ids, attention_mask=attn_mask)
+        return output_dict.logits
+```
+
+Then you can "select" which parameters of the LLM you want to apply the Laplace approximation
+on, by switching off the gradients of the "unneeded" parameters.
+For example, we can replicate a last-layer Laplace: (in actual practice, use `Laplace(..., subset_of_weights='last_layer', ...)` instead, though!)
+
+```python
+model = MyGPT2(tokenizer)
+model.eval()
+
+# Enable grad only for the last layer
+for p in model.hf_model.parameters():
+    p.requires_grad = False
+for p in model.hf_model.score.parameters():
+    p.requires_grad = True
+
+la = Laplace(
+    model,
+    likelihood="classification",
+    # Will only hit the last-layer since it's the only one that is grad-enabled
+    subset_of_weights="all",
+    hessian_structure="diag",
+)
+la.fit(dataloader)
+la.optimize_prior_precision()
+
+test_data = next(iter(dataloader))
+pred = la(test_data)
+```
+
+This is useful because we can apply the LA only on the parameter-efficient finetuning
+weights. E.g., we can fix the LLM itself, and apply the Laplace approximation only
+on the LoRA weights. Huggingface will automatically switch off the non-LoRA weights'
+gradients.
+
+```python
+def get_lora_model():
+    model = MyGPT2(tokenizer)  # Note we don't disable grad
+    config = LoraConfig(
+        r=4,
+        lora_alpha=16,
+        target_modules=["c_attn"],  # LoRA on the attention weights
+        lora_dropout=0.1,
+        bias="none",
+    )
+    lora_model = get_peft_model(model, config)
+    return lora_model
+
+lora_model = get_lora_model()
+
+# Train it as usual here...
+
+lora_model.eval()
+
+lora_la = Laplace(
+    lora_model,
+    likelihood="classification",
+    subset_of_weights="all",
+    hessian_structure="diag",
+    backend=AsdlGGN,
+)
+
+test_data = next(iter(dataloader))
+lora_pred = lora_la(test_data)
+```
+
+### Subnetwork Laplace
+
+This example shows how to fit the Laplace approximation over only
+a subnetwork within a neural network (while keeping all other parameters
+fixed at their MAP estimates), as proposed in [11]. It also exemplifies
+different ways to specify the subnetwork to perform inference over.
+
+First, we make use of `SubnetLaplace`, where we specify the subnetwork by
+generating a list of indices for the active model parameters.
+
+```python
+from laplace import Laplace
+
+# Pre-trained model
+model = load_model()
+
+# Examples of different ways to specify the subnetwork
+# via indices of the vectorized model parameters
+#
+# Example 1: select the 128 parameters with the largest magnitude
+from laplace.utils import LargestMagnitudeSubnetMask
+subnetwork_mask = LargestMagnitudeSubnetMask(model, n_params_subnet=128)
+subnetwork_indices = subnetwork_mask.select()
+
+# Example 2: specify the layers that define the subnetwork
+from laplace.utils import ModuleNameSubnetMask
+subnetwork_mask = ModuleNameSubnetMask(model, module_names=["layer.1", "layer.3"])
+subnetwork_mask.select()
+subnetwork_indices = subnetwork_mask.indices
+
+# Example 3: manually define the subnetwork via custom subnetwork indices
+import torch
+subnetwork_indices = torch.tensor([0, 4, 11, 42, 123, 2021])
+
+# Define and fit subnetwork LA using the specified subnetwork indices
+la = Laplace(model, "classification",
+             subset_of_weights="subnetwork",
+             hessian_structure="full",
+             subnetwork_indices=subnetwork_indices)
+la.fit(train_loader)
+```
+
+Besides `SubnetLaplace`, you can, as already mentioned, also treat the last
+layer only using `Laplace(..., subset_of_weights='last_layer')`, which uses
+`LLLaplace`. As a third method, you may define a subnetwork by disabling
+gradients of fixed model parameters. The different methods target different use
+cases. Each method has pros and cons, please see [this
+discussion](https://github.com/aleximmer/Laplace/issues/217#issuecomment-2278311460)
+for details. In summary
+
+- Disable-grad: General method to perform Laplace on specific types of
+  layer/parameter, e.g. in an LLM with LoRA. Can be used to emulate `LLLaplace`
+  as well. Always use `subset_of_weights='all'` for this method.
+  - subnet selection by disabling grads is more efficient than
+    `SubnetLaplace` since it avoids calculating full Jacobians first
+  - disabling grads can only be performed on `Parameter` level and not for
+    individual weights, so this doesn't cover all cases that `SubnetLaplace`
+    offers such as `Largest*SubnetMask` or `RandomSubnetMask`
+- `LLLaplace`: last-layer specific code with improved performance (#145)
+- `SubnetLaplace`: more fine-grained partitioning such as
+  `LargestMagnitudeSubnetMask`
+
+### Serialization
+
+As with plain `torch`, we support to ways to serialize data.
+
+One is the familiar `state_dict` approach. Here you need to save and re-create
+both `model` and `Laplace`. Use this for long-term storage of models and
+sharing of a fitted `Laplace` instance.
+
+```py
+# Save model and Laplace instance
+torch.save(model.state_dict(), "model_state_dict.bin")
+torch.save(la.state_dict(), "la_state_dict.bin")
+
+# Load serialized data
+model2 = MyModel(...)
+model2.load_state_dict(torch.load("model_state_dict.bin"))
+la2 = Laplace(model2, "classification",
+              subset_of_weights="all",
+              hessian_structure="diag")
+la2.load_state_dict(torch.load("la_state_dict.bin"))
+```
+
+The second approach is to save the whole `Laplace` object, including
+`self.model`. This is less verbose and more convenient since you have the
+trained model and the fitted `Laplace` data stored in one place, but [also comes with
+some
+drawbacks](https://pytorch.org/tutorials/beginner/saving_loading_models.html#saving-loading-model-for-inference).
+Use this for quick save-load cycles during experiments, say.
+
+```py
+# Save Laplace, including la.model
+torch.save(la, "la.pt")
+
+# Load both
+torch.load("la.pt")
+```
+
+Some Laplace variants such as `LLLaplace` might have trouble being serialized
+using the default `pickle` module, which `torch.save()` and `torch.load()` use
+(`AttributeError: Can't pickle local object ...`). In this case, the
+[`dill`](https://github.com/uqfoundation/dill) package will come in handy.
+
+```py
+import dill
+
+torch.save(la, "la.pt", pickle_module=dill)
+```
+
+With both methods, you are free to switch devices, for instance when you
+trained on a GPU but want to run predictions on CPU. In this case, use
+
+```py
+torch.load(..., map_location="cpu")
+```
+
+!!! warning
+
+    Currently, this library always assumes that the model has an
+    output tensor of shape `(batch_size, ..., n_classes)`, so in
+    the case of image outputs, you need to rearrange from NCHW to NHWC.
+
+## When to use which backend
+
+!!! tip
+
+    Each backend as its own caveat/behavior. The use the following to guide you
+    picking the suitable backend, depending on you model & application.
+
+- **Small, simple MLP, or last-layer Laplace:** Any backend should work well.
+  `CurvlinopsGGN` or `CurvlinopsEF` is recommended if
+  `hessian_factorization = 'kron'`, but it's inefficient for other factorizations.
+- **LLMs with PEFT (e.g. LoRA):** `AsdlGGN` and `AsdlEF` are recommended.
+- **Continuous Bayesian optimization:** `CurvlinopsGGN/EF` and `BackpackGGN/EF` are
+  recommended since they are the only ones supporting backprop over Jacobians.
+
+!!! caution
+
+    The `curvlinops` backends are inefficient for full and diagonal factorizations.
+    Moreover, they're also inefficient for computing the Jacobians of large models
+    since they rely on `torch.func.jacrev` along `torch.func.vmap`!
+    Finally, `curvlinops` only computes K-FAC (`hessian_factorization = 'kron'`)
+    for `nn.Linear` and `nn.Conv2d` modules (including those inside larger modules
+    like Attention).
+
+!!! caution
+
+    The `BackPack` backends are limited to models expressed as `nn.Sequential`.
+    Also, they're not compatible with normalization layers.
+
+## References
+
+This package relies on various improvements to the Laplace approximation for neural networks, which was originally due to MacKay [1]. Please consider citing the respective papers if you use any of their proposed methods via our laplace library.
+
+- [1] MacKay, DJC. [_A Practical Bayesian Framework for Backpropagation Networks_](https://authors.library.caltech.edu/13793/). Neural Computation 1992.
+- [2] Gibbs, M. N. [_Bayesian Gaussian Processes for Regression and Classification_](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.147.1130&rep=rep1&type=pdf). PhD Thesis 1997.
+- [3] Snoek, J., Rippel, O., Swersky, K., Kiros, R., Satish, N., Sundaram, N., Patwary, M., Prabhat, M., Adams, R. [_Scalable Bayesian Optimization Using Deep Neural Networks_](https://arxiv.org/abs/1502.05700). ICML 2015.
+- [4] Ritter, H., Botev, A., Barber, D. [_A Scalable Laplace Approximation for Neural Networks_](https://openreview.net/forum?id=Skdvd2xAZ). ICLR 2018.
+- [5] Foong, A. Y., Li, Y., Hernández-Lobato, J. M., Turner, R. E. [_'In-Between' Uncertainty in Bayesian Neural Networks_](https://arxiv.org/abs/1906.11537). ICML UDL Workshop 2019.
+- [6] Khan, M. E., Immer, A., Abedi, E., Korzepa, M. [_Approximate Inference Turns Deep Networks into Gaussian Processes_](https://arxiv.org/abs/1906.01930). NeurIPS 2019.
+- [7] Kristiadi, A., Hein, M., Hennig, P. [_Being Bayesian, Even Just a Bit, Fixes Overconfidence in ReLU Networks_](https://arxiv.org/abs/2002.10118). ICML 2020.
+- [8] Immer, A., Korzepa, M., Bauer, M. [_Improving predictions of Bayesian neural nets via local linearization_](https://arxiv.org/abs/2008.08400). AISTATS 2021.
+- [9] Sharma, A., Azizan, N., Pavone, M. [_Sketching Curvature for Efficient Out-of-Distribution Detection for Deep Neural Networks_](https://arxiv.org/abs/2102.12567). UAI 2021.
+- [10] Immer, A., Bauer, M., Fortuin, V., Rätsch, G., Khan, EM. [_Scalable Marginal Likelihood Estimation for Model Selection in Deep Learning_](https://arxiv.org/abs/2104.04975). ICML 2021.
+- [11] Daxberger, E., Nalisnick, E., Allingham, JU., Antorán, J., Hernández-Lobato, JM. [_Bayesian Deep Learning via Subnetwork Inference_](https://arxiv.org/abs/2010.14689). ICML 2021.
diff --git a/docs/javascripts/mathjax.js b/docs/javascripts/mathjax.js
new file mode 100644
index 00000000..5209b3c1
--- /dev/null
+++ b/docs/javascripts/mathjax.js
@@ -0,0 +1,19 @@
+window.MathJax = {
+  tex: {
+    inlineMath: [["\\(", "\\)"]],
+    displayMath: [["\\[", "\\]"]],
+    processEscapes: true,
+    processEnvironments: true,
+  },
+  options: {
+    ignoreHtmlClass: ".*|",
+    processHtmlClass: "arithmatex",
+  },
+};
+
+document$.subscribe(() => {
+  MathJax.startup.output.clearCache();
+  MathJax.typesetClear();
+  MathJax.texReset();
+  MathJax.typesetPromise();
+});
diff --git a/docs/laplace.html b/docs/laplace.html
deleted file mode 100644
index 86dd634e..00000000
--- a/docs/laplace.html
+++ /dev/null
@@ -1,88 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
-<meta name="generator" content="pdoc3 0.11.1">
-<title>laplace.laplace API documentation</title>
-<meta name="description" content="">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script type="text/x-mathjax-config">MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], processEscapes: true } });</script>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => {
-hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
-hljs.highlightAll();
-})</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Module <code>laplace.laplace</code></h1>
-</header>
-<section id="section-intro">
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-functions">Functions</h2>
-<dl>
-<dt id="laplace.laplace.Laplace"><code class="name flex">
-<span>def <span class="ident">Laplace</span></span>(<span>model: torch.nn.Module, likelihood: Likelihood | str, subset_of_weights: SubsetOfWeights | str = SubsetOfWeights.LAST_LAYER, hessian_structure: HessianStructure | str = HessianStructure.KRON, *args, **kwargs)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Simplified Laplace access using strings instead of different classes.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>likelihood</code></strong> :&ensp;<code>Likelihood</code> or <code>str in {'classification', 'regression'}</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>subset_of_weights</code></strong> :&ensp;<code>SubsetofWeights</code> or <code>{'last_layer', 'subnetwork', 'all'}</code>, default=<code>SubsetOfWeights.LAST_LAYER</code></dt>
-<dd>subset of weights to consider for inference</dd>
-<dt><strong><code>hessian_structure</code></strong> :&ensp;<code>HessianStructure</code> or <code>str in {'diag', 'kron', 'full', 'lowrank', 'gp'}</code>, default=<code>HessianStructure.KRON</code></dt>
-<dd>structure of the Hessian approximation (note that in case of 'gp',
-we are not actually doing any Hessian approximation, the inference is instead done in the functional space)</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>laplace</code></strong> :&ensp;<code>BaseLaplace</code></dt>
-<dd>chosen subclass of BaseLaplace instantiated with additional arguments</dd>
-</dl></div>
-</dd>
-</dl>
-</section>
-<section>
-</section>
-</article>
-<nav id="sidebar">
-<div class="toc">
-<ul></ul>
-</div>
-<ul id="index">
-<li><h3>Super-module</h3>
-<ul>
-<li><code><a title="laplace" href="index.html">laplace</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-functions">Functions</a></h3>
-<ul class="">
-<li><code><a title="laplace.laplace.Laplace" href="#laplace.laplace.Laplace">Laplace</a></code></li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.1</a>.</p>
-</footer>
-</body>
-</html>
diff --git a/docs/lllaplace.html b/docs/lllaplace.html
deleted file mode 100644
index 1cb8f330..00000000
--- a/docs/lllaplace.html
+++ /dev/null
@@ -1,459 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
-<meta name="generator" content="pdoc3 0.11.1">
-<title>laplace.lllaplace API documentation</title>
-<meta name="description" content="">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script type="text/x-mathjax-config">MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], processEscapes: true } });</script>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => {
-hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
-hljs.highlightAll();
-})</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Module <code>laplace.lllaplace</code></h1>
-</header>
-<section id="section-intro">
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-classes">Classes</h2>
-<dl>
-<dt id="laplace.lllaplace.LLLaplace"><code class="flex name class">
-<span>class <span class="ident">LLLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, feature_reduction: FeatureReduction | str | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend: type[CurvatureInterface] | None = None, last_layer_name: str | None = None, backend_kwargs: dict[str, Any] | None = None, asdl_fisher_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Baseclass for all last-layer Laplace approximations in this library.
-Subclasses specify the structure of the Hessian approximation.
-See <code>BaseLaplace</code> for the full interface.</p>
-<p>A Laplace approximation is represented by a MAP which is given by the
-<code>model</code> parameter and a posterior precision or covariance specifying
-a Gaussian distribution <span><span class="MathJax_Preview">\mathcal{N}(\theta_{MAP}, P^{-1})</span><script type="math/tex">\mathcal{N}(\theta_{MAP}, P^{-1})</script></span>.
-Here, only the parameters of the last layer of the neural network
-are treated probabilistically.
-The goal of this class is to compute the posterior precision <span><span class="MathJax_Preview">P</span><script type="math/tex">P</script></span>
-which sums as
-<span><span class="MathJax_Preview">
-P = \sum_{n=1}^N \nabla^2_\theta \log p(\mathcal{D}_n \mid \theta)
-\vert_{\theta_{MAP}} + \nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}}.
-</span><script type="math/tex; mode=display">
-P = \sum_{n=1}^N \nabla^2_\theta \log p(\mathcal{D}_n \mid \theta)
-\vert_{\theta_{MAP}} + \nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}}.
-</script></span>
-Every subclass implements different approximations to the log likelihood Hessians,
-for example, a diagonal one. The prior is assumed to be Gaussian and therefore we have
-a simple form for <span><span class="MathJax_Preview">\nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}} = P_0 </span><script type="math/tex">\nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}} = P_0 </script></span>.
-In particular, we assume a scalar or diagonal prior precision so that in
-all cases <span><span class="MathJax_Preview">P_0 = \textrm{diag}(p_0)</span><script type="math/tex">P_0 = \textrm{diag}(p_0)</script></span> and the structure of <span><span class="MathJax_Preview">p_0</span><script type="math/tex">p_0</script></span> can be varied.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code> or <code><a title="laplace.utils.feature_extractor.FeatureExtractor" href="utils/feature_extractor.html#laplace.utils.feature_extractor.FeatureExtractor">FeatureExtractor</a></code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>likelihood</code></strong> :&ensp;<code>Likelihood</code> or <code>{'classification', 'regression'}</code></dt>
-<dd>determines the log likelihood Hessian approximation</dd>
-<dt><strong><code>sigma_noise</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code>, default=<code>1</code></dt>
-<dd>observation noise for the regression setting; must be 1 for classification</dd>
-<dt><strong><code>prior_precision</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code>, default=<code>1</code></dt>
-<dd>prior precision of a Gaussian prior (= weight decay);
-can be scalar, per-layer, or diagonal in the most general case</dd>
-<dt><strong><code>prior_mean</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code>, default=<code>0</code></dt>
-<dd>prior mean of a Gaussian prior, useful for continual learning</dd>
-<dt><strong><code>temperature</code></strong> :&ensp;<code>float</code>, default=<code>1</code></dt>
-<dd>temperature of the likelihood; lower temperature leads to more
-concentrated posterior and vice versa.</dd>
-<dt><strong><code>enable_backprop</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>whether to enable backprop to the input <code>x</code> through the Laplace predictive.
-Useful for e.g. Bayesian optimization.</dd>
-<dt><strong><code>feature_reduction</code></strong> :&ensp;<code>FeatureReduction</code> or <code>str</code>, optional, default=<code>None</code></dt>
-<dd>when the last-layer <code>features</code> is a tensor of dim &gt;= 3, this tells how to reduce
-it into a dim-2 tensor. E.g. in LLMs for non-language modeling problems,
-the penultultimate output is a tensor of shape <code>(batch_size, seq_len, embd_dim)</code>.
-But the last layer maps <code>(batch_size, embd_dim)</code> to <code>(batch_size, n_classes)</code>.
-Note: Make sure that this option faithfully reflects the reduction in the model
-definition. When inputting a string, available options are
-<code>{'pick_first', 'pick_last', 'average'}</code>.</dd>
-<dt><strong><code>dict_key_x</code></strong> :&ensp;<code>str</code>, default=<code>'input_ids'</code></dt>
-<dd>The dictionary key under which the input tensor <code>x</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-<dt><strong><code>dict_key_y</code></strong> :&ensp;<code>str</code>, default=<code>'labels'</code></dt>
-<dd>The dictionary key under which the target tensor <code>y</code> is stored. Only has effect
-when the model takes a <code>MutableMapping</code> as the input. Useful for Huggingface
-LLM models.</dd>
-<dt><strong><code>backend</code></strong> :&ensp;<code>subclasses</code> of <code><a title="laplace.curvature.CurvatureInterface" href="curvature/index.html#laplace.curvature.CurvatureInterface">CurvatureInterface</a></code></dt>
-<dd>backend for access to curvature/Hessian approximations</dd>
-<dt><strong><code>last_layer_name</code></strong> :&ensp;<code>str</code>, default=<code>None</code></dt>
-<dd>name of the model's last layer, if None it will be determined automatically</dd>
-<dt><strong><code>backend_kwargs</code></strong> :&ensp;<code>dict</code>, default=<code>None</code></dt>
-<dd>arguments passed to the backend on initialization, for example to
-set the number of MC samples for stochastic approximations.</dd>
-</dl></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.lllaplace.DiagLLLaplace" href="#laplace.lllaplace.DiagLLLaplace">DiagLLLaplace</a></li>
-<li><a title="laplace.lllaplace.FullLLLaplace" href="#laplace.lllaplace.FullLLLaplace">FullLLLaplace</a></li>
-<li><a title="laplace.lllaplace.KronLLLaplace" href="#laplace.lllaplace.KronLLLaplace">KronLLLaplace</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.lllaplace.LLLaplace.prior_precision_diag"><code class="name">prop <span class="ident">prior_precision_diag</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Obtain the diagonal prior precision <span><span class="MathJax_Preview">p_0</span><script type="math/tex">p_0</script></span> constructed from either
-a scalar or diagonal prior precision.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>prior_precision_diag</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.lllaplace.LLLaplace.fit"><code class="name flex">
-<span>def <span class="ident">fit</span></span>(<span>self, train_loader: DataLoader, override: bool = True, progress_bar: bool = False) ‑> None</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Fit the local Laplace approximation at the parameters of the model.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>train_loader</code></strong> :&ensp;<code>torch.data.utils.DataLoader</code></dt>
-<dd>each iterate is a training batch, either <code>(X, y)</code> tensors or a dict-like
-object containing keys as expressed by <code>self.dict_key_x</code> and
-<code>self.dict_key_y</code>. <code>train_loader.dataset</code> needs to be set to access
-<span><span class="MathJax_Preview">N</span><script type="math/tex">N</script></span>, size of the data set.</dd>
-<dt><strong><code>override</code></strong> :&ensp;<code>bool</code>, default=<code>True</code></dt>
-<dd>whether to initialize H, loss, and n_data again; setting to False is useful for
-online learning settings to accumulate a sequential posterior approximation.</dd>
-<dt><strong><code>progress_bar</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.lllaplace.LLLaplace.functional_variance_fast"><code class="name flex">
-<span>def <span class="ident">functional_variance_fast</span></span>(<span>self, X)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Should be overriden if there exists a trick to make this fast!</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>X</code></strong> :&ensp;<code>torch.Tensor</code> of <code>shape (batch_size, input_dim)</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>f_var_diag</code></strong> :&ensp;<code>torch.Tensor</code> of <code>shape (batch_size, num_outputs)</code></dt>
-<dd>Corresponding to the diagonal of the covariance matrix of the outputs</dd>
-</dl></div>
-</dd>
-<dt id="laplace.lllaplace.LLLaplace.state_dict"><code class="name flex">
-<span>def <span class="ident">state_dict</span></span>(<span>self) ‑> dict[str, typing.Any]</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.lllaplace.LLLaplace.load_state_dict"><code class="name flex">
-<span>def <span class="ident">load_state_dict</span></span>(<span>self, state_dict: dict[str, Any]) ‑> None</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_prior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_prob" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.posterior_precision">posterior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.sample" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.scatter" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.square_norm" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.lllaplace.FullLLLaplace"><code class="flex name class">
-<span>class <span class="ident">FullLLLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, feature_reduction: FeatureReduction | str | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend: type[CurvatureInterface] | None = None, last_layer_name: str | None = None, backend_kwargs: dict[str, Any] | None = None, asdl_fisher_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Last-layer Laplace approximation with full, i.e., dense, log likelihood Hessian approximation
-and hence posterior precision. Based on the chosen <code>backend</code> parameter, the full
-approximation can be, for example, a generalized Gauss-Newton matrix.
-Mathematically, we have <span><span class="MathJax_Preview">P \in \mathbb{R}^{P \times P}</span><script type="math/tex">P \in \mathbb{R}^{P \times P}</script></span>.
-See <code>FullLaplace</code>, <code><a title="laplace.lllaplace.LLLaplace" href="#laplace.lllaplace.LLLaplace">LLLaplace</a></code>, and <code>BaseLaplace</code> for the full interface.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.lllaplace.LLLaplace" href="#laplace.lllaplace.LLLaplace">LLLaplace</a></li>
-<li><a title="laplace.baselaplace.FullLaplace" href="baselaplace.html#laplace.baselaplace.FullLaplace">FullLaplace</a></li>
-<li><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.lllaplace.LLLaplace" href="#laplace.lllaplace.LLLaplace">LLLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.lllaplace.LLLaplace.fit" href="#laplace.lllaplace.LLLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.functional_variance_fast" href="#laplace.lllaplace.LLLaplace.functional_variance_fast">functional_variance_fast</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_det_posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_det_prior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_prob" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.posterior_precision">posterior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.prior_precision_diag" href="#laplace.lllaplace.LLLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.sample" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.scatter" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.square_norm" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-<li><code><b><a title="laplace.baselaplace.FullLaplace" href="baselaplace.html#laplace.baselaplace.FullLaplace">FullLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.FullLaplace.posterior_covariance" href="baselaplace.html#laplace.baselaplace.FullLaplace.posterior_covariance">posterior_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.FullLaplace.posterior_scale" href="baselaplace.html#laplace.baselaplace.FullLaplace.posterior_scale">posterior_scale</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.lllaplace.KronLLLaplace"><code class="flex name class">
-<span>class <span class="ident">KronLLLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, feature_reduction: FeatureReduction | str | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend: type[CurvatureInterface] | None = None, last_layer_name: str | None = None, damping: bool = False, backend_kwargs: dict[str, Any] | None = None, asdl_fisher_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Last-layer Laplace approximation with Kronecker factored log likelihood Hessian approximation
-and hence posterior precision.
-Mathematically, we have for the last parameter group, i.e., torch.nn.Linear,
-that \P\approx Q \otimes H.
-See <code>KronLaplace</code>, <code><a title="laplace.lllaplace.LLLaplace" href="#laplace.lllaplace.LLLaplace">LLLaplace</a></code>, and <code>BaseLaplace</code> for the full interface and see
-<code><a title="laplace.utils.matrix.Kron" href="utils/matrix.html#laplace.utils.matrix.Kron">Kron</a></code> and <code><a title="laplace.utils.matrix.KronDecomposed" href="utils/matrix.html#laplace.utils.matrix.KronDecomposed">KronDecomposed</a></code> for the structure of
-the Kronecker factors. <code>Kron</code> is used to aggregate factors by summing up and
-<code>KronDecomposed</code> is used to add the prior, a Hessian factor (e.g. temperature),
-and computing posterior covariances, marginal likelihood, etc.
-Use of <code>damping</code> is possible by initializing or setting <code>damping=True</code>.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.lllaplace.LLLaplace" href="#laplace.lllaplace.LLLaplace">LLLaplace</a></li>
-<li><a title="laplace.baselaplace.KronLaplace" href="baselaplace.html#laplace.baselaplace.KronLaplace">KronLaplace</a></li>
-<li><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.lllaplace.LLLaplace" href="#laplace.lllaplace.LLLaplace">LLLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.lllaplace.LLLaplace.fit" href="#laplace.lllaplace.LLLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.functional_variance_fast" href="#laplace.lllaplace.LLLaplace.functional_variance_fast">functional_variance_fast</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_det_posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_det_prior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_prob" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.posterior_precision">posterior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.prior_precision_diag" href="#laplace.lllaplace.LLLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.sample" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.scatter" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.square_norm" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.lllaplace.DiagLLLaplace"><code class="flex name class">
-<span>class <span class="ident">DiagLLLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, feature_reduction: FeatureReduction | str | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', backend: type[CurvatureInterface] | None = None, last_layer_name: str | None = None, backend_kwargs: dict[str, Any] | None = None, asdl_fisher_kwargs: dict[str, Any] | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Last-layer Laplace approximation with diagonal log likelihood Hessian approximation
-and hence posterior precision.
-Mathematically, we have <span><span class="MathJax_Preview">P \approx \textrm{diag}(P)</span><script type="math/tex">P \approx \textrm{diag}(P)</script></span>.
-See <code>DiagLaplace</code>, <code><a title="laplace.lllaplace.LLLaplace" href="#laplace.lllaplace.LLLaplace">LLLaplace</a></code>, and <code>BaseLaplace</code> for the full interface.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.lllaplace.LLLaplace" href="#laplace.lllaplace.LLLaplace">LLLaplace</a></li>
-<li><a title="laplace.baselaplace.DiagLaplace" href="baselaplace.html#laplace.baselaplace.DiagLaplace">DiagLaplace</a></li>
-<li><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.lllaplace.LLLaplace" href="#laplace.lllaplace.LLLaplace">LLLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.lllaplace.LLLaplace.fit" href="#laplace.lllaplace.LLLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.functional_variance_fast" href="#laplace.lllaplace.LLLaplace.functional_variance_fast">functional_variance_fast</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_det_posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_det_prior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.log_prob" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.posterior_precision">posterior_precision</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.prior_precision_diag" href="#laplace.lllaplace.LLLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.sample" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.scatter" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.square_norm" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-<li><code><b><a title="laplace.baselaplace.DiagLaplace" href="baselaplace.html#laplace.baselaplace.DiagLaplace">DiagLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.DiagLaplace.posterior_scale" href="baselaplace.html#laplace.baselaplace.DiagLaplace.posterior_scale">posterior_scale</a></code></li>
-<li><code><a title="laplace.baselaplace.DiagLaplace.posterior_variance" href="baselaplace.html#laplace.baselaplace.DiagLaplace.posterior_variance">posterior_variance</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.lllaplace.FunctionalLLLaplace"><code class="flex name class">
-<span>class <span class="ident">FunctionalLLLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, n_subset: int, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, enable_backprop: bool = False, feature_reduction: FeatureReduction | str | None = None, dict_key_x: str = 'input_ids', dict_key_y: str = 'labels', last_layer_name: str | None = None, backend: type[CurvatureInterface] | None = laplace.curvature.backpack.BackPackGGN, backend_kwargs: dict[str, Any] | None = None, independent_outputs: bool = False, seed: int = 0)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Here not much changes in terms of GP inference compared to FunctionalLaplace class.
-Since now we treat only the last layer probabilistically and the rest of the network is used as a "fixed feature
-extractor", that means that the <span><span class="MathJax_Preview">X \in \mathbb{R}^{M \times D}</span><script type="math/tex">X \in \mathbb{R}^{M \times D}</script></span> in GP inference changes
-to <span><span class="MathJax_Preview">\tilde{X} \in \mathbb{R}^{M \times l_{n-1}} </span><script type="math/tex">\tilde{X} \in \mathbb{R}^{M \times l_{n-1}} </script></span>,
-where <span><span class="MathJax_Preview">l_{n-1}</span><script type="math/tex">l_{n-1}</script></span> is the dimension of the output
-of the penultimate NN layer.</p>
-<p>See <code>FunctionalLaplace</code> for the full interface.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.FunctionalLaplace" href="baselaplace.html#laplace.baselaplace.FunctionalLaplace">FunctionalLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.lllaplace.FunctionalLLLaplace.fit"><code class="name flex">
-<span>def <span class="ident">fit</span></span>(<span>self, train_loader: DataLoader) ‑> None</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Fit the Laplace approximation of a GP posterior.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>train_loader</code></strong> :&ensp;<code>torch.data.utils.DataLoader</code></dt>
-<dd><code>train_loader.dataset</code> needs to be set to access <span><span class="MathJax_Preview">N</span><script type="math/tex">N</script></span>, size of the data set
-<code>train_loader.batch_size</code> needs to be set to access <span><span class="MathJax_Preview">b</span><script type="math/tex">b</script></span> batch_size</dd>
-</dl></div>
-</dd>
-<dt id="laplace.lllaplace.FunctionalLLLaplace.state_dict"><code class="name flex">
-<span>def <span class="ident">state_dict</span></span>(<span>self) ‑> dict</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.lllaplace.FunctionalLLLaplace.load_state_dict"><code class="name flex">
-<span>def <span class="ident">load_state_dict</span></span>(<span>self, state_dict: dict)</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.baselaplace.FunctionalLaplace" href="baselaplace.html#laplace.baselaplace.FunctionalLaplace">FunctionalLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.FunctionalLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.FunctionalLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.FunctionalLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.FunctionalLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.FunctionalLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.FunctionalLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.prior_precision_diag" href="baselaplace.html#laplace.baselaplace.BaseLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.baselaplace.FunctionalLaplace.scatter" href="baselaplace.html#laplace.baselaplace.FunctionalLaplace.scatter">scatter</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-</dl>
-</section>
-</article>
-<nav id="sidebar">
-<div class="toc">
-<ul></ul>
-</div>
-<ul id="index">
-<li><h3>Super-module</h3>
-<ul>
-<li><code><a title="laplace" href="index.html">laplace</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-classes">Classes</a></h3>
-<ul>
-<li>
-<h4><code><a title="laplace.lllaplace.LLLaplace" href="#laplace.lllaplace.LLLaplace">LLLaplace</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.lllaplace.LLLaplace.fit" href="#laplace.lllaplace.LLLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.functional_variance_fast" href="#laplace.lllaplace.LLLaplace.functional_variance_fast">functional_variance_fast</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.state_dict" href="#laplace.lllaplace.LLLaplace.state_dict">state_dict</a></code></li>
-<li><code><a title="laplace.lllaplace.LLLaplace.load_state_dict" href="#laplace.lllaplace.LLLaplace.load_state_dict">load_state_dict</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.lllaplace.FullLLLaplace" href="#laplace.lllaplace.FullLLLaplace">FullLLLaplace</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.lllaplace.KronLLLaplace" href="#laplace.lllaplace.KronLLLaplace">KronLLLaplace</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.lllaplace.DiagLLLaplace" href="#laplace.lllaplace.DiagLLLaplace">DiagLLLaplace</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.lllaplace.FunctionalLLLaplace" href="#laplace.lllaplace.FunctionalLLLaplace">FunctionalLLLaplace</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.lllaplace.FunctionalLLLaplace.fit" href="#laplace.lllaplace.FunctionalLLLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.lllaplace.FunctionalLLLaplace.state_dict" href="#laplace.lllaplace.FunctionalLLLaplace.state_dict">state_dict</a></code></li>
-<li><code><a title="laplace.lllaplace.FunctionalLLLaplace.load_state_dict" href="#laplace.lllaplace.FunctionalLLLaplace.load_state_dict">load_state_dict</a></code></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.1</a>.</p>
-</footer>
-</body>
-</html>
diff --git a/examples/regression_example.md b/docs/regression_example.md
similarity index 86%
rename from examples/regression_example.md
rename to docs/regression_example.md
index 77dcc2b2..ef43cb76 100644
--- a/examples/regression_example.md
+++ b/docs/regression_example.md
@@ -1,11 +1,9 @@
-## Full example: Optimization of the marginal likelihood and prediction
-
-### Sinusoidal toy data
+## Sinusoidal toy data
 
 We show how the marginal likelihood can be used after training a MAP network on a simple sinusoidal regression task.
 Subsequently, we use the optimized LA to predict which provides uncertainty on top of the MAP prediction.
 We also show how the `marglik_training` utility method can be used to jointly train the MAP and hyperparameters.
-First, we set up the training data for the problem with observation noise \\(\\sigma=0.3\\):
+First, we set up the training data for the problem with observation noise \(\sigma=0.3\):
 
 ```python
 from laplace.baselaplace import FullLaplace
@@ -24,7 +22,7 @@ torch.manual_seed(711)
 X_train, y_train, train_loader, X_test = get_sinusoid_example(sigma_noise=0.3)
 ```
 
-### Training a MAP
+## Training a MAP
 
 We now use `pytorch` to train a neural network with single hidden layer and Tanh activation.
 The trained neural network will be our MAP estimate.
@@ -49,7 +47,7 @@ for i in range(n_epochs):
         optimizer.step()
 ```
 
-### Fitting and optimizing the Laplace approximation using empirical Bayes
+## Fitting and optimizing the Laplace approximation using empirical Bayes
 
 With the MAP-trained model at hand, we can estimate the prior precision and observation noise
 using empirical Bayes after training.
@@ -70,15 +68,15 @@ for i in range(n_epochs):
     hyper_optimizer.step()
 ```
 
-The obtained observation noise is close to the ground truth with a value of \\(\\sigma \\approx 0.28\\)
+The obtained observation noise is close to the ground truth with a value of \(\sigma \approx 0.28\)
 without the need for any validation data.
-The resulting prior precision is \\(\\delta \\approx 0.10\\).
+The resulting prior precision is \(\delta \approx 0.10\).
 
-### Bayesian predictive
+## Bayesian predictive
 
 Here, we compare the MAP prediction to the obtained LA prediction.
-For LA, we have a closed-form predictive distribution on the output \\(f\\) which is a Gaussian
-\\(\\mathcal{N}(f(x;\\theta\_{MAP}), \\mathbb{V}[f] + \\sigma^2)\\):
+For LA, we have a closed-form predictive distribution on the output \(f\) which is a Gaussian
+\(\mathcal{N}(f(x;\theta\_{MAP}), \mathbb{V}[f] + \sigma^2)\):
 
 ```python
 x = X_test.flatten().cpu().numpy()
@@ -90,14 +88,13 @@ pred_std = np.sqrt(f_sigma**2 + la.sigma_noise.item()**2)
 plot_regression(X_train, y_train, x, f_mu, pred_std)
 ```
 
-.. image:: regression_example.png
-:align: center
+![Posthoc Laplace](assets/regression_example.png)
 
 In comparison to the MAP, the predictive shows useful uncertainties.
 When our MAP is over or underfit, the Laplace approximation cannot fix this anymore.
 In this case, joint optimization of MAP and marginal likelihood can be useful.
 
-### Jointly optimize MAP and hyperparameters using online empirical Bayes
+## Jointly optimize MAP and hyperparameters using online empirical Bayes
 
 We provide a utility method `marglik_training` that implements the algorithm proposed in [1].
 The method optimizes the neural network and the hyperparameters in an interleaved way
@@ -120,5 +117,4 @@ pred_std = np.sqrt(f_sigma**2 + la.sigma_noise.item()**2)
 plot_regression(X_train, y_train, x, f_mu, pred_std)
 ```
 
-.. image:: regression_example_online.png
-:align: center
+![Online Laplace](assets/regression_example_online.png)
diff --git a/examples/reward_modeling_example.md b/docs/reward_modeling_example.md
similarity index 97%
rename from examples/reward_modeling_example.md
rename to docs/reward_modeling_example.md
index a338572a..342bc67b 100644
--- a/examples/reward_modeling_example.md
+++ b/docs/reward_modeling_example.md
@@ -1,9 +1,9 @@
-## Full Example: Bayesian Bradley-Terry Reward Modeling
-
 The `laplace-torch` library can also be used to "Bayesianize" a pretrained Bradley-Terry
 reward model, popular in large language models. See <http://arxiv.org/abs/2009.01325>
 for a primer in reward modeling.
 
+## Defining a preference dataset
+
 First order of business, let's define our comparison dataset. We will use the `datasets`
 library from Huggingface to handle the data.
 
@@ -41,6 +41,8 @@ data_dict = [
 dataset = Dataset.from_list(data_dict)
 ```
 
+## Defining a reward model
+
 Now, let's define the reward model. During training, it assumes that `x` is a tensor
 of shape `(batch_size, 2, dim)`, which is a concatenation of `x0` and `x1` above.
 The second dimension of size 2 is preserved through the forward pass, resulting in
@@ -91,6 +93,8 @@ class SimpleRewardModel(nn.Module):
             return logits
 ```
 
+## Data preprocessing
+
 To fulfill the 3D tensor requirement, we need to preprocess the dict-based dataset.
 
 ```python
@@ -108,6 +112,8 @@ tensor_dataloader = data_utils.DataLoader(
 )
 ```
 
+## MAP training
+
 Then, we can train as usual using the cross entropy loss.
 
 ```python
@@ -124,6 +130,8 @@ for epoch in range(10):
         opt.step()
 ```
 
+## Applying Laplace
+
 Applying Laplace to this model is a breeze. Simply state that the likelihood is `reward_modeling`.
 
 ```python
diff --git a/docs/subnetlaplace.html b/docs/subnetlaplace.html
deleted file mode 100644
index bb11f14e..00000000
--- a/docs/subnetlaplace.html
+++ /dev/null
@@ -1,291 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
-<meta name="generator" content="pdoc3 0.11.1">
-<title>laplace.subnetlaplace API documentation</title>
-<meta name="description" content="">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script type="text/x-mathjax-config">MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], processEscapes: true } });</script>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => {
-hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
-hljs.highlightAll();
-})</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Module <code>laplace.subnetlaplace</code></h1>
-</header>
-<section id="section-intro">
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-classes">Classes</h2>
-<dl>
-<dt id="laplace.subnetlaplace.SubnetLaplace"><code class="flex name class">
-<span>class <span class="ident">SubnetLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, subnetwork_indices: torch.LongTensor, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, backend: Type[CurvatureInterface] | None = None, backend_kwargs: dict | None = None, asdl_fisher_kwargs: dict | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Class for subnetwork Laplace, which computes the Laplace approximation over just a subset
-of the model parameters (i.e. a subnetwork within the neural network), as proposed in [1].
-Subnetwork Laplace can only be used with either a full or a diagonal Hessian approximation.</p>
-<p>A Laplace approximation is represented by a MAP which is given by the
-<code>model</code> parameter and a posterior precision or covariance specifying
-a Gaussian distribution <span><span class="MathJax_Preview">\mathcal{N}(\theta_{MAP}, P^{-1})</span><script type="math/tex">\mathcal{N}(\theta_{MAP}, P^{-1})</script></span>.
-Here, only a subset of the model parameters (i.e. a subnetwork of the
-neural network) are treated probabilistically.
-The goal of this class is to compute the posterior precision <span><span class="MathJax_Preview">P</span><script type="math/tex">P</script></span>
-which sums as
-<span><span class="MathJax_Preview">
-P = \sum_{n=1}^N \nabla^2_\theta \log p(\mathcal{D}_n \mid \theta)
-\vert_{\theta_{MAP}} + \nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}}.
-</span><script type="math/tex; mode=display">
-P = \sum_{n=1}^N \nabla^2_\theta \log p(\mathcal{D}_n \mid \theta)
-\vert_{\theta_{MAP}} + \nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}}.
-</script></span>
-The prior is assumed to be Gaussian and therefore we have a simple form for
-<span><span class="MathJax_Preview">\nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}} = P_0 </span><script type="math/tex">\nabla^2_\theta \log p(\theta) \vert_{\theta_{MAP}} = P_0 </script></span>.
-In particular, we assume a scalar or diagonal prior precision so that in
-all cases <span><span class="MathJax_Preview">P_0 = \textrm{diag}(p_0)</span><script type="math/tex">P_0 = \textrm{diag}(p_0)</script></span> and the structure of <span><span class="MathJax_Preview">p_0</span><script type="math/tex">p_0</script></span> can be varied.</p>
-<p>The subnetwork Laplace approximation only supports a full, i.e., dense, log likelihood
-Hessian approximation and hence posterior precision.
-Based on the chosen <code>backend</code>
-parameter, the full approximation can be, for example, a generalized Gauss-Newton
-matrix.
-Mathematically, we have <span><span class="MathJax_Preview">P \in \mathbb{R}^{P \times P}</span><script type="math/tex">P \in \mathbb{R}^{P \times P}</script></span>.
-See <code>FullLaplace</code> and <code>BaseLaplace</code> for the full interface.</p>
-<h2 id="references">References</h2>
-<p>[1] Daxberger, E., Nalisnick, E., Allingham, JU., Antorán, J., Hernández-Lobato, JM.
-<a href="https://arxiv.org/abs/2010.14689"><em>Bayesian Deep Learning via Subnetwork Inference</em></a>.
-ICML 2021.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code> or <code><a title="laplace.utils.feature_extractor.FeatureExtractor" href="utils/feature_extractor.html#laplace.utils.feature_extractor.FeatureExtractor">FeatureExtractor</a></code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>likelihood</code></strong> :&ensp;<code>{'classification', 'regression'}</code></dt>
-<dd>determines the log likelihood Hessian approximation</dd>
-<dt><strong><code>subnetwork_indices</code></strong> :&ensp;<code>torch.LongTensor</code></dt>
-<dd>indices of the vectorized model parameters
-(i.e. <code>torch.nn.utils.parameters_to_vector(model.parameters())</code>)
-that define the subnetwork to apply the Laplace approximation over</dd>
-<dt><strong><code>sigma_noise</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code>, default=<code>1</code></dt>
-<dd>observation noise for the regression setting; must be 1 for classification</dd>
-<dt><strong><code>prior_precision</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code>, default=<code>1</code></dt>
-<dd>prior precision of a Gaussian prior (= weight decay);
-can be scalar, per-layer, or diagonal in the most general case</dd>
-<dt><strong><code>prior_mean</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code>, default=<code>0</code></dt>
-<dd>prior mean of a Gaussian prior, useful for continual learning</dd>
-<dt><strong><code>temperature</code></strong> :&ensp;<code>float</code>, default=<code>1</code></dt>
-<dd>temperature of the likelihood; lower temperature leads to more
-concentrated posterior and vice versa.</dd>
-<dt><strong><code>backend</code></strong> :&ensp;<code>subclasses</code> of <code><a title="laplace.curvature.CurvatureInterface" href="curvature/index.html#laplace.curvature.CurvatureInterface">CurvatureInterface</a></code></dt>
-<dd>backend for access to curvature/Hessian approximations</dd>
-<dt><strong><code>backend_kwargs</code></strong> :&ensp;<code>dict</code>, default=<code>None</code></dt>
-<dd>arguments passed to the backend on initialization, for example to
-set the number of MC samples for stochastic approximations.</dd>
-</dl></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.subnetlaplace.DiagSubnetLaplace" href="#laplace.subnetlaplace.DiagSubnetLaplace">DiagSubnetLaplace</a></li>
-<li><a title="laplace.subnetlaplace.FullSubnetLaplace" href="#laplace.subnetlaplace.FullSubnetLaplace">FullSubnetLaplace</a></li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.subnetlaplace.SubnetLaplace.prior_precision_diag"><code class="name">prop <span class="ident">prior_precision_diag</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"><p>Obtain the diagonal prior precision <span><span class="MathJax_Preview">p_0</span><script type="math/tex">p_0</script></span> constructed from either
-a scalar or diagonal prior precision.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>prior_precision_diag</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.subnetlaplace.SubnetLaplace.mean_subnet"><code class="name">prop <span class="ident">mean_subnet</span> : torch.Tensor</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.subnetlaplace.SubnetLaplace.assemble_full_samples"><code class="name flex">
-<span>def <span class="ident">assemble_full_samples</span></span>(<span>self, subnet_samples) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.ParametricLaplace.fit" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_prior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.log_prob" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.posterior_precision">posterior_precision</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.sample" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.scatter" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.baselaplace.ParametricLaplace.square_norm" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.subnetlaplace.FullSubnetLaplace"><code class="flex name class">
-<span>class <span class="ident">FullSubnetLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, subnetwork_indices: torch.LongTensor, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, backend: Type[CurvatureInterface] | None = None, backend_kwargs: dict | None = None, asdl_fisher_kwargs: dict | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Subnetwork Laplace approximation with full, i.e., dense, log likelihood Hessian
-approximation and hence posterior precision. Based on the chosen <code>backend</code> parameter,
-the full approximation can be, for example, a generalized Gauss-Newton matrix.
-Mathematically, we have <span><span class="MathJax_Preview">P \in \mathbb{R}^{P \times P}</span><script type="math/tex">P \in \mathbb{R}^{P \times P}</script></span>.
-See <code>FullLaplace</code>, <code><a title="laplace.subnetlaplace.SubnetLaplace" href="#laplace.subnetlaplace.SubnetLaplace">SubnetLaplace</a></code>, and <code>BaseLaplace</code> for the full interface.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.subnetlaplace.SubnetLaplace" href="#laplace.subnetlaplace.SubnetLaplace">SubnetLaplace</a></li>
-<li><a title="laplace.baselaplace.FullLaplace" href="baselaplace.html#laplace.baselaplace.FullLaplace">FullLaplace</a></li>
-<li><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.subnetlaplace.SubnetLaplace" href="#laplace.subnetlaplace.SubnetLaplace">SubnetLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.fit" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_det_posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_det_prior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_prob" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.posterior_precision">posterior_precision</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.prior_precision_diag" href="#laplace.subnetlaplace.SubnetLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.sample" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.scatter" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.square_norm" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-<li><code><b><a title="laplace.baselaplace.FullLaplace" href="baselaplace.html#laplace.baselaplace.FullLaplace">FullLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.FullLaplace.posterior_covariance" href="baselaplace.html#laplace.baselaplace.FullLaplace.posterior_covariance">posterior_covariance</a></code></li>
-<li><code><a title="laplace.baselaplace.FullLaplace.posterior_scale" href="baselaplace.html#laplace.baselaplace.FullLaplace.posterior_scale">posterior_scale</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.subnetlaplace.DiagSubnetLaplace"><code class="flex name class">
-<span>class <span class="ident">DiagSubnetLaplace</span></span>
-<span>(</span><span>model: nn.Module, likelihood: Likelihood | str, subnetwork_indices: torch.LongTensor, sigma_noise: float | torch.Tensor = 1.0, prior_precision: float | torch.Tensor = 1.0, prior_mean: float | torch.Tensor = 0.0, temperature: float = 1.0, backend: Type[CurvatureInterface] | None = None, backend_kwargs: dict | None = None, asdl_fisher_kwargs: dict | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Subnetwork Laplace approximation with diagonal log likelihood Hessian approximation
-and hence posterior precision.
-Mathematically, we have <span><span class="MathJax_Preview">P \approx \textrm{diag}(P)</span><script type="math/tex">P \approx \textrm{diag}(P)</script></span>.
-See <code>DiagLaplace</code>, <code><a title="laplace.subnetlaplace.SubnetLaplace" href="#laplace.subnetlaplace.SubnetLaplace">SubnetLaplace</a></code>, and <code>BaseLaplace</code> for the full interface.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.subnetlaplace.SubnetLaplace" href="#laplace.subnetlaplace.SubnetLaplace">SubnetLaplace</a></li>
-<li><a title="laplace.baselaplace.DiagLaplace" href="baselaplace.html#laplace.baselaplace.DiagLaplace">DiagLaplace</a></li>
-<li><a title="laplace.baselaplace.ParametricLaplace" href="baselaplace.html#laplace.baselaplace.ParametricLaplace">ParametricLaplace</a></li>
-<li><a title="laplace.baselaplace.BaseLaplace" href="baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a></li>
-</ul>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.subnetlaplace.SubnetLaplace" href="#laplace.subnetlaplace.SubnetLaplace">SubnetLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.fit" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.fit">fit</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.functional_covariance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_covariance">functional_covariance</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.functional_variance" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.functional_variance">functional_variance</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_det_posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_posterior_precision">log_det_posterior_precision</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_det_prior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_prior_precision">log_det_prior_precision</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_det_ratio" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_det_ratio">log_det_ratio</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_likelihood" href="baselaplace.html#laplace.baselaplace.BaseLaplace.log_likelihood">log_likelihood</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_marginal_likelihood" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_marginal_likelihood">log_marginal_likelihood</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.log_prob" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.log_prob">log_prob</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.optimize_prior_precision" href="baselaplace.html#laplace.baselaplace.BaseLaplace.optimize_prior_precision">optimize_prior_precision</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.posterior_precision" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.posterior_precision">posterior_precision</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.predictive_samples" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.predictive_samples">predictive_samples</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.prior_precision_diag" href="#laplace.subnetlaplace.SubnetLaplace.prior_precision_diag">prior_precision_diag</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.sample" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.sample">sample</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.scatter" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.scatter">scatter</a></code></li>
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.square_norm" href="baselaplace.html#laplace.baselaplace.ParametricLaplace.square_norm">square_norm</a></code></li>
-</ul>
-</li>
-<li><code><b><a title="laplace.baselaplace.DiagLaplace" href="baselaplace.html#laplace.baselaplace.DiagLaplace">DiagLaplace</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.baselaplace.DiagLaplace.posterior_scale" href="baselaplace.html#laplace.baselaplace.DiagLaplace.posterior_scale">posterior_scale</a></code></li>
-<li><code><a title="laplace.baselaplace.DiagLaplace.posterior_variance" href="baselaplace.html#laplace.baselaplace.DiagLaplace.posterior_variance">posterior_variance</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-</dl>
-</section>
-</article>
-<nav id="sidebar">
-<div class="toc">
-<ul></ul>
-</div>
-<ul id="index">
-<li><h3>Super-module</h3>
-<ul>
-<li><code><a title="laplace" href="index.html">laplace</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-classes">Classes</a></h3>
-<ul>
-<li>
-<h4><code><a title="laplace.subnetlaplace.SubnetLaplace" href="#laplace.subnetlaplace.SubnetLaplace">SubnetLaplace</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.subnetlaplace.SubnetLaplace.assemble_full_samples" href="#laplace.subnetlaplace.SubnetLaplace.assemble_full_samples">assemble_full_samples</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.subnetlaplace.FullSubnetLaplace" href="#laplace.subnetlaplace.FullSubnetLaplace">FullSubnetLaplace</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.subnetlaplace.DiagSubnetLaplace" href="#laplace.subnetlaplace.DiagSubnetLaplace">DiagSubnetLaplace</a></code></h4>
-</li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.1</a>.</p>
-</footer>
-</body>
-</html>
diff --git a/docs/utils/enums.html b/docs/utils/enums.html
deleted file mode 100644
index e7da94bd..00000000
--- a/docs/utils/enums.html
+++ /dev/null
@@ -1,282 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
-<meta name="generator" content="pdoc3 0.11.1">
-<title>laplace.utils.enums API documentation</title>
-<meta name="description" content="">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script type="text/x-mathjax-config">MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], processEscapes: true } });</script>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => {
-hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
-hljs.highlightAll();
-})</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Module <code>laplace.utils.enums</code></h1>
-</header>
-<section id="section-intro">
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-classes">Classes</h2>
-<dl>
-<dt id="laplace.utils.enums.SubsetOfWeights"><code class="flex name class">
-<span>class <span class="ident">SubsetOfWeights</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.utils.enums.SubsetOfWeights.ALL"><code class="name">var <span class="ident">ALL</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.enums.SubsetOfWeights.LAST_LAYER"><code class="name">var <span class="ident">LAST_LAYER</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.enums.SubsetOfWeights.SUBNETWORK"><code class="name">var <span class="ident">SUBNETWORK</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.enums.HessianStructure"><code class="flex name class">
-<span>class <span class="ident">HessianStructure</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.utils.enums.HessianStructure.FULL"><code class="name">var <span class="ident">FULL</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.enums.HessianStructure.KRON"><code class="name">var <span class="ident">KRON</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.enums.HessianStructure.DIAG"><code class="name">var <span class="ident">DIAG</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.enums.HessianStructure.LOWRANK"><code class="name">var <span class="ident">LOWRANK</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.enums.HessianStructure.GP"><code class="name">var <span class="ident">GP</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.enums.Likelihood"><code class="flex name class">
-<span>class <span class="ident">Likelihood</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.utils.enums.Likelihood.REGRESSION"><code class="name">var <span class="ident">REGRESSION</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.enums.Likelihood.CLASSIFICATION"><code class="name">var <span class="ident">CLASSIFICATION</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.enums.Likelihood.REWARD_MODELING"><code class="name">var <span class="ident">REWARD_MODELING</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.enums.PredType"><code class="flex name class">
-<span>class <span class="ident">PredType</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.utils.enums.PredType.GLM"><code class="name">var <span class="ident">GLM</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.enums.PredType.NN"><code class="name">var <span class="ident">NN</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.enums.PredType.GP"><code class="name">var <span class="ident">GP</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.enums.LinkApprox"><code class="flex name class">
-<span>class <span class="ident">LinkApprox</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.utils.enums.LinkApprox.MC"><code class="name">var <span class="ident">MC</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.enums.LinkApprox.PROBIT"><code class="name">var <span class="ident">PROBIT</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.enums.LinkApprox.BRIDGE"><code class="name">var <span class="ident">BRIDGE</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.enums.LinkApprox.BRIDGE_NORM"><code class="name">var <span class="ident">BRIDGE_NORM</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.enums.TuningMethod"><code class="flex name class">
-<span>class <span class="ident">TuningMethod</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.utils.enums.TuningMethod.MARGLIK"><code class="name">var <span class="ident">MARGLIK</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.enums.TuningMethod.GRIDSEARCH"><code class="name">var <span class="ident">GRIDSEARCH</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.enums.PriorStructure"><code class="flex name class">
-<span>class <span class="ident">PriorStructure</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.utils.enums.PriorStructure.SCALAR"><code class="name">var <span class="ident">SCALAR</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.enums.PriorStructure.DIAG"><code class="name">var <span class="ident">DIAG</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.enums.PriorStructure.LAYERWISE"><code class="name">var <span class="ident">LAYERWISE</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-</dl>
-</section>
-</article>
-<nav id="sidebar">
-<div class="toc">
-<ul></ul>
-</div>
-<ul id="index">
-<li><h3>Super-module</h3>
-<ul>
-<li><code><a title="laplace.utils" href="index.html">laplace.utils</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-classes">Classes</a></h3>
-<ul>
-<li>
-<h4><code><a title="laplace.utils.enums.SubsetOfWeights" href="#laplace.utils.enums.SubsetOfWeights">SubsetOfWeights</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.utils.enums.HessianStructure" href="#laplace.utils.enums.HessianStructure">HessianStructure</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.utils.enums.Likelihood" href="#laplace.utils.enums.Likelihood">Likelihood</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.utils.enums.PredType" href="#laplace.utils.enums.PredType">PredType</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.utils.enums.LinkApprox" href="#laplace.utils.enums.LinkApprox">LinkApprox</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.utils.enums.TuningMethod" href="#laplace.utils.enums.TuningMethod">TuningMethod</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.utils.enums.PriorStructure" href="#laplace.utils.enums.PriorStructure">PriorStructure</a></code></h4>
-</li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.1</a>.</p>
-</footer>
-</body>
-</html>
diff --git a/docs/utils/feature_extractor.html b/docs/utils/feature_extractor.html
deleted file mode 100644
index d68359de..00000000
--- a/docs/utils/feature_extractor.html
+++ /dev/null
@@ -1,212 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
-<meta name="generator" content="pdoc3 0.11.1">
-<title>laplace.utils.feature_extractor API documentation</title>
-<meta name="description" content="">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script type="text/x-mathjax-config">MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], processEscapes: true } });</script>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => {
-hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
-hljs.highlightAll();
-})</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Module <code>laplace.utils.feature_extractor</code></h1>
-</header>
-<section id="section-intro">
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-classes">Classes</h2>
-<dl>
-<dt id="laplace.utils.feature_extractor.FeatureReduction"><code class="flex name class">
-<span>class <span class="ident">FeatureReduction</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Possible choices of feature reduction before applying last-layer Laplace.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.utils.feature_extractor.FeatureReduction.PICK_FIRST"><code class="name">var <span class="ident">PICK_FIRST</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.feature_extractor.FeatureReduction.PICK_LAST"><code class="name">var <span class="ident">PICK_LAST</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.feature_extractor.FeatureReduction.AVERAGE"><code class="name">var <span class="ident">AVERAGE</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.feature_extractor.FeatureExtractor"><code class="flex name class">
-<span>class <span class="ident">FeatureExtractor</span></span>
-<span>(</span><span>model: nn.Module, last_layer_name: str | None = None, enable_backprop: bool = False, feature_reduction: <a title="laplace.utils.feature_extractor.FeatureReduction" href="#laplace.utils.feature_extractor.FeatureReduction">FeatureReduction</a> | str | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Feature extractor for a PyTorch neural network.
-A wrapper which can return the output of the penultimate layer in addition to
-the output of the last layer for each forward pass. If the name of the last
-layer is not known, it can determine it automatically. It assumes that the
-last layer is linear and that for every forward pass the last layer is the same.
-If the name of the last layer is known, it can be passed as a parameter at
-initilization; this is the safest way to use this class.
-Based on <a href="https://gist.github.com/fkodom/27ed045c9051a39102e8bcf4ce31df76.">https://gist.github.com/fkodom/27ed045c9051a39102e8bcf4ce31df76.</a></p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>PyTorch model</dd>
-<dt><strong><code>last_layer_name</code></strong> :&ensp;<code>str</code>, default=<code>None</code></dt>
-<dd>if the name of the last layer is already known, otherwise it will
-be determined automatically.</dd>
-<dt><strong><code>enable_backprop</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>whether to enable backprop through the feature extactor to get the gradients of
-the inputs. Useful for e.g. Bayesian optimization.</dd>
-<dt><strong><code>feature_reduction</code></strong> :&ensp;<code><a title="laplace.utils.feature_extractor.FeatureReduction" href="#laplace.utils.feature_extractor.FeatureReduction">FeatureReduction</a></code> or <code>str</code>, default=<code>None</code></dt>
-<dd>when the last-layer <code>features</code> is a tensor of dim &gt;= 3, this tells how to reduce
-it into a dim-2 tensor. E.g. in LLMs for non-language modeling problems,
-the penultultimate output is a tensor of shape <code>(batch_size, seq_len, embd_dim)</code>.
-But the last layer maps <code>(batch_size, embd_dim)</code> to <code>(batch_size, n_classes)</code>.
-Note: Make sure that this option faithfully reflects the reduction in the model
-definition. When inputting a string, available options are
-<code>{'pick_first', 'pick_last', 'average'}</code>.</dd>
-</dl>
-<p>Initialize internal Module state, shared by both nn.Module and ScriptModule.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>torch.nn.modules.module.Module</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.utils.feature_extractor.FeatureExtractor.dump_patches"><code class="name">var <span class="ident">dump_patches</span> : bool</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.feature_extractor.FeatureExtractor.training"><code class="name">var <span class="ident">training</span> : bool</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.feature_extractor.FeatureExtractor.call_super_init"><code class="name">var <span class="ident">call_super_init</span> : bool</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.feature_extractor.FeatureExtractor.forward"><code class="name flex">
-<span>def <span class="ident">forward</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any]) ‑> Callable[..., Any]</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Forward pass. If the last layer is not known yet, it will be
-determined when this function is called for the first time.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code> or <code>a dict-like object containing the input tensors</code></dt>
-<dd>one batch of data to use as input for the forward pass</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.feature_extractor.FeatureExtractor.forward_with_features"><code class="name flex">
-<span>def <span class="ident">forward_with_features</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Forward pass which returns the output of the penultimate layer along
-with the output of the last layer. If the last layer is not known yet,
-it will be determined when this function is called for the first time.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code> or <code>a dict-like object containing the input tensors</code></dt>
-<dd>one batch of data to use as input for the forward pass</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.feature_extractor.FeatureExtractor.set_last_layer"><code class="name flex">
-<span>def <span class="ident">set_last_layer</span></span>(<span>self, last_layer_name: str) ‑> None</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Set the last layer of the model by its name. This sets the forward
-hook to get the output of the penultimate layer.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>last_layer_name</code></strong> :&ensp;<code>str</code></dt>
-<dd>the name of the last layer (fixed in <code>model.named_modules()</code>).</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.feature_extractor.FeatureExtractor.find_last_layer"><code class="name flex">
-<span>def <span class="ident">find_last_layer</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Automatically determines the last layer of the model with one
-forward pass. It assumes that the last layer is the same for every
-forward pass and that it is an instance of <code>torch.nn.Linear</code>.
-Might not work with every architecture, but is tested with all PyTorch
-torchvision classification models (besides SqueezeNet, which has no
-linear last layer).</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code> or <code>dict-like object containing the input tensors</code></dt>
-<dd>one batch of data to use as input for the forward pass</dd>
-</dl></div>
-</dd>
-</dl>
-</dd>
-</dl>
-</section>
-</article>
-<nav id="sidebar">
-<div class="toc">
-<ul></ul>
-</div>
-<ul id="index">
-<li><h3>Super-module</h3>
-<ul>
-<li><code><a title="laplace.utils" href="index.html">laplace.utils</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-classes">Classes</a></h3>
-<ul>
-<li>
-<h4><code><a title="laplace.utils.feature_extractor.FeatureReduction" href="#laplace.utils.feature_extractor.FeatureReduction">FeatureReduction</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.utils.feature_extractor.FeatureExtractor" href="#laplace.utils.feature_extractor.FeatureExtractor">FeatureExtractor</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.feature_extractor.FeatureExtractor.forward" href="#laplace.utils.feature_extractor.FeatureExtractor.forward">forward</a></code></li>
-<li><code><a title="laplace.utils.feature_extractor.FeatureExtractor.forward_with_features" href="#laplace.utils.feature_extractor.FeatureExtractor.forward_with_features">forward_with_features</a></code></li>
-<li><code><a title="laplace.utils.feature_extractor.FeatureExtractor.set_last_layer" href="#laplace.utils.feature_extractor.FeatureExtractor.set_last_layer">set_last_layer</a></code></li>
-<li><code><a title="laplace.utils.feature_extractor.FeatureExtractor.find_last_layer" href="#laplace.utils.feature_extractor.FeatureExtractor.find_last_layer">find_last_layer</a></code></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.1</a>.</p>
-</footer>
-</body>
-</html>
diff --git a/docs/utils/index.html b/docs/utils/index.html
deleted file mode 100644
index d742b699..00000000
--- a/docs/utils/index.html
+++ /dev/null
@@ -1,1473 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
-<meta name="generator" content="pdoc3 0.11.1">
-<title>laplace.utils API documentation</title>
-<meta name="description" content="">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script type="text/x-mathjax-config">MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], processEscapes: true } });</script>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => {
-hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
-hljs.highlightAll();
-})</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Module <code>laplace.utils</code></h1>
-</header>
-<section id="section-intro">
-</section>
-<section>
-<h2 class="section-title" id="header-submodules">Sub-modules</h2>
-<dl>
-<dt><code class="name"><a title="laplace.utils.enums" href="enums.html">laplace.utils.enums</a></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt><code class="name"><a title="laplace.utils.feature_extractor" href="feature_extractor.html">laplace.utils.feature_extractor</a></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt><code class="name"><a title="laplace.utils.matrix" href="matrix.html">laplace.utils.matrix</a></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt><code class="name"><a title="laplace.utils.metrics" href="metrics.html">laplace.utils.metrics</a></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt><code class="name"><a title="laplace.utils.subnetmask" href="subnetmask.html">laplace.utils.subnetmask</a></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt><code class="name"><a title="laplace.utils.swag" href="swag.html">laplace.utils.swag</a></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt><code class="name"><a title="laplace.utils.utils" href="utils.html">laplace.utils.utils</a></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-functions">Functions</h2>
-<dl>
-<dt id="laplace.utils.get_nll"><code class="name flex">
-<span>def <span class="ident">get_nll</span></span>(<span>out_dist: torch.Tensor, targets: torch.Tensor) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.validate"><code class="name flex">
-<span>def <span class="ident">validate</span></span>(<span>laplace: <a title="laplace.baselaplace.BaseLaplace" href="../baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a>, val_loader: DataLoader, loss: torchmetrics.Metric | Callable[[torch.Tensor, torch.Tensor], torch.Tensor] | Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor], pred_type: <a title="laplace.utils.PredType" href="#laplace.utils.PredType">PredType</a> | str = PredType.GLM, link_approx: <a title="laplace.utils.LinkApprox" href="#laplace.utils.LinkApprox">LinkApprox</a> | str = LinkApprox.PROBIT, n_samples: int = 100, dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.parameters_per_layer"><code class="name flex">
-<span>def <span class="ident">parameters_per_layer</span></span>(<span>model: nn.Module) ‑> list[int]</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Get number of parameters per layer.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>params_per_layer</code></strong> :&ensp;<code>list[int]</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.invsqrt_precision"><code class="name flex">
-<span>def <span class="ident">invsqrt_precision</span></span>(<span>M: torch.Tensor) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute <code>M^{-0.5}</code> as a tridiagonal matrix.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>M</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>M_invsqrt</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.kron"><code class="name flex">
-<span>def <span class="ident">kron</span></span>(<span>t1: torch.Tensor, t2: torch.Tensor) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Computes the Kronecker product between two tensors.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>t1</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>t2</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>kron_product</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.diagonal_add_scalar"><code class="name flex">
-<span>def <span class="ident">diagonal_add_scalar</span></span>(<span>X: torch.Tensor, value: torch.Tensor) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Add scalar value <code>value</code> to diagonal of <code>X</code>.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>X</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>value</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>X_add_scalar</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.symeig"><code class="name flex">
-<span>def <span class="ident">symeig</span></span>(<span>M: torch.Tensor) ‑> tuple[torch.Tensor, torch.Tensor]</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Symetric eigendecomposition avoiding failure cases by
-adding and removing jitter to the diagonal.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>M</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>L</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>eigenvalues</dd>
-<dt><strong><code>W</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>eigenvectors</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.block_diag"><code class="name flex">
-<span>def <span class="ident">block_diag</span></span>(<span>blocks: list[torch.Tensor]) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compose block-diagonal matrix of individual blocks.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>blocks</code></strong> :&ensp;<code>list[torch.Tensor]</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>M</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.normal_samples"><code class="name flex">
-<span>def <span class="ident">normal_samples</span></span>(<span>mean: torch.Tensor, var: torch.Tensor, n_samples: int, generator: torch.Generator | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Produce samples from a batch of Normal distributions either parameterized
-by a diagonal or full covariance given by <code>var</code>.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>mean</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd><code>(batch_size, output_dim)</code></dd>
-<dt><strong><code>var</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>(co)variance of the Normal distribution
-<code>(batch_size, output_dim, output_dim)</code> or <code>(batch_size, output_dim)</code></dd>
-<dt><strong><code>generator</code></strong> :&ensp;<code>torch.Generator</code></dt>
-<dd>random number generator</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils._is_batchnorm"><code class="name flex">
-<span>def <span class="ident">_is_batchnorm</span></span>(<span>module: nn.Module) ‑> bool</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils._is_valid_scalar"><code class="name flex">
-<span>def <span class="ident">_is_valid_scalar</span></span>(<span>scalar: float | int | torch.Tensor)</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.expand_prior_precision"><code class="name flex">
-<span>def <span class="ident">expand_prior_precision</span></span>(<span>prior_prec: torch.Tensor, model: nn.Module) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Expand prior precision to match the shape of the model parameters.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>prior_prec</code></strong> :&ensp;<code>torch.Tensor 1-dimensional</code></dt>
-<dd>prior precision</dd>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>torch model with parameters that are regularized by prior_prec</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>expanded_prior_prec</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>expanded prior precision has the same shape as model parameters</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.fix_prior_prec_structure"><code class="name flex">
-<span>def <span class="ident">fix_prior_prec_structure</span></span>(<span>prior_prec_init: torch.Tensor, prior_structure: <a title="laplace.utils.PriorStructure" href="#laplace.utils.PriorStructure">PriorStructure</a> | str, n_layers: int, n_params: int, device: torch.device)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Create a tensor of prior precision with the correct shape, depending on the
-choice of the prior structure type.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>prior_prec_init</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>the initial prior precision tensor (could be scalar)</dd>
-<dt><strong><code>prior_structure</code></strong> :&ensp;<code>PriorStructure | str</code></dt>
-<dd>the choice of the prior structure type</dd>
-<dt><strong><code>n_layers</code></strong> :&ensp;<code>int</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>n_params</code></strong> :&ensp;<code>int</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>device</code></strong> :&ensp;<code>torch.device</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>correct_prior_precision</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.fit_diagonal_swag_var"><code class="name flex">
-<span>def <span class="ident">fit_diagonal_swag_var</span></span>(<span>model: nn.Module, train_loader: DataLoader, criterion: nn.CrossEntropyLoss | nn.MSELoss, n_snapshots_total: int = 40, snapshot_freq: int = 1, lr: float = 0.01, momentum: float = 0.9, weight_decay: float = 0.0003, min_var: float = 1e-30)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Fit diagonal SWAG [1], which estimates marginal variances of model parameters by
-computing the first and second moment of SGD iterates with a large learning rate.</p>
-<p>Implementation partly adapted from:
-- <a href="https://github.com/wjmaddox/swa_gaussian/blob/master/swag/posteriors/swag.py">https://github.com/wjmaddox/swa_gaussian/blob/master/swag/posteriors/swag.py</a>
-- <a href="https://github.com/wjmaddox/swa_gaussian/blob/master/experiments/train/run_swag.py">https://github.com/wjmaddox/swa_gaussian/blob/master/experiments/train/run_swag.py</a></p>
-<h2 id="references">References</h2>
-<p>[1] Maddox, W., Garipov, T., Izmailov, P., Vetrov, D., Wilson, AG.
-<a href="https://arxiv.org/abs/1902.02476"><em>A Simple Baseline for Bayesian Uncertainty in Deep Learning</em></a>.
-NeurIPS 2019.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>train_loader</code></strong> :&ensp;<code>torch.data.utils.DataLoader</code></dt>
-<dd>training data loader to use for snapshot collection</dd>
-<dt><strong><code>criterion</code></strong> :&ensp;<code>torch.nn.CrossEntropyLoss</code> or <code>torch.nn.MSELoss</code></dt>
-<dd>loss function to use for snapshot collection</dd>
-<dt><strong><code>n_snapshots_total</code></strong> :&ensp;<code>int</code></dt>
-<dd>total number of model snapshots to collect</dd>
-<dt><strong><code>snapshot_freq</code></strong> :&ensp;<code>int</code></dt>
-<dd>snapshot collection frequency (in epochs)</dd>
-<dt><strong><code>lr</code></strong> :&ensp;<code>float</code></dt>
-<dd>SGD learning rate for collecting snapshots</dd>
-<dt><strong><code>momentum</code></strong> :&ensp;<code>float</code></dt>
-<dd>SGD momentum</dd>
-<dt><strong><code>weight_decay</code></strong> :&ensp;<code>float</code></dt>
-<dd>SGD weight decay</dd>
-<dt><strong><code>min_var</code></strong> :&ensp;<code>float</code></dt>
-<dd>minimum parameter variance to clamp to (for numerical stability)</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>param_variances</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>vector of marginal variances for each model parameter</dd>
-</dl></div>
-</dd>
-</dl>
-</section>
-<section>
-<h2 class="section-title" id="header-classes">Classes</h2>
-<dl>
-<dt id="laplace.utils.SoDSampler"><code class="flex name class">
-<span>class <span class="ident">SoDSampler</span></span>
-<span>(</span><span>N, M, seed: int = 0)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Base class for all Samplers.</p>
-<p>Every Sampler subclass has to provide an :meth:<code>__iter__</code> method, providing a
-way to iterate over indices or lists of indices (batches) of dataset elements,
-and may provide a :meth:<code>__len__</code> method that returns the length of the returned iterators.</p>
-<h2 id="args">Args</h2>
-<dl>
-<dt><strong><code>data_source</code></strong> :&ensp;<code>Dataset</code></dt>
-<dd>This argument is not used and will be removed in 2.2.0.
-You may still have custom implementation that utilizes it.</dd>
-</dl>
-<h2 id="example">Example</h2>
-<pre><code class="language-python-repl">&gt;&gt;&gt; # xdoctest: +SKIP
-&gt;&gt;&gt; class AccedingSequenceLengthSampler(Sampler[int]):
-&gt;&gt;&gt;     def __init__(self, data: List[str]) -&gt; None:
-&gt;&gt;&gt;         self.data = data
-&gt;&gt;&gt;
-&gt;&gt;&gt;     def __len__(self) -&gt; int:
-&gt;&gt;&gt;         return len(self.data)
-&gt;&gt;&gt;
-&gt;&gt;&gt;     def __iter__(self) -&gt; Iterator[int]:
-&gt;&gt;&gt;         sizes = torch.tensor([len(x) for x in self.data])
-&gt;&gt;&gt;         yield from torch.argsort(sizes).tolist()
-&gt;&gt;&gt;
-&gt;&gt;&gt; class AccedingSequenceLengthBatchSampler(Sampler[List[int]]):
-&gt;&gt;&gt;     def __init__(self, data: List[str], batch_size: int) -&gt; None:
-&gt;&gt;&gt;         self.data = data
-&gt;&gt;&gt;         self.batch_size = batch_size
-&gt;&gt;&gt;
-&gt;&gt;&gt;     def __len__(self) -&gt; int:
-&gt;&gt;&gt;         return (len(self.data) + self.batch_size - 1) // self.batch_size
-&gt;&gt;&gt;
-&gt;&gt;&gt;     def __iter__(self) -&gt; Iterator[List[int]]:
-&gt;&gt;&gt;         sizes = torch.tensor([len(x) for x in self.data])
-&gt;&gt;&gt;         for batch in torch.chunk(torch.argsort(sizes), len(self)):
-&gt;&gt;&gt;             yield batch.tolist()
-</code></pre>
-<div class="admonition note">
-<p class="admonition-title">Note:&ensp;The :meth:<code>__len__</code> method isn't strictly required by</p>
-<p>:class:<code>~torch.utils.data.DataLoader</code>, but is expected in any
-calculation involving the length of a :class:<code>~torch.utils.data.DataLoader</code>.</p>
-</div></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>torch.utils.data.sampler.Sampler</li>
-<li>typing.Generic</li>
-</ul>
-</dd>
-<dt id="laplace.utils.FeatureExtractor"><code class="flex name class">
-<span>class <span class="ident">FeatureExtractor</span></span>
-<span>(</span><span>model: nn.Module, last_layer_name: str | None = None, enable_backprop: bool = False, feature_reduction: FeatureReduction | str | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Feature extractor for a PyTorch neural network.
-A wrapper which can return the output of the penultimate layer in addition to
-the output of the last layer for each forward pass. If the name of the last
-layer is not known, it can determine it automatically. It assumes that the
-last layer is linear and that for every forward pass the last layer is the same.
-If the name of the last layer is known, it can be passed as a parameter at
-initilization; this is the safest way to use this class.
-Based on <a href="https://gist.github.com/fkodom/27ed045c9051a39102e8bcf4ce31df76.">https://gist.github.com/fkodom/27ed045c9051a39102e8bcf4ce31df76.</a></p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>PyTorch model</dd>
-<dt><strong><code>last_layer_name</code></strong> :&ensp;<code>str</code>, default=<code>None</code></dt>
-<dd>if the name of the last layer is already known, otherwise it will
-be determined automatically.</dd>
-<dt><strong><code>enable_backprop</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>whether to enable backprop through the feature extactor to get the gradients of
-the inputs. Useful for e.g. Bayesian optimization.</dd>
-<dt><strong><code>feature_reduction</code></strong> :&ensp;<code>FeatureReduction</code> or <code>str</code>, default=<code>None</code></dt>
-<dd>when the last-layer <code>features</code> is a tensor of dim &gt;= 3, this tells how to reduce
-it into a dim-2 tensor. E.g. in LLMs for non-language modeling problems,
-the penultultimate output is a tensor of shape <code>(batch_size, seq_len, embd_dim)</code>.
-But the last layer maps <code>(batch_size, embd_dim)</code> to <code>(batch_size, n_classes)</code>.
-Note: Make sure that this option faithfully reflects the reduction in the model
-definition. When inputting a string, available options are
-<code>{'pick_first', 'pick_last', 'average'}</code>.</dd>
-</dl>
-<p>Initialize internal Module state, shared by both nn.Module and ScriptModule.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>torch.nn.modules.module.Module</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.utils.FeatureExtractor.dump_patches"><code class="name">var <span class="ident">dump_patches</span> : bool</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.FeatureExtractor.training"><code class="name">var <span class="ident">training</span> : bool</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.FeatureExtractor.call_super_init"><code class="name">var <span class="ident">call_super_init</span> : bool</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.FeatureExtractor.forward"><code class="name flex">
-<span>def <span class="ident">forward</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any]) ‑> Callable[..., Any]</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Forward pass. If the last layer is not known yet, it will be
-determined when this function is called for the first time.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code> or <code>a dict-like object containing the input tensors</code></dt>
-<dd>one batch of data to use as input for the forward pass</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.FeatureExtractor.forward_with_features"><code class="name flex">
-<span>def <span class="ident">forward_with_features</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Forward pass which returns the output of the penultimate layer along
-with the output of the last layer. If the last layer is not known yet,
-it will be determined when this function is called for the first time.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code> or <code>a dict-like object containing the input tensors</code></dt>
-<dd>one batch of data to use as input for the forward pass</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.FeatureExtractor.set_last_layer"><code class="name flex">
-<span>def <span class="ident">set_last_layer</span></span>(<span>self, last_layer_name: str) ‑> None</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Set the last layer of the model by its name. This sets the forward
-hook to get the output of the penultimate layer.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>last_layer_name</code></strong> :&ensp;<code>str</code></dt>
-<dd>the name of the last layer (fixed in <code>model.named_modules()</code>).</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.FeatureExtractor.find_last_layer"><code class="name flex">
-<span>def <span class="ident">find_last_layer</span></span>(<span>self, x: torch.Tensor | MutableMapping[str, torch.Tensor | Any])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Automatically determines the last layer of the model with one
-forward pass. It assumes that the last layer is the same for every
-forward pass and that it is an instance of <code>torch.nn.Linear</code>.
-Might not work with every architecture, but is tested with all PyTorch
-torchvision classification models (besides SqueezeNet, which has no
-linear last layer).</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>x</code></strong> :&ensp;<code>torch.Tensor</code> or <code>dict-like object containing the input tensors</code></dt>
-<dd>one batch of data to use as input for the forward pass</dd>
-</dl></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.Kron"><code class="flex name class">
-<span>class <span class="ident">Kron</span></span>
-<span>(</span><span>kfacs: list[tuple[torch.Tensor] | torch.Tensor])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Kronecker factored approximate curvature representation for a corresponding
-neural network.
-Each element in <code>kfacs</code> is either a tuple or single matrix.
-A tuple represents two Kronecker factors <span><span class="MathJax_Preview">Q</span><script type="math/tex">Q</script></span>, and <span><span class="MathJax_Preview">H</span><script type="math/tex">H</script></span> and a single element
-is just a full block Hessian approximation.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>kfacs</code></strong> :&ensp;<code>list[Iterable[torch.Tensor] | torch.Tensor]</code></dt>
-<dd>each element in the list is a tuple of two Kronecker factors Q, H
-or a single matrix approximating the Hessian (in case of bias, for example)</dd>
-</dl></div>
-<h3>Static methods</h3>
-<dl>
-<dt id="laplace.utils.Kron.init_from_model"><code class="name flex">
-<span>def <span class="ident">init_from_model</span></span>(<span>model: nn.Module | Iterable[nn.Parameter], device: torch.device)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Initialize Kronecker factors based on a models architecture.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>nn.Module</code> or <code>iterable</code> of <code>parameters, e.g. model.parameters()</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>device</code></strong> :&ensp;<code>torch.device</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>kron</code></strong> :&ensp;<code><a title="laplace.utils.Kron" href="#laplace.utils.Kron">Kron</a></code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.Kron.decompose"><code class="name flex">
-<span>def <span class="ident">decompose</span></span>(<span>self, damping: bool = False) ‑> <a title="laplace.utils.matrix.KronDecomposed" href="matrix.html#laplace.utils.matrix.KronDecomposed">KronDecomposed</a></span>
-</code></dt>
-<dd>
-<div class="desc"><p>Eigendecompose Kronecker factors and turn into <code><a title="laplace.utils.KronDecomposed" href="#laplace.utils.KronDecomposed">KronDecomposed</a></code>.
-Parameters</p>
-<hr>
-<dl>
-<dt><strong><code>damping</code></strong> :&ensp;<code>bool</code></dt>
-<dd>use damping</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>kron_decomposed</code></strong> :&ensp;<code><a title="laplace.utils.KronDecomposed" href="#laplace.utils.KronDecomposed">KronDecomposed</a></code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.Kron.bmm"><code class="name flex">
-<span>def <span class="ident">bmm</span></span>(<span>self, W: torch.Tensor, exponent: float = 1) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Batched matrix multiplication with the Kronecker factors.
-If Kron is <code>H</code>, we compute <code>H @ W</code>.
-This is useful for computing the predictive or a regularization
-based on Kronecker factors as in continual learning.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>W</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>matrix <code>(batch, classes, params)</code></dd>
-<dt><strong><code>exponent</code></strong> :&ensp;<code>float</code>, default=<code>1</code></dt>
-<dd>only can be <code>1</code> for Kron, requires <code><a title="laplace.utils.KronDecomposed" href="#laplace.utils.KronDecomposed">KronDecomposed</a></code> for other
-exponent values of the Kronecker factors.</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>SW</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>result <code>(batch, classes, params)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.Kron.logdet"><code class="name flex">
-<span>def <span class="ident">logdet</span></span>(<span>self) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute log determinant of the Kronecker factors and sums them up.
-This corresponds to the log determinant of the entire Hessian approximation.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>logdet</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.Kron.diag"><code class="name flex">
-<span>def <span class="ident">diag</span></span>(<span>self) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Extract diagonal of the entire Kronecker factorization.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>diag</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.Kron.to_matrix"><code class="name flex">
-<span>def <span class="ident">to_matrix</span></span>(<span>self) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Make the Kronecker factorization dense by computing the kronecker product.
-Warning: this should only be used for testing purposes as it will allocate
-large amounts of memory for big architectures.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>block_diag</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.KronDecomposed"><code class="flex name class">
-<span>class <span class="ident">KronDecomposed</span></span>
-<span>(</span><span>eigenvectors: list[tuple[torch.Tensor]], eigenvalues: list[tuple[torch.Tensor]], deltas: torch.Tensor | None = None, damping: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Decomposed Kronecker factored approximate curvature representation
-for a corresponding neural network.
-Each matrix in <code><a title="laplace.utils.Kron" href="#laplace.utils.Kron">Kron</a></code> is decomposed to obtain <code><a title="laplace.utils.KronDecomposed" href="#laplace.utils.KronDecomposed">KronDecomposed</a></code>.
-Front-loading decomposition allows cheap repeated computation
-of inverses and log determinants.
-In contrast to <code><a title="laplace.utils.Kron" href="#laplace.utils.Kron">Kron</a></code>, we can add scalar or layerwise scalars but
-we cannot add other <code><a title="laplace.utils.Kron" href="#laplace.utils.Kron">Kron</a></code> or <code><a title="laplace.utils.KronDecomposed" href="#laplace.utils.KronDecomposed">KronDecomposed</a></code> anymore.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>eigenvectors</code></strong> :&ensp;<code>list[Tuple[torch.Tensor]]</code></dt>
-<dd>eigenvectors corresponding to matrices in a corresponding <code><a title="laplace.utils.Kron" href="#laplace.utils.Kron">Kron</a></code></dd>
-<dt><strong><code>eigenvalues</code></strong> :&ensp;<code>list[Tuple[torch.Tensor]]</code></dt>
-<dd>eigenvalues corresponding to matrices in a corresponding <code><a title="laplace.utils.Kron" href="#laplace.utils.Kron">Kron</a></code></dd>
-<dt><strong><code>deltas</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>addend for each group of Kronecker factors representing, for example,
-a prior precision</dd>
-<dt><strong><code>dampen</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>use dampen approximation mixing prior and Kron partially multiplicatively</dd>
-</dl></div>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.KronDecomposed.detach"><code class="name flex">
-<span>def <span class="ident">detach</span></span>(<span>self) ‑> <a title="laplace.utils.matrix.KronDecomposed" href="matrix.html#laplace.utils.matrix.KronDecomposed">KronDecomposed</a></span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.KronDecomposed.logdet"><code class="name flex">
-<span>def <span class="ident">logdet</span></span>(<span>self) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute log determinant of the Kronecker factors and sums them up.
-This corresponds to the log determinant of the entire Hessian approximation.
-In contrast to <code><a title="laplace.utils.Kron.logdet" href="#laplace.utils.Kron.logdet">Kron.logdet()</a></code>, additive <code>deltas</code> corresponding to prior
-precisions are added.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>logdet</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.KronDecomposed.inv_square_form"><code class="name flex">
-<span>def <span class="ident">inv_square_form</span></span>(<span>self, W: torch.Tensor) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.KronDecomposed.bmm"><code class="name flex">
-<span>def <span class="ident">bmm</span></span>(<span>self, W: torch.Tensor, exponent: float = -1) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Batched matrix multiplication with the decomposed Kronecker factors.
-This is useful for computing the predictive or a regularization loss.
-Compared to <code><a title="laplace.utils.Kron.bmm" href="#laplace.utils.Kron.bmm">Kron.bmm()</a></code>, a prior can be added here in form of <code>deltas</code>
-and the exponent can be other than just 1.
-Computes <span><span class="MathJax_Preview">H^{exponent} W</span><script type="math/tex">H^{exponent} W</script></span>.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>W</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>matrix <code>(batch, classes, params)</code></dd>
-<dt><strong><code>exponent</code></strong> :&ensp;<code>float</code>, default=<code>1</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>SW</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>result <code>(batch, classes, params)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.KronDecomposed.diag"><code class="name flex">
-<span>def <span class="ident">diag</span></span>(<span>self, exponent: float = 1) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Extract diagonal of the entire decomposed Kronecker factorization.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>exponent</code></strong> :&ensp;<code>float</code>, default=<code>1</code></dt>
-<dd>exponent of the Kronecker factorization</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>diag</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.KronDecomposed.to_matrix"><code class="name flex">
-<span>def <span class="ident">to_matrix</span></span>(<span>self, exponent: float = 1) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Make the Kronecker factorization dense by computing the kronecker product.
-Warning: this should only be used for testing purposes as it will allocate
-large amounts of memory for big architectures.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>exponent</code></strong> :&ensp;<code>float</code>, default=<code>1</code></dt>
-<dd>exponent of the Kronecker factorization</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>block_diag</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.SubnetMask"><code class="flex name class">
-<span>class <span class="ident">SubnetMask</span></span>
-<span>(</span><span>model: nn.Module)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Baseclass for all subnetwork masks in this library (for subnetwork Laplace).</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.utils.subnetmask.ModuleNameSubnetMask" href="subnetmask.html#laplace.utils.subnetmask.ModuleNameSubnetMask">ModuleNameSubnetMask</a></li>
-<li><a title="laplace.utils.subnetmask.ParamNameSubnetMask" href="subnetmask.html#laplace.utils.subnetmask.ParamNameSubnetMask">ParamNameSubnetMask</a></li>
-<li>laplace.utils.subnetmask.ScoreBasedSubnetMask</li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.utils.SubnetMask.indices"><code class="name">prop <span class="ident">indices</span> : torch.LongTensor</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.SubnetMask.n_params_subnet"><code class="name">prop <span class="ident">n_params_subnet</span> : int</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.SubnetMask.convert_subnet_mask_to_indices"><code class="name flex">
-<span>def <span class="ident">convert_subnet_mask_to_indices</span></span>(<span>self, subnet_mask: torch.Tensor) ‑> torch.LongTensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Converts a subnetwork mask into subnetwork indices.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>subnet_mask</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>a binary vector of size (n_params) where 1s locate the subnetwork parameters
-within the vectorized model parameters
-(i.e. <code>torch.nn.utils.parameters_to_vector(model.parameters())</code>)</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>subnet_mask_indices</code></strong> :&ensp;<code>torch.LongTensor</code></dt>
-<dd>a vector of indices of the vectorized model parameters
-(i.e. <code>torch.nn.utils.parameters_to_vector(model.parameters())</code>)
-that define the subnetwork</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.SubnetMask.select"><code class="name flex">
-<span>def <span class="ident">select</span></span>(<span>self, train_loader: DataLoader | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Select the subnetwork mask.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>train_loader</code></strong> :&ensp;<code>torch.data.utils.DataLoader</code>, default=<code>None</code></dt>
-<dd>each iterate is a training batch (X, y);
-<code>train_loader.dataset</code> needs to be set to access <span><span class="MathJax_Preview">N</span><script type="math/tex">N</script></span>, size of the data set</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>subnet_mask_indices</code></strong> :&ensp;<code>torch.LongTensor</code></dt>
-<dd>a vector of indices of the vectorized model parameters
-(i.e. <code>torch.nn.utils.parameters_to_vector(model.parameters())</code>)
-that define the subnetwork</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.SubnetMask.get_subnet_mask"><code class="name flex">
-<span>def <span class="ident">get_subnet_mask</span></span>(<span>self, train_loader: DataLoader) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Get the subnetwork mask.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>train_loader</code></strong> :&ensp;<code>torch.data.utils.DataLoader</code></dt>
-<dd>each iterate is a training batch (X, y);
-<code>train_loader.dataset</code> needs to be set to access <span><span class="MathJax_Preview">N</span><script type="math/tex">N</script></span>, size of the data set</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>subnet_mask</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>a binary vector of size (n_params) where 1s locate the subnetwork parameters
-within the vectorized model parameters
-(i.e. <code>torch.nn.utils.parameters_to_vector(model.parameters())</code>)</dd>
-</dl></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.RandomSubnetMask"><code class="flex name class">
-<span>class <span class="ident">RandomSubnetMask</span></span>
-<span>(</span><span>model: nn.Module, n_params_subnet: int)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Subnetwork mask of parameters sampled uniformly at random.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>laplace.utils.subnetmask.ScoreBasedSubnetMask</li>
-<li><a title="laplace.utils.subnetmask.SubnetMask" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.RandomSubnetMask.compute_param_scores"><code class="name flex">
-<span>def <span class="ident">compute_param_scores</span></span>(<span>self, train_loader: DataLoader) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.utils.subnetmask.SubnetMask" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices">convert_subnet_mask_to_indices</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.get_subnet_mask" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask.get_subnet_mask">get_subnet_mask</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.select" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask.select">select</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.utils.LargestMagnitudeSubnetMask"><code class="flex name class">
-<span>class <span class="ident">LargestMagnitudeSubnetMask</span></span>
-<span>(</span><span>model: nn.Module, n_params_subnet: int)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Subnetwork mask identifying the parameters with the largest magnitude.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>laplace.utils.subnetmask.ScoreBasedSubnetMask</li>
-<li><a title="laplace.utils.subnetmask.SubnetMask" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.LargestMagnitudeSubnetMask.compute_param_scores"><code class="name flex">
-<span>def <span class="ident">compute_param_scores</span></span>(<span>self, train_loader: DataLoader) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.utils.subnetmask.SubnetMask" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices">convert_subnet_mask_to_indices</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.get_subnet_mask" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask.get_subnet_mask">get_subnet_mask</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.select" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask.select">select</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.utils.LargestVarianceDiagLaplaceSubnetMask"><code class="flex name class">
-<span>class <span class="ident">LargestVarianceDiagLaplaceSubnetMask</span></span>
-<span>(</span><span>model: nn.Module, n_params_subnet: int, diag_laplace_model: <a title="laplace.baselaplace.DiagLaplace" href="../baselaplace.html#laplace.baselaplace.DiagLaplace">DiagLaplace</a>)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Subnetwork mask identifying the parameters with the largest marginal variances
-(estimated using a diagonal Laplace approximation over all model parameters).</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>n_params_subnet</code></strong> :&ensp;<code>int</code></dt>
-<dd>number of parameters in the subnetwork (i.e. number of top-scoring parameters to select)</dd>
-<dt><strong><code>diag_laplace_model</code></strong> :&ensp;<code><a title="laplace.baselaplace.DiagLaplace" href="../baselaplace.html#laplace.baselaplace.DiagLaplace">DiagLaplace</a></code></dt>
-<dd>diagonal Laplace model to use for variance estimation</dd>
-</dl></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>laplace.utils.subnetmask.ScoreBasedSubnetMask</li>
-<li><a title="laplace.utils.subnetmask.SubnetMask" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.LargestVarianceDiagLaplaceSubnetMask.compute_param_scores"><code class="name flex">
-<span>def <span class="ident">compute_param_scores</span></span>(<span>self, train_loader: DataLoader) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.utils.subnetmask.SubnetMask" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices">convert_subnet_mask_to_indices</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.get_subnet_mask" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask.get_subnet_mask">get_subnet_mask</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.select" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask.select">select</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.utils.LargestVarianceSWAGSubnetMask"><code class="flex name class">
-<span>class <span class="ident">LargestVarianceSWAGSubnetMask</span></span>
-<span>(</span><span>model: nn.Module, n_params_subnet: int, likelihood: <a title="laplace.utils.Likelihood" href="#laplace.utils.Likelihood">Likelihood</a> | str = Likelihood.CLASSIFICATION, swag_n_snapshots: int = 40, swag_snapshot_freq: int = 1, swag_lr: float = 0.01)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Subnetwork mask identifying the parameters with the largest marginal variances
-(estimated using diagonal SWAG over all model parameters).</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>n_params_subnet</code></strong> :&ensp;<code>int</code></dt>
-<dd>number of parameters in the subnetwork (i.e. number of top-scoring parameters to select)</dd>
-<dt><strong><code>likelihood</code></strong> :&ensp;<code>str</code></dt>
-<dd>'classification' or 'regression'</dd>
-<dt><strong><code>swag_n_snapshots</code></strong> :&ensp;<code>int</code></dt>
-<dd>number of model snapshots to collect for SWAG</dd>
-<dt><strong><code>swag_snapshot_freq</code></strong> :&ensp;<code>int</code></dt>
-<dd>SWAG snapshot collection frequency (in epochs)</dd>
-<dt><strong><code>swag_lr</code></strong> :&ensp;<code>float</code></dt>
-<dd>learning rate for SWAG snapshot collection</dd>
-</dl></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>laplace.utils.subnetmask.ScoreBasedSubnetMask</li>
-<li><a title="laplace.utils.subnetmask.SubnetMask" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.LargestVarianceSWAGSubnetMask.compute_param_scores"><code class="name flex">
-<span>def <span class="ident">compute_param_scores</span></span>(<span>self, train_loader: DataLoader) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.utils.subnetmask.SubnetMask" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices">convert_subnet_mask_to_indices</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.get_subnet_mask" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask.get_subnet_mask">get_subnet_mask</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.select" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask.select">select</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.utils.ParamNameSubnetMask"><code class="flex name class">
-<span>class <span class="ident">ParamNameSubnetMask</span></span>
-<span>(</span><span>model: nn.Module, parameter_names: list[str])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Subnetwork mask corresponding to the specified parameters of the neural network.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>parameter_names</code></strong> :&ensp;<code>List[str]</code></dt>
-<dd>list of names of the parameters (as in <code>model.named_parameters()</code>)
-that define the subnetwork</dd>
-</dl></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.utils.subnetmask.SubnetMask" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.ParamNameSubnetMask.get_subnet_mask"><code class="name flex">
-<span>def <span class="ident">get_subnet_mask</span></span>(<span>self, train_loader: DataLoader) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Get the subnetwork mask identifying the specified parameters.</p></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.utils.subnetmask.SubnetMask" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices">convert_subnet_mask_to_indices</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.select" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask.select">select</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.utils.ModuleNameSubnetMask"><code class="flex name class">
-<span>class <span class="ident">ModuleNameSubnetMask</span></span>
-<span>(</span><span>model: nn.Module, module_names: list[str])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Subnetwork mask corresponding to the specified modules of the neural network.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>parameter_names</code></strong> :&ensp;<code>List[str]</code></dt>
-<dd>list of names of the modules (as in <code>model.named_modules()</code>) that define the subnetwork;
-the modules cannot have children, i.e. need to be leaf modules</dd>
-</dl></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.utils.subnetmask.SubnetMask" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.utils.subnetmask.LastLayerSubnetMask" href="subnetmask.html#laplace.utils.subnetmask.LastLayerSubnetMask">LastLayerSubnetMask</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.ModuleNameSubnetMask.get_subnet_mask"><code class="name flex">
-<span>def <span class="ident">get_subnet_mask</span></span>(<span>self, train_loader: DataLoader) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Get the subnetwork mask identifying the specified modules.</p></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.utils.subnetmask.SubnetMask" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices">convert_subnet_mask_to_indices</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.select" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask.select">select</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.utils.LastLayerSubnetMask"><code class="flex name class">
-<span>class <span class="ident">LastLayerSubnetMask</span></span>
-<span>(</span><span>model: nn.Module, last_layer_name: str | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Subnetwork mask corresponding to the last layer of the neural network.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>last_layer_name</code></strong> :&ensp;<code>str</code>, default=<code>None</code></dt>
-<dd>name of the model's last layer, if None it will be determined automatically</dd>
-</dl></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.utils.subnetmask.ModuleNameSubnetMask" href="subnetmask.html#laplace.utils.subnetmask.ModuleNameSubnetMask">ModuleNameSubnetMask</a></li>
-<li><a title="laplace.utils.subnetmask.SubnetMask" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.LastLayerSubnetMask.get_subnet_mask"><code class="name flex">
-<span>def <span class="ident">get_subnet_mask</span></span>(<span>self, train_loader: DataLoader) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Get the subnetwork mask identifying the last layer.</p></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.utils.subnetmask.ModuleNameSubnetMask" href="subnetmask.html#laplace.utils.subnetmask.ModuleNameSubnetMask">ModuleNameSubnetMask</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.utils.subnetmask.ModuleNameSubnetMask.convert_subnet_mask_to_indices" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices">convert_subnet_mask_to_indices</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.ModuleNameSubnetMask.select" href="subnetmask.html#laplace.utils.subnetmask.SubnetMask.select">select</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.utils.RunningNLLMetric"><code class="flex name class">
-<span>class <span class="ident">RunningNLLMetric</span></span>
-<span>(</span><span>ignore_index: int = -100)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>NLL metrics that</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>ignore_index</code></strong> :&ensp;<code>int</code>, default <code>= -100</code></dt>
-<dd>which class label to ignore when computing the NLL loss</dd>
-</dl>
-<p>Initialize internal Module state, shared by both nn.Module and ScriptModule.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>torchmetrics.metric.Metric</li>
-<li>torch.nn.modules.module.Module</li>
-<li>abc.ABC</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.utils.RunningNLLMetric.is_differentiable"><code class="name">var <span class="ident">is_differentiable</span> : Optional[bool]</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.RunningNLLMetric.higher_is_better"><code class="name">var <span class="ident">higher_is_better</span> : Optional[bool]</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.RunningNLLMetric.full_state_update"><code class="name">var <span class="ident">full_state_update</span> : Optional[bool]</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.RunningNLLMetric.plot_lower_bound"><code class="name">var <span class="ident">plot_lower_bound</span> : Optional[float]</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.RunningNLLMetric.plot_upper_bound"><code class="name">var <span class="ident">plot_upper_bound</span> : Optional[float]</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.RunningNLLMetric.plot_legend_name"><code class="name">var <span class="ident">plot_legend_name</span> : Optional[str]</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.RunningNLLMetric.update"><code class="name flex">
-<span>def <span class="ident">update</span></span>(<span>self, probs: torch.Tensor, targets: torch.Tensor) ‑> None</span>
-</code></dt>
-<dd>
-<div class="desc"><h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>probs</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>probability tensor of shape (&hellip;, n_classes)</dd>
-<dt><strong><code>targets</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>integer tensor of shape (&hellip;)</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.RunningNLLMetric.compute"><code class="name flex">
-<span>def <span class="ident">compute</span></span>(<span>self) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Override this method to compute the final metric value.</p>
-<p>This method will automatically synchronize state variables when running in distributed backend.</p></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.SubsetOfWeights"><code class="flex name class">
-<span>class <span class="ident">SubsetOfWeights</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.utils.SubsetOfWeights.ALL"><code class="name">var <span class="ident">ALL</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.SubsetOfWeights.LAST_LAYER"><code class="name">var <span class="ident">LAST_LAYER</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.SubsetOfWeights.SUBNETWORK"><code class="name">var <span class="ident">SUBNETWORK</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.HessianStructure"><code class="flex name class">
-<span>class <span class="ident">HessianStructure</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.utils.HessianStructure.FULL"><code class="name">var <span class="ident">FULL</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.HessianStructure.KRON"><code class="name">var <span class="ident">KRON</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.HessianStructure.DIAG"><code class="name">var <span class="ident">DIAG</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.HessianStructure.LOWRANK"><code class="name">var <span class="ident">LOWRANK</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.HessianStructure.GP"><code class="name">var <span class="ident">GP</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.Likelihood"><code class="flex name class">
-<span>class <span class="ident">Likelihood</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.utils.Likelihood.REGRESSION"><code class="name">var <span class="ident">REGRESSION</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.Likelihood.CLASSIFICATION"><code class="name">var <span class="ident">CLASSIFICATION</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.Likelihood.REWARD_MODELING"><code class="name">var <span class="ident">REWARD_MODELING</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.PredType"><code class="flex name class">
-<span>class <span class="ident">PredType</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.utils.PredType.GLM"><code class="name">var <span class="ident">GLM</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.PredType.NN"><code class="name">var <span class="ident">NN</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.PredType.GP"><code class="name">var <span class="ident">GP</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.LinkApprox"><code class="flex name class">
-<span>class <span class="ident">LinkApprox</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.utils.LinkApprox.MC"><code class="name">var <span class="ident">MC</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.LinkApprox.PROBIT"><code class="name">var <span class="ident">PROBIT</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.LinkApprox.BRIDGE"><code class="name">var <span class="ident">BRIDGE</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.LinkApprox.BRIDGE_NORM"><code class="name">var <span class="ident">BRIDGE_NORM</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.TuningMethod"><code class="flex name class">
-<span>class <span class="ident">TuningMethod</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.utils.TuningMethod.MARGLIK"><code class="name">var <span class="ident">MARGLIK</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.TuningMethod.GRIDSEARCH"><code class="name">var <span class="ident">GRIDSEARCH</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.PriorStructure"><code class="flex name class">
-<span>class <span class="ident">PriorStructure</span></span>
-<span>(</span><span>value, names=None, *, module=None, qualname=None, type=None, start=1)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>An enumeration.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>builtins.str</li>
-<li>enum.Enum</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.utils.PriorStructure.SCALAR"><code class="name">var <span class="ident">SCALAR</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.PriorStructure.DIAG"><code class="name">var <span class="ident">DIAG</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.PriorStructure.LAYERWISE"><code class="name">var <span class="ident">LAYERWISE</span></code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-</dd>
-</dl>
-</section>
-</article>
-<nav id="sidebar">
-<div class="toc">
-<ul></ul>
-</div>
-<ul id="index">
-<li><h3>Super-module</h3>
-<ul>
-<li><code><a title="laplace" href="../index.html">laplace</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-submodules">Sub-modules</a></h3>
-<ul>
-<li><code><a title="laplace.utils.enums" href="enums.html">laplace.utils.enums</a></code></li>
-<li><code><a title="laplace.utils.feature_extractor" href="feature_extractor.html">laplace.utils.feature_extractor</a></code></li>
-<li><code><a title="laplace.utils.matrix" href="matrix.html">laplace.utils.matrix</a></code></li>
-<li><code><a title="laplace.utils.metrics" href="metrics.html">laplace.utils.metrics</a></code></li>
-<li><code><a title="laplace.utils.subnetmask" href="subnetmask.html">laplace.utils.subnetmask</a></code></li>
-<li><code><a title="laplace.utils.swag" href="swag.html">laplace.utils.swag</a></code></li>
-<li><code><a title="laplace.utils.utils" href="utils.html">laplace.utils.utils</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-functions">Functions</a></h3>
-<ul class="">
-<li><code><a title="laplace.utils.get_nll" href="#laplace.utils.get_nll">get_nll</a></code></li>
-<li><code><a title="laplace.utils.validate" href="#laplace.utils.validate">validate</a></code></li>
-<li><code><a title="laplace.utils.parameters_per_layer" href="#laplace.utils.parameters_per_layer">parameters_per_layer</a></code></li>
-<li><code><a title="laplace.utils.invsqrt_precision" href="#laplace.utils.invsqrt_precision">invsqrt_precision</a></code></li>
-<li><code><a title="laplace.utils.kron" href="#laplace.utils.kron">kron</a></code></li>
-<li><code><a title="laplace.utils.diagonal_add_scalar" href="#laplace.utils.diagonal_add_scalar">diagonal_add_scalar</a></code></li>
-<li><code><a title="laplace.utils.symeig" href="#laplace.utils.symeig">symeig</a></code></li>
-<li><code><a title="laplace.utils.block_diag" href="#laplace.utils.block_diag">block_diag</a></code></li>
-<li><code><a title="laplace.utils.normal_samples" href="#laplace.utils.normal_samples">normal_samples</a></code></li>
-<li><code><a title="laplace.utils._is_batchnorm" href="#laplace.utils._is_batchnorm">_is_batchnorm</a></code></li>
-<li><code><a title="laplace.utils._is_valid_scalar" href="#laplace.utils._is_valid_scalar">_is_valid_scalar</a></code></li>
-<li><code><a title="laplace.utils.expand_prior_precision" href="#laplace.utils.expand_prior_precision">expand_prior_precision</a></code></li>
-<li><code><a title="laplace.utils.fix_prior_prec_structure" href="#laplace.utils.fix_prior_prec_structure">fix_prior_prec_structure</a></code></li>
-<li><code><a title="laplace.utils.fit_diagonal_swag_var" href="#laplace.utils.fit_diagonal_swag_var">fit_diagonal_swag_var</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-classes">Classes</a></h3>
-<ul>
-<li>
-<h4><code><a title="laplace.utils.SoDSampler" href="#laplace.utils.SoDSampler">SoDSampler</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.utils.FeatureExtractor" href="#laplace.utils.FeatureExtractor">FeatureExtractor</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.FeatureExtractor.forward" href="#laplace.utils.FeatureExtractor.forward">forward</a></code></li>
-<li><code><a title="laplace.utils.FeatureExtractor.forward_with_features" href="#laplace.utils.FeatureExtractor.forward_with_features">forward_with_features</a></code></li>
-<li><code><a title="laplace.utils.FeatureExtractor.set_last_layer" href="#laplace.utils.FeatureExtractor.set_last_layer">set_last_layer</a></code></li>
-<li><code><a title="laplace.utils.FeatureExtractor.find_last_layer" href="#laplace.utils.FeatureExtractor.find_last_layer">find_last_layer</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.Kron" href="#laplace.utils.Kron">Kron</a></code></h4>
-<ul class="two-column">
-<li><code><a title="laplace.utils.Kron.init_from_model" href="#laplace.utils.Kron.init_from_model">init_from_model</a></code></li>
-<li><code><a title="laplace.utils.Kron.decompose" href="#laplace.utils.Kron.decompose">decompose</a></code></li>
-<li><code><a title="laplace.utils.Kron.bmm" href="#laplace.utils.Kron.bmm">bmm</a></code></li>
-<li><code><a title="laplace.utils.Kron.logdet" href="#laplace.utils.Kron.logdet">logdet</a></code></li>
-<li><code><a title="laplace.utils.Kron.diag" href="#laplace.utils.Kron.diag">diag</a></code></li>
-<li><code><a title="laplace.utils.Kron.to_matrix" href="#laplace.utils.Kron.to_matrix">to_matrix</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.KronDecomposed" href="#laplace.utils.KronDecomposed">KronDecomposed</a></code></h4>
-<ul class="two-column">
-<li><code><a title="laplace.utils.KronDecomposed.detach" href="#laplace.utils.KronDecomposed.detach">detach</a></code></li>
-<li><code><a title="laplace.utils.KronDecomposed.logdet" href="#laplace.utils.KronDecomposed.logdet">logdet</a></code></li>
-<li><code><a title="laplace.utils.KronDecomposed.inv_square_form" href="#laplace.utils.KronDecomposed.inv_square_form">inv_square_form</a></code></li>
-<li><code><a title="laplace.utils.KronDecomposed.bmm" href="#laplace.utils.KronDecomposed.bmm">bmm</a></code></li>
-<li><code><a title="laplace.utils.KronDecomposed.diag" href="#laplace.utils.KronDecomposed.diag">diag</a></code></li>
-<li><code><a title="laplace.utils.KronDecomposed.to_matrix" href="#laplace.utils.KronDecomposed.to_matrix">to_matrix</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.SubnetMask" href="#laplace.utils.SubnetMask">SubnetMask</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.SubnetMask.convert_subnet_mask_to_indices" href="#laplace.utils.SubnetMask.convert_subnet_mask_to_indices">convert_subnet_mask_to_indices</a></code></li>
-<li><code><a title="laplace.utils.SubnetMask.select" href="#laplace.utils.SubnetMask.select">select</a></code></li>
-<li><code><a title="laplace.utils.SubnetMask.get_subnet_mask" href="#laplace.utils.SubnetMask.get_subnet_mask">get_subnet_mask</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.RandomSubnetMask" href="#laplace.utils.RandomSubnetMask">RandomSubnetMask</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.RandomSubnetMask.compute_param_scores" href="#laplace.utils.RandomSubnetMask.compute_param_scores">compute_param_scores</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.LargestMagnitudeSubnetMask" href="#laplace.utils.LargestMagnitudeSubnetMask">LargestMagnitudeSubnetMask</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.LargestMagnitudeSubnetMask.compute_param_scores" href="#laplace.utils.LargestMagnitudeSubnetMask.compute_param_scores">compute_param_scores</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.LargestVarianceDiagLaplaceSubnetMask" href="#laplace.utils.LargestVarianceDiagLaplaceSubnetMask">LargestVarianceDiagLaplaceSubnetMask</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.LargestVarianceDiagLaplaceSubnetMask.compute_param_scores" href="#laplace.utils.LargestVarianceDiagLaplaceSubnetMask.compute_param_scores">compute_param_scores</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.LargestVarianceSWAGSubnetMask" href="#laplace.utils.LargestVarianceSWAGSubnetMask">LargestVarianceSWAGSubnetMask</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.LargestVarianceSWAGSubnetMask.compute_param_scores" href="#laplace.utils.LargestVarianceSWAGSubnetMask.compute_param_scores">compute_param_scores</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.ParamNameSubnetMask" href="#laplace.utils.ParamNameSubnetMask">ParamNameSubnetMask</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.ParamNameSubnetMask.get_subnet_mask" href="#laplace.utils.ParamNameSubnetMask.get_subnet_mask">get_subnet_mask</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.ModuleNameSubnetMask" href="#laplace.utils.ModuleNameSubnetMask">ModuleNameSubnetMask</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.ModuleNameSubnetMask.get_subnet_mask" href="#laplace.utils.ModuleNameSubnetMask.get_subnet_mask">get_subnet_mask</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.LastLayerSubnetMask" href="#laplace.utils.LastLayerSubnetMask">LastLayerSubnetMask</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.LastLayerSubnetMask.get_subnet_mask" href="#laplace.utils.LastLayerSubnetMask.get_subnet_mask">get_subnet_mask</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.RunningNLLMetric" href="#laplace.utils.RunningNLLMetric">RunningNLLMetric</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.RunningNLLMetric.update" href="#laplace.utils.RunningNLLMetric.update">update</a></code></li>
-<li><code><a title="laplace.utils.RunningNLLMetric.compute" href="#laplace.utils.RunningNLLMetric.compute">compute</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.SubsetOfWeights" href="#laplace.utils.SubsetOfWeights">SubsetOfWeights</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.utils.HessianStructure" href="#laplace.utils.HessianStructure">HessianStructure</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.utils.Likelihood" href="#laplace.utils.Likelihood">Likelihood</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.utils.PredType" href="#laplace.utils.PredType">PredType</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.utils.LinkApprox" href="#laplace.utils.LinkApprox">LinkApprox</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.utils.TuningMethod" href="#laplace.utils.TuningMethod">TuningMethod</a></code></h4>
-</li>
-<li>
-<h4><code><a title="laplace.utils.PriorStructure" href="#laplace.utils.PriorStructure">PriorStructure</a></code></h4>
-</li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.1</a>.</p>
-</footer>
-</body>
-</html>
diff --git a/docs/utils/matrix.html b/docs/utils/matrix.html
deleted file mode 100644
index 1b66b9ee..00000000
--- a/docs/utils/matrix.html
+++ /dev/null
@@ -1,312 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
-<meta name="generator" content="pdoc3 0.11.1">
-<title>laplace.utils.matrix API documentation</title>
-<meta name="description" content="">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script type="text/x-mathjax-config">MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], processEscapes: true } });</script>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => {
-hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
-hljs.highlightAll();
-})</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Module <code>laplace.utils.matrix</code></h1>
-</header>
-<section id="section-intro">
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-classes">Classes</h2>
-<dl>
-<dt id="laplace.utils.matrix.Kron"><code class="flex name class">
-<span>class <span class="ident">Kron</span></span>
-<span>(</span><span>kfacs: list[tuple[torch.Tensor] | torch.Tensor])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Kronecker factored approximate curvature representation for a corresponding
-neural network.
-Each element in <code>kfacs</code> is either a tuple or single matrix.
-A tuple represents two Kronecker factors <span><span class="MathJax_Preview">Q</span><script type="math/tex">Q</script></span>, and <span><span class="MathJax_Preview">H</span><script type="math/tex">H</script></span> and a single element
-is just a full block Hessian approximation.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>kfacs</code></strong> :&ensp;<code>list[Iterable[torch.Tensor] | torch.Tensor]</code></dt>
-<dd>each element in the list is a tuple of two Kronecker factors Q, H
-or a single matrix approximating the Hessian (in case of bias, for example)</dd>
-</dl></div>
-<h3>Static methods</h3>
-<dl>
-<dt id="laplace.utils.matrix.Kron.init_from_model"><code class="name flex">
-<span>def <span class="ident">init_from_model</span></span>(<span>model: nn.Module | Iterable[nn.Parameter], device: torch.device)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Initialize Kronecker factors based on a models architecture.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>nn.Module</code> or <code>iterable</code> of <code>parameters, e.g. model.parameters()</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>device</code></strong> :&ensp;<code>torch.device</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>kron</code></strong> :&ensp;<code><a title="laplace.utils.matrix.Kron" href="#laplace.utils.matrix.Kron">Kron</a></code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.matrix.Kron.decompose"><code class="name flex">
-<span>def <span class="ident">decompose</span></span>(<span>self, damping: bool = False) ‑> <a title="laplace.utils.matrix.KronDecomposed" href="#laplace.utils.matrix.KronDecomposed">KronDecomposed</a></span>
-</code></dt>
-<dd>
-<div class="desc"><p>Eigendecompose Kronecker factors and turn into <code><a title="laplace.utils.matrix.KronDecomposed" href="#laplace.utils.matrix.KronDecomposed">KronDecomposed</a></code>.
-Parameters</p>
-<hr>
-<dl>
-<dt><strong><code>damping</code></strong> :&ensp;<code>bool</code></dt>
-<dd>use damping</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>kron_decomposed</code></strong> :&ensp;<code><a title="laplace.utils.matrix.KronDecomposed" href="#laplace.utils.matrix.KronDecomposed">KronDecomposed</a></code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.matrix.Kron.bmm"><code class="name flex">
-<span>def <span class="ident">bmm</span></span>(<span>self, W: torch.Tensor, exponent: float = 1) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Batched matrix multiplication with the Kronecker factors.
-If Kron is <code>H</code>, we compute <code>H @ W</code>.
-This is useful for computing the predictive or a regularization
-based on Kronecker factors as in continual learning.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>W</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>matrix <code>(batch, classes, params)</code></dd>
-<dt><strong><code>exponent</code></strong> :&ensp;<code>float</code>, default=<code>1</code></dt>
-<dd>only can be <code>1</code> for Kron, requires <code><a title="laplace.utils.matrix.KronDecomposed" href="#laplace.utils.matrix.KronDecomposed">KronDecomposed</a></code> for other
-exponent values of the Kronecker factors.</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>SW</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>result <code>(batch, classes, params)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.matrix.Kron.logdet"><code class="name flex">
-<span>def <span class="ident">logdet</span></span>(<span>self) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute log determinant of the Kronecker factors and sums them up.
-This corresponds to the log determinant of the entire Hessian approximation.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>logdet</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.matrix.Kron.diag"><code class="name flex">
-<span>def <span class="ident">diag</span></span>(<span>self) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Extract diagonal of the entire Kronecker factorization.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>diag</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.matrix.Kron.to_matrix"><code class="name flex">
-<span>def <span class="ident">to_matrix</span></span>(<span>self) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Make the Kronecker factorization dense by computing the kronecker product.
-Warning: this should only be used for testing purposes as it will allocate
-large amounts of memory for big architectures.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>block_diag</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.matrix.KronDecomposed"><code class="flex name class">
-<span>class <span class="ident">KronDecomposed</span></span>
-<span>(</span><span>eigenvectors: list[tuple[torch.Tensor]], eigenvalues: list[tuple[torch.Tensor]], deltas: torch.Tensor | None = None, damping: bool = False)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Decomposed Kronecker factored approximate curvature representation
-for a corresponding neural network.
-Each matrix in <code><a title="laplace.utils.matrix.Kron" href="#laplace.utils.matrix.Kron">Kron</a></code> is decomposed to obtain <code><a title="laplace.utils.matrix.KronDecomposed" href="#laplace.utils.matrix.KronDecomposed">KronDecomposed</a></code>.
-Front-loading decomposition allows cheap repeated computation
-of inverses and log determinants.
-In contrast to <code><a title="laplace.utils.matrix.Kron" href="#laplace.utils.matrix.Kron">Kron</a></code>, we can add scalar or layerwise scalars but
-we cannot add other <code><a title="laplace.utils.matrix.Kron" href="#laplace.utils.matrix.Kron">Kron</a></code> or <code><a title="laplace.utils.matrix.KronDecomposed" href="#laplace.utils.matrix.KronDecomposed">KronDecomposed</a></code> anymore.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>eigenvectors</code></strong> :&ensp;<code>list[Tuple[torch.Tensor]]</code></dt>
-<dd>eigenvectors corresponding to matrices in a corresponding <code><a title="laplace.utils.matrix.Kron" href="#laplace.utils.matrix.Kron">Kron</a></code></dd>
-<dt><strong><code>eigenvalues</code></strong> :&ensp;<code>list[Tuple[torch.Tensor]]</code></dt>
-<dd>eigenvalues corresponding to matrices in a corresponding <code><a title="laplace.utils.matrix.Kron" href="#laplace.utils.matrix.Kron">Kron</a></code></dd>
-<dt><strong><code>deltas</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>addend for each group of Kronecker factors representing, for example,
-a prior precision</dd>
-<dt><strong><code>dampen</code></strong> :&ensp;<code>bool</code>, default=<code>False</code></dt>
-<dd>use dampen approximation mixing prior and Kron partially multiplicatively</dd>
-</dl></div>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.matrix.KronDecomposed.detach"><code class="name flex">
-<span>def <span class="ident">detach</span></span>(<span>self) ‑> <a title="laplace.utils.matrix.KronDecomposed" href="#laplace.utils.matrix.KronDecomposed">KronDecomposed</a></span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.matrix.KronDecomposed.logdet"><code class="name flex">
-<span>def <span class="ident">logdet</span></span>(<span>self) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute log determinant of the Kronecker factors and sums them up.
-This corresponds to the log determinant of the entire Hessian approximation.
-In contrast to <code><a title="laplace.utils.matrix.Kron.logdet" href="#laplace.utils.matrix.Kron.logdet">Kron.logdet()</a></code>, additive <code>deltas</code> corresponding to prior
-precisions are added.</p>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>logdet</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.matrix.KronDecomposed.inv_square_form"><code class="name flex">
-<span>def <span class="ident">inv_square_form</span></span>(<span>self, W: torch.Tensor) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.matrix.KronDecomposed.bmm"><code class="name flex">
-<span>def <span class="ident">bmm</span></span>(<span>self, W: torch.Tensor, exponent: float = -1) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Batched matrix multiplication with the decomposed Kronecker factors.
-This is useful for computing the predictive or a regularization loss.
-Compared to <code><a title="laplace.utils.matrix.Kron.bmm" href="#laplace.utils.matrix.Kron.bmm">Kron.bmm()</a></code>, a prior can be added here in form of <code>deltas</code>
-and the exponent can be other than just 1.
-Computes <span><span class="MathJax_Preview">H^{exponent} W</span><script type="math/tex">H^{exponent} W</script></span>.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>W</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>matrix <code>(batch, classes, params)</code></dd>
-<dt><strong><code>exponent</code></strong> :&ensp;<code>float</code>, default=<code>1</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>SW</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>result <code>(batch, classes, params)</code></dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.matrix.KronDecomposed.diag"><code class="name flex">
-<span>def <span class="ident">diag</span></span>(<span>self, exponent: float = 1) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Extract diagonal of the entire decomposed Kronecker factorization.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>exponent</code></strong> :&ensp;<code>float</code>, default=<code>1</code></dt>
-<dd>exponent of the Kronecker factorization</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>diag</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.matrix.KronDecomposed.to_matrix"><code class="name flex">
-<span>def <span class="ident">to_matrix</span></span>(<span>self, exponent: float = 1) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Make the Kronecker factorization dense by computing the kronecker product.
-Warning: this should only be used for testing purposes as it will allocate
-large amounts of memory for big architectures.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>exponent</code></strong> :&ensp;<code>float</code>, default=<code>1</code></dt>
-<dd>exponent of the Kronecker factorization</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>block_diag</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-</dl>
-</dd>
-</dl>
-</section>
-</article>
-<nav id="sidebar">
-<div class="toc">
-<ul></ul>
-</div>
-<ul id="index">
-<li><h3>Super-module</h3>
-<ul>
-<li><code><a title="laplace.utils" href="index.html">laplace.utils</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-classes">Classes</a></h3>
-<ul>
-<li>
-<h4><code><a title="laplace.utils.matrix.Kron" href="#laplace.utils.matrix.Kron">Kron</a></code></h4>
-<ul class="two-column">
-<li><code><a title="laplace.utils.matrix.Kron.init_from_model" href="#laplace.utils.matrix.Kron.init_from_model">init_from_model</a></code></li>
-<li><code><a title="laplace.utils.matrix.Kron.decompose" href="#laplace.utils.matrix.Kron.decompose">decompose</a></code></li>
-<li><code><a title="laplace.utils.matrix.Kron.bmm" href="#laplace.utils.matrix.Kron.bmm">bmm</a></code></li>
-<li><code><a title="laplace.utils.matrix.Kron.logdet" href="#laplace.utils.matrix.Kron.logdet">logdet</a></code></li>
-<li><code><a title="laplace.utils.matrix.Kron.diag" href="#laplace.utils.matrix.Kron.diag">diag</a></code></li>
-<li><code><a title="laplace.utils.matrix.Kron.to_matrix" href="#laplace.utils.matrix.Kron.to_matrix">to_matrix</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.matrix.KronDecomposed" href="#laplace.utils.matrix.KronDecomposed">KronDecomposed</a></code></h4>
-<ul class="two-column">
-<li><code><a title="laplace.utils.matrix.KronDecomposed.detach" href="#laplace.utils.matrix.KronDecomposed.detach">detach</a></code></li>
-<li><code><a title="laplace.utils.matrix.KronDecomposed.logdet" href="#laplace.utils.matrix.KronDecomposed.logdet">logdet</a></code></li>
-<li><code><a title="laplace.utils.matrix.KronDecomposed.inv_square_form" href="#laplace.utils.matrix.KronDecomposed.inv_square_form">inv_square_form</a></code></li>
-<li><code><a title="laplace.utils.matrix.KronDecomposed.bmm" href="#laplace.utils.matrix.KronDecomposed.bmm">bmm</a></code></li>
-<li><code><a title="laplace.utils.matrix.KronDecomposed.diag" href="#laplace.utils.matrix.KronDecomposed.diag">diag</a></code></li>
-<li><code><a title="laplace.utils.matrix.KronDecomposed.to_matrix" href="#laplace.utils.matrix.KronDecomposed.to_matrix">to_matrix</a></code></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.1</a>.</p>
-</footer>
-</body>
-</html>
diff --git a/docs/utils/metrics.html b/docs/utils/metrics.html
deleted file mode 100644
index ff60fcb8..00000000
--- a/docs/utils/metrics.html
+++ /dev/null
@@ -1,139 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
-<meta name="generator" content="pdoc3 0.11.1">
-<title>laplace.utils.metrics API documentation</title>
-<meta name="description" content="">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script type="text/x-mathjax-config">MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], processEscapes: true } });</script>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => {
-hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
-hljs.highlightAll();
-})</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Module <code>laplace.utils.metrics</code></h1>
-</header>
-<section id="section-intro">
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-classes">Classes</h2>
-<dl>
-<dt id="laplace.utils.metrics.RunningNLLMetric"><code class="flex name class">
-<span>class <span class="ident">RunningNLLMetric</span></span>
-<span>(</span><span>ignore_index: int = -100)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>NLL metrics that</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>ignore_index</code></strong> :&ensp;<code>int</code>, default <code>= -100</code></dt>
-<dd>which class label to ignore when computing the NLL loss</dd>
-</dl>
-<p>Initialize internal Module state, shared by both nn.Module and ScriptModule.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>torchmetrics.metric.Metric</li>
-<li>torch.nn.modules.module.Module</li>
-<li>abc.ABC</li>
-</ul>
-<h3>Class variables</h3>
-<dl>
-<dt id="laplace.utils.metrics.RunningNLLMetric.is_differentiable"><code class="name">var <span class="ident">is_differentiable</span> : Optional[bool]</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.metrics.RunningNLLMetric.higher_is_better"><code class="name">var <span class="ident">higher_is_better</span> : Optional[bool]</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.metrics.RunningNLLMetric.full_state_update"><code class="name">var <span class="ident">full_state_update</span> : Optional[bool]</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.metrics.RunningNLLMetric.plot_lower_bound"><code class="name">var <span class="ident">plot_lower_bound</span> : Optional[float]</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.metrics.RunningNLLMetric.plot_upper_bound"><code class="name">var <span class="ident">plot_upper_bound</span> : Optional[float]</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.metrics.RunningNLLMetric.plot_legend_name"><code class="name">var <span class="ident">plot_legend_name</span> : Optional[str]</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.metrics.RunningNLLMetric.update"><code class="name flex">
-<span>def <span class="ident">update</span></span>(<span>self, probs: torch.Tensor, targets: torch.Tensor) ‑> None</span>
-</code></dt>
-<dd>
-<div class="desc"><h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>probs</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>probability tensor of shape (&hellip;, n_classes)</dd>
-<dt><strong><code>targets</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>integer tensor of shape (&hellip;)</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.metrics.RunningNLLMetric.compute"><code class="name flex">
-<span>def <span class="ident">compute</span></span>(<span>self) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Override this method to compute the final metric value.</p>
-<p>This method will automatically synchronize state variables when running in distributed backend.</p></div>
-</dd>
-</dl>
-</dd>
-</dl>
-</section>
-</article>
-<nav id="sidebar">
-<div class="toc">
-<ul></ul>
-</div>
-<ul id="index">
-<li><h3>Super-module</h3>
-<ul>
-<li><code><a title="laplace.utils" href="index.html">laplace.utils</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-classes">Classes</a></h3>
-<ul>
-<li>
-<h4><code><a title="laplace.utils.metrics.RunningNLLMetric" href="#laplace.utils.metrics.RunningNLLMetric">RunningNLLMetric</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.metrics.RunningNLLMetric.update" href="#laplace.utils.metrics.RunningNLLMetric.update">update</a></code></li>
-<li><code><a title="laplace.utils.metrics.RunningNLLMetric.compute" href="#laplace.utils.metrics.RunningNLLMetric.compute">compute</a></code></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.1</a>.</p>
-</footer>
-</body>
-</html>
diff --git a/docs/utils/subnetmask.html b/docs/utils/subnetmask.html
deleted file mode 100644
index 06632728..00000000
--- a/docs/utils/subnetmask.html
+++ /dev/null
@@ -1,469 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
-<meta name="generator" content="pdoc3 0.11.1">
-<title>laplace.utils.subnetmask API documentation</title>
-<meta name="description" content="">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script type="text/x-mathjax-config">MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], processEscapes: true } });</script>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => {
-hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
-hljs.highlightAll();
-})</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Module <code>laplace.utils.subnetmask</code></h1>
-</header>
-<section id="section-intro">
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-classes">Classes</h2>
-<dl>
-<dt id="laplace.utils.subnetmask.SubnetMask"><code class="flex name class">
-<span>class <span class="ident">SubnetMask</span></span>
-<span>(</span><span>model: nn.Module)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Baseclass for all subnetwork masks in this library (for subnetwork Laplace).</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.utils.subnetmask.ModuleNameSubnetMask" href="#laplace.utils.subnetmask.ModuleNameSubnetMask">ModuleNameSubnetMask</a></li>
-<li><a title="laplace.utils.subnetmask.ParamNameSubnetMask" href="#laplace.utils.subnetmask.ParamNameSubnetMask">ParamNameSubnetMask</a></li>
-<li>laplace.utils.subnetmask.ScoreBasedSubnetMask</li>
-</ul>
-<h3>Instance variables</h3>
-<dl>
-<dt id="laplace.utils.subnetmask.SubnetMask.indices"><code class="name">prop <span class="ident">indices</span> : torch.LongTensor</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.subnetmask.SubnetMask.n_params_subnet"><code class="name">prop <span class="ident">n_params_subnet</span> : int</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices"><code class="name flex">
-<span>def <span class="ident">convert_subnet_mask_to_indices</span></span>(<span>self, subnet_mask: torch.Tensor) ‑> torch.LongTensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Converts a subnetwork mask into subnetwork indices.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>subnet_mask</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>a binary vector of size (n_params) where 1s locate the subnetwork parameters
-within the vectorized model parameters
-(i.e. <code>torch.nn.utils.parameters_to_vector(model.parameters())</code>)</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>subnet_mask_indices</code></strong> :&ensp;<code>torch.LongTensor</code></dt>
-<dd>a vector of indices of the vectorized model parameters
-(i.e. <code>torch.nn.utils.parameters_to_vector(model.parameters())</code>)
-that define the subnetwork</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.subnetmask.SubnetMask.select"><code class="name flex">
-<span>def <span class="ident">select</span></span>(<span>self, train_loader: DataLoader | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Select the subnetwork mask.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>train_loader</code></strong> :&ensp;<code>torch.data.utils.DataLoader</code>, default=<code>None</code></dt>
-<dd>each iterate is a training batch (X, y);
-<code>train_loader.dataset</code> needs to be set to access <span><span class="MathJax_Preview">N</span><script type="math/tex">N</script></span>, size of the data set</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>subnet_mask_indices</code></strong> :&ensp;<code>torch.LongTensor</code></dt>
-<dd>a vector of indices of the vectorized model parameters
-(i.e. <code>torch.nn.utils.parameters_to_vector(model.parameters())</code>)
-that define the subnetwork</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.subnetmask.SubnetMask.get_subnet_mask"><code class="name flex">
-<span>def <span class="ident">get_subnet_mask</span></span>(<span>self, train_loader: DataLoader) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Get the subnetwork mask.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>train_loader</code></strong> :&ensp;<code>torch.data.utils.DataLoader</code></dt>
-<dd>each iterate is a training batch (X, y);
-<code>train_loader.dataset</code> needs to be set to access <span><span class="MathJax_Preview">N</span><script type="math/tex">N</script></span>, size of the data set</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>subnet_mask</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>a binary vector of size (n_params) where 1s locate the subnetwork parameters
-within the vectorized model parameters
-(i.e. <code>torch.nn.utils.parameters_to_vector(model.parameters())</code>)</dd>
-</dl></div>
-</dd>
-</dl>
-</dd>
-<dt id="laplace.utils.subnetmask.RandomSubnetMask"><code class="flex name class">
-<span>class <span class="ident">RandomSubnetMask</span></span>
-<span>(</span><span>model: nn.Module, n_params_subnet: int)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Subnetwork mask of parameters sampled uniformly at random.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>laplace.utils.subnetmask.ScoreBasedSubnetMask</li>
-<li><a title="laplace.utils.subnetmask.SubnetMask" href="#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.subnetmask.RandomSubnetMask.compute_param_scores"><code class="name flex">
-<span>def <span class="ident">compute_param_scores</span></span>(<span>self, train_loader: DataLoader) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.utils.subnetmask.SubnetMask" href="#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices" href="#laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices">convert_subnet_mask_to_indices</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.get_subnet_mask" href="#laplace.utils.subnetmask.SubnetMask.get_subnet_mask">get_subnet_mask</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.select" href="#laplace.utils.subnetmask.SubnetMask.select">select</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.utils.subnetmask.LargestMagnitudeSubnetMask"><code class="flex name class">
-<span>class <span class="ident">LargestMagnitudeSubnetMask</span></span>
-<span>(</span><span>model: nn.Module, n_params_subnet: int)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Subnetwork mask identifying the parameters with the largest magnitude.</p></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>laplace.utils.subnetmask.ScoreBasedSubnetMask</li>
-<li><a title="laplace.utils.subnetmask.SubnetMask" href="#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.subnetmask.LargestMagnitudeSubnetMask.compute_param_scores"><code class="name flex">
-<span>def <span class="ident">compute_param_scores</span></span>(<span>self, train_loader: DataLoader) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.utils.subnetmask.SubnetMask" href="#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices" href="#laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices">convert_subnet_mask_to_indices</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.get_subnet_mask" href="#laplace.utils.subnetmask.SubnetMask.get_subnet_mask">get_subnet_mask</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.select" href="#laplace.utils.subnetmask.SubnetMask.select">select</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.utils.subnetmask.LargestVarianceDiagLaplaceSubnetMask"><code class="flex name class">
-<span>class <span class="ident">LargestVarianceDiagLaplaceSubnetMask</span></span>
-<span>(</span><span>model: nn.Module, n_params_subnet: int, diag_laplace_model: <a title="laplace.baselaplace.DiagLaplace" href="../baselaplace.html#laplace.baselaplace.DiagLaplace">DiagLaplace</a>)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Subnetwork mask identifying the parameters with the largest marginal variances
-(estimated using a diagonal Laplace approximation over all model parameters).</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>n_params_subnet</code></strong> :&ensp;<code>int</code></dt>
-<dd>number of parameters in the subnetwork (i.e. number of top-scoring parameters to select)</dd>
-<dt><strong><code>diag_laplace_model</code></strong> :&ensp;<code><a title="laplace.baselaplace.DiagLaplace" href="../baselaplace.html#laplace.baselaplace.DiagLaplace">DiagLaplace</a></code></dt>
-<dd>diagonal Laplace model to use for variance estimation</dd>
-</dl></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>laplace.utils.subnetmask.ScoreBasedSubnetMask</li>
-<li><a title="laplace.utils.subnetmask.SubnetMask" href="#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.subnetmask.LargestVarianceDiagLaplaceSubnetMask.compute_param_scores"><code class="name flex">
-<span>def <span class="ident">compute_param_scores</span></span>(<span>self, train_loader: DataLoader) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.utils.subnetmask.SubnetMask" href="#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices" href="#laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices">convert_subnet_mask_to_indices</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.get_subnet_mask" href="#laplace.utils.subnetmask.SubnetMask.get_subnet_mask">get_subnet_mask</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.select" href="#laplace.utils.subnetmask.SubnetMask.select">select</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.utils.subnetmask.LargestVarianceSWAGSubnetMask"><code class="flex name class">
-<span>class <span class="ident">LargestVarianceSWAGSubnetMask</span></span>
-<span>(</span><span>model: nn.Module, n_params_subnet: int, likelihood: Likelihood | str = Likelihood.CLASSIFICATION, swag_n_snapshots: int = 40, swag_snapshot_freq: int = 1, swag_lr: float = 0.01)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Subnetwork mask identifying the parameters with the largest marginal variances
-(estimated using diagonal SWAG over all model parameters).</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>n_params_subnet</code></strong> :&ensp;<code>int</code></dt>
-<dd>number of parameters in the subnetwork (i.e. number of top-scoring parameters to select)</dd>
-<dt><strong><code>likelihood</code></strong> :&ensp;<code>str</code></dt>
-<dd>'classification' or 'regression'</dd>
-<dt><strong><code>swag_n_snapshots</code></strong> :&ensp;<code>int</code></dt>
-<dd>number of model snapshots to collect for SWAG</dd>
-<dt><strong><code>swag_snapshot_freq</code></strong> :&ensp;<code>int</code></dt>
-<dd>SWAG snapshot collection frequency (in epochs)</dd>
-<dt><strong><code>swag_lr</code></strong> :&ensp;<code>float</code></dt>
-<dd>learning rate for SWAG snapshot collection</dd>
-</dl></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>laplace.utils.subnetmask.ScoreBasedSubnetMask</li>
-<li><a title="laplace.utils.subnetmask.SubnetMask" href="#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.subnetmask.LargestVarianceSWAGSubnetMask.compute_param_scores"><code class="name flex">
-<span>def <span class="ident">compute_param_scores</span></span>(<span>self, train_loader: DataLoader) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.utils.subnetmask.SubnetMask" href="#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices" href="#laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices">convert_subnet_mask_to_indices</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.get_subnet_mask" href="#laplace.utils.subnetmask.SubnetMask.get_subnet_mask">get_subnet_mask</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.select" href="#laplace.utils.subnetmask.SubnetMask.select">select</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.utils.subnetmask.ParamNameSubnetMask"><code class="flex name class">
-<span>class <span class="ident">ParamNameSubnetMask</span></span>
-<span>(</span><span>model: nn.Module, parameter_names: list[str])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Subnetwork mask corresponding to the specified parameters of the neural network.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>parameter_names</code></strong> :&ensp;<code>List[str]</code></dt>
-<dd>list of names of the parameters (as in <code>model.named_parameters()</code>)
-that define the subnetwork</dd>
-</dl></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.utils.subnetmask.SubnetMask" href="#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.subnetmask.ParamNameSubnetMask.get_subnet_mask"><code class="name flex">
-<span>def <span class="ident">get_subnet_mask</span></span>(<span>self, train_loader: DataLoader) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Get the subnetwork mask identifying the specified parameters.</p></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.utils.subnetmask.SubnetMask" href="#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices" href="#laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices">convert_subnet_mask_to_indices</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.select" href="#laplace.utils.subnetmask.SubnetMask.select">select</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.utils.subnetmask.ModuleNameSubnetMask"><code class="flex name class">
-<span>class <span class="ident">ModuleNameSubnetMask</span></span>
-<span>(</span><span>model: nn.Module, module_names: list[str])</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Subnetwork mask corresponding to the specified modules of the neural network.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>parameter_names</code></strong> :&ensp;<code>List[str]</code></dt>
-<dd>list of names of the modules (as in <code>model.named_modules()</code>) that define the subnetwork;
-the modules cannot have children, i.e. need to be leaf modules</dd>
-</dl></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.utils.subnetmask.SubnetMask" href="#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></li>
-</ul>
-<h3>Subclasses</h3>
-<ul class="hlist">
-<li><a title="laplace.utils.subnetmask.LastLayerSubnetMask" href="#laplace.utils.subnetmask.LastLayerSubnetMask">LastLayerSubnetMask</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.subnetmask.ModuleNameSubnetMask.get_subnet_mask"><code class="name flex">
-<span>def <span class="ident">get_subnet_mask</span></span>(<span>self, train_loader: DataLoader) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Get the subnetwork mask identifying the specified modules.</p></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.utils.subnetmask.SubnetMask" href="#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices" href="#laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices">convert_subnet_mask_to_indices</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.select" href="#laplace.utils.subnetmask.SubnetMask.select">select</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-<dt id="laplace.utils.subnetmask.LastLayerSubnetMask"><code class="flex name class">
-<span>class <span class="ident">LastLayerSubnetMask</span></span>
-<span>(</span><span>model: nn.Module, last_layer_name: str | None = None)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Subnetwork mask corresponding to the last layer of the neural network.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>last_layer_name</code></strong> :&ensp;<code>str</code>, default=<code>None</code></dt>
-<dd>name of the model's last layer, if None it will be determined automatically</dd>
-</dl></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li><a title="laplace.utils.subnetmask.ModuleNameSubnetMask" href="#laplace.utils.subnetmask.ModuleNameSubnetMask">ModuleNameSubnetMask</a></li>
-<li><a title="laplace.utils.subnetmask.SubnetMask" href="#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></li>
-</ul>
-<h3>Methods</h3>
-<dl>
-<dt id="laplace.utils.subnetmask.LastLayerSubnetMask.get_subnet_mask"><code class="name flex">
-<span>def <span class="ident">get_subnet_mask</span></span>(<span>self, train_loader: DataLoader) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Get the subnetwork mask identifying the last layer.</p></div>
-</dd>
-</dl>
-<h3>Inherited members</h3>
-<ul class="hlist">
-<li><code><b><a title="laplace.utils.subnetmask.ModuleNameSubnetMask" href="#laplace.utils.subnetmask.ModuleNameSubnetMask">ModuleNameSubnetMask</a></b></code>:
-<ul class="hlist">
-<li><code><a title="laplace.utils.subnetmask.ModuleNameSubnetMask.convert_subnet_mask_to_indices" href="#laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices">convert_subnet_mask_to_indices</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.ModuleNameSubnetMask.select" href="#laplace.utils.subnetmask.SubnetMask.select">select</a></code></li>
-</ul>
-</li>
-</ul>
-</dd>
-</dl>
-</section>
-</article>
-<nav id="sidebar">
-<div class="toc">
-<ul></ul>
-</div>
-<ul id="index">
-<li><h3>Super-module</h3>
-<ul>
-<li><code><a title="laplace.utils" href="index.html">laplace.utils</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-classes">Classes</a></h3>
-<ul>
-<li>
-<h4><code><a title="laplace.utils.subnetmask.SubnetMask" href="#laplace.utils.subnetmask.SubnetMask">SubnetMask</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices" href="#laplace.utils.subnetmask.SubnetMask.convert_subnet_mask_to_indices">convert_subnet_mask_to_indices</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.select" href="#laplace.utils.subnetmask.SubnetMask.select">select</a></code></li>
-<li><code><a title="laplace.utils.subnetmask.SubnetMask.get_subnet_mask" href="#laplace.utils.subnetmask.SubnetMask.get_subnet_mask">get_subnet_mask</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.subnetmask.RandomSubnetMask" href="#laplace.utils.subnetmask.RandomSubnetMask">RandomSubnetMask</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.subnetmask.RandomSubnetMask.compute_param_scores" href="#laplace.utils.subnetmask.RandomSubnetMask.compute_param_scores">compute_param_scores</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.subnetmask.LargestMagnitudeSubnetMask" href="#laplace.utils.subnetmask.LargestMagnitudeSubnetMask">LargestMagnitudeSubnetMask</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.subnetmask.LargestMagnitudeSubnetMask.compute_param_scores" href="#laplace.utils.subnetmask.LargestMagnitudeSubnetMask.compute_param_scores">compute_param_scores</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.subnetmask.LargestVarianceDiagLaplaceSubnetMask" href="#laplace.utils.subnetmask.LargestVarianceDiagLaplaceSubnetMask">LargestVarianceDiagLaplaceSubnetMask</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.subnetmask.LargestVarianceDiagLaplaceSubnetMask.compute_param_scores" href="#laplace.utils.subnetmask.LargestVarianceDiagLaplaceSubnetMask.compute_param_scores">compute_param_scores</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.subnetmask.LargestVarianceSWAGSubnetMask" href="#laplace.utils.subnetmask.LargestVarianceSWAGSubnetMask">LargestVarianceSWAGSubnetMask</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.subnetmask.LargestVarianceSWAGSubnetMask.compute_param_scores" href="#laplace.utils.subnetmask.LargestVarianceSWAGSubnetMask.compute_param_scores">compute_param_scores</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.subnetmask.ParamNameSubnetMask" href="#laplace.utils.subnetmask.ParamNameSubnetMask">ParamNameSubnetMask</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.subnetmask.ParamNameSubnetMask.get_subnet_mask" href="#laplace.utils.subnetmask.ParamNameSubnetMask.get_subnet_mask">get_subnet_mask</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.subnetmask.ModuleNameSubnetMask" href="#laplace.utils.subnetmask.ModuleNameSubnetMask">ModuleNameSubnetMask</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.subnetmask.ModuleNameSubnetMask.get_subnet_mask" href="#laplace.utils.subnetmask.ModuleNameSubnetMask.get_subnet_mask">get_subnet_mask</a></code></li>
-</ul>
-</li>
-<li>
-<h4><code><a title="laplace.utils.subnetmask.LastLayerSubnetMask" href="#laplace.utils.subnetmask.LastLayerSubnetMask">LastLayerSubnetMask</a></code></h4>
-<ul class="">
-<li><code><a title="laplace.utils.subnetmask.LastLayerSubnetMask.get_subnet_mask" href="#laplace.utils.subnetmask.LastLayerSubnetMask.get_subnet_mask">get_subnet_mask</a></code></li>
-</ul>
-</li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.1</a>.</p>
-</footer>
-</body>
-</html>
diff --git a/docs/utils/swag.html b/docs/utils/swag.html
deleted file mode 100644
index 69d02ed2..00000000
--- a/docs/utils/swag.html
+++ /dev/null
@@ -1,105 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
-<meta name="generator" content="pdoc3 0.11.1">
-<title>laplace.utils.swag API documentation</title>
-<meta name="description" content="">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script type="text/x-mathjax-config">MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], processEscapes: true } });</script>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => {
-hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
-hljs.highlightAll();
-})</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Module <code>laplace.utils.swag</code></h1>
-</header>
-<section id="section-intro">
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-functions">Functions</h2>
-<dl>
-<dt id="laplace.utils.swag.fit_diagonal_swag_var"><code class="name flex">
-<span>def <span class="ident">fit_diagonal_swag_var</span></span>(<span>model: nn.Module, train_loader: DataLoader, criterion: nn.CrossEntropyLoss | nn.MSELoss, n_snapshots_total: int = 40, snapshot_freq: int = 1, lr: float = 0.01, momentum: float = 0.9, weight_decay: float = 0.0003, min_var: float = 1e-30)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Fit diagonal SWAG [1], which estimates marginal variances of model parameters by
-computing the first and second moment of SGD iterates with a large learning rate.</p>
-<p>Implementation partly adapted from:
-- <a href="https://github.com/wjmaddox/swa_gaussian/blob/master/swag/posteriors/swag.py">https://github.com/wjmaddox/swa_gaussian/blob/master/swag/posteriors/swag.py</a>
-- <a href="https://github.com/wjmaddox/swa_gaussian/blob/master/experiments/train/run_swag.py">https://github.com/wjmaddox/swa_gaussian/blob/master/experiments/train/run_swag.py</a></p>
-<h2 id="references">References</h2>
-<p>[1] Maddox, W., Garipov, T., Izmailov, P., Vetrov, D., Wilson, AG.
-<a href="https://arxiv.org/abs/1902.02476"><em>A Simple Baseline for Bayesian Uncertainty in Deep Learning</em></a>.
-NeurIPS 2019.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>train_loader</code></strong> :&ensp;<code>torch.data.utils.DataLoader</code></dt>
-<dd>training data loader to use for snapshot collection</dd>
-<dt><strong><code>criterion</code></strong> :&ensp;<code>torch.nn.CrossEntropyLoss</code> or <code>torch.nn.MSELoss</code></dt>
-<dd>loss function to use for snapshot collection</dd>
-<dt><strong><code>n_snapshots_total</code></strong> :&ensp;<code>int</code></dt>
-<dd>total number of model snapshots to collect</dd>
-<dt><strong><code>snapshot_freq</code></strong> :&ensp;<code>int</code></dt>
-<dd>snapshot collection frequency (in epochs)</dd>
-<dt><strong><code>lr</code></strong> :&ensp;<code>float</code></dt>
-<dd>SGD learning rate for collecting snapshots</dd>
-<dt><strong><code>momentum</code></strong> :&ensp;<code>float</code></dt>
-<dd>SGD momentum</dd>
-<dt><strong><code>weight_decay</code></strong> :&ensp;<code>float</code></dt>
-<dd>SGD weight decay</dd>
-<dt><strong><code>min_var</code></strong> :&ensp;<code>float</code></dt>
-<dd>minimum parameter variance to clamp to (for numerical stability)</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>param_variances</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>vector of marginal variances for each model parameter</dd>
-</dl></div>
-</dd>
-</dl>
-</section>
-<section>
-</section>
-</article>
-<nav id="sidebar">
-<div class="toc">
-<ul></ul>
-</div>
-<ul id="index">
-<li><h3>Super-module</h3>
-<ul>
-<li><code><a title="laplace.utils" href="index.html">laplace.utils</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-functions">Functions</a></h3>
-<ul class="">
-<li><code><a title="laplace.utils.swag.fit_diagonal_swag_var" href="#laplace.utils.swag.fit_diagonal_swag_var">fit_diagonal_swag_var</a></code></li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.1</a>.</p>
-</footer>
-</body>
-</html>
diff --git a/docs/utils/utils.html b/docs/utils/utils.html
deleted file mode 100644
index 8e085d13..00000000
--- a/docs/utils/utils.html
+++ /dev/null
@@ -1,268 +0,0 @@
-<!doctype html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1">
-<meta name="generator" content="pdoc3 0.11.1">
-<title>laplace.utils.utils API documentation</title>
-<meta name="description" content="">
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/sanitize.min.css" integrity="sha512-y1dtMcuvtTMJc1yPgEqF0ZjQbhnc/bFhyvIyVNb9Zk5mIGtqVaAB1Ttl28su8AvFMOY0EwRbAe+HCLqj6W7/KA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/13.0.0/typography.min.css" integrity="sha512-Y1DYSb995BAfxobCkKepB1BqJJTPrOp3zPL74AWFugHHmmdcvO+C48WLrUOlhGMc0QG7AE3f7gmvvcrmX2fDoA==" crossorigin>
-<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github.min.css" crossorigin>
-<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:1.5em;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:2em 0 .50em 0}h3{font-size:1.4em;margin:1.6em 0 .7em 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .2s ease-in-out}a:visited{color:#503}a:hover{color:#b62}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900;font-weight:bold}pre code{font-size:.8em;line-height:1.4em;padding:1em;display:block}code{background:#f3f3f3;font-family:"DejaVu Sans Mono",monospace;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em 1em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
-<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul ul{padding-left:1em}.toc > ul > li{margin-top:.5em}}</style>
-<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
-<script type="text/x-mathjax-config">MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], processEscapes: true } });</script>
-<script async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/latest.js?config=TeX-AMS_CHTML" integrity="sha256-kZafAc6mZvK3W3v1pHOcUix30OHQN6pU/NO2oFkqZVw=" crossorigin></script>
-<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js" integrity="sha512-D9gUyxqja7hBtkWpPWGt9wfbfaMGVt9gnyCvYa+jojwwPHLCzUm5i8rpk7vD7wNee9bA35eYIjobYPaQuKS1MQ==" crossorigin></script>
-<script>window.addEventListener('DOMContentLoaded', () => {
-hljs.configure({languages: ['bash', 'css', 'diff', 'graphql', 'ini', 'javascript', 'json', 'plaintext', 'python', 'python-repl', 'rust', 'shell', 'sql', 'typescript', 'xml', 'yaml']});
-hljs.highlightAll();
-})</script>
-</head>
-<body>
-<main>
-<article id="content">
-<header>
-<h1 class="title">Module <code>laplace.utils.utils</code></h1>
-</header>
-<section id="section-intro">
-</section>
-<section>
-</section>
-<section>
-</section>
-<section>
-<h2 class="section-title" id="header-functions">Functions</h2>
-<dl>
-<dt id="laplace.utils.utils.get_nll"><code class="name flex">
-<span>def <span class="ident">get_nll</span></span>(<span>out_dist: torch.Tensor, targets: torch.Tensor) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.utils.validate"><code class="name flex">
-<span>def <span class="ident">validate</span></span>(<span>laplace: <a title="laplace.baselaplace.BaseLaplace" href="../baselaplace.html#laplace.baselaplace.BaseLaplace">BaseLaplace</a>, val_loader: DataLoader, loss: torchmetrics.Metric | Callable[[torch.Tensor, torch.Tensor], torch.Tensor] | Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor], pred_type: PredType | str = PredType.GLM, link_approx: LinkApprox | str = LinkApprox.PROBIT, n_samples: int = 100, dict_key_y: str = 'labels')</span>
-</code></dt>
-<dd>
-<div class="desc"></div>
-</dd>
-<dt id="laplace.utils.utils.parameters_per_layer"><code class="name flex">
-<span>def <span class="ident">parameters_per_layer</span></span>(<span>model: nn.Module) ‑> list[int]</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Get number of parameters per layer.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>params_per_layer</code></strong> :&ensp;<code>list[int]</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.utils.invsqrt_precision"><code class="name flex">
-<span>def <span class="ident">invsqrt_precision</span></span>(<span>M: torch.Tensor) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compute <code>M^{-0.5}</code> as a tridiagonal matrix.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>M</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>M_invsqrt</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.utils.kron"><code class="name flex">
-<span>def <span class="ident">kron</span></span>(<span>t1: torch.Tensor, t2: torch.Tensor) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Computes the Kronecker product between two tensors.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>t1</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>t2</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>kron_product</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.utils.diagonal_add_scalar"><code class="name flex">
-<span>def <span class="ident">diagonal_add_scalar</span></span>(<span>X: torch.Tensor, value: torch.Tensor) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Add scalar value <code>value</code> to diagonal of <code>X</code>.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>X</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-<dt><strong><code>value</code></strong> :&ensp;<code>torch.Tensor</code> or <code>float</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>X_add_scalar</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.utils.symeig"><code class="name flex">
-<span>def <span class="ident">symeig</span></span>(<span>M: torch.Tensor) ‑> tuple[torch.Tensor, torch.Tensor]</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Symetric eigendecomposition avoiding failure cases by
-adding and removing jitter to the diagonal.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>M</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>L</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>eigenvalues</dd>
-<dt><strong><code>W</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>eigenvectors</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.utils.block_diag"><code class="name flex">
-<span>def <span class="ident">block_diag</span></span>(<span>blocks: list[torch.Tensor]) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Compose block-diagonal matrix of individual blocks.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>blocks</code></strong> :&ensp;<code>list[torch.Tensor]</code></dt>
-<dd>&nbsp;</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>M</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>&nbsp;</dd>
-</dl></div>
-</dd>
-<dt id="laplace.utils.utils.expand_prior_precision"><code class="name flex">
-<span>def <span class="ident">expand_prior_precision</span></span>(<span>prior_prec: torch.Tensor, model: nn.Module) ‑> torch.Tensor</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Expand prior precision to match the shape of the model parameters.</p>
-<h2 id="parameters">Parameters</h2>
-<dl>
-<dt><strong><code>prior_prec</code></strong> :&ensp;<code>torch.Tensor 1-dimensional</code></dt>
-<dd>prior precision</dd>
-<dt><strong><code>model</code></strong> :&ensp;<code>torch.nn.Module</code></dt>
-<dd>torch model with parameters that are regularized by prior_prec</dd>
-</dl>
-<h2 id="returns">Returns</h2>
-<dl>
-<dt><strong><code>expanded_prior_prec</code></strong> :&ensp;<code>torch.Tensor</code></dt>
-<dd>expanded prior precision has the same shape as model parameters</dd>
-</dl></div>
-</dd>
-</dl>
-</section>
-<section>
-<h2 class="section-title" id="header-classes">Classes</h2>
-<dl>
-<dt id="laplace.utils.utils.SoDSampler"><code class="flex name class">
-<span>class <span class="ident">SoDSampler</span></span>
-<span>(</span><span>N, M, seed: int = 0)</span>
-</code></dt>
-<dd>
-<div class="desc"><p>Base class for all Samplers.</p>
-<p>Every Sampler subclass has to provide an :meth:<code>__iter__</code> method, providing a
-way to iterate over indices or lists of indices (batches) of dataset elements,
-and may provide a :meth:<code>__len__</code> method that returns the length of the returned iterators.</p>
-<h2 id="args">Args</h2>
-<dl>
-<dt><strong><code>data_source</code></strong> :&ensp;<code>Dataset</code></dt>
-<dd>This argument is not used and will be removed in 2.2.0.
-You may still have custom implementation that utilizes it.</dd>
-</dl>
-<h2 id="example">Example</h2>
-<pre><code class="language-python-repl">&gt;&gt;&gt; # xdoctest: +SKIP
-&gt;&gt;&gt; class AccedingSequenceLengthSampler(Sampler[int]):
-&gt;&gt;&gt;     def __init__(self, data: List[str]) -&gt; None:
-&gt;&gt;&gt;         self.data = data
-&gt;&gt;&gt;
-&gt;&gt;&gt;     def __len__(self) -&gt; int:
-&gt;&gt;&gt;         return len(self.data)
-&gt;&gt;&gt;
-&gt;&gt;&gt;     def __iter__(self) -&gt; Iterator[int]:
-&gt;&gt;&gt;         sizes = torch.tensor([len(x) for x in self.data])
-&gt;&gt;&gt;         yield from torch.argsort(sizes).tolist()
-&gt;&gt;&gt;
-&gt;&gt;&gt; class AccedingSequenceLengthBatchSampler(Sampler[List[int]]):
-&gt;&gt;&gt;     def __init__(self, data: List[str], batch_size: int) -&gt; None:
-&gt;&gt;&gt;         self.data = data
-&gt;&gt;&gt;         self.batch_size = batch_size
-&gt;&gt;&gt;
-&gt;&gt;&gt;     def __len__(self) -&gt; int:
-&gt;&gt;&gt;         return (len(self.data) + self.batch_size - 1) // self.batch_size
-&gt;&gt;&gt;
-&gt;&gt;&gt;     def __iter__(self) -&gt; Iterator[List[int]]:
-&gt;&gt;&gt;         sizes = torch.tensor([len(x) for x in self.data])
-&gt;&gt;&gt;         for batch in torch.chunk(torch.argsort(sizes), len(self)):
-&gt;&gt;&gt;             yield batch.tolist()
-</code></pre>
-<div class="admonition note">
-<p class="admonition-title">Note:&ensp;The :meth:<code>__len__</code> method isn't strictly required by</p>
-<p>:class:<code>~torch.utils.data.DataLoader</code>, but is expected in any
-calculation involving the length of a :class:<code>~torch.utils.data.DataLoader</code>.</p>
-</div></div>
-<h3>Ancestors</h3>
-<ul class="hlist">
-<li>torch.utils.data.sampler.Sampler</li>
-<li>typing.Generic</li>
-</ul>
-</dd>
-</dl>
-</section>
-</article>
-<nav id="sidebar">
-<div class="toc">
-<ul></ul>
-</div>
-<ul id="index">
-<li><h3>Super-module</h3>
-<ul>
-<li><code><a title="laplace.utils" href="index.html">laplace.utils</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-functions">Functions</a></h3>
-<ul class="">
-<li><code><a title="laplace.utils.utils.get_nll" href="#laplace.utils.utils.get_nll">get_nll</a></code></li>
-<li><code><a title="laplace.utils.utils.validate" href="#laplace.utils.utils.validate">validate</a></code></li>
-<li><code><a title="laplace.utils.utils.parameters_per_layer" href="#laplace.utils.utils.parameters_per_layer">parameters_per_layer</a></code></li>
-<li><code><a title="laplace.utils.utils.invsqrt_precision" href="#laplace.utils.utils.invsqrt_precision">invsqrt_precision</a></code></li>
-<li><code><a title="laplace.utils.utils.kron" href="#laplace.utils.utils.kron">kron</a></code></li>
-<li><code><a title="laplace.utils.utils.diagonal_add_scalar" href="#laplace.utils.utils.diagonal_add_scalar">diagonal_add_scalar</a></code></li>
-<li><code><a title="laplace.utils.utils.symeig" href="#laplace.utils.utils.symeig">symeig</a></code></li>
-<li><code><a title="laplace.utils.utils.block_diag" href="#laplace.utils.utils.block_diag">block_diag</a></code></li>
-<li><code><a title="laplace.utils.utils.expand_prior_precision" href="#laplace.utils.utils.expand_prior_precision">expand_prior_precision</a></code></li>
-</ul>
-</li>
-<li><h3><a href="#header-classes">Classes</a></h3>
-<ul>
-<li>
-<h4><code><a title="laplace.utils.utils.SoDSampler" href="#laplace.utils.utils.SoDSampler">SoDSampler</a></code></h4>
-</li>
-</ul>
-</li>
-</ul>
-</nav>
-</main>
-<footer id="footer">
-<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.11.1</a>.</p>
-</footer>
-</body>
-</html>
diff --git a/examples/helper/util.py b/examples/helper/util.py
index 6f1005dc..cfb6da07 100644
--- a/examples/helper/util.py
+++ b/examples/helper/util.py
@@ -47,4 +47,4 @@ def plot_regression(
     if plot:
         plt.show()
     else:
-        plt.savefig(f"docs/{file_name}.png")
+        plt.savefig(f"docs/assets/{file_name}.png")
diff --git a/laplace/baselaplace.py b/laplace/baselaplace.py
index c8909b1f..f8296e9c 100644
--- a/laplace/baselaplace.py
+++ b/laplace/baselaplace.py
@@ -723,10 +723,12 @@ class ParametricLaplace(BaseLaplace):
     a Gaussian distribution \\(\\mathcal{N}(\\theta_{MAP}, P^{-1})\\).
     The goal of this class is to compute the posterior precision \\(P\\)
     which sums as
-    \\[
+
+    $$
         P = \\sum_{n=1}^N \\nabla^2_\\theta \\log p(\\mathcal{D}_n \\mid \\theta)
         \\vert_{\\theta_{MAP}} + \\nabla^2_\\theta \\log p(\\theta) \\vert_{\\theta_{MAP}}.
-    \\]
+    $$
+
     Every subclass implements different approximations to the log likelihood Hessians,
     for example, a diagonal one. The prior is assumed to be Gaussian and therefore we have
     a simple form for \\(\\nabla^2_\\theta \\log p(\\theta) \\vert_{\\theta_{MAP}} = P_0 \\).
@@ -892,9 +894,10 @@ def log_det_posterior_precision(self) -> torch.Tensor:
     @property
     def log_det_ratio(self) -> torch.Tensor:
         """Compute the log determinant ratio, a part of the log marginal likelihood.
-        \\[
+
+        $$
             \\log \\frac{\\det P}{\\det P_0} = \\log \\det P - \\log \\det P_0
-        \\]
+        $$
 
         Returns
         -------
@@ -904,9 +907,11 @@ def log_det_ratio(self) -> torch.Tensor:
 
     def square_norm(self, value) -> torch.Tensor:
         """Compute the square norm under post. Precision with `value-self.mean` as 𝛥:
-        \\[
+
+        $$
             \\Delta^\top P \\Delta
-        \\]
+        $$
+
         Returns
         -------
         square_form
@@ -2420,7 +2425,9 @@ def gp_kernel_prior_variance(self):
     def functional_variance(self, Js_star: torch.Tensor) -> torch.Tensor:
         """GP posterior variance:
 
-        \\[ k_{**} - K_{*M} (K_{MM}+ L_{MM}^{-1})^{-1} K_{M*}\\]
+        $$
+            k_{**} - K_{*M} (K_{MM}+ L_{MM}^{-1})^{-1} K_{M*}
+        $$
 
         Parameters
         ----------
@@ -2457,7 +2464,9 @@ def functional_variance(self, Js_star: torch.Tensor) -> torch.Tensor:
     def functional_covariance(self, Js_star: torch.Tensor) -> torch.Tensor:
         """GP posterior covariance:
 
-        \\[ k_{**} - K_{*M} (K_{MM}+ L_{MM}^{-1})^{-1} K_{M*}\\]
+        $$
+            k_{**} - K_{*M} (K_{MM}+ L_{MM}^{-1})^{-1} K_{M*}
+        $$
 
         Parameters
         ----------
diff --git a/laplace/lllaplace.py b/laplace/lllaplace.py
index bea33e68..7529870c 100644
--- a/laplace/lllaplace.py
+++ b/laplace/lllaplace.py
@@ -43,10 +43,12 @@ class LLLaplace(ParametricLaplace):
     are treated probabilistically.
     The goal of this class is to compute the posterior precision \\(P\\)
     which sums as
-    \\[
+
+    $$
         P = \\sum_{n=1}^N \\nabla^2_\\theta \\log p(\\mathcal{D}_n \\mid \\theta)
         \\vert_{\\theta_{MAP}} + \\nabla^2_\\theta \\log p(\\theta) \\vert_{\\theta_{MAP}}.
-    \\]
+    $$
+
     Every subclass implements different approximations to the log likelihood Hessians,
     for example, a diagonal one. The prior is assumed to be Gaussian and therefore we have
     a simple form for \\(\\nabla^2_\\theta \\log p(\\theta) \\vert_{\\theta_{MAP}} = P_0 \\).
diff --git a/laplace/subnetlaplace.py b/laplace/subnetlaplace.py
index 01685b9f..0ec3c28a 100644
--- a/laplace/subnetlaplace.py
+++ b/laplace/subnetlaplace.py
@@ -24,10 +24,12 @@ class SubnetLaplace(ParametricLaplace):
     neural network) are treated probabilistically.
     The goal of this class is to compute the posterior precision \\(P\\)
     which sums as
-    \\[
+
+    $$
         P = \\sum_{n=1}^N \\nabla^2_\\theta \\log p(\\mathcal{D}_n \\mid \\theta)
         \\vert_{\\theta_{MAP}} + \\nabla^2_\\theta \\log p(\\theta) \\vert_{\\theta_{MAP}}.
-    \\]
+    $$
+
     The prior is assumed to be Gaussian and therefore we have a simple form for
     \\(\\nabla^2_\\theta \\log p(\\theta) \\vert_{\\theta_{MAP}} = P_0 \\).
     In particular, we assume a scalar or diagonal prior precision so that in
diff --git a/laplace/utils/enums.py b/laplace/utils/enums.py
index 6ee492a0..549f885c 100644
--- a/laplace/utils/enums.py
+++ b/laplace/utils/enums.py
@@ -2,44 +2,101 @@
 
 
 class SubsetOfWeights(str, Enum):
+    """Valid options for `subset_of_weights`."""
+
     ALL = "all"
+    """All-layer, all-parameter Laplace."""
+
     LAST_LAYER = "last_layer"
+    """Last-layer Laplace."""
+
     SUBNETWORK = "subnetwork"
+    """Subnetwork Laplace."""
 
 
 class HessianStructure(str, Enum):
+    """Valid options for `hessian_structure`."""
+
     FULL = "full"
+    """Full Hessian (generally very expensive)."""
+
     KRON = "kron"
+    """Kronecker-factored Hessian (preferrable)."""
+
     DIAG = "diag"
+    """Diagonal Hessian."""
+
     LOWRANK = "lowrank"
+    """Low-rank Hessian."""
+
     GP = "gp"
+    """Functional Laplace."""
 
 
 class Likelihood(str, Enum):
+    """Valid options for `likelihood`."""
+
     REGRESSION = "regression"
+    """Homoskedastic regression, assuming `loss_fn = nn.MSELoss()`."""
+
     CLASSIFICATION = "classification"
+    """Classification, assuming `loss_fn = nn.CrossEntropyLoss()`."""
+
     REWARD_MODELING = "reward_modeling"
+    """Bradley-Terry likelihood, for preference learning / reward modeling."""
 
 
 class PredType(str, Enum):
+    """Valid options for `pred_type`."""
+
     GLM = "glm"
+    """Linearized, closed-form predictive."""
+
     NN = "nn"
+    """Monte-Carlo predictive on the NN's weights."""
+
     GP = "gp"
+    """Gaussian-process predictive, done by inverting the kernel matrix."""
 
 
 class LinkApprox(str, Enum):
+    """Valid options for `link_approx`.
+    Only works with `likelihood = Likelihood.CLASSIFICATION`.
+    """
+
     MC = "mc"
+    """Monte-Carlo approximation in the function space on top of the GLM predictive."""
+
     PROBIT = "probit"
+    """Closed-form multiclass probit approximation."""
+
     BRIDGE = "bridge"
+    """Closed-form Laplace Bridge approximation."""
+
     BRIDGE_NORM = "bridge_norm"
+    """Closed-form Laplace Bridge approximation with normalization factor.
+    Preferable to `BRIDGE`."""
 
 
 class TuningMethod(str, Enum):
+    """Valid options for the `method` parameter in `optimize_prior_precision`."""
+
     MARGLIK = "marglik"
+    """Marginal-likelihood loss via SGD. Does not require validation data."""
+
     GRIDSEARCH = "gridsearch"
+    """Grid search. Requires validation data."""
 
 
 class PriorStructure(str, Enum):
+    """Valid options for the `prior_structure` in `optimize_prior_precision`."""
+
     SCALAR = "scalar"
+    """Scalar prior precision \\( \\tau I, \\tau \in \\mathbf{R} \\)."""
+
     DIAG = "diag"
+    """Scalar prior precision \\( \\tau \\in \\mathbb{R}^p \\)."""
+
     LAYERWISE = "layerwise"
+    """Layerwise prior precision, i.e. a single scalar prior precision for each block 
+    (corresponding to each the NN's layer) of the diagonal prior-precision matrix.."""
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 00000000..9c8e4ce4
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,139 @@
+site_name: laplace-torch
+
+repo_url: https://github.com/aleximmer/laplace
+repo_name: laplace
+site_author: wiseodd
+site_url: https://aleximmer.github.io/Laplace/
+site_dir: site/laplace
+
+nav:
+  - Introduction: index.md
+  - Developer's Guide: devs_guide.md
+  - "Example: Regression": regression_example.md
+  - "Example: Calibration": calibration_example.md
+  - "Example: GP Inference": calibration_gp_example.md
+  - "Example: Huggingface LLMs": huggingface_example.md
+  - "Example: Reward Modeling": reward_modeling_example.md
+  - API Reference:
+      - Laplace Frontend: api_reference/laplace.md
+      - Laplace Options: api_reference/enums.md
+      - Base Laplace: api_reference/baselaplace.md
+      - Parametric Laplace: api_reference/parametriclaplace.md
+      - Functional Laplace: api_reference/functionallaplace.md
+      - Last-Layer Laplace: api_reference/lllaplace.md
+      - Subnet Laplace: api_reference/subnetlaplace.md
+      - Curvatures: api_reference/curvatures.md
+      - Marglik Training Utils: api_reference/marglik_training.md
+      - Utilities: api_reference/utils.md
+
+theme:
+  name: material
+  logo: assets/laplace_logo_inv.png
+
+  features:
+    - navigation.instant
+    - navigation.instant.prefetch
+    - navigation.instant.progress
+    - navigation.indexes
+    - navigation.tracking
+    - content.code.annotate
+    - toc.follow
+    - navigation.footer
+    - navigation.top
+    - content.code.copy
+    - content.tabs.link
+
+  palette:
+    # Palette toggle for automatic mode
+    - media: "(prefers-color-scheme)"
+      primary: black
+      toggle:
+        icon: material/brightness-auto
+        name: Switch to light mode
+
+    # Palette toggle for light mode
+    - media: "(prefers-color-scheme: light)"
+      primary: black
+      scheme: default
+      toggle:
+        icon: material/brightness-7
+        name: Switch to dark mode
+
+    # Palette toggle for dark mode
+    - media: "(prefers-color-scheme: dark)"
+      primary: black
+      scheme: slate
+      toggle:
+        icon: material/brightness-4
+        name: Switch to system preference
+
+markdown_extensions:
+  - admonition
+  - pymdownx.details
+  - toc:
+      permalink: "#"
+  - pymdownx.snippets
+  - pymdownx.magiclink
+  - attr_list
+  - md_in_html
+  - pymdownx.inlinehilite
+  - pymdownx.superfences
+  - pymdownx.keys
+  - pymdownx.tasklist:
+      custom_checkbox: true
+  - pymdownx.highlight:
+      anchor_linenums: true
+  - pymdownx.tabbed:
+      alternate_style: true
+  - pymdownx.arithmatex:
+      generic: true
+
+plugins:
+  - search
+  - mkdocstrings:
+      enabled: true
+      default_handler: python
+      handlers:
+        python:
+          import:
+            - https://docs.python.org/3/objects.inv
+          options:
+            docstring_style: numpy
+            docstring_section_style: list
+            docstring_options:
+              ignore_init_summary: true
+            filters: []
+            heading_level: 1
+            members: true
+            members_order: source
+            merge_init_into_class: true
+            inherited_members: true
+            parameter_headings: true
+            separate_signature: true
+            show_if_no_docstring: false
+            show_labels: false
+            show_root_heading: true
+            show_signature_annotations: true
+            show_symbol_type_heading: true
+            show_symbol_type_toc: true
+            signature_crossrefs: true
+            summary: true
+
+extra:
+  social:
+    - icon: fontawesome/brands/github
+      link: https://github.com/aleximmer/laplace
+    - icon: fontawesome/brands/python
+      link: https://pypi.org/project/laplace-torch/
+
+extra_css:
+  - css/mkdocstrings.css
+
+extra_javascript:
+  - javascripts/mathjax.js
+  - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js
+
+validation:
+  omitted_files: warn
+  absolute_links: warn
+  unrecognized_links: warn
diff --git a/pyproject.toml b/pyproject.toml
index 6c2865f4..2e3cb0a6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,7 +47,9 @@ py-modules = ["laplace"]
 dev = [
   "coveralls",
   "matplotlib",
-  "pdoc3",
+  "mkdocs",
+  "mkdocs-material",
+  "mkdocstrings-python",
   "pytest",
   "pytest-cov",
   "pytest-mock",
diff --git a/template/config.mako b/template/config.mako
deleted file mode 100644
index 106f0bf3..00000000
--- a/template/config.mako
+++ /dev/null
@@ -1,54 +0,0 @@
-<%!
-    # Template configuration. Copy over in your template directory
-    # (used with `--template-dir`) and adapt as necessary.
-    # Note, defaults are loaded from this distribution file, so your
-    # config.mako only needs to contain values you want overridden.
-    # You can also run pdoc with `--config KEY=VALUE` to override
-    # individual values.
-    html_lang = 'en'
-    show_inherited_members = False
-    extract_module_toc_into_sidebar = True
-    list_class_variables_in_index = False
-    sort_identifiers = False
-    show_type_annotations = True
-    # Show collapsed source code block next to each item.
-    # Disabling this can improve rendering speed of large modules.
-    show_source_code = False
-    # If set, format links to objects in online source code repository
-    # according to this template. Supported keywords for interpolation
-    # are: commit, path, start_line, end_line.
-    #git_link_template = 'https://github.com/USER/PROJECT/blob/{commit}/{path}#L{start_line}-L{end_line}'
-    #git_link_template = 'https://gitlab.com/USER/PROJECT/blob/{commit}/{path}#L{start_line}-L{end_line}'
-    #git_link_template = 'https://bitbucket.org/USER/PROJECT/src/{commit}/{path}#lines-{start_line}:{end_line}'
-    #git_link_template = 'https://CGIT_HOSTNAME/PROJECT/tree/{path}?id={commit}#n{start-line}'
-    git_link_template = None
-    # A prefix to use for every HTML hyperlink in the generated documentation.
-    # No prefix results in all links being relative.
-    link_prefix = ''
-    # Enable syntax highlighting for code/source blocks by including Highlight.js
-    syntax_highlighting = True
-    # Set the style keyword such as 'atom-one-light' or 'github-gist'
-    #     Options: https://github.com/highlightjs/highlight.js/tree/master/src/styles
-    #     Demo: https://highlightjs.org/static/demo/
-    hljs_style = 'github'
-    # If set, insert Google Analytics tracking code. Value is GA
-    # tracking id (UA-XXXXXX-Y).
-    google_analytics = ''
-    # If set, insert Google Custom Search search bar widget above the sidebar index.
-    # The whitespace-separated tokens represent arbitrary extra queries (at least one
-    # must match) passed to regular Google search. Example:
-    #google_search_query = 'inurl:github.com/USER/PROJECT  site:PROJECT.github.io  site:PROJECT.website'
-    google_search_query = ''
-    # Enable offline search using Lunr.js. For explanation of 'fuzziness' parameter, which is
-    # added to every query word, see: https://lunrjs.com/guides/searching.html#fuzzy-matches
-    # If 'index_docstrings' is False, a shorter index is built, indexing only
-    # the full object reference names.
-    #lunr_search = {'fuzziness': 1, 'index_docstrings': True}
-    lunr_search = None
-    # If set, render LaTeX math syntax within \(...\) (inline equations),
-    # or within \[...\] or $$...$$ or `.. math::` (block equations)
-    # as nicely-formatted math formulas using MathJax.
-    # Note: in Python docstrings, either all backslashes need to be escaped (\\)
-    # or you need to use raw r-strings.
-    latex_math = True
-%>
\ No newline at end of file
diff --git a/update_docs.sh b/update_docs.sh
deleted file mode 100644
index a4f452b9..00000000
--- a/update_docs.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-rm -rf docs/
-pdoc --html --output-dir docs --template-dir template --force laplace
-python examples/regression_example.py
-mv docs/laplace/* docs/
-rm -rf docs/laplace/
diff --git a/uv.lock b/uv.lock
index 9b6b78fe..2b433c3b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -14,6 +14,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/32/9a/7548968acfacb162c88dd9c2247a54f36136221ebb13d457016b28d47523/asdfghjkl-0.1a4-py3-none-any.whl", hash = "sha256:9cedec38494d7907075941d03d018d8c2b64047e6c41713a49f30e3790816c51", size = 89694 },
 ]
 
+[[package]]
+name = "babel"
+version = "2.16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2a/74/f1bc80f23eeba13393b7222b11d95ca3af2c1e28edca18af487137eefed9/babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316", size = 9348104 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ed/20/bc79bc575ba2e2a7f70e8a1155618bb1301eaa5132a8271373a6903f73f8/babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b", size = 9587599 },
+]
+
 [[package]]
 name = "backpack-for-pytorch"
 version = "1.6.0"
@@ -107,6 +116,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/28/76/e6222113b83e3622caa4bb41032d0b1bf785250607392e1b778aca0b8a7d/charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc", size = 48543 },
 ]
 
+[[package]]
+name = "click"
+version = "8.1.7"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "platform_system == 'Windows'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/96/d3/f04c7bfcf5c1862a2a5b845c6b2b360488cf47af55dfa79c98f6a6bf98b5/click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de", size = 336121 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/00/2e/d53fa4befbf2cfa713304affc7ca780ce4fc1fd8710527771b58311a3229/click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", size = 97941 },
+]
+
 [[package]]
 name = "colorama"
 version = "0.4.6"
@@ -380,6 +401,30 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5e/44/73bea497ac69bafde2ee4269292fa3b41f1198f4bb7bbaaabde30ad29d4a/fsspec-2024.6.1-py3-none-any.whl", hash = "sha256:3cb443f8bcd2efb31295a5b9fdb02aee81d8452c80d28f97a6d0959e6cee101e", size = 177561 },
 ]
 
+[[package]]
+name = "ghp-import"
+version = "2.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "python-dateutil" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d9/29/d40217cbe2f6b1359e00c6c307bb3fc876ba74068cbab3dde77f03ca0dc4/ghp-import-2.1.0.tar.gz", hash = "sha256:9c535c4c61193c2df8871222567d7fd7e5014d835f97dc7b7439069e2413d343", size = 10943 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f7/ec/67fbef5d497f86283db54c22eec6f6140243aae73265799baaaa19cd17fb/ghp_import-2.1.0-py3-none-any.whl", hash = "sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619", size = 11034 },
+]
+
+[[package]]
+name = "griffe"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ae/d1/dcd486d6d577cb12490c78cfa88679fb9b481b227807f14632ba9bd82245/griffe-1.3.1.tar.gz", hash = "sha256:3f86a716b631a4c0f96a43cb75d05d3c85975003c20540426c0eba3b0581c56a", size = 382412 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1b/19/63971981a20aecfa7cbd07c5cac6914cf1180b3dd8db5fe8ab2ea410315f/griffe-1.3.1-py3-none-any.whl", hash = "sha256:940aeb630bc3054b4369567f150b6365be6f11eef46b0ed8623aea96e6d17b19", size = 126902 },
+]
+
 [[package]]
 name = "idna"
 version = "3.7"
@@ -539,7 +584,9 @@ dependencies = [
 dev = [
     { name = "coveralls" },
     { name = "matplotlib" },
-    { name = "pdoc3" },
+    { name = "mkdocs" },
+    { name = "mkdocs-material" },
+    { name = "mkdocstrings-python" },
     { name = "pytest" },
     { name = "pytest-cov" },
     { name = "pytest-mock" },
@@ -554,9 +601,11 @@ requires-dist = [
     { name = "coveralls", marker = "extra == 'dev'" },
     { name = "curvlinops-for-pytorch", specifier = ">=2.0" },
     { name = "matplotlib", marker = "extra == 'dev'" },
+    { name = "mkdocs", marker = "extra == 'dev'" },
+    { name = "mkdocs-material", marker = "extra == 'dev'" },
+    { name = "mkdocstrings-python", marker = "extra == 'dev'" },
     { name = "numpy" },
     { name = "opt-einsum" },
-    { name = "pdoc3", marker = "extra == 'dev'" },
     { name = "pytest", marker = "extra == 'dev'" },
     { name = "pytest-cov", marker = "extra == 'dev'" },
     { name = "pytest-mock", marker = "extra == 'dev'" },
@@ -582,18 +631,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ea/d5/ed204bc738672c17455019b5e0c7c8d1effb0ea17707150ca50336298ca0/lightning_utilities-0.11.6-py3-none-any.whl", hash = "sha256:ecd9953c316cbaf56ad820fbe7bd062187b9973c4a23d47b076cd59dc080a310", size = 26818 },
 ]
 
-[[package]]
-name = "mako"
-version = "1.3.5"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "markupsafe" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/67/03/fb5ba97ff65ce64f6d35b582aacffc26b693a98053fa831ab43a437cbddb/Mako-1.3.5.tar.gz", hash = "sha256:48dbc20568c1d276a2698b36d968fa76161bf127194907ea6fc594fa81f943bc", size = 392738 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/03/62/70f5a0c2dd208f9f3f2f9afd103aec42ee4d9ad2401d78342f75e9b8da36/Mako-1.3.5-py3-none-any.whl", hash = "sha256:260f1dbc3a519453a9c856dedfe4beb4e50bd5a26d96386cb6c80856556bb91a", size = 78565 },
-]
-
 [[package]]
 name = "markdown"
 version = "3.7"
@@ -713,6 +750,135 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c0/e8/a69f4ad5b544f509ec3718dfa003187a94a37d79bf2e175180668c0ff8ec/matplotlib-3.9.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:050598c2b29e0b9832cde72bcf97627bf00262adbc4a54e2b856426bb2ef0697", size = 7845515 },
 ]
 
+[[package]]
+name = "mergedeep"
+version = "1.3.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3a/41/580bb4006e3ed0361b8151a01d324fb03f420815446c7def45d02f74c270/mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8", size = 4661 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2c/19/04f9b178c2d8a15b076c8b5140708fa6ffc5601fb6f1e975537072df5b2a/mergedeep-1.3.4-py3-none-any.whl", hash = "sha256:70775750742b25c0d8f36c55aed03d24c3384d17c951b3175d898bd778ef0307", size = 6354 },
+]
+
+[[package]]
+name = "mkdocs"
+version = "1.6.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click" },
+    { name = "colorama", marker = "platform_system == 'Windows'" },
+    { name = "ghp-import" },
+    { name = "importlib-metadata", marker = "python_full_version < '3.10'" },
+    { name = "jinja2" },
+    { name = "markdown" },
+    { name = "markupsafe" },
+    { name = "mergedeep" },
+    { name = "mkdocs-get-deps" },
+    { name = "packaging" },
+    { name = "pathspec" },
+    { name = "pyyaml" },
+    { name = "pyyaml-env-tag" },
+    { name = "watchdog" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/bc/c6/bbd4f061bd16b378247f12953ffcb04786a618ce5e904b8c5a01a0309061/mkdocs-1.6.1.tar.gz", hash = "sha256:7b432f01d928c084353ab39c57282f29f92136665bdd6abf7c1ec8d822ef86f2", size = 3889159 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/22/5b/dbc6a8cddc9cfa9c4971d59fb12bb8d42e161b7e7f8cc89e49137c5b279c/mkdocs-1.6.1-py3-none-any.whl", hash = "sha256:db91759624d1647f3f34aa0c3f327dd2601beae39a366d6e064c03468d35c20e", size = 3864451 },
+]
+
+[[package]]
+name = "mkdocs-autorefs"
+version = "1.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markdown" },
+    { name = "markupsafe" },
+    { name = "mkdocs" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fb/ae/0f1154c614d6a8b8a36fff084e5b82af3a15f7d2060cf0dcdb1c53297a71/mkdocs_autorefs-1.2.0.tar.gz", hash = "sha256:a86b93abff653521bda71cf3fc5596342b7a23982093915cb74273f67522190f", size = 40262 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/71/26/4d39d52ea2219604053a4d05b98e90d6a335511cc01806436ec4886b1028/mkdocs_autorefs-1.2.0-py3-none-any.whl", hash = "sha256:d588754ae89bd0ced0c70c06f58566a4ee43471eeeee5202427da7de9ef85a2f", size = 16522 },
+]
+
+[[package]]
+name = "mkdocs-get-deps"
+version = "0.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "importlib-metadata", marker = "python_full_version < '3.10'" },
+    { name = "mergedeep" },
+    { name = "platformdirs" },
+    { name = "pyyaml" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/98/f5/ed29cd50067784976f25ed0ed6fcd3c2ce9eb90650aa3b2796ddf7b6870b/mkdocs_get_deps-0.2.0.tar.gz", hash = "sha256:162b3d129c7fad9b19abfdcb9c1458a651628e4b1dea628ac68790fb3061c60c", size = 10239 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9f/d4/029f984e8d3f3b6b726bd33cafc473b75e9e44c0f7e80a5b29abc466bdea/mkdocs_get_deps-0.2.0-py3-none-any.whl", hash = "sha256:2bf11d0b133e77a0dd036abeeb06dec8775e46efa526dc70667d8863eefc6134", size = 9521 },
+]
+
+[[package]]
+name = "mkdocs-material"
+version = "9.5.34"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "babel" },
+    { name = "colorama" },
+    { name = "jinja2" },
+    { name = "markdown" },
+    { name = "mkdocs" },
+    { name = "mkdocs-material-extensions" },
+    { name = "paginate" },
+    { name = "pygments" },
+    { name = "pymdown-extensions" },
+    { name = "regex" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/87/79/9b1428841d1d9331561c9d2f22f572b8ceeb67432b383bb1f74f954d92c2/mkdocs_material-9.5.34.tar.gz", hash = "sha256:1e60ddf716cfb5679dfd65900b8a25d277064ed82d9a53cd5190e3f894df7840", size = 3975085 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/36/42/d6cc455d37979c5617cab5aac49bd15e07317f7a61a80ce3c49ce23c779f/mkdocs_material-9.5.34-py3-none-any.whl", hash = "sha256:54caa8be708de2b75167fd4d3b9f3d949579294f49cb242515d4653dbee9227e", size = 8668442 },
+]
+
+[[package]]
+name = "mkdocs-material-extensions"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/79/9b/9b4c96d6593b2a541e1cb8b34899a6d021d208bb357042823d4d2cabdbe7/mkdocs_material_extensions-1.3.1.tar.gz", hash = "sha256:10c9511cea88f568257f960358a467d12b970e1f7b2c0e5fb2bb48cab1928443", size = 11847 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5b/54/662a4743aa81d9582ee9339d4ffa3c8fd40a4965e033d77b9da9774d3960/mkdocs_material_extensions-1.3.1-py3-none-any.whl", hash = "sha256:adff8b62700b25cb77b53358dad940f3ef973dd6db797907c49e3c2ef3ab4e31", size = 8728 },
+]
+
+[[package]]
+name = "mkdocstrings"
+version = "0.26.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click" },
+    { name = "importlib-metadata", marker = "python_full_version < '3.10'" },
+    { name = "jinja2" },
+    { name = "markdown" },
+    { name = "markupsafe" },
+    { name = "mkdocs" },
+    { name = "mkdocs-autorefs" },
+    { name = "platformdirs" },
+    { name = "pymdown-extensions" },
+    { name = "typing-extensions", marker = "python_full_version < '3.10'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e6/bf/170ff04de72227f715d67da32950c7b8434449f3805b2ec3dd1085db4d7c/mkdocstrings-0.26.1.tar.gz", hash = "sha256:bb8b8854d6713d5348ad05b069a09f3b79edbc6a0f33a34c6821141adb03fe33", size = 92677 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/23/cc/8ba127aaee5d1e9046b0d33fa5b3d17da95a9d705d44902792e0569257fd/mkdocstrings-0.26.1-py3-none-any.whl", hash = "sha256:29738bfb72b4608e8e55cc50fb8a54f325dc7ebd2014e4e3881a49892d5983cf", size = 29643 },
+]
+
+[[package]]
+name = "mkdocstrings-python"
+version = "1.11.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "griffe" },
+    { name = "mkdocs-autorefs" },
+    { name = "mkdocstrings" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fc/ba/534c934cd0a809f51c91332d6ed278782ee4126b8ba8db02c2003f162b47/mkdocstrings_python-1.11.1.tar.gz", hash = "sha256:8824b115c5359304ab0b5378a91f6202324a849e1da907a3485b59208b797322", size = 166890 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2f/f2/2a2c48fda645ac6bbe73bcc974587a579092b6868e6ff8bc6d177f4db38a/mkdocstrings_python-1.11.1-py3-none-any.whl", hash = "sha256:a21a1c05acef129a618517bb5aae3e33114f569b11588b1e7af3e9d4061a71af", size = 109297 },
+]
+
 [[package]]
 name = "mkl"
 version = "2021.4.0"
@@ -940,14 +1106,22 @@ wheels = [
 ]
 
 [[package]]
-name = "pdoc3"
-version = "0.11.1"
+name = "paginate"
+version = "0.5.7"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "mako" },
-    { name = "markdown" },
+sdist = { url = "https://files.pythonhosted.org/packages/ec/46/68dde5b6bc00c1296ec6466ab27dddede6aec9af1b99090e1107091b3b84/paginate-0.5.7.tar.gz", hash = "sha256:22bd083ab41e1a8b4f3690544afb2c60c25e5c9a63a30fa2f483f6c60c8e5945", size = 19252 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/90/96/04b8e52da071d28f5e21a805b19cb9390aa17a47462ac87f5e2696b9566d/paginate-0.5.7-py2.py3-none-any.whl", hash = "sha256:b885e2af73abcf01d9559fd5216b57ef722f8c42affbb63942377668e35c7591", size = 13746 },
+]
+
+[[package]]
+name = "pathspec"
+version = "0.12.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/40/a3/141d32ae6abfdd17f0314a3f6b93ce83becefc7f0f71f5902b6b8b993cd7/pdoc3-0.11.1.tar.gz", hash = "sha256:20bf2aad8110892573fd350b1bf95f6612b53b55e29b0fc143b5f2b5bcbabfb6", size = 97928 }
 
 [[package]]
 name = "pillow"
@@ -1026,6 +1200,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/37/ae/2dbfc38cc4fd14aceea14bc440d5151b21f64c4c3ba3f6f4191610b7ee5d/pillow-10.4.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:cfdd747216947628af7b259d274771d84db2268ca062dd5faf373639d00113a3", size = 2554652 },
 ]
 
+[[package]]
+name = "platformdirs"
+version = "4.3.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/75/a0/d7cab8409cdc7d39b037c85ac46d92434fb6595432e069251b38e5c8dd0e/platformdirs-4.3.2.tar.gz", hash = "sha256:9e5e27a08aa095dd127b9f2e764d74254f482fef22b0970773bfba79d091ab8c", size = 21276 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/da/8b/d497999c4017b80678017ddce745cf675489c110681ad3c84a55eddfd3e7/platformdirs-4.3.2-py3-none-any.whl", hash = "sha256:eb1c8582560b34ed4ba105009a4badf7f6f85768b30126f351328507b2beb617", size = 18417 },
+]
+
 [[package]]
 name = "pluggy"
 version = "1.5.0"
@@ -1035,6 +1218,28 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 },
 ]
 
+[[package]]
+name = "pygments"
+version = "2.18.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/8e/62/8336eff65bcbc8e4cb5d05b55faf041285951b6e80f33e2bff2024788f31/pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199", size = 4891905 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f7/3f/01c8b82017c199075f8f788d0d906b9ffbbc5a47dc9918a945e13d5a2bda/pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a", size = 1205513 },
+]
+
+[[package]]
+name = "pymdown-extensions"
+version = "10.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markdown" },
+    { name = "pyyaml" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d8/d3/fb86beeaa4416f73a28a5e8d440976b7cada2b2d7b5e715b2bd849d4de32/pymdown_extensions-10.9.tar.gz", hash = "sha256:6ff740bcd99ec4172a938970d42b96128bdc9d4b9bcad72494f29921dc69b753", size = 812128 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7b/41/18b5dc5e97ec3ff1c2f51d372e570a9fbe231f1124dcc36dbc6b47f93058/pymdown_extensions-10.9-py3-none-any.whl", hash = "sha256:d323f7e90d83c86113ee78f3fe62fc9dee5f56b54d912660703ea1816fed5626", size = 250954 },
+]
+
 [[package]]
 name = "pyparsing"
 version = "3.1.2"
@@ -1098,6 +1303,156 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 },
 ]
 
+[[package]]
+name = "pyyaml"
+version = "6.0.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9b/95/a3fac87cb7158e231b5a6012e438c647e1a87f09f8e0d123acec8ab8bf71/PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086", size = 184199 },
+    { url = "https://files.pythonhosted.org/packages/c7/7a/68bd47624dab8fd4afbfd3c48e3b79efe09098ae941de5b58abcbadff5cb/PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf", size = 171758 },
+    { url = "https://files.pythonhosted.org/packages/49/ee/14c54df452143b9ee9f0f29074d7ca5516a36edb0b4cc40c3f280131656f/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237", size = 718463 },
+    { url = "https://files.pythonhosted.org/packages/4d/61/de363a97476e766574650d742205be468921a7b532aa2499fcd886b62530/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b", size = 719280 },
+    { url = "https://files.pythonhosted.org/packages/6b/4e/1523cb902fd98355e2e9ea5e5eb237cbc5f3ad5f3075fa65087aa0ecb669/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed", size = 751239 },
+    { url = "https://files.pythonhosted.org/packages/b7/33/5504b3a9a4464893c32f118a9cc045190a91637b119a9c881da1cf6b7a72/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180", size = 695802 },
+    { url = "https://files.pythonhosted.org/packages/5c/20/8347dcabd41ef3a3cdc4f7b7a2aff3d06598c8779faa189cdbf878b626a4/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68", size = 720527 },
+    { url = "https://files.pythonhosted.org/packages/be/aa/5afe99233fb360d0ff37377145a949ae258aaab831bde4792b32650a4378/PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99", size = 144052 },
+    { url = "https://files.pythonhosted.org/packages/b5/84/0fa4b06f6d6c958d207620fc60005e241ecedceee58931bb20138e1e5776/PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e", size = 161774 },
+    { url = "https://files.pythonhosted.org/packages/f8/aa/7af4e81f7acba21a4c6be026da38fd2b872ca46226673c89a758ebdc4fd2/PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774", size = 184612 },
+    { url = "https://files.pythonhosted.org/packages/8b/62/b9faa998fd185f65c1371643678e4d58254add437edb764a08c5a98fb986/PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee", size = 172040 },
+    { url = "https://files.pythonhosted.org/packages/ad/0c/c804f5f922a9a6563bab712d8dcc70251e8af811fce4524d57c2c0fd49a4/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c", size = 736829 },
+    { url = "https://files.pythonhosted.org/packages/51/16/6af8d6a6b210c8e54f1406a6b9481febf9c64a3109c541567e35a49aa2e7/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317", size = 764167 },
+    { url = "https://files.pythonhosted.org/packages/75/e4/2c27590dfc9992f73aabbeb9241ae20220bd9452df27483b6e56d3975cc5/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85", size = 762952 },
+    { url = "https://files.pythonhosted.org/packages/9b/97/ecc1abf4a823f5ac61941a9c00fe501b02ac3ab0e373c3857f7d4b83e2b6/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4", size = 735301 },
+    { url = "https://files.pythonhosted.org/packages/45/73/0f49dacd6e82c9430e46f4a027baa4ca205e8b0a9dce1397f44edc23559d/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e", size = 756638 },
+    { url = "https://files.pythonhosted.org/packages/22/5f/956f0f9fc65223a58fbc14459bf34b4cc48dec52e00535c79b8db361aabd/PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5", size = 143850 },
+    { url = "https://files.pythonhosted.org/packages/ed/23/8da0bbe2ab9dcdd11f4f4557ccaf95c10b9811b13ecced089d43ce59c3c8/PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44", size = 161980 },
+    { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873 },
+    { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302 },
+    { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154 },
+    { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223 },
+    { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542 },
+    { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164 },
+    { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611 },
+    { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591 },
+    { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338 },
+    { url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309 },
+    { url = "https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", size = 171679 },
+    { url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428 },
+    { url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361 },
+    { url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523 },
+    { url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660 },
+    { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597 },
+    { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527 },
+    { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446 },
+    { url = "https://files.pythonhosted.org/packages/65/d8/b7a1db13636d7fb7d4ff431593c510c8b8fca920ade06ca8ef20015493c5/PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d", size = 184777 },
+    { url = "https://files.pythonhosted.org/packages/0a/02/6ec546cd45143fdf9840b2c6be8d875116a64076218b61d68e12548e5839/PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f", size = 172318 },
+    { url = "https://files.pythonhosted.org/packages/0e/9a/8cc68be846c972bda34f6c2a93abb644fb2476f4dcc924d52175786932c9/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290", size = 720891 },
+    { url = "https://files.pythonhosted.org/packages/e9/6c/6e1b7f40181bc4805e2e07f4abc10a88ce4648e7e95ff1abe4ae4014a9b2/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12", size = 722614 },
+    { url = "https://files.pythonhosted.org/packages/3d/32/e7bd8535d22ea2874cef6a81021ba019474ace0d13a4819c2a4bce79bd6a/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19", size = 737360 },
+    { url = "https://files.pythonhosted.org/packages/d7/12/7322c1e30b9be969670b672573d45479edef72c9a0deac3bb2868f5d7469/PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e", size = 699006 },
+    { url = "https://files.pythonhosted.org/packages/82/72/04fcad41ca56491995076630c3ec1e834be241664c0c09a64c9a2589b507/PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725", size = 723577 },
+    { url = "https://files.pythonhosted.org/packages/ed/5e/46168b1f2757f1fcd442bc3029cd8767d88a98c9c05770d8b420948743bb/PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631", size = 144593 },
+    { url = "https://files.pythonhosted.org/packages/19/87/5124b1c1f2412bb95c59ec481eaf936cd32f0fe2a7b16b97b81c4c017a6a/PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8", size = 162312 },
+]
+
+[[package]]
+name = "pyyaml-env-tag"
+version = "0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pyyaml" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fb/8e/da1c6c58f751b70f8ceb1eb25bc25d524e8f14fe16edcce3f4e3ba08629c/pyyaml_env_tag-0.1.tar.gz", hash = "sha256:70092675bda14fdec33b31ba77e7543de9ddc88f2e5b99160396572d11525bdb", size = 5631 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/66/bbb1dd374f5c870f59c5bb1db0e18cbe7fa739415a24cbd95b2d1f5ae0c4/pyyaml_env_tag-0.1-py3-none-any.whl", hash = "sha256:af31106dec8a4d68c60207c1886031cbf839b68aa7abccdb19868200532c2069", size = 3911 },
+]
+
+[[package]]
+name = "regex"
+version = "2024.9.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f9/38/148df33b4dbca3bd069b963acab5e0fa1a9dbd6820f8c322d0dd6faeff96/regex-2024.9.11.tar.gz", hash = "sha256:6c188c307e8433bcb63dc1915022deb553b4203a70722fc542c363bf120a01fd", size = 399403 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/63/12/497bd6599ce8a239ade68678132296aec5ee25ebea45fc8ba91aa60fceec/regex-2024.9.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1494fa8725c285a81d01dc8c06b55287a1ee5e0e382d8413adc0a9197aac6408", size = 482488 },
+    { url = "https://files.pythonhosted.org/packages/c1/24/595ddb9bec2a9b151cdaf9565b0c9f3da9f0cb1dca6c158bc5175332ddf8/regex-2024.9.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0e12c481ad92d129c78f13a2a3662317e46ee7ef96c94fd332e1c29131875b7d", size = 287443 },
+    { url = "https://files.pythonhosted.org/packages/69/a8/b2fb45d9715b1469383a0da7968f8cacc2f83e9fbbcd6b8713752dd980a6/regex-2024.9.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:16e13a7929791ac1216afde26f712802e3df7bf0360b32e4914dca3ab8baeea5", size = 284561 },
+    { url = "https://files.pythonhosted.org/packages/88/87/1ce4a5357216b19b7055e7d3b0efc75a6e426133bf1e7d094321df514257/regex-2024.9.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46989629904bad940bbec2106528140a218b4a36bb3042d8406980be1941429c", size = 783177 },
+    { url = "https://files.pythonhosted.org/packages/3c/65/b9f002ab32f7b68e7d1dcabb67926f3f47325b8dbc22cc50b6a043e1d07c/regex-2024.9.11-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a906ed5e47a0ce5f04b2c981af1c9acf9e8696066900bf03b9d7879a6f679fc8", size = 823193 },
+    { url = "https://files.pythonhosted.org/packages/22/91/8339dd3abce101204d246e31bc26cdd7ec07c9f91598472459a3a902aa41/regex-2024.9.11-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9a091b0550b3b0207784a7d6d0f1a00d1d1c8a11699c1a4d93db3fbefc3ad35", size = 809950 },
+    { url = "https://files.pythonhosted.org/packages/cb/19/556638aa11c2ec9968a1da998f07f27ec0abb9bf3c647d7c7985ca0b8eea/regex-2024.9.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ddcd9a179c0a6fa8add279a4444015acddcd7f232a49071ae57fa6e278f1f71", size = 782661 },
+    { url = "https://files.pythonhosted.org/packages/d1/e9/7a5bc4c6ef8d9cd2bdd83a667888fc35320da96a4cc4da5fa084330f53db/regex-2024.9.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6b41e1adc61fa347662b09398e31ad446afadff932a24807d3ceb955ed865cc8", size = 772348 },
+    { url = "https://files.pythonhosted.org/packages/f1/0b/29f2105bfac3ed08e704914c38e93b07c784a6655f8a015297ee7173e95b/regex-2024.9.11-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ced479f601cd2f8ca1fd7b23925a7e0ad512a56d6e9476f79b8f381d9d37090a", size = 697460 },
+    { url = "https://files.pythonhosted.org/packages/71/3a/52ff61054d15a4722605f5872ad03962b319a04c1ebaebe570b8b9b7dde1/regex-2024.9.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:635a1d96665f84b292e401c3d62775851aedc31d4f8784117b3c68c4fcd4118d", size = 769151 },
+    { url = "https://files.pythonhosted.org/packages/97/07/37e460ab5ca84be8e1e197c3b526c5c86993dcc9e13cbc805c35fc2463c1/regex-2024.9.11-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:c0256beda696edcf7d97ef16b2a33a8e5a875affd6fa6567b54f7c577b30a137", size = 777478 },
+    { url = "https://files.pythonhosted.org/packages/65/7b/953075723dd5ab00780043ac2f9de667306ff9e2a85332975e9f19279174/regex-2024.9.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:3ce4f1185db3fbde8ed8aa223fc9620f276c58de8b0d4f8cc86fd1360829edb6", size = 845373 },
+    { url = "https://files.pythonhosted.org/packages/40/b8/3e9484c6230b8b6e8f816ab7c9a080e631124991a4ae2c27a81631777db0/regex-2024.9.11-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:09d77559e80dcc9d24570da3745ab859a9cf91953062e4ab126ba9d5993688ca", size = 845369 },
+    { url = "https://files.pythonhosted.org/packages/b7/99/38434984d912edbd2e1969d116257e869578f67461bd7462b894c45ed874/regex-2024.9.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7a22ccefd4db3f12b526eccb129390942fe874a3a9fdbdd24cf55773a1faab1a", size = 773935 },
+    { url = "https://files.pythonhosted.org/packages/ab/67/43174d2b46fa947b7b9dfe56b6c8a8a76d44223f35b1d64645a732fd1d6f/regex-2024.9.11-cp310-cp310-win32.whl", hash = "sha256:f745ec09bc1b0bd15cfc73df6fa4f726dcc26bb16c23a03f9e3367d357eeedd0", size = 261624 },
+    { url = "https://files.pythonhosted.org/packages/c4/2a/4f9c47d9395b6aff24874c761d8d620c0232f97c43ef3cf668c8b355e7a7/regex-2024.9.11-cp310-cp310-win_amd64.whl", hash = "sha256:01c2acb51f8a7d6494c8c5eafe3d8e06d76563d8a8a4643b37e9b2dd8a2ff623", size = 274020 },
+    { url = "https://files.pythonhosted.org/packages/86/a1/d526b7b6095a0019aa360948c143aacfeb029919c898701ce7763bbe4c15/regex-2024.9.11-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2cce2449e5927a0bf084d346da6cd5eb016b2beca10d0013ab50e3c226ffc0df", size = 482483 },
+    { url = "https://files.pythonhosted.org/packages/32/d9/bfdd153179867c275719e381e1e8e84a97bd186740456a0dcb3e7125c205/regex-2024.9.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b37fa423beefa44919e009745ccbf353d8c981516e807995b2bd11c2c77d268", size = 287442 },
+    { url = "https://files.pythonhosted.org/packages/33/c4/60f3370735135e3a8d673ddcdb2507a8560d0e759e1398d366e43d000253/regex-2024.9.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:64ce2799bd75039b480cc0360907c4fb2f50022f030bf9e7a8705b636e408fad", size = 284561 },
+    { url = "https://files.pythonhosted.org/packages/b1/51/91a5ebdff17f9ec4973cb0aa9d37635efec1c6868654bbc25d1543aca4ec/regex-2024.9.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4cc92bb6db56ab0c1cbd17294e14f5e9224f0cc6521167ef388332604e92679", size = 791779 },
+    { url = "https://files.pythonhosted.org/packages/07/4a/022c5e6f0891a90cd7eb3d664d6c58ce2aba48bff107b00013f3d6167069/regex-2024.9.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d05ac6fa06959c4172eccd99a222e1fbf17b5670c4d596cb1e5cde99600674c4", size = 832605 },
+    { url = "https://files.pythonhosted.org/packages/ac/1c/3793990c8c83ca04e018151ddda83b83ecc41d89964f0f17749f027fc44d/regex-2024.9.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:040562757795eeea356394a7fb13076ad4f99d3c62ab0f8bdfb21f99a1f85664", size = 818556 },
+    { url = "https://files.pythonhosted.org/packages/e9/5c/8b385afbfacb853730682c57be56225f9fe275c5bf02ac1fc88edbff316d/regex-2024.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6113c008a7780792efc80f9dfe10ba0cd043cbf8dc9a76ef757850f51b4edc50", size = 792808 },
+    { url = "https://files.pythonhosted.org/packages/9b/8b/a4723a838b53c771e9240951adde6af58c829fb6a6a28f554e8131f53839/regex-2024.9.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e5fb5f77c8745a60105403a774fe2c1759b71d3e7b4ca237a5e67ad066c7199", size = 781115 },
+    { url = "https://files.pythonhosted.org/packages/83/5f/031a04b6017033d65b261259c09043c06f4ef2d4eac841d0649d76d69541/regex-2024.9.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:54d9ff35d4515debf14bc27f1e3b38bfc453eff3220f5bce159642fa762fe5d4", size = 778155 },
+    { url = "https://files.pythonhosted.org/packages/fd/cd/4660756070b03ce4a66663a43f6c6e7ebc2266cc6b4c586c167917185eb4/regex-2024.9.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:df5cbb1fbc74a8305b6065d4ade43b993be03dbe0f8b30032cced0d7740994bd", size = 784614 },
+    { url = "https://files.pythonhosted.org/packages/93/8d/65b9bea7df120a7be8337c415b6d256ba786cbc9107cebba3bf8ff09da99/regex-2024.9.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7fb89ee5d106e4a7a51bce305ac4efb981536301895f7bdcf93ec92ae0d91c7f", size = 853744 },
+    { url = "https://files.pythonhosted.org/packages/96/a7/fba1eae75eb53a704475baf11bd44b3e6ccb95b316955027eb7748f24ef8/regex-2024.9.11-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:a738b937d512b30bf75995c0159c0ddf9eec0775c9d72ac0202076c72f24aa96", size = 855890 },
+    { url = "https://files.pythonhosted.org/packages/45/14/d864b2db80a1a3358534392373e8a281d95b28c29c87d8548aed58813910/regex-2024.9.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e28f9faeb14b6f23ac55bfbbfd3643f5c7c18ede093977f1df249f73fd22c7b1", size = 781887 },
+    { url = "https://files.pythonhosted.org/packages/4d/a9/bfb29b3de3eb11dc9b412603437023b8e6c02fb4e11311863d9bf62c403a/regex-2024.9.11-cp311-cp311-win32.whl", hash = "sha256:18e707ce6c92d7282dfce370cd205098384b8ee21544e7cb29b8aab955b66fa9", size = 261644 },
+    { url = "https://files.pythonhosted.org/packages/c7/ab/1ad2511cf6a208fde57fafe49829cab8ca018128ab0d0b48973d8218634a/regex-2024.9.11-cp311-cp311-win_amd64.whl", hash = "sha256:313ea15e5ff2a8cbbad96ccef6be638393041b0a7863183c2d31e0c6116688cf", size = 274033 },
+    { url = "https://files.pythonhosted.org/packages/6e/92/407531450762bed778eedbde04407f68cbd75d13cee96c6f8d6903d9c6c1/regex-2024.9.11-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b0d0a6c64fcc4ef9c69bd5b3b3626cc3776520a1637d8abaa62b9edc147a58f7", size = 483590 },
+    { url = "https://files.pythonhosted.org/packages/8e/a2/048acbc5ae1f615adc6cba36cc45734e679b5f1e4e58c3c77f0ed611d4e2/regex-2024.9.11-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:49b0e06786ea663f933f3710a51e9385ce0cba0ea56b67107fd841a55d56a231", size = 288175 },
+    { url = "https://files.pythonhosted.org/packages/8a/ea/909d8620329ab710dfaf7b4adee41242ab7c9b95ea8d838e9bfe76244259/regex-2024.9.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5b513b6997a0b2f10e4fd3a1313568e373926e8c252bd76c960f96fd039cd28d", size = 284749 },
+    { url = "https://files.pythonhosted.org/packages/ca/fa/521eb683b916389b4975337873e66954e0f6d8f91bd5774164a57b503185/regex-2024.9.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee439691d8c23e76f9802c42a95cfeebf9d47cf4ffd06f18489122dbb0a7ad64", size = 795181 },
+    { url = "https://files.pythonhosted.org/packages/28/db/63047feddc3280cc242f9c74f7aeddc6ee662b1835f00046f57d5630c827/regex-2024.9.11-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a8f877c89719d759e52783f7fe6e1c67121076b87b40542966c02de5503ace42", size = 835842 },
+    { url = "https://files.pythonhosted.org/packages/e3/94/86adc259ff8ec26edf35fcca7e334566c1805c7493b192cb09679f9c3dee/regex-2024.9.11-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:23b30c62d0f16827f2ae9f2bb87619bc4fba2044911e2e6c2eb1af0161cdb766", size = 823533 },
+    { url = "https://files.pythonhosted.org/packages/29/52/84662b6636061277cb857f658518aa7db6672bc6d1a3f503ccd5aefc581e/regex-2024.9.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85ab7824093d8f10d44330fe1e6493f756f252d145323dd17ab6b48733ff6c0a", size = 797037 },
+    { url = "https://files.pythonhosted.org/packages/c3/2a/cd4675dd987e4a7505f0364a958bc41f3b84942de9efaad0ef9a2646681c/regex-2024.9.11-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8dee5b4810a89447151999428fe096977346cf2f29f4d5e29609d2e19e0199c9", size = 784106 },
+    { url = "https://files.pythonhosted.org/packages/6f/75/3ea7ec29de0bbf42f21f812f48781d41e627d57a634f3f23947c9a46e303/regex-2024.9.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:98eeee2f2e63edae2181c886d7911ce502e1292794f4c5ee71e60e23e8d26b5d", size = 782468 },
+    { url = "https://files.pythonhosted.org/packages/d3/67/15519d69b52c252b270e679cb578e22e0c02b8dd4e361f2b04efcc7f2335/regex-2024.9.11-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:57fdd2e0b2694ce6fc2e5ccf189789c3e2962916fb38779d3e3521ff8fe7a822", size = 790324 },
+    { url = "https://files.pythonhosted.org/packages/9c/71/eff77d3fe7ba08ab0672920059ec30d63fa7e41aa0fb61c562726e9bd721/regex-2024.9.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:d552c78411f60b1fdaafd117a1fca2f02e562e309223b9d44b7de8be451ec5e0", size = 860214 },
+    { url = "https://files.pythonhosted.org/packages/81/11/e1bdf84a72372e56f1ea4b833dd583b822a23138a616ace7ab57a0e11556/regex-2024.9.11-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a0b2b80321c2ed3fcf0385ec9e51a12253c50f146fddb2abbb10f033fe3d049a", size = 859420 },
+    { url = "https://files.pythonhosted.org/packages/ea/75/9753e9dcebfa7c3645563ef5c8a58f3a47e799c872165f37c55737dadd3e/regex-2024.9.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:18406efb2f5a0e57e3a5881cd9354c1512d3bb4f5c45d96d110a66114d84d23a", size = 787333 },
+    { url = "https://files.pythonhosted.org/packages/bc/4e/ba1cbca93141f7416624b3ae63573e785d4bc1834c8be44a8f0747919eca/regex-2024.9.11-cp312-cp312-win32.whl", hash = "sha256:e464b467f1588e2c42d26814231edecbcfe77f5ac414d92cbf4e7b55b2c2a776", size = 262058 },
+    { url = "https://files.pythonhosted.org/packages/6e/16/efc5f194778bf43e5888209e5cec4b258005d37c613b67ae137df3b89c53/regex-2024.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:9e8719792ca63c6b8340380352c24dcb8cd7ec49dae36e963742a275dfae6009", size = 273526 },
+    { url = "https://files.pythonhosted.org/packages/93/0a/d1c6b9af1ff1e36832fe38d74d5c5bab913f2bdcbbd6bc0e7f3ce8b2f577/regex-2024.9.11-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c157bb447303070f256e084668b702073db99bbb61d44f85d811025fcf38f784", size = 483376 },
+    { url = "https://files.pythonhosted.org/packages/a4/42/5910a050c105d7f750a72dcb49c30220c3ae4e2654e54aaaa0e9bc0584cb/regex-2024.9.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4db21ece84dfeefc5d8a3863f101995de646c6cb0536952c321a2650aa202c36", size = 288112 },
+    { url = "https://files.pythonhosted.org/packages/8d/56/0c262aff0e9224fa7ffce47b5458d373f4d3e3ff84e99b5ff0cb15e0b5b2/regex-2024.9.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:220e92a30b426daf23bb67a7962900ed4613589bab80382be09b48896d211e92", size = 284608 },
+    { url = "https://files.pythonhosted.org/packages/b9/54/9fe8f9aec5007bbbbce28ba3d2e3eaca425f95387b7d1e84f0d137d25237/regex-2024.9.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb1ae19e64c14c7ec1995f40bd932448713d3c73509e82d8cd7744dc00e29e86", size = 795337 },
+    { url = "https://files.pythonhosted.org/packages/b2/e7/6b2f642c3cded271c4f16cc4daa7231be544d30fe2b168e0223724b49a61/regex-2024.9.11-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f47cd43a5bfa48f86925fe26fbdd0a488ff15b62468abb5d2a1e092a4fb10e85", size = 835848 },
+    { url = "https://files.pythonhosted.org/packages/cd/9e/187363bdf5d8c0e4662117b92aa32bf52f8f09620ae93abc7537d96d3311/regex-2024.9.11-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9d4a76b96f398697fe01117093613166e6aa8195d63f1b4ec3f21ab637632963", size = 823503 },
+    { url = "https://files.pythonhosted.org/packages/f8/10/601303b8ee93589f879664b0cfd3127949ff32b17f9b6c490fb201106c4d/regex-2024.9.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ea51dcc0835eea2ea31d66456210a4e01a076d820e9039b04ae8d17ac11dee6", size = 797049 },
+    { url = "https://files.pythonhosted.org/packages/ef/1c/ea200f61ce9f341763f2717ab4daebe4422d83e9fd4ac5e33435fd3a148d/regex-2024.9.11-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7aaa315101c6567a9a45d2839322c51c8d6e81f67683d529512f5bcfb99c802", size = 784144 },
+    { url = "https://files.pythonhosted.org/packages/d8/5c/d2429be49ef3292def7688401d3deb11702c13dcaecdc71d2b407421275b/regex-2024.9.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c57d08ad67aba97af57a7263c2d9006d5c404d721c5f7542f077f109ec2a4a29", size = 782483 },
+    { url = "https://files.pythonhosted.org/packages/12/d9/cbc30f2ff7164f3b26a7760f87c54bf8b2faed286f60efd80350a51c5b99/regex-2024.9.11-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:f8404bf61298bb6f8224bb9176c1424548ee1181130818fcd2cbffddc768bed8", size = 790320 },
+    { url = "https://files.pythonhosted.org/packages/19/1d/43ed03a236313639da5a45e61bc553c8d41e925bcf29b0f8ecff0c2c3f25/regex-2024.9.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dd4490a33eb909ef5078ab20f5f000087afa2a4daa27b4c072ccb3cb3050ad84", size = 860435 },
+    { url = "https://files.pythonhosted.org/packages/34/4f/5d04da61c7c56e785058a46349f7285ae3ebc0726c6ea7c5c70600a52233/regex-2024.9.11-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:eee9130eaad130649fd73e5cd92f60e55708952260ede70da64de420cdcad554", size = 859571 },
+    { url = "https://files.pythonhosted.org/packages/12/7f/8398c8155a3c70703a8e91c29532558186558e1aea44144b382faa2a6f7a/regex-2024.9.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6a2644a93da36c784e546de579ec1806bfd2763ef47babc1b03d765fe560c9f8", size = 787398 },
+    { url = "https://files.pythonhosted.org/packages/58/3a/f5903977647a9a7e46d5535e9e96c194304aeeca7501240509bde2f9e17f/regex-2024.9.11-cp313-cp313-win32.whl", hash = "sha256:e997fd30430c57138adc06bba4c7c2968fb13d101e57dd5bb9355bf8ce3fa7e8", size = 262035 },
+    { url = "https://files.pythonhosted.org/packages/ff/80/51ba3a4b7482f6011095b3a036e07374f64de180b7d870b704ed22509002/regex-2024.9.11-cp313-cp313-win_amd64.whl", hash = "sha256:042c55879cfeb21a8adacc84ea347721d3d83a159da6acdf1116859e2427c43f", size = 273510 },
+    { url = "https://files.pythonhosted.org/packages/a1/aa/e31baf8482ad690ccb3cdf20d1963a01e98d137e4d9ee493dbb0fa8ba2c6/regex-2024.9.11-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:07f45f287469039ffc2c53caf6803cd506eb5f5f637f1d4acb37a738f71dd066", size = 482489 },
+    { url = "https://files.pythonhosted.org/packages/a1/b5/449c2f14fc20dc42ef9729469fcff42809393470f021ed6c6fcf5f3d3297/regex-2024.9.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4838e24ee015101d9f901988001038f7f0d90dc0c3b115541a1365fb439add62", size = 287440 },
+    { url = "https://files.pythonhosted.org/packages/3f/36/4b60a0c2e4cc6ecb2651be828117a31f42fae55a51a484a8071729df56a6/regex-2024.9.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6edd623bae6a737f10ce853ea076f56f507fd7726bee96a41ee3d68d347e4d16", size = 284566 },
+    { url = "https://files.pythonhosted.org/packages/b4/21/feaa5b0d3e5e3bad659cd7d640e6b76cc0719504dbd9bc8f67cfa21bde82/regex-2024.9.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c69ada171c2d0e97a4b5aa78fbb835e0ffbb6b13fc5da968c09811346564f0d3", size = 782747 },
+    { url = "https://files.pythonhosted.org/packages/bb/89/93516f0aa3e8a9366df2cf79bb0290abdc7dbe5dd27373d9bea0978b7ba6/regex-2024.9.11-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02087ea0a03b4af1ed6ebab2c54d7118127fee8d71b26398e8e4b05b78963199", size = 822700 },
+    { url = "https://files.pythonhosted.org/packages/d5/e7/79c04ccb81cee2831d9d4499274919b9153c1741ce8b3421d69cb0032f1b/regex-2024.9.11-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:69dee6a020693d12a3cf892aba4808fe168d2a4cef368eb9bf74f5398bfd4ee8", size = 809327 },
+    { url = "https://files.pythonhosted.org/packages/01/e6/a7256c99c312b68f01cfd4f8eae6e770906fffb3832ecb66f35ca5b86b96/regex-2024.9.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:297f54910247508e6e5cae669f2bc308985c60540a4edd1c77203ef19bfa63ca", size = 781970 },
+    { url = "https://files.pythonhosted.org/packages/18/c4/29e8b6ff2208775858b5d4a2caa6428d40b5fade95aee426de7e42ffff39/regex-2024.9.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ecea58b43a67b1b79805f1a0255730edaf5191ecef84dbc4cc85eb30bc8b63b9", size = 771885 },
+    { url = "https://files.pythonhosted.org/packages/95/78/7acd8882ac335f1f5ae1756417739fda3053e0bcacea8716ae4a04e74553/regex-2024.9.11-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:eab4bb380f15e189d1313195b062a6aa908f5bd687a0ceccd47c8211e9cf0d4a", size = 696978 },
+    { url = "https://files.pythonhosted.org/packages/cb/d2/1d44f9b4a3d33ff5773fd79bea53e992d00f81e0af6f1f4e2efac1e4d897/regex-2024.9.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0cbff728659ce4bbf4c30b2a1be040faafaa9eca6ecde40aaff86f7889f4ab39", size = 768655 },
+    { url = "https://files.pythonhosted.org/packages/79/ba/92ef9d3b8f59cb3df9febef07098dfb4a43c3bdcf35b1084c2009b0a93bf/regex-2024.9.11-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:54c4a097b8bc5bb0dfc83ae498061d53ad7b5762e00f4adaa23bee22b012e6ba", size = 776922 },
+    { url = "https://files.pythonhosted.org/packages/16/71/d964c0c9d447f04bbe6ab5eafd220208e7d52b9608e452e6fcad553b38e0/regex-2024.9.11-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:73d6d2f64f4d894c96626a75578b0bf7d9e56dcda8c3d037a2118fdfe9b1c664", size = 845014 },
+    { url = "https://files.pythonhosted.org/packages/83/cb/a378cdc2468782eefefa50183bbeabc3357fb588d4109d845f0a56e68713/regex-2024.9.11-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:e53b5fbab5d675aec9f0c501274c467c0f9a5d23696cfc94247e1fb56501ed89", size = 844916 },
+    { url = "https://files.pythonhosted.org/packages/b9/f0/82ea1565a6639270cfe96263002b3d91084a1db5048d9b6084f83bd5972d/regex-2024.9.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:0ffbcf9221e04502fc35e54d1ce9567541979c3fdfb93d2c554f0ca583a19b35", size = 773409 },
+    { url = "https://files.pythonhosted.org/packages/97/9e/0400d742b9647b4940609a96d550de89e4e89c85f6a370796dab25b5979c/regex-2024.9.11-cp39-cp39-win32.whl", hash = "sha256:e4c22e1ac1f1ec1e09f72e6c44d8f2244173db7eb9629cc3a346a8d7ccc31142", size = 261680 },
+    { url = "https://files.pythonhosted.org/packages/b6/f1/aef1112652ac7b3922d2c129f8325a4fd286b66691127dd99f380f8ede19/regex-2024.9.11-cp39-cp39-win_amd64.whl", hash = "sha256:faa3c142464efec496967359ca99696c896c591c56c53506bac1ad465f66e919", size = 274066 },
+]
+
 [[package]]
 name = "requests"
 version = "2.32.3"
@@ -1392,6 +1747,43 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ca/1c/89ffc63a9605b583d5df2be791a27bc1a42b7c32bab68d3c8f2f73a98cd4/urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472", size = 121444 },
 ]
 
+[[package]]
+name = "watchdog"
+version = "5.0.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/cd/5e/95dcd86d8339fcf76385f7fad5e49cbfd989b6c6199127121c9587febc65/watchdog-5.0.2.tar.gz", hash = "sha256:dcebf7e475001d2cdeb020be630dc5b687e9acdd60d16fea6bb4508e7b94cf76", size = 127779 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/71/19/c5b0f64269d396dbc9f06d4b7fa8400c0282143640ebc8cbad84553ee4ee/watchdog-5.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d961f4123bb3c447d9fcdcb67e1530c366f10ab3a0c7d1c0c9943050936d4877", size = 96274 },
+    { url = "https://files.pythonhosted.org/packages/5e/5e/62adbcf4d96a533d71dbd951a3c101019989c8ce8796e267d6509ba12138/watchdog-5.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72990192cb63872c47d5e5fefe230a401b87fd59d257ee577d61c9e5564c62e5", size = 88275 },
+    { url = "https://files.pythonhosted.org/packages/d0/16/5b36358158b7debcae7b62fe9b6d9874c60e445b37b1e51b7c5d00c6572b/watchdog-5.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6bec703ad90b35a848e05e1b40bf0050da7ca28ead7ac4be724ae5ac2653a1a0", size = 88911 },
+    { url = "https://files.pythonhosted.org/packages/80/52/51046f428e813270cd959bee9d2343f103c10adf10e957f69d6710a38ab8/watchdog-5.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:dae7a1879918f6544201d33666909b040a46421054a50e0f773e0d870ed7438d", size = 96276 },
+    { url = "https://files.pythonhosted.org/packages/b3/8e/0e5671f3950fd2049bbeb8c965cb53e143bfd72869e5e4c60dda572121cd/watchdog-5.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c4a440f725f3b99133de610bfec93d570b13826f89616377715b9cd60424db6e", size = 88269 },
+    { url = "https://files.pythonhosted.org/packages/b5/34/9c436ec85f7234b468e49380f57cc784b4e22f058febe17221f25ce85c4b/watchdog-5.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f8b2918c19e0d48f5f20df458c84692e2a054f02d9df25e6c3c930063eca64c1", size = 88914 },
+    { url = "https://files.pythonhosted.org/packages/ef/41/fe19a56aa8ea7e453311f2b4fd2bfb172d21bd72ef6ae0fd40c304c74edf/watchdog-5.0.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:aa9cd6e24126d4afb3752a3e70fce39f92d0e1a58a236ddf6ee823ff7dba28ee", size = 96365 },
+    { url = "https://files.pythonhosted.org/packages/cc/02/86d631595ec1c5678e23e9359741d2dea460be0712b41a243281b37e90ba/watchdog-5.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f627c5bf5759fdd90195b0c0431f99cff4867d212a67b384442c51136a098ed7", size = 88330 },
+    { url = "https://files.pythonhosted.org/packages/d8/a7/5c57f05def91ff11528f0aa0d4c23efc99fa064ec69c262fedc6c9885697/watchdog-5.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d7594a6d32cda2b49df3fd9abf9b37c8d2f3eab5df45c24056b4a671ac661619", size = 88935 },
+    { url = "https://files.pythonhosted.org/packages/80/1a/a681c0093eea33b18a7348b398302628ab96647f59eaf06a5a047e8a1f39/watchdog-5.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba32efcccfe2c58f4d01115440d1672b4eb26cdd6fc5b5818f1fb41f7c3e1889", size = 96362 },
+    { url = "https://files.pythonhosted.org/packages/c4/aa/0c827bd35716d91b5a4a2a6c5ca7638d936e6055dec8ce85414383ab887f/watchdog-5.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:963f7c4c91e3f51c998eeff1b3fb24a52a8a34da4f956e470f4b068bb47b78ee", size = 88336 },
+    { url = "https://files.pythonhosted.org/packages/6e/ba/da13d47dacc84bfab52310e74f954eb440c5cdee11ff8786228f17343a3d/watchdog-5.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8c47150aa12f775e22efff1eee9f0f6beee542a7aa1a985c271b1997d340184f", size = 88938 },
+    { url = "https://files.pythonhosted.org/packages/e3/db/83ae143fced93f7e5962d2aa6f938d3986a5931c6e68dafea65d40b40fd3/watchdog-5.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:14dd4ed023d79d1f670aa659f449bcd2733c33a35c8ffd88689d9d243885198b", size = 96273 },
+    { url = "https://files.pythonhosted.org/packages/fc/c2/48b61c5668e8a0692a823b8c0a965b39c80768d82cd3c0d2f5b17ead6e43/watchdog-5.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b84bff0391ad4abe25c2740c7aec0e3de316fdf7764007f41e248422a7760a7f", size = 88271 },
+    { url = "https://files.pythonhosted.org/packages/ca/53/57e380b6b88dcbb47a6cad077abdc1fbdd12bf153f8e2ed8e48c5ffacbbd/watchdog-5.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3e8d5ff39f0a9968952cce548e8e08f849141a4fcc1290b1c17c032ba697b9d7", size = 88910 },
+    { url = "https://files.pythonhosted.org/packages/23/f1/dafce06a12fe2d61859aaceb81fbe3f3ed7907b81fcfa784416b1196dcfe/watchdog-5.0.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:fb223456db6e5f7bd9bbd5cd969f05aae82ae21acc00643b60d81c770abd402b", size = 87795 },
+    { url = "https://files.pythonhosted.org/packages/7c/63/39a71aa9cea895885b3e644b573f1d05e00e368211efe76b9a63c7623512/watchdog-5.0.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:9814adb768c23727a27792c77812cf4e2fd9853cd280eafa2bcfa62a99e8bd6e", size = 88269 },
+    { url = "https://files.pythonhosted.org/packages/06/6d/866cacf6f17db488cbe117dd8e18712b4e316f16e61b9e6e104d4ce4b512/watchdog-5.0.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:901ee48c23f70193d1a7bc2d9ee297df66081dd5f46f0ca011be4f70dec80dab", size = 87794 },
+    { url = "https://files.pythonhosted.org/packages/14/83/06ea29be9e7c6dfd1224c98e37fef2e20c63ceffa7fb71622bc82b55da1d/watchdog-5.0.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:638bcca3d5b1885c6ec47be67bf712b00a9ab3d4b22ec0881f4889ad870bc7e8", size = 88270 },
+    { url = "https://files.pythonhosted.org/packages/5b/cb/c13dfc4714547c4a63f27a50d5d0bbda655ef06d93595c016822ff771032/watchdog-5.0.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5597c051587f8757798216f2485e85eac583c3b343e9aa09127a3a6f82c65ee8", size = 78960 },
+    { url = "https://files.pythonhosted.org/packages/cb/ed/78acaa8e95e193a46925f7beeed45c29569d0ee572216df622bb0908abf3/watchdog-5.0.2-py3-none-manylinux2014_armv7l.whl", hash = "sha256:53ed1bf71fcb8475dd0ef4912ab139c294c87b903724b6f4a8bd98e026862e6d", size = 78960 },
+    { url = "https://files.pythonhosted.org/packages/2f/54/30bde6279d2f77e6c2838a89e9975038bba4adbfb029f9b8e01cf2813199/watchdog-5.0.2-py3-none-manylinux2014_i686.whl", hash = "sha256:29e4a2607bd407d9552c502d38b45a05ec26a8e40cc7e94db9bb48f861fa5abc", size = 78958 },
+    { url = "https://files.pythonhosted.org/packages/f4/db/886241c6d02f165fbf633b633dc5ceddc6c145fec3704828606743ddb663/watchdog-5.0.2-py3-none-manylinux2014_ppc64.whl", hash = "sha256:b6dc8f1d770a8280997e4beae7b9a75a33b268c59e033e72c8a10990097e5fde", size = 78957 },
+    { url = "https://files.pythonhosted.org/packages/a9/74/c255a2146280adcb2d1b5ccb7580e71114b253f356a6c4ea748b0eb7a7b5/watchdog-5.0.2-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:d2ab34adc9bf1489452965cdb16a924e97d4452fcf88a50b21859068b50b5c3b", size = 78960 },
+    { url = "https://files.pythonhosted.org/packages/8a/dc/4bdc31a35ffce526280c5a29b64b939624761f47e3fcdac34808589d0845/watchdog-5.0.2-py3-none-manylinux2014_s390x.whl", hash = "sha256:7d1aa7e4bb0f0c65a1a91ba37c10e19dabf7eaaa282c5787e51371f090748f4b", size = 78959 },
+    { url = "https://files.pythonhosted.org/packages/9d/53/e71b01aa5737a21664b731de5f91c5b0721ff64d237e43efc56a99254fa1/watchdog-5.0.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:726eef8f8c634ac6584f86c9c53353a010d9f311f6c15a034f3800a7a891d941", size = 78959 },
+    { url = "https://files.pythonhosted.org/packages/5d/0e/c37862900200436a554a4c411645f29887fe3fb4d4e465fbedcf1e0e383a/watchdog-5.0.2-py3-none-win32.whl", hash = "sha256:bda40c57115684d0216556671875e008279dea2dc00fcd3dde126ac8e0d7a2fb", size = 78947 },
+    { url = "https://files.pythonhosted.org/packages/8f/ab/f1a3791be609e18596ce6a52c00274f1b244340b87379eb78c4df15f6b2b/watchdog-5.0.2-py3-none-win_amd64.whl", hash = "sha256:d010be060c996db725fbce7e3ef14687cdcc76f4ca0e4339a68cc4532c382a73", size = 78950 },
+    { url = "https://files.pythonhosted.org/packages/53/99/f5065334d157518ec8c707aa790c93d639fac582be4f7caec5db8c6fa089/watchdog-5.0.2-py3-none-win_ia64.whl", hash = "sha256:3960136b2b619510569b90f0cd96408591d6c251a75c97690f4553ca88889769", size = 78948 },
+]
+
 [[package]]
 name = "zipp"
 version = "3.20.0"