From 7380088604e3d4b89d1f2c303b4abd3bf5b7db90 Mon Sep 17 00:00:00 2001
From: Rishi Kulkarni <insta111@gmail.com>
Date: Sat, 5 Jun 2021 16:34:58 -0700
Subject: [PATCH] updated documentation for 1.0.0 release

---
 README.md                                     |   2 +-
 docs/_templates/custom-class-template.rst     |  33 +
 docs/_templates/custom-module-template.rst    |  66 ++
 docs/conf.py                                  |  10 +-
 .../hierarch.power.DataSimulator.rst          |  25 +
 .../reference/_autosummary/hierarch.power.rst |  32 +
 .../hierarch.resampling.Bootstrapper.rst      |  25 +
 .../hierarch.resampling.Permuter.rst          |  25 +
 .../_autosummary/hierarch.resampling.rst      |  33 +
 .../hierarch.stats.confidence_interval.rst    |   6 +
 .../hierarch.stats.linear_regression_test.rst |   6 +
 .../hierarch.stats.multi_sample_test.rst      |   6 +
 .../hierarch.stats.preprocess_data.rst        |   6 +
 .../reference/_autosummary/hierarch.stats.rst |  37 +
 .../hierarch.stats.studentized_covariance.rst |   6 +
 .../hierarch.stats.two_sample_test.rst        |   6 +
 .../hierarch.stats.welch_statistic.rst        |   6 +
 docs/reference/index.rst                      |  13 +-
 docs/reference/power.rst                      |   5 -
 docs/reference/resampling.rst                 |   5 -
 docs/reference/stats.rst                      |   5 -
 docs/user/confidence.rst                      | 291 ++++++++
 docs/user/hypothesis.rst                      | 428 ++++++++++++
 docs/user/importing.rst                       |   9 +
 docs/user/power.rst                           | 196 ++++++
 docs/user/usage.rst                           | 632 +-----------------
 hierarch/internal_functions.py                |  44 --
 hierarch/power.py                             |   7 -
 hierarch/resampling.py                        |  22 +-
 hierarch/stats.py                             |  45 +-
 setup.py                                      |   2 +-
 31 files changed, 1315 insertions(+), 719 deletions(-)
 create mode 100644 docs/_templates/custom-class-template.rst
 create mode 100644 docs/_templates/custom-module-template.rst
 create mode 100644 docs/reference/_autosummary/hierarch.power.DataSimulator.rst
 create mode 100644 docs/reference/_autosummary/hierarch.power.rst
 create mode 100644 docs/reference/_autosummary/hierarch.resampling.Bootstrapper.rst
 create mode 100644 docs/reference/_autosummary/hierarch.resampling.Permuter.rst
 create mode 100644 docs/reference/_autosummary/hierarch.resampling.rst
 create mode 100644 docs/reference/_autosummary/hierarch.stats.confidence_interval.rst
 create mode 100644 docs/reference/_autosummary/hierarch.stats.linear_regression_test.rst
 create mode 100644 docs/reference/_autosummary/hierarch.stats.multi_sample_test.rst
 create mode 100644 docs/reference/_autosummary/hierarch.stats.preprocess_data.rst
 create mode 100644 docs/reference/_autosummary/hierarch.stats.rst
 create mode 100644 docs/reference/_autosummary/hierarch.stats.studentized_covariance.rst
 create mode 100644 docs/reference/_autosummary/hierarch.stats.two_sample_test.rst
 create mode 100644 docs/reference/_autosummary/hierarch.stats.welch_statistic.rst
 delete mode 100644 docs/reference/power.rst
 delete mode 100644 docs/reference/resampling.rst
 delete mode 100644 docs/reference/stats.rst
 create mode 100644 docs/user/confidence.rst
 create mode 100644 docs/user/hypothesis.rst
 create mode 100644 docs/user/importing.rst
 create mode 100644 docs/user/power.rst

diff --git a/README.md b/README.md
index 45f44de..8765fe5 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 ## A Hierarchical Resampling Package for Python
 
-Version 0.4
+Version 1.0
 
 hierarch is a package for hierarchical resampling (bootstrapping, permutation) of datasets in Python. Because for loops are ultimately intrinsic to cluster-aware resampling, hierarch uses Numba to accelerate many of its key functions.
 
diff --git a/docs/_templates/custom-class-template.rst b/docs/_templates/custom-class-template.rst
new file mode 100644
index 0000000..d248858
--- /dev/null
+++ b/docs/_templates/custom-class-template.rst
@@ -0,0 +1,33 @@
+{{ fullname | escape | underline}}
+
+.. currentmodule:: {{ module }}
+
+.. autoclass:: {{ objname }}
+   :members:
+   :show-inheritance:
+   :inherited-members:
+   :special-members: __call__, __add__, __mul__
+
+   {% block methods %}
+   {% if methods %}
+   .. rubric:: {{ _('Methods') }}
+
+   .. autosummary::
+   {% for item in methods %}
+      {%- if not item.startswith('_') %}
+      ~{{ name }}.{{ item }}
+      {%- endif -%}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+   {% block attributes %}
+   {% if attributes %}
+   .. rubric:: {{ _('Attributes') }}
+
+   .. autosummary::
+   {% for item in attributes %}
+      ~{{ name }}.{{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
\ No newline at end of file
diff --git a/docs/_templates/custom-module-template.rst b/docs/_templates/custom-module-template.rst
new file mode 100644
index 0000000..dd90e32
--- /dev/null
+++ b/docs/_templates/custom-module-template.rst
@@ -0,0 +1,66 @@
+{{ fullname | escape | underline}}
+
+.. automodule:: {{ fullname }}
+
+   {% block attributes %}
+   {% if attributes %}
+   .. rubric:: Module attributes
+
+   .. autosummary::
+      :toctree:
+   {% for item in attributes %}
+      {{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+   {% block functions %}
+   {% if functions %}
+   .. rubric:: {{ _('Functions') }}
+
+   .. autosummary::
+      :toctree:
+      :nosignatures:
+   {% for item in functions %}
+      {{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+   {% block classes %}
+   {% if classes %}
+   .. rubric:: {{ _('Classes') }}
+
+   .. autosummary::
+      :toctree:
+      :template: custom-class-template.rst
+      :nosignatures:
+   {% for item in classes %}
+      {{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+   {% block exceptions %}
+   {% if exceptions %}
+   .. rubric:: {{ _('Exceptions') }}
+
+   .. autosummary::
+      :toctree:
+   {% for item in exceptions %}
+      {{ item }}
+   {%- endfor %}
+   {% endif %}
+   {% endblock %}
+
+{% block modules %}
+{% if modules %}
+.. autosummary::
+   :toctree:
+   :template: custom-module-template.rst
+   :recursive:
+{% for item in modules %}
+   {{ item }}
+{%- endfor %}
+{% endif %}
+{% endblock %}
\ No newline at end of file
diff --git a/docs/conf.py b/docs/conf.py
index a74bc6c..1f5c2b2 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -33,7 +33,15 @@
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = ["sphinx_rtd_theme", "numpydoc", "sphinx.ext.autodoc"]
+extensions = [
+    "sphinx_rtd_theme",
+    "numpydoc",
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+]
+
+autosummary_generate = True  # Turn on sphinx.ext.autosummary
+numpydoc_show_class_members = False
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates"]
diff --git a/docs/reference/_autosummary/hierarch.power.DataSimulator.rst b/docs/reference/_autosummary/hierarch.power.DataSimulator.rst
new file mode 100644
index 0000000..973d923
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.power.DataSimulator.rst
@@ -0,0 +1,25 @@
+hierarch.power.DataSimulator
+============================
+
+.. currentmodule:: hierarch.power
+
+.. autoclass:: DataSimulator
+   :members:
+   :show-inheritance:
+   :inherited-members:
+   :special-members: __call__, __add__, __mul__
+
+   
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~DataSimulator.fit
+      ~DataSimulator.generate
+   
+   
+
+   
+   
+   
\ No newline at end of file
diff --git a/docs/reference/_autosummary/hierarch.power.rst b/docs/reference/_autosummary/hierarch.power.rst
new file mode 100644
index 0000000..90a4b72
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.power.rst
@@ -0,0 +1,32 @@
+﻿hierarch.power
+==============
+
+.. automodule:: hierarch.power
+
+   
+   
+   
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Classes
+
+   .. autosummary::
+      :toctree:
+      :template: custom-class-template.rst
+      :nosignatures:
+   
+      DataSimulator
+   
+   
+
+   
+   
+   
+
+
+
diff --git a/docs/reference/_autosummary/hierarch.resampling.Bootstrapper.rst b/docs/reference/_autosummary/hierarch.resampling.Bootstrapper.rst
new file mode 100644
index 0000000..dbab531
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.resampling.Bootstrapper.rst
@@ -0,0 +1,25 @@
+﻿hierarch.resampling.Bootstrapper
+================================
+
+.. currentmodule:: hierarch.resampling
+
+.. autoclass:: Bootstrapper
+   :members:
+   :show-inheritance:
+   :inherited-members:
+   :special-members: __call__, __add__, __mul__
+
+   
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~Bootstrapper.fit
+      ~Bootstrapper.transform
+   
+   
+
+   
+   
+   
\ No newline at end of file
diff --git a/docs/reference/_autosummary/hierarch.resampling.Permuter.rst b/docs/reference/_autosummary/hierarch.resampling.Permuter.rst
new file mode 100644
index 0000000..045a098
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.resampling.Permuter.rst
@@ -0,0 +1,25 @@
+hierarch.resampling.Permuter
+============================
+
+.. currentmodule:: hierarch.resampling
+
+.. autoclass:: Permuter
+   :members:
+   :show-inheritance:
+   :inherited-members:
+   :special-members: __call__, __add__, __mul__
+
+   
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~Permuter.fit
+      ~Permuter.transform
+   
+   
+
+   
+   
+   
\ No newline at end of file
diff --git a/docs/reference/_autosummary/hierarch.resampling.rst b/docs/reference/_autosummary/hierarch.resampling.rst
new file mode 100644
index 0000000..17cb063
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.resampling.rst
@@ -0,0 +1,33 @@
+﻿hierarch.resampling
+===================
+
+.. automodule:: hierarch.resampling
+
+   
+   
+   
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Classes
+
+   .. autosummary::
+      :toctree:
+      :template: custom-class-template.rst
+      :nosignatures:
+   
+      Bootstrapper
+      Permuter
+   
+   
+
+   
+   
+   
+
+
+
diff --git a/docs/reference/_autosummary/hierarch.stats.confidence_interval.rst b/docs/reference/_autosummary/hierarch.stats.confidence_interval.rst
new file mode 100644
index 0000000..5be2f74
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.stats.confidence_interval.rst
@@ -0,0 +1,6 @@
+hierarch.stats.confidence\_interval
+===================================
+
+.. currentmodule:: hierarch.stats
+
+.. autofunction:: confidence_interval
\ No newline at end of file
diff --git a/docs/reference/_autosummary/hierarch.stats.linear_regression_test.rst b/docs/reference/_autosummary/hierarch.stats.linear_regression_test.rst
new file mode 100644
index 0000000..f31efb5
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.stats.linear_regression_test.rst
@@ -0,0 +1,6 @@
+hierarch.stats.linear\_regression\_test
+=======================================
+
+.. currentmodule:: hierarch.stats
+
+.. autofunction:: linear_regression_test
\ No newline at end of file
diff --git a/docs/reference/_autosummary/hierarch.stats.multi_sample_test.rst b/docs/reference/_autosummary/hierarch.stats.multi_sample_test.rst
new file mode 100644
index 0000000..0704b62
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.stats.multi_sample_test.rst
@@ -0,0 +1,6 @@
+hierarch.stats.multi\_sample\_test
+==================================
+
+.. currentmodule:: hierarch.stats
+
+.. autofunction:: multi_sample_test
\ No newline at end of file
diff --git a/docs/reference/_autosummary/hierarch.stats.preprocess_data.rst b/docs/reference/_autosummary/hierarch.stats.preprocess_data.rst
new file mode 100644
index 0000000..3f53556
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.stats.preprocess_data.rst
@@ -0,0 +1,6 @@
+hierarch.stats.preprocess\_data
+===============================
+
+.. currentmodule:: hierarch.stats
+
+.. autofunction:: preprocess_data
\ No newline at end of file
diff --git a/docs/reference/_autosummary/hierarch.stats.rst b/docs/reference/_autosummary/hierarch.stats.rst
new file mode 100644
index 0000000..f787b18
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.stats.rst
@@ -0,0 +1,37 @@
+﻿hierarch.stats
+==============
+
+.. automodule:: hierarch.stats
+
+   
+   
+   
+
+   
+   
+   .. rubric:: Functions
+
+   .. autosummary::
+      :toctree:
+      :nosignatures:
+   
+      confidence_interval
+      linear_regression_test
+      multi_sample_test
+      preprocess_data
+      studentized_covariance
+      two_sample_test
+      welch_statistic
+   
+   
+
+   
+   
+   
+
+   
+   
+   
+
+
+
diff --git a/docs/reference/_autosummary/hierarch.stats.studentized_covariance.rst b/docs/reference/_autosummary/hierarch.stats.studentized_covariance.rst
new file mode 100644
index 0000000..6f3b081
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.stats.studentized_covariance.rst
@@ -0,0 +1,6 @@
+hierarch.stats.studentized\_covariance
+======================================
+
+.. currentmodule:: hierarch.stats
+
+.. autofunction:: studentized_covariance
\ No newline at end of file
diff --git a/docs/reference/_autosummary/hierarch.stats.two_sample_test.rst b/docs/reference/_autosummary/hierarch.stats.two_sample_test.rst
new file mode 100644
index 0000000..e97b3c9
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.stats.two_sample_test.rst
@@ -0,0 +1,6 @@
+hierarch.stats.two\_sample\_test
+================================
+
+.. currentmodule:: hierarch.stats
+
+.. autofunction:: two_sample_test
\ No newline at end of file
diff --git a/docs/reference/_autosummary/hierarch.stats.welch_statistic.rst b/docs/reference/_autosummary/hierarch.stats.welch_statistic.rst
new file mode 100644
index 0000000..1c605d6
--- /dev/null
+++ b/docs/reference/_autosummary/hierarch.stats.welch_statistic.rst
@@ -0,0 +1,6 @@
+hierarch.stats.welch\_statistic
+===============================
+
+.. currentmodule:: hierarch.stats
+
+.. autofunction:: welch_statistic
\ No newline at end of file
diff --git a/docs/reference/index.rst b/docs/reference/index.rst
index 2da9fa4..0e33e3d 100644
--- a/docs/reference/index.rst
+++ b/docs/reference/index.rst
@@ -1,8 +1,11 @@
 Reference 
 =========
 
-.. toctree::
-
-    stats.rst
-    power.rst
-    resampling.rst
\ No newline at end of file
+.. autosummary::
+    :toctree: _autosummary
+    :template: custom-module-template.rst
+    :recursive:
+ 
+    hierarch.stats
+    hierarch.power
+    hierarch.resampling
\ No newline at end of file
diff --git a/docs/reference/power.rst b/docs/reference/power.rst
deleted file mode 100644
index 7b0d085..0000000
--- a/docs/reference/power.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-hierarch.power
-==============
-
-.. automodule:: hierarch.power
-    :members:
\ No newline at end of file
diff --git a/docs/reference/resampling.rst b/docs/reference/resampling.rst
deleted file mode 100644
index c94f9db..0000000
--- a/docs/reference/resampling.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-hierarch.resampling
-===================
-
-.. automodule:: hierarch.resampling
-    :members:
\ No newline at end of file
diff --git a/docs/reference/stats.rst b/docs/reference/stats.rst
deleted file mode 100644
index 3afd251..0000000
--- a/docs/reference/stats.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-hierarch.stats
-==============
-
-.. automodule:: hierarch.stats
-    :members:
\ No newline at end of file
diff --git a/docs/user/confidence.rst b/docs/user/confidence.rst
new file mode 100644
index 0000000..2f1b5ed
--- /dev/null
+++ b/docs/user/confidence.rst
@@ -0,0 +1,291 @@
+Confidence Intervals
+====================
+
+Two-Sample Effect Sizes 
+-----------------------
+Researchers can use hierarch to compute confidence intervals for effect sizes. 
+These intervals are computed via test inversion and, as a result, have the advantage
+of essentially always achieving the nominal coverage. 
+
+To put it another way, hierarch computes a 95% confidence interval by performing a 
+permutation test against the null hypothesis that true effect size is exactly equal 
+to the observed effect size. Then, the bounds of the acceptance region at alpha = 0.05
+are the bounds of the confidence interval. Let's consider the dataset from earlier. ::
+
+    import pandas as pd
+    import numpy as np
+    import hierarch as ha
+
+    data = pd.read_clipboard()
+
+    print(data)
+
++------------+------+-------------+----------+
+|  Condition | Well | Measurement |  Values  |
++============+======+=============+==========+
+|    None    |   1  |      1      | 5.202258 |
++------------+------+-------------+----------+
+|    None    |   1  |      2      | 5.136128 |
++------------+------+-------------+----------+
+|    None    |   1  |      3      | 5.231401 |
++------------+------+-------------+----------+
+|    None    |   2  |      1      | 5.336643 |
++------------+------+-------------+----------+
+|    None    |   2  |      2      | 5.287973 |
++------------+------+-------------+----------+
+|    None    |   2  |      3      | 5.375359 |
++------------+------+-------------+----------+
+|    None    |   3  |      1      | 5.350692 |
++------------+------+-------------+----------+
+|    None    |   3  |      2      | 5.465206 |
++------------+------+-------------+----------+
+|    None    |   3  |      3      | 5.422602 |
++------------+------+-------------+----------+
+| +Treatment |   4  |      1      | 5.695427 |
++------------+------+-------------+----------+
+| +Treatment |   4  |      2      | 5.668457 |
++------------+------+-------------+----------+
+| +Treatment |   4  |      3      | 5.752592 |
++------------+------+-------------+----------+
+| +Treatment |   5  |      1      | 5.583562 |
++------------+------+-------------+----------+
+| +Treatment |   5  |      2      | 5.647895 |
++------------+------+-------------+----------+
+| +Treatment |   5  |      3      | 5.618315 |
++------------+------+-------------+----------+
+| +Treatment |   6  |      1      | 5.642983 |
++------------+------+-------------+----------+
+| +Treatment |   6  |      2      |  5.47072 |
++------------+------+-------------+----------+
+| +Treatment |   6  |      3      | 5.686654 |
++------------+------+-------------+----------+
+
+You can use the confidence_interval function in hierarch.stats to compute the 
+confidence interval. ::
+
+    from hierarch.stats import confidence_interval
+
+    ha.stats.confidence_interval(
+    data,
+    treatment_col=0,
+    compare='means',
+    interval=95,
+    bootstraps=500,
+    permutations="all",
+    random_state=1,
+    )
+
+    (-0.5373088054909549, -0.12010079984237881)
+
+This interval does not cross 0, so it is consistent with significance at the alpha = 0.05
+level.
+
+Because ha.stats.confidence_interval is based on a hypothesis test, it requires
+the same input parameters as two_sample_test or linear_regression_test. However, 
+the new **interval** parameter determines the width of the interval. ::
+
+    ha.stats.confidence_interval(
+    data,
+    treatment_col=0,
+    compare='means',
+    interval=99,
+    bootstraps=500,
+    permutations="all",
+    random_state=1,
+    )
+
+    (-0.9086402840632387, 0.25123067872990457)
+
+    ha.stats.confidence_interval(
+    data,
+    treatment_col=0,
+    compare='means',
+    interval=68,
+    bootstraps=500,
+    permutations="all",
+    random_state=1,
+    )
+
+    (-0.40676489798778065, -0.25064470734555316)
+
+The 99% confidence interval does indeed cross 0, so we could not reject the null hypothesis
+at the alpha = 0.01 level.
+
+To build your confidence, you can perform a simulation analysis to ensure 
+the confidence interval achieves the nominal coverage. You can set up a 
+DataSimulator using the functions in hierarch.power as follows. ::
+
+    from hierarch.power import DataSimulator
+
+    parameters = [[0, 1.525], #difference in means due to treatment
+                [stats.norm, 0, 1], #column 1 distribution - stats.norm(loc=0, scale=1)
+                [stats.lognorm, 0.75]] #column 2 distribution - stats.lognorm(s = 0.75)
+
+    sim = DataSimulator(parameters, random_state=1)
+
+    import scipy.stats as stats
+
+    hierarchy = [2, #treatments
+                3, #samples
+                3] #within-sample measurements
+
+    sim.fit(hierarchy)
+
+The "true" difference between the two samples is 1.525 according to the simulation
+parameters, so 95% of 95% confidence intervals that hierarch calculates should contain
+this value. You can test this with the following code. ::
+
+    true_difference = 1.525
+    coverage = 0
+    loops = 1000
+
+    for i in range(loops):
+        data = sim.generate()
+        lower, upper = ha.stats.confidence_interval(data, 0, interval=95, bootstraps=100, permutations='all')
+        if lower <= true_difference <= upper:
+            coverage += 1
+
+    print("Coverage:", coverage/loops)
+    
+    Coverage: 0.946
+
+This is within the Monte Carlo error of the simulation (+/- 0.7%) of 95%, so we can feel
+confident in this method of interval computation.
+
+Regression Coefficient Confidence Intervals
+-------------------------------------------
+The confidence_interval function can also be used on many-sample datasets that represent
+a hypothesized linear relationship. Let's generate a dataset with a "true" slope of 
+2/3. ::
+
+    paramlist = [[0, 2/3, 4/3, 2], [stats.norm], [stats.norm]]
+    hierarchy = [4, 2, 3]
+    datagen = DataSimulator(paramlist, random_state=2)
+    datagen.fit(hierarchy)
+    data = datagen.generate()
+    data
+
++---+---+---+----------+
+| 0 | 1 | 2 | 3        |
++===+===+===+==========+
+| 1 | 1 | 1 | 0.470264 |
++---+---+---+----------+
+| 1 | 1 | 2 | -0.36477 |
++---+---+---+----------+
+| 1 | 1 | 3 | 1.166621 |
++---+---+---+----------+
+| 1 | 2 | 1 | -0.8333  |
++---+---+---+----------+
+| 1 | 2 | 2 | -0.85157 |
++---+---+---+----------+
+| 1 | 2 | 3 | -1.3149  |
++---+---+---+----------+
+| 2 | 1 | 1 | 0.708561 |
++---+---+---+----------+
+| 2 | 1 | 2 | 0.154405 |
++---+---+---+----------+
+| 2 | 1 | 3 | 0.798892 |
++---+---+---+----------+
+| 2 | 2 | 1 | -2.38199 |
++---+---+---+----------+
+| 2 | 2 | 2 | -1.64797 |
++---+---+---+----------+
+| 2 | 2 | 3 | -2.66707 |
++---+---+---+----------+
+| 3 | 1 | 1 | 3.974506 |
++---+---+---+----------+
+| 3 | 1 | 2 | 3.321076 |
++---+---+---+----------+
+| 3 | 1 | 3 | 3.463612 |
++---+---+---+----------+
+| 3 | 2 | 1 | 2.888003 |
++---+---+---+----------+
+| 3 | 2 | 2 | 1.466742 |
++---+---+---+----------+
+| 3 | 2 | 3 | 3.26068  |
++---+---+---+----------+
+| 4 | 1 | 1 | 3.73128  |
++---+---+---+----------+
+| 4 | 1 | 2 | 0.036135 |
++---+---+---+----------+
+| 4 | 1 | 3 | -0.05483 |
++---+---+---+----------+
+| 4 | 2 | 1 | 1.268975 |
++---+---+---+----------+
+| 4 | 2 | 2 | 3.615265 |
++---+---+---+----------+
+| 4 | 2 | 3 | 2.902522 |
++---+---+---+----------+
+
+You can compute a confidence interval in the same manner as above. This time, you should set the
+**compare** keyword argument to "corr" for clarity, but "corr" is also the default setting
+for **compare** when computing a confidence interval. ::
+
+    from hierarch.stats import confidence_interval
+
+    ha.stats.confidence_interval(
+    data,
+    treatment_col=0,
+    compare='corr',
+    interval=95,
+    bootstraps=500,
+    permutations="all",
+    random_state=1,
+    )
+
+    (0.3410887712843298, 1.7540918236455125)
+
+This confidence interval corresponds to the slope in a linear model. You can check this by
+computing the slope coefficient via Ordinary Least Squares. ::
+
+    import scipy.stats
+    from hierarch.internal_functions import GroupbyMean
+
+    grouper = GroupbyMean()
+    test = grouper.fit_transform(data)
+    stats.linregress(test[:,0], test[:,-1])
+
+    LinregressResult(slope=1.0515132531203024, intercept=-1.6658194480556106, 
+    rvalue=0.6444075548383587, pvalue=0.08456152533094284, 
+    stderr=0.5094006523081002, intercept_stderr=1.3950511403849626)
+
+The slope, 1.0515, is indeed in the center of our computed interval (within Monte Carlo error).
+
+Again, it is worthwhile to check that confidence_interval is performing adequately. You can
+set up a simulation as above to check the coverage of the 95% confidence interval. ::
+
+    true_difference = 2/3
+    coverage = 0
+    loops = 1000
+
+    for i in range(loops):
+        data = datagen.generate()
+        lower, upper = ha.stats.confidence_interval(data, 0, interval=95, bootstraps=100, permutations='all')
+        if lower <= true_difference <= upper:
+            coverage += 1
+
+    print(coverage/loops)
+
+    0.956
+
+This is within the Monte Carlo error of the simulation (+/- 0.7%) of 95% and therefore
+acceptable. You can check the coverage of other intervals by changing the **interval** keyword
+argument, though be aware that Monte Carlo error depends on the probability of the event of
+interest. ::
+
+    true_difference = 2/3
+    coverage = 0
+    loops = 1000
+
+    for i in range(loops):
+        data = datagen.generate()
+        lower, upper = ha.stats.confidence_interval(data, 0, interval=99, bootstraps=100, permutations='all')
+        if lower <= true_difference <= upper:
+            coverage += 1
+
+    print(coverage/loops)
+
+    0.99
+
+Using the confidence_interval function, researchers can rapidly calculate confidence intervals for
+effect sizes that maintain nominal coverage without worrying about distributional assumptions. 
\ No newline at end of file
diff --git a/docs/user/hypothesis.rst b/docs/user/hypothesis.rst
new file mode 100644
index 0000000..0357108
--- /dev/null
+++ b/docs/user/hypothesis.rst
@@ -0,0 +1,428 @@
+Hypothesis Testing
+==================
+
+Two-Sample Hypothesis Tests
+---------------------------
+Performing a hierarchical permutation test for difference of means is simple. 
+Consider an imaging experiment with two treatment groups, three coverslips in 
+each group, and three images (fields of view) within each coverslip. If you have 
+the data stored in an Excel file, you can use pandas to either directly read the 
+file or copy it in from the clipboard, as below. ::
+
+    import pandas as pd
+    import numpy as np
+    import hierarch as ha
+
+    data = pd.read_clipboard()
+
+    print(data)
+
++------------+------+-------------+----------+
+|  Condition | Well | Measurement |  Values  |
++============+======+=============+==========+
+|    None    |   1  |      1      | 5.202258 |
++------------+------+-------------+----------+
+|    None    |   1  |      2      | 5.136128 |
++------------+------+-------------+----------+
+|    None    |   1  |      3      | 5.231401 |
++------------+------+-------------+----------+
+|    None    |   2  |      1      | 5.336643 |
++------------+------+-------------+----------+
+|    None    |   2  |      2      | 5.287973 |
++------------+------+-------------+----------+
+|    None    |   2  |      3      | 5.375359 |
++------------+------+-------------+----------+
+|    None    |   3  |      1      | 5.350692 |
++------------+------+-------------+----------+
+|    None    |   3  |      2      | 5.465206 |
++------------+------+-------------+----------+
+|    None    |   3  |      3      | 5.422602 |
++------------+------+-------------+----------+
+| +Treatment |   4  |      1      | 5.695427 |
++------------+------+-------------+----------+
+| +Treatment |   4  |      2      | 5.668457 |
++------------+------+-------------+----------+
+| +Treatment |   4  |      3      | 5.752592 |
++------------+------+-------------+----------+
+| +Treatment |   5  |      1      | 5.583562 |
++------------+------+-------------+----------+
+| +Treatment |   5  |      2      | 5.647895 |
++------------+------+-------------+----------+
+| +Treatment |   5  |      3      | 5.618315 |
++------------+------+-------------+----------+
+| +Treatment |   6  |      1      | 5.642983 |
++------------+------+-------------+----------+
+| +Treatment |   6  |      2      |  5.47072 |
++------------+------+-------------+----------+
+| +Treatment |   6  |      3      | 5.686654 |
++------------+------+-------------+----------+
+
+It is important to note that the ordering of the columns from left to right 
+reflects the experimental design scheme. This is necessary for hierarch 
+to infer the clustering within your dataset. In case your data is not 
+ordered properly, pandas makes it easy enough to index your data in the 
+correct order. ::
+
+
+    columns = ['Condition', 'Coverslip', 'Field of View', 'Mean Fluorescence']
+
+    data[columns]
+
+Next, you can call two_sample_test from hierarch's stats module, which will 
+calculate the p-value. You have to specify what column is the treatment 
+column - in this case, "Condition." Indexing starts at 0, so you input 
+treatment_col=0. In this case, there are only 6c3 = 20 ways to permute the 
+treatment labels, so you should specify "all" permutations be used. ::
+
+    p_val = ha.stats.two_sample_test(data, treatment_col=0, 
+                                     bootstraps=500, permutations='all', 
+                                     random_state=1)
+
+    print('p-value =', p_val)
+
+    #out: p-value = 0.0406
+
+There are a number of parameters that can be used to modify two_sample_test. ::
+
+    ha.stats.two_sample_test(data_array, 
+                            treatment_col, 
+                            compare="means", 
+                            skip=None, 
+                            bootstraps=100, 
+                            permutations=1000, 
+                            kind='weights', 
+                            return_null=False,
+                            random_state=None)
+
+**compare**: The default "means" assumes that you are testing for a difference in means, so it uses the Welch t-statistic. For flexibility, two_sample_test can take a test statistic function as an argument. 
+
+**skip**: indicates the indices of columns that should be skipped in the bootstrapping procedure. 
+
+A simple rule of thumb is that columns should be resampled with replacement only if they were originally sampled with replacement (or effectively sampled with replacement). For example, consider an imaging experiment in which you image several fields of view in a well, which each contain several cells. While you can consider the cells sampled with replacement from the well (there are so many more cells in the population that this assumption is fair), the cells are not sampled with replacement from the field of view (as you are measuring ALL cells in each field of view). The 10% condition is a reasonable rule of thumb here - if your sample represents less than 10% of the population, you can treat it as sampled with replacement.
+
+**bootstraps**: indicates the number of bootstrapped samples to be drawn from data_array. 
+
+Generally, as the number of possible permutations of your data increases, the number of bootstraps should decrease. If the goal of bootstrapping is to include the standard error in the biological samples in the null distribution, only 50-100 bootstraps is sufficient given a large enough set of possible permutations.
+
+**permutations**: indicates the number of permutations of the treatment label PER bootstrapped sample.
+
+Inputting "all" will enumerate all of the possible permutations and iterate through them one by one. This is done using a generator, so the permutations are not stored in memory, but is still excessively time consuming for large datasets. 
+
+**kind**: "weights" or "indexes" or "bayesian" specifies the bootstrapping algorithm. "weights" returns an array the same size as the input array, but with the data reweighted according to the Efron bootstrap procedure. "indexes" uses the same algorithm, but returns a reindexed array. "bayesian" also returns a reweighted array, but the weights are allowed to be any real number rather than just integers.
+
+**return_null**: setting this to True will also return the empirical null distribution as a list.
+
+**seed**: allows you to specify a random seed for reproducibility. 
+
+Many-Sample Hypothesis Tests - Several Hypotheses
+-------------------------------------------------
+Researchers may want to perform a series of hypothesis tests to determine 
+whether there are significant differences between some parameter in three 
+or more unrelated groups. This is similar to the goal of one-way ANOVA. To 
+this end, hierarch includes the multi_sample_test function, which performs
+multiple two-sample tests in the vein of post-hoc tests after ANOVA. The 
+researcher can also choose to make a multiple-comparison correction in the 
+form of the Benjamini-Hochberg procedure, which controls for False Discovery
+Rate.
+
+Consider an experiment with four treatment groups. We can simulate a dataset
+as follows. ::
+
+    from hierarch.power import DataSimulator
+    import scipy.stats as stats
+
+    paramlist = [[0, 1, 4, 0], [stats.norm], [stats.norm]]
+    hierarchy = [4, 3, 3]
+
+    datagen = DataSimulator(paramlist, random_state=1)
+    datagen.fit(hierarchy)
+    data = datagen.generate()
+    data    
+
++---+---+---+----------+
+| 0 | 1 | 2 | 3        |
++===+===+===+==========+
+| 1 | 1 | 1 | -0.39087 |
++---+---+---+----------+
+| 1 | 1 | 2 | 0.182674 |
++---+---+---+----------+
+| 1 | 1 | 3 | -0.13654 |
++---+---+---+----------+
+| 1 | 2 | 1 | 1.420464 |
++---+---+---+----------+
+| 1 | 2 | 2 | 0.86134  |
++---+---+---+----------+
+| 1 | 2 | 3 | 0.529161 |
++---+---+---+----------+
+| 1 | 3 | 1 | -0.45147 |
++---+---+---+----------+
+| 1 | 3 | 2 | 0.073245 |
++---+---+---+----------+
+| 1 | 3 | 3 | 0.338579 |
++---+---+---+----------+
+| 2 | 1 | 1 | -0.57876 |
++---+---+---+----------+
+| 2 | 1 | 2 | 0.990907 |
++---+---+---+----------+
+| 2 | 1 | 3 | 0.703567 |
++---+---+---+----------+
+| 2 | 2 | 1 | -0.80581 |
++---+---+---+----------+
+| 2 | 2 | 2 | 0.016343 |
++---+---+---+----------+
+| 2 | 2 | 3 | 1.730584 |
++---+---+---+----------+
+| 2 | 3 | 1 | 1.024184 |
++---+---+---+----------+
+| 2 | 3 | 2 | 1.660018 |
++---+---+---+----------+
+| 2 | 3 | 3 | 1.663697 |
++---+---+---+----------+
+| 3 | 1 | 1 | 5.580886 |
++---+---+---+----------+
+| 3 | 1 | 2 | 2.351026 |
++---+---+---+----------+
+| 3 | 1 | 3 | 3.085442 |
++---+---+---+----------+
+| 3 | 2 | 1 | 6.62389  |
++---+---+---+----------+
+| 3 | 2 | 2 | 5.227821 |
++---+---+---+----------+
+| 3 | 2 | 3 | 5.244181 |
++---+---+---+----------+
+| 3 | 3 | 1 | 3.850566 |
++---+---+---+----------+
+| 3 | 3 | 2 | 2.716497 |
++---+---+---+----------+
+| 3 | 3 | 3 | 4.532037 |
++---+---+---+----------+
+| 4 | 1 | 1 | 0.403147 |
++---+---+---+----------+
+| 4 | 1 | 2 | -0.93322 |
++---+---+---+----------+
+| 4 | 1 | 3 | -0.38909 |
++---+---+---+----------+
+| 4 | 2 | 1 | -0.04362 |
++---+---+---+----------+
+| 4 | 2 | 2 | -0.91633 |
++---+---+---+----------+
+| 4 | 2 | 3 | -0.06985 |
++---+---+---+----------+
+| 4 | 3 | 1 | 0.642196 |
++---+---+---+----------+
+| 4 | 3 | 2 | 0.582299 |
++---+---+---+----------+
+| 4 | 3 | 3 | 0.040421 |
++---+---+---+----------+
+
+This dataset has been generated such that treatments 1 and 4 have the same mean, while
+treatment 2 represents a slight difference and treatment 4 represents a large difference.
+There are six total comparisons that can be made, which can be performed automatically
+using multi_sample_test as follows. ::
+
+    multi_sample_test(data, treatment_col=0, hypotheses="all",
+                    correction=None, bootstraps=1000,
+                    permutations="all", random_state=111)
+    
+    array([[2.0, 3.0, 0.0355],
+           [1.0, 3.0, 0.0394],
+           [3.0, 4.0, 0.0407],
+           [2.0, 4.0, 0.1477],
+           [1.0, 2.0, 0.4022],
+           [1.0, 4.0, 0.4559]], dtype=object)
+
+The first two columns indicate the conditions being compared, while the last column indicates
+the uncorrected p-value. Because there are several hypotheses being tested, it is advisable
+to make a multiple comparisons correction. Currently, hierarch can automatically perform the
+Benjamini-Hochberg procedure, which controls False Discovery Rate. By indicating the "fdr"
+correction, the output array has an additional column showing the q-values, or adjusted p-values. ::
+
+    multi_sample_test(data, treatment_col=0, hypotheses="all",
+                    correction='fdr', bootstraps=1000,
+                    permutations="all", random_state=111)
+    array([[2.0, 3.0, 0.0355, 0.0814],
+           [1.0, 3.0, 0.0394, 0.0814],
+           [3.0, 4.0, 0.0407, 0.0814],
+           [2.0, 4.0, 0.1477, 0.22155],
+           [1.0, 2.0, 0.4022, 0.4559],
+           [1.0, 4.0, 0.4559, 0.4559]], dtype=object)
+
+Testing more hypotheses necessarily lowers the p-value required to call a result significant. However,
+we are not always interested in performing every comparison - perhaps condition 2 is a control that all
+other conditions are meant to be compared to. The comparisons of interest can be specified using a list. ::
+
+    tests = [[2.0, 1.0], [2.0, 3.0], [2.0, 4.0]]
+    multi_sample_test(data, treatment_col=0, hypotheses=tests,
+                      correction='fdr', bootstraps=1000,
+                      permutations="all", random_state=222)
+    array([[2.0, 3.0, 0.036, 0.108],
+           [2.0, 4.0, 0.1506, 0.2259],
+           [2.0, 1.0, 0.4036, 0.4036]], dtype=object)
+
+Many-Sample Hypothesis Tests - Single Hypothesis
+------------------------------------------------
+One-way ANOVA and similar tests (like multi_sample_test) are inappropriate when
+you have several samples meant to test a single hypothesis. For example, perhaps
+you have several samples with different concentrations of the same drug treatment.
+In this case, hierarch provides linear_regression_test, which is equivalent to
+performing a hypothesis test on a linear model against the null hypothesis that
+the slope coefficient is equal to 0.
+
+This hypothesis test uses a studentized covariance test statistic - essentially,
+the sample covariance divided by the standard error of the sample covariance. This
+test statistic is approximately normally distributed and in the two-sample case, 
+this test gives the same result as two_sample_test.
+
+First, consider a dataset with two treatment groups, four samples each, and three
+measurements on each sample. ::
+
+    from hierarch.power import DataSimulator
+    import scipy.stats as stats
+
+    paramlist = [[0, 2], [stats.norm], [stats.norm]]
+    hierarchy = [2, 4, 3]
+
+    datagen = DataSimulator(paramlist, random_state=2)
+    datagen.fit(hierarchy)
+    data = datagen.generate()
+    data
+
++---+---+---+----------+
+| 0 | 1 | 2 | 3        |
++===+===+===+==========+
+| 1 | 1 | 1 | 0.470264 |
++---+---+---+----------+
+| 1 | 1 | 2 | -0.36477 |
++---+---+---+----------+
+| 1 | 1 | 3 | 1.166621 |
++---+---+---+----------+
+| 1 | 2 | 1 | -0.8333  |
++---+---+---+----------+
+| 1 | 2 | 2 | -0.85157 |
++---+---+---+----------+
+| 1 | 2 | 3 | -1.3149  |
++---+---+---+----------+
+| 1 | 3 | 1 | 0.041895 |
++---+---+---+----------+
+| 1 | 3 | 2 | -0.51226 |
++---+---+---+----------+
+| 1 | 3 | 3 | 0.132225 |
++---+---+---+----------+
+| 1 | 4 | 1 | -3.04865 |
++---+---+---+----------+
+| 1 | 4 | 2 | -2.31464 |
++---+---+---+----------+
+| 1 | 4 | 3 | -3.33374 |
++---+---+---+----------+
+| 2 | 1 | 1 | 4.641172 |
++---+---+---+----------+
+| 2 | 1 | 2 | 3.987742 |
++---+---+---+----------+
+| 2 | 1 | 3 | 4.130278 |
++---+---+---+----------+
+| 2 | 2 | 1 | 3.55467  |
++---+---+---+----------+
+| 2 | 2 | 2 | 2.133408 |
++---+---+---+----------+
+| 2 | 2 | 3 | 3.927347 |
++---+---+---+----------+
+| 2 | 3 | 1 | 3.73128  |
++---+---+---+----------+
+| 2 | 3 | 2 | 0.036135 |
++---+---+---+----------+
+| 2 | 3 | 3 | -0.05483 |
++---+---+---+----------+
+| 2 | 4 | 1 | 1.268975 |
++---+---+---+----------+
+| 2 | 4 | 2 | 3.615265 |
++---+---+---+----------+
+| 2 | 4 | 3 | 2.902522 |
++---+---+---+----------+
+
+Performing linear_regression_test and two_sample_test on this dataset should
+give very similar p-values. ::
+
+    linear_regression_test(data, treatment_col=0,
+                        bootstraps=1000, permutations='all',
+                        random_state=1)
+    0.013714285714285714
+
+    two_sample_test(data, treatment_col=0,
+                    bootstraps=1000, permutations='all',
+                    random_state=1)
+    0.013714285714285714
+
+However, unlike two_sample_test, this test can handle any number of conditions. Consider instead
+a dataset with four treatment conditions that have a linear relationship. ::
+
+    paramlist = [[0, 2/3, 4/3, 2], [stats.norm], [stats.norm]]
+    hierarchy = [4, 2, 3]
+    datagen = DataSimulator(paramlist, random_state=2)
+    datagen.fit(hierarchy)
+    data = datagen.generate()
+    data
+
++---+---+---+----------+
+| 0 | 1 | 2 | 3        |
++===+===+===+==========+
+| 1 | 1 | 1 | 0.470264 |
++---+---+---+----------+
+| 1 | 1 | 2 | -0.36477 |
++---+---+---+----------+
+| 1 | 1 | 3 | 1.166621 |
++---+---+---+----------+
+| 1 | 2 | 1 | -0.8333  |
++---+---+---+----------+
+| 1 | 2 | 2 | -0.85157 |
++---+---+---+----------+
+| 1 | 2 | 3 | -1.3149  |
++---+---+---+----------+
+| 2 | 1 | 1 | 0.708561 |
++---+---+---+----------+
+| 2 | 1 | 2 | 0.154405 |
++---+---+---+----------+
+| 2 | 1 | 3 | 0.798892 |
++---+---+---+----------+
+| 2 | 2 | 1 | -2.38199 |
++---+---+---+----------+
+| 2 | 2 | 2 | -1.64797 |
++---+---+---+----------+
+| 2 | 2 | 3 | -2.66707 |
++---+---+---+----------+
+| 3 | 1 | 1 | 3.974506 |
++---+---+---+----------+
+| 3 | 1 | 2 | 3.321076 |
++---+---+---+----------+
+| 3 | 1 | 3 | 3.463612 |
++---+---+---+----------+
+| 3 | 2 | 1 | 2.888003 |
++---+---+---+----------+
+| 3 | 2 | 2 | 1.466742 |
++---+---+---+----------+
+| 3 | 2 | 3 | 3.26068  |
++---+---+---+----------+
+| 4 | 1 | 1 | 3.73128  |
++---+---+---+----------+
+| 4 | 1 | 2 | 0.036135 |
++---+---+---+----------+
+| 4 | 1 | 3 | -0.05483 |
++---+---+---+----------+
+| 4 | 2 | 1 | 1.268975 |
++---+---+---+----------+
+| 4 | 2 | 2 | 3.615265 |
++---+---+---+----------+
+| 4 | 2 | 3 | 2.902522 |
++---+---+---+----------+
+
+For this dataset, there are 8! / (2!^4) = 2,520 total permutations. We will choose a random
+subset of them to compute the p-value. ::
+
+    linear_regression_test(data, treatment_col=0,
+                        bootstraps=100, permutations=1000,
+                        random_state=1)
+    0.00767
+
+Between these three tests, researchers can address a large variety of experimental designs. Unfortunately,
+interaction effects are outside the scope of permutation tests - it is not possible to construct an
+exact test for interaction effects in general. However, an asymptotic test for interaction effects
+may be implemented in the future.
diff --git a/docs/user/importing.rst b/docs/user/importing.rst
new file mode 100644
index 0000000..2e852c6
--- /dev/null
+++ b/docs/user/importing.rst
@@ -0,0 +1,9 @@
+Importing Data
+==============
+
+Hierarch is compatible with pandas DataFrames and numpy arrays. 
+Pandas is capable of conveniently importing data from a wide variety 
+of formats, including Excel files. ::
+
+    import pandas as pd
+    data = pd.read_excel(filepath)
\ No newline at end of file
diff --git a/docs/user/power.rst b/docs/user/power.rst
new file mode 100644
index 0000000..fc0ad68
--- /dev/null
+++ b/docs/user/power.rst
@@ -0,0 +1,196 @@
+Power Analysis
+==============
+
+Researchers can also use hierarch to determine the appropriate sample size 
+for a future experiment. hierarch.power provides a class, DataSimulator, 
+to assist in power analyses. DataSimulator is initialized with a list 
+specifying the probability distributions generating the data and an optional 
+random_state for reproducibility. 
+
+In this case, consider an experiment similar to the one above - two treatment 
+conditions, but the sample size at each level of hierarchy is yet to be 
+determined. First, you must posit a data-generating process for the analysis.
+
+Suppose you assume that the column 1 values are normally distributed with 
+mean 0 and variance 1. From past experience, you believe that the column 2 
+values follow a right-tailed distribution, so you choose to model it as a 
+lognormal distribution with a scale parameter of 0.75. Finally, you decide 
+that you want to achieve 80% power for a mean difference equal to one standard 
+deviation. You calculate that the summed standard deviation of the two 
+distributions you specified is 1.525 and input that as a parameter, as well. ::
+
+    from hierarch.power import DataSimulator
+
+    parameters = [[0, 1.525], #difference in means due to treatment
+                [stats.norm, 0, 1], #column 1 distribution - stats.norm(loc=0, scale=1)
+                [stats.lognorm, 0.75]] #column 2 distribution - stats.lognorm(s = 0.75)
+
+    sim = DataSimulator(parameters, random_state=1)
+
+Next, you choose a experimental design to simulate. Perhaps, like above, you 
+decide to start with three samples per treatment condition and three measurements 
+within each sample. Calling the .fit() function will ready the DataSimulator to 
+produce randomly-generated data according to this experimental scheme. ::
+
+    import scipy.stats as stats
+
+    hierarchy = [2, #treatments
+                3, #samples
+                3] #within-sample measurements
+
+    sim.fit(hierarchy)
+
+    By calling the .generate() function, DataSimulator uses the prespecified 
+    parameters to generate a simulated dataset. ::
+
+    print(sim.generate())
+
++---+---+---+----------+
+| 0 | 1 | 2 | 3        |
++===+===+===+==========+
+| 1 | 1 | 1 | 1.014087 |
++---+---+---+----------+
+| 1 | 1 | 2 | 1.891843 |
++---+---+---+----------+
+| 1 | 1 | 3 | 1.660049 |
++---+---+---+----------+
+| 1 | 2 | 1 | 2.068442 |
++---+---+---+----------+
+| 1 | 2 | 2 | 1.843164 |
++---+---+---+----------+
+| 1 | 2 | 3 | 2.328488 |
++---+---+---+----------+
+| 1 | 3 | 1 | 0.906038 |
++---+---+---+----------+
+| 1 | 3 | 2 | 1.215424 |
++---+---+---+----------+
+| 1 | 3 | 3 | 1.027005 |
++---+---+---+----------+
+| 2 | 1 | 1 | 1.788798 |
++---+---+---+----------+
+| 2 | 1 | 2 | 1.252083 |
++---+---+---+----------+
+| 2 | 1 | 3 | 1.024889 |
++---+---+---+----------+
+| 2 | 2 | 1 | 2.986665 |
++---+---+---+----------+
+| 2 | 2 | 2 | 3.254925 |
++---+---+---+----------+
+| 2 | 2 | 3 | 3.436481 |
++---+---+---+----------+
+| 2 | 3 | 1 | 2.784636 |
++---+---+---+----------+
+| 2 | 3 | 2 | 4.610765 |
++---+---+---+----------+
+| 2 | 3 | 3 | 4.099078 |
++---+---+---+----------+    
+
+You can use this to set up a simple power analysis. The following 
+code performs a hierarchical permutation test with 50,000 total 
+permutations (though this is overkill in the 2, 3, 3 case) on each 
+of 100 simulated datasets and prints the fraction of them that return 
+a significant result, assuming a p-value cutoff of 0.05. ::
+
+    pvalues = []
+    loops = 100
+    for i in range(loops):
+        data = sim.generate()
+        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
+        
+    print(np.less(pvalues, 0.05).sum() / loops) 
+
+    #out: 0.29
+
+The targeted power is 0.8, so you can fit the DataSimulator with a larger sample 
+size. You can run the following code block with different sample sizes until 
+you determine the column 1 sample size that achieves at least 80% power. ::
+
+    sim.fit([2,10,3])
+
+    pvalues = []
+    loops = 100
+    for i in range(loops):
+        data = sim.generate()
+        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
+        
+    print(np.less(pvalues, 0.05).sum() / loops)
+
+    #out: 0.81
+
+You note, however, that increasing the number of column 1 samples is much 
+more laborious than increasing the number of column 2 samples. For example, 
+perhaps the column 1 samples represent mice, while column 2 represents 
+multiple measurements of some feature from each mouse's cells. You have 
+posited that the slight majority of your observed variance comes from the 
+column 2 samples - indeed, in biological samples, within-sample variance 
+can be equal to or greater than between-sample variance. After all, that 
+is why we make multiple measurements within the same biological sample! 
+Given that this is a reasonable assumption, perhaps 80% power can be 
+achieved with an experimental design that makes more column 2 measurements. ::
+
+    sim.fit([2,8,30])
+
+    pvalues = []
+    loops = 100
+    for i in range(loops):
+        data = sim.generate()
+        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
+        
+    print(np.less(pvalues, 0.05).sum() / loops)
+
+    #out: 0.84
+
+Of course, adding column 2 samples has a much more limited 
+influence on power compared to adding column 1 samples - with infinite 
+column 2 samples, the standard error for the difference of means is 
+still dependent on the variance of the column 1 data-generating process. 
+This is illustrated with an excessive example of 300 column 2 samples 
+per column 1 sample, which shows no improvement in power over using 
+only 30 column 2 samples. ::
+
+    sim.fit([2,8,300])
+
+    pvalues = []
+    loops = 100
+    for i in range(loops):
+        data = sim.generate()
+        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
+        
+    print(np.less(pvalues, 0.05).sum() / loops)
+    
+    #out: 0.83
+
+On the other hand, adding only four column 1 samples to each treatment group 
+(rather than 270 to each column 1 sample) brings the power to 97%. 
+
+Finally, to ensure that hierarchical permutation is valid for the posited 
+data-generating process, you can do another power analysis under the null 
+hypothesis - that there is no difference between groups. To compensate for 
+Monte Carlo error, you should increase the number of loops - at 100 loops, 
+the error for an event that happens 5% probability is +/- 2%, but at 
+1000 loops, it is only +/- 0.7%. ::
+
+    parameters = [[0, 0], #no difference in means because we are sampling under the null hypothesis
+                [stats.norm, 0, 1], #column 1 probability distribution  
+                [stats.lognorm, 0.75]] #column 2 probability distribution
+    sim = ha.power.DataSimulator(parameters, random_state=1)
+    sim.fit([2,12,30])
+
+    pvalues = []
+    loops = 1000
+    for i in range(loops):
+        data = sim.generate()
+        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
+        
+    print(np.less(pvalues, 0.05).sum() / loops)
+
+    #out: 0.05
+
+Hierarchical permutation experiences no size distortion for this experimental 
+design and is therefore a valid test.  
+
+Note: because these power calculations are subject to Monte Carlo error, 
+so you should consider upping the number of loops if the precise value for 
+power is of extreme importance. In nonclinical settings, however, small-scale 
+power analyses are sufficient and can be a valuable guide for choosing the 
+sample size for your study. 
\ No newline at end of file
diff --git a/docs/user/usage.rst b/docs/user/usage.rst
index 5a8f89f..843e6ee 100644
--- a/docs/user/usage.rst
+++ b/docs/user/usage.rst
@@ -1,633 +1,11 @@
 Usage 
 =====
 
-Importing Data
---------------
-Hierarch is compatible with pandas DataFrames and numpy arrays. 
-Pandas is capable of conveniently importing data from a wide variety 
-of formats, including Excel files. ::
+.. toctree::
 
-    import pandas as pd
-    data = pd.read_excel(filepath)
+    importing.rst
+    hypothesis.rst
+    confidence.rst
+    power.rst
 
-Two-Sample Hypothesis Tests
----------------------------
-Performing a hierarchical permutation test for difference of means is simple. 
-Consider an imaging experiment with two treatment groups, three coverslips in 
-each group, and three images (fields of view) within each coverslip. If you have 
-the data stored in an Excel file, you can use pandas to either directly read the 
-file or copy it in from the clipboard, as below. ::
 
-    import pandas as pd
-    import numpy as np
-    import hierarch as ha
-
-    data = pd.read_clipboard()
-
-    print(data)
-
-+------------+------+-------------+----------+
-|  Condition | Well | Measurement |  Values  |
-+============+======+=============+==========+
-|    None    |   1  |      1      | 5.202258 |
-+------------+------+-------------+----------+
-|    None    |   1  |      2      | 5.136128 |
-+------------+------+-------------+----------+
-|    None    |   1  |      3      | 5.231401 |
-+------------+------+-------------+----------+
-|    None    |   2  |      1      | 5.336643 |
-+------------+------+-------------+----------+
-|    None    |   2  |      2      | 5.287973 |
-+------------+------+-------------+----------+
-|    None    |   2  |      3      | 5.375359 |
-+------------+------+-------------+----------+
-|    None    |   3  |      1      | 5.350692 |
-+------------+------+-------------+----------+
-|    None    |   3  |      2      | 5.465206 |
-+------------+------+-------------+----------+
-|    None    |   3  |      3      | 5.422602 |
-+------------+------+-------------+----------+
-| +Treatment |   4  |      1      | 5.695427 |
-+------------+------+-------------+----------+
-| +Treatment |   4  |      2      | 5.668457 |
-+------------+------+-------------+----------+
-| +Treatment |   4  |      3      | 5.752592 |
-+------------+------+-------------+----------+
-| +Treatment |   5  |      1      | 5.583562 |
-+------------+------+-------------+----------+
-| +Treatment |   5  |      2      | 5.647895 |
-+------------+------+-------------+----------+
-| +Treatment |   5  |      3      | 5.618315 |
-+------------+------+-------------+----------+
-| +Treatment |   6  |      1      | 5.642983 |
-+------------+------+-------------+----------+
-| +Treatment |   6  |      2      |  5.47072 |
-+------------+------+-------------+----------+
-| +Treatment |   6  |      3      | 5.686654 |
-+------------+------+-------------+----------+
-
-It is important to note that the ordering of the columns from left to right 
-reflects the experimental design scheme. This is necessary for hierarch 
-to infer the clustering within your dataset. In case your data is not 
-ordered properly, pandas makes it easy enough to index your data in the 
-correct order. ::
-
-
-    columns = ['Condition', 'Coverslip', 'Field of View', 'Mean Fluorescence']
-
-    data[columns]
-
-Next, you can call two_sample_test from hierarch's stats module, which will 
-calculate the p-value. You have to specify what column is the treatment 
-column - in this case, "Condition." Indexing starts at 0, so you input 
-treatment_col=0. In this case, there are only 6c3 = 20 ways to permute the 
-treatment labels, so you should specify "all" permutations be used. ::
-
-    p_val = ha.stats.two_sample_test(data, treatment_col=0, 
-                                     bootstraps=500, permutations='all', 
-                                     random_state=1)
-
-    print('p-value =', p_val)
-
-    #out: p-value = 0.0406
-
-There are a number of parameters that can be used to modify two_sample_test. ::
-
-    ha.stats.two_sample_test(data_array, 
-                            treatment_col, 
-                            compare="means", 
-                            skip=None, 
-                            bootstraps=100, 
-                            permutations=1000, 
-                            kind='weights', 
-                            return_null=False,
-                            random_state=None)
-
-**compare**: The default "means" assumes that you are testing for a difference in means, so it uses the Welch t-statistic. For flexibility, two_sample_test can take a test statistic function as an argument. 
-
-**skip**: indicates the indices of columns that should be skipped in the bootstrapping procedure. 
-
-A simple rule of thumb is that columns should be resampled with replacement only if they were originally sampled with replacement (or effectively sampled with replacement). For example, consider an imaging experiment in which you image several fields of view in a well, which each contain several cells. While you can consider the cells sampled with replacement from the well (there are so many more cells in the population that this assumption is fair), the cells are not sampled with replacement from the field of view (as you are measuring ALL cells in each field of view). The 10% condition is a reasonable rule of thumb here - if your sample represents less than 10% of the population, you can treat it as sampled with replacement.
-
-**bootstraps**: indicates the number of bootstrapped samples to be drawn from data_array. 
-
-Generally, as the number of possible permutations of your data increases, the number of bootstraps should decrease. If the goal of bootstrapping is to include the standard error in the biological samples in the null distribution, only 50-100 bootstraps is sufficient given a large enough set of possible permutations.
-
-**permutations**: indicates the number of permutations of the treatment label PER bootstrapped sample.
-
-Inputting "all" will enumerate all of the possible permutations and iterate through them one by one. This is done using a generator, so the permutations are not stored in memory, but is still excessively time consuming for large datasets. 
-
-**kind**: "weights" or "indexes" or "bayesian" specifies the bootstrapping algorithm. "weights" returns an array the same size as the input array, but with the data reweighted according to the Efron bootstrap procedure. "indexes" uses the same algorithm, but returns a reindexed array. "bayesian" also returns a reweighted array, but the weights are allowed to be any real number rather than just integers.
-
-**return_null**: setting this to True will also return the empirical null distribution as a list.
-
-**seed**: allows you to specify a random seed for reproducibility. 
-
-Many-Sample Hypothesis Tests - Several Hypotheses
--------------------------------------------------
-Researchers may want to perform a series of hypothesis tests to determine 
-whether there are significant differences between some parameter in three 
-or more unrelated groups. This is similar to the goal of one-way ANOVA. To 
-this end, hierarch includes the multi_sample_test function, which performs
-multiple two-sample tests in the vein of post-hoc tests after ANOVA. The 
-researcher can also choose to make a multiple-comparison correction in the 
-form of the Benjamini-Hochberg procedure, which controls for False Discovery
-Rate.
-
-Consider an experiment with four treatment groups. We can simulate a dataset
-as follows. ::
-
-    from hierarch.power import DataSimulator
-    import scipy.stats as stats
-
-    paramlist = [[0, 1, 4, 0], [stats.norm], [stats.norm]]
-    hierarchy = [4, 3, 3]
-
-    datagen = DataSimulator(paramlist, random_state=1)
-    datagen.fit(hierarchy)
-    data = datagen.generate()
-    data    
-
-+---+---+---+----------+
-| 0 | 1 | 2 | 3        |
-+===+===+===+==========+
-| 1 | 1 | 1 | -0.39087 |
-+---+---+---+----------+
-| 1 | 1 | 2 | 0.182674 |
-+---+---+---+----------+
-| 1 | 1 | 3 | -0.13654 |
-+---+---+---+----------+
-| 1 | 2 | 1 | 1.420464 |
-+---+---+---+----------+
-| 1 | 2 | 2 | 0.86134  |
-+---+---+---+----------+
-| 1 | 2 | 3 | 0.529161 |
-+---+---+---+----------+
-| 1 | 3 | 1 | -0.45147 |
-+---+---+---+----------+
-| 1 | 3 | 2 | 0.073245 |
-+---+---+---+----------+
-| 1 | 3 | 3 | 0.338579 |
-+---+---+---+----------+
-| 2 | 1 | 1 | -0.57876 |
-+---+---+---+----------+
-| 2 | 1 | 2 | 0.990907 |
-+---+---+---+----------+
-| 2 | 1 | 3 | 0.703567 |
-+---+---+---+----------+
-| 2 | 2 | 1 | -0.80581 |
-+---+---+---+----------+
-| 2 | 2 | 2 | 0.016343 |
-+---+---+---+----------+
-| 2 | 2 | 3 | 1.730584 |
-+---+---+---+----------+
-| 2 | 3 | 1 | 1.024184 |
-+---+---+---+----------+
-| 2 | 3 | 2 | 1.660018 |
-+---+---+---+----------+
-| 2 | 3 | 3 | 1.663697 |
-+---+---+---+----------+
-| 3 | 1 | 1 | 5.580886 |
-+---+---+---+----------+
-| 3 | 1 | 2 | 2.351026 |
-+---+---+---+----------+
-| 3 | 1 | 3 | 3.085442 |
-+---+---+---+----------+
-| 3 | 2 | 1 | 6.62389  |
-+---+---+---+----------+
-| 3 | 2 | 2 | 5.227821 |
-+---+---+---+----------+
-| 3 | 2 | 3 | 5.244181 |
-+---+---+---+----------+
-| 3 | 3 | 1 | 3.850566 |
-+---+---+---+----------+
-| 3 | 3 | 2 | 2.716497 |
-+---+---+---+----------+
-| 3 | 3 | 3 | 4.532037 |
-+---+---+---+----------+
-| 4 | 1 | 1 | 0.403147 |
-+---+---+---+----------+
-| 4 | 1 | 2 | -0.93322 |
-+---+---+---+----------+
-| 4 | 1 | 3 | -0.38909 |
-+---+---+---+----------+
-| 4 | 2 | 1 | -0.04362 |
-+---+---+---+----------+
-| 4 | 2 | 2 | -0.91633 |
-+---+---+---+----------+
-| 4 | 2 | 3 | -0.06985 |
-+---+---+---+----------+
-| 4 | 3 | 1 | 0.642196 |
-+---+---+---+----------+
-| 4 | 3 | 2 | 0.582299 |
-+---+---+---+----------+
-| 4 | 3 | 3 | 0.040421 |
-+---+---+---+----------+
-
-This dataset has been generated such that treatments 1 and 4 have the same mean, while
-treatment 2 represents a slight difference and treatment 4 represents a large difference.
-There are six total comparisons that can be made, which can be performed automatically
-using multi_sample_test as follows. ::
-
-    multi_sample_test(data, treatment_col=0, hypotheses="all",
-                    correction=None, bootstraps=1000,
-                    permutations="all", random_state=111)
-    
-    array([[2.0, 3.0, 0.0355],
-           [1.0, 3.0, 0.0394],
-           [3.0, 4.0, 0.0407],
-           [2.0, 4.0, 0.1477],
-           [1.0, 2.0, 0.4022],
-           [1.0, 4.0, 0.4559]], dtype=object)
-
-The first two columns indicate the conditions being compared, while the last column indicates
-the uncorrected p-value. Because there are several hypotheses being tested, it is advisable
-to make a multiple comparisons correction. Currently, hierarch can automatically perform the
-Benjamini-Hochberg procedure, which controls False Discovery Rate. By indicating the "fdr"
-correction, the output array has an additional column showing the q-values, or adjusted p-values. ::
-
-    multi_sample_test(data, treatment_col=0, hypotheses="all",
-                    correction='fdr', bootstraps=1000,
-                    permutations="all", random_state=111)
-    array([[2.0, 3.0, 0.0355, 0.0814],
-           [1.0, 3.0, 0.0394, 0.0814],
-           [3.0, 4.0, 0.0407, 0.0814],
-           [2.0, 4.0, 0.1477, 0.22155],
-           [1.0, 2.0, 0.4022, 0.4559],
-           [1.0, 4.0, 0.4559, 0.4559]], dtype=object)
-
-Testing more hypotheses necessarily lowers the p-value required to call a result significant. However,
-we are not always interested in performing every comparison - perhaps condition 2 is a control that all
-other conditions are meant to be compared to. The comparisons of interest can be specified using a list. ::
-
-    tests = [[2.0, 1.0], [2.0, 3.0], [2.0, 4.0]]
-    multi_sample_test(data, treatment_col=0, hypotheses=tests,
-                      correction='fdr', bootstraps=1000,
-                      permutations="all", random_state=222)
-    array([[2.0, 3.0, 0.036, 0.108],
-           [2.0, 4.0, 0.1506, 0.2259],
-           [2.0, 1.0, 0.4036, 0.4036]], dtype=object)
-
-Many-Sample Hypothesis Tests - Single Hypothesis
-------------------------------------------------
-One-way ANOVA and similar tests (like multi_sample_test) are inappropriate when
-you have several samples meant to test a single hypothesis. For example, perhaps
-you have several samples with different concentrations of the same drug treatment.
-In this case, hierarch provides linear_regression_test, which is equivalent to
-performing a hypothesis test on a linear model against the null hypothesis that
-the slope coefficient is equal to 0.
-
-This hypothesis test uses a studentized covariance test statistic - essentially,
-the sample covariance divided by the standard error of the sample covariance. This
-test statistic is approximately normally distributed and in the two-sample case, 
-this test gives the same result as two_sample_test.
-
-First, consider a dataset with two treatment groups, four samples each, and three
-measurements on each sample. ::
-
-    from hierarch.power import DataSimulator
-    import scipy.stats as stats
-
-    paramlist = [[0, 2], [stats.norm], [stats.norm]]
-    hierarchy = [2, 4, 3]
-
-    datagen = DataSimulator(paramlist, random_state=2)
-    datagen.fit(hierarchy)
-    data = datagen.generate()
-    data
-
-+---+---+---+----------+
-| 0 | 1 | 2 | 3        |
-+===+===+===+==========+
-| 1 | 1 | 1 | 0.470264 |
-+---+---+---+----------+
-| 1 | 1 | 2 | -0.36477 |
-+---+---+---+----------+
-| 1 | 1 | 3 | 1.166621 |
-+---+---+---+----------+
-| 1 | 2 | 1 | -0.8333  |
-+---+---+---+----------+
-| 1 | 2 | 2 | -0.85157 |
-+---+---+---+----------+
-| 1 | 2 | 3 | -1.3149  |
-+---+---+---+----------+
-| 1 | 3 | 1 | 0.041895 |
-+---+---+---+----------+
-| 1 | 3 | 2 | -0.51226 |
-+---+---+---+----------+
-| 1 | 3 | 3 | 0.132225 |
-+---+---+---+----------+
-| 1 | 4 | 1 | -3.04865 |
-+---+---+---+----------+
-| 1 | 4 | 2 | -2.31464 |
-+---+---+---+----------+
-| 1 | 4 | 3 | -3.33374 |
-+---+---+---+----------+
-| 2 | 1 | 1 | 4.641172 |
-+---+---+---+----------+
-| 2 | 1 | 2 | 3.987742 |
-+---+---+---+----------+
-| 2 | 1 | 3 | 4.130278 |
-+---+---+---+----------+
-| 2 | 2 | 1 | 3.55467  |
-+---+---+---+----------+
-| 2 | 2 | 2 | 2.133408 |
-+---+---+---+----------+
-| 2 | 2 | 3 | 3.927347 |
-+---+---+---+----------+
-| 2 | 3 | 1 | 3.73128  |
-+---+---+---+----------+
-| 2 | 3 | 2 | 0.036135 |
-+---+---+---+----------+
-| 2 | 3 | 3 | -0.05483 |
-+---+---+---+----------+
-| 2 | 4 | 1 | 1.268975 |
-+---+---+---+----------+
-| 2 | 4 | 2 | 3.615265 |
-+---+---+---+----------+
-| 2 | 4 | 3 | 2.902522 |
-+---+---+---+----------+
-
-Performing linear_regression_test and two_sample_test on this dataset should
-give very similar p-values. ::
-
-    linear_regression_test(data, treatment_col=0,
-                        bootstraps=1000, permutations='all',
-                        random_state=1)
-    0.013714285714285714
-
-    two_sample_test(data, treatment_col=0,
-                    bootstraps=1000, permutations='all',
-                    random_state=1)
-    0.013714285714285714
-
-However, unlike two_sample_test, this test can handle any number of conditions. Consider instead
-a dataset with four treatment conditions that have a linear relationship. ::
-
-    paramlist = [[0, 2/3, 4/3, 2], [stats.norm], [stats.norm]]
-    hierarchy = [4, 2, 3]
-    datagen = DataSimulator(paramlist, random_state=2)
-    datagen.fit(hierarchy)
-    data = datagen.generate()
-    data
-
-+---+---+---+----------+
-| 0 | 1 | 2 | 3        |
-+===+===+===+==========+
-| 1 | 1 | 1 | 0.470264 |
-+---+---+---+----------+
-| 1 | 1 | 2 | -0.36477 |
-+---+---+---+----------+
-| 1 | 1 | 3 | 1.166621 |
-+---+---+---+----------+
-| 1 | 2 | 1 | -0.8333  |
-+---+---+---+----------+
-| 1 | 2 | 2 | -0.85157 |
-+---+---+---+----------+
-| 1 | 2 | 3 | -1.3149  |
-+---+---+---+----------+
-| 2 | 1 | 1 | 0.708561 |
-+---+---+---+----------+
-| 2 | 1 | 2 | 0.154405 |
-+---+---+---+----------+
-| 2 | 1 | 3 | 0.798892 |
-+---+---+---+----------+
-| 2 | 2 | 1 | -2.38199 |
-+---+---+---+----------+
-| 2 | 2 | 2 | -1.64797 |
-+---+---+---+----------+
-| 2 | 2 | 3 | -2.66707 |
-+---+---+---+----------+
-| 3 | 1 | 1 | 3.974506 |
-+---+---+---+----------+
-| 3 | 1 | 2 | 3.321076 |
-+---+---+---+----------+
-| 3 | 1 | 3 | 3.463612 |
-+---+---+---+----------+
-| 3 | 2 | 1 | 2.888003 |
-+---+---+---+----------+
-| 3 | 2 | 2 | 1.466742 |
-+---+---+---+----------+
-| 3 | 2 | 3 | 3.26068  |
-+---+---+---+----------+
-| 4 | 1 | 1 | 3.73128  |
-+---+---+---+----------+
-| 4 | 1 | 2 | 0.036135 |
-+---+---+---+----------+
-| 4 | 1 | 3 | -0.05483 |
-+---+---+---+----------+
-| 4 | 2 | 1 | 1.268975 |
-+---+---+---+----------+
-| 4 | 2 | 2 | 3.615265 |
-+---+---+---+----------+
-| 4 | 2 | 3 | 2.902522 |
-+---+---+---+----------+
-
-For this dataset, there are 8! / (2!^4) = 2,520 total permutations. We will choose a random
-subset of them to compute the p-value. ::
-
-    linear_regression_test(data, treatment_col=0,
-                        bootstraps=100, permutations=1000,
-                        random_state=1)
-    0.00767
-
-Between these three tests, researchers can address a large variety of experimental designs. Unfortunately,
-interaction effects are outside the scope of permutation tests - it is not possible to construct an
-exact test for interaction effects in general. However, an asymptotic test for interaction effects
-may be implemented in the future.
-
-Power Analysis
---------------
-Researchers can also use hierarch to determine the appropriate sample size 
-for a future experiment. hierarch.power provides a class, DataSimulator, 
-to assist in power analyses. DataSimulator is initialized with a list 
-specifying the probability distributions generating the data and an optional 
-random_state for reproducibility. 
-
-In this case, consider an experiment similar to the one above - two treatment 
-conditions, but the sample size at each level of hierarchy is yet to be 
-determined. First, you must posit a data-generating process for the analysis.
-
-Suppose you assume that the column 1 values are normally distributed with 
-mean 0 and variance 1. From past experience, you believe that the column 2 
-values follow a right-tailed distribution, so you choose to model it as a 
-lognormal distribution with a scale parameter of 0.75. Finally, you decide 
-that you want to achieve 80% power for a mean difference equal to one standard 
-deviation. You calculate that the summed standard deviation of the two 
-distributions you specified is 1.525 and input that as a parameter, as well. ::
-
-    from hierarch.power import DataSimulator
-
-    parameters = [[0, 1.525], #difference in means due to treatment
-                [stats.norm, 0, 1], #column 1 distribution - stats.norm(loc=0, scale=1)
-                [stats.lognorm, 0.75]] #column 2 distribution - stats.lognorm(s = 0.75)
-
-    sim = DataSimulator(parameters, random_state=1)
-
-Next, you choose a experimental design to simulate. Perhaps, like above, you 
-decide to start with three samples per treatment condition and three measurements 
-within each sample. Calling the .fit() function will ready the DataSimulator to 
-produce randomly-generated data according to this experimental scheme. ::
-
-    import scipy.stats as stats
-
-    hierarchy = [2, #treatments
-                3, #samples
-                3] #within-sample measurements
-
-    sim.fit(hierarchy)
-
-    By calling the .generate() function, DataSimulator uses the prespecified 
-    parameters to generate a simulated dataset. ::
-
-    print(sim.generate())
-
-+---+---+---+----------+
-| 0 | 1 | 2 | 3        |
-+===+===+===+==========+
-| 1 | 1 | 1 | 1.014087 |
-+---+---+---+----------+
-| 1 | 1 | 2 | 1.891843 |
-+---+---+---+----------+
-| 1 | 1 | 3 | 1.660049 |
-+---+---+---+----------+
-| 1 | 2 | 1 | 2.068442 |
-+---+---+---+----------+
-| 1 | 2 | 2 | 1.843164 |
-+---+---+---+----------+
-| 1 | 2 | 3 | 2.328488 |
-+---+---+---+----------+
-| 1 | 3 | 1 | 0.906038 |
-+---+---+---+----------+
-| 1 | 3 | 2 | 1.215424 |
-+---+---+---+----------+
-| 1 | 3 | 3 | 1.027005 |
-+---+---+---+----------+
-| 2 | 1 | 1 | 1.788798 |
-+---+---+---+----------+
-| 2 | 1 | 2 | 1.252083 |
-+---+---+---+----------+
-| 2 | 1 | 3 | 1.024889 |
-+---+---+---+----------+
-| 2 | 2 | 1 | 2.986665 |
-+---+---+---+----------+
-| 2 | 2 | 2 | 3.254925 |
-+---+---+---+----------+
-| 2 | 2 | 3 | 3.436481 |
-+---+---+---+----------+
-| 2 | 3 | 1 | 2.784636 |
-+---+---+---+----------+
-| 2 | 3 | 2 | 4.610765 |
-+---+---+---+----------+
-| 2 | 3 | 3 | 4.099078 |
-+---+---+---+----------+    
-
-You can use this to set up a simple power analysis. The following 
-code performs a hierarchical permutation test with 50,000 total 
-permutations (though this is overkill in the 2, 3, 3 case) on each 
-of 100 simulated datasets and prints the fraction of them that return 
-a significant result, assuming a p-value cutoff of 0.05. ::
-
-    pvalues = []
-    loops = 100
-    for i in range(loops):
-        data = sim.generate()
-        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
-        
-    print(np.less(pvalues, 0.05).sum() / loops) 
-
-    #out: 0.29
-
-The targeted power is 0.8, so you can fit the DataSimulator with a larger sample 
-size. You can run the following code block with different sample sizes until 
-you determine the column 1 sample size that achieves at least 80% power. ::
-
-    sim.fit([2,10,3])
-
-    pvalues = []
-    loops = 100
-    for i in range(loops):
-        data = sim.generate()
-        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
-        
-    print(np.less(pvalues, 0.05).sum() / loops)
-
-    #out: 0.81
-
-You note, however, that increasing the number of column 1 samples is much 
-more laborious than increasing the number of column 2 samples. For example, 
-perhaps the column 1 samples represent mice, while column 2 represents 
-multiple measurements of some feature from each mouse's cells. You have 
-posited that the slight majority of your observed variance comes from the 
-column 2 samples - indeed, in biological samples, within-sample variance 
-can be equal to or greater than between-sample variance. After all, that 
-is why we make multiple measurements within the same biological sample! 
-Given that this is a reasonable assumption, perhaps 80% power can be 
-achieved with an experimental design that makes more column 2 measurements. ::
-
-    sim.fit([2,8,30])
-
-    pvalues = []
-    loops = 100
-    for i in range(loops):
-        data = sim.generate()
-        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
-        
-    print(np.less(pvalues, 0.05).sum() / loops)
-
-    #out: 0.84
-
-Of course, adding column 2 samples has a much more limited 
-influence on power compared to adding column 1 samples - with infinite 
-column 2 samples, the standard error for the difference of means is 
-still dependent on the variance of the column 1 data-generating process. 
-This is illustrated with an excessive example of 300 column 2 samples 
-per column 1 sample, which shows no improvement in power over using 
-only 30 column 2 samples. ::
-
-    sim.fit([2,8,300])
-
-    pvalues = []
-    loops = 100
-    for i in range(loops):
-        data = sim.generate()
-        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
-        
-    print(np.less(pvalues, 0.05).sum() / loops)
-    
-    #out: 0.83
-
-On the other hand, adding only four column 1 samples to each treatment group 
-(rather than 270 to each column 1 sample) brings the power to 97%. 
-
-Finally, to ensure that hierarchical permutation is valid for the posited 
-data-generating process, you can do another power analysis under the null 
-hypothesis - that there is no difference between groups. To compensate for 
-Monte Carlo error, you should increase the number of loops - at 100 loops, 
-the error for an event that happens 5% probability is +/- 2%, but at 
-1000 loops, it is only +/- 0.7%. ::
-
-    parameters = [[0, 0], #no difference in means because we are sampling under the null hypothesis
-                [stats.norm, 0, 1], #column 1 probability distribution  
-                [stats.lognorm, 0.75]] #column 2 probability distribution
-    sim = ha.power.DataSimulator(parameters, random_state=1)
-    sim.fit([2,12,30])
-
-    pvalues = []
-    loops = 1000
-    for i in range(loops):
-        data = sim.generate()
-        pvalues.append(ha.stats.two_sample_test(data, 0, bootstraps=500, permutations=100))
-        
-    print(np.less(pvalues, 0.05).sum() / loops)
-
-    #out: 0.05
-
-Hierarchical permutation experiences no size distortion for this experimental 
-design and is therefore a valid test.  
-
-Note: because these power calculations are subject to Monte Carlo error, 
-so you should consider upping the number of loops if the precise value for 
-power is of extreme importance. In nonclinical settings, however, small-scale 
-power analyses are sufficient and can be a valuable guide for choosing the 
-sample size for your study. 
\ No newline at end of file
diff --git a/hierarch/internal_functions.py b/hierarch/internal_functions.py
index 2bdbb86..60bd396 100644
--- a/hierarch/internal_functions.py
+++ b/hierarch/internal_functions.py
@@ -432,50 +432,6 @@ def visit(head):
         yield visit(head)
 
 
-def preprocess_data(data):
-    """Performs label encoding without overwriting numerical variables.
-
-    Parameters
-    ----------
-    data : 2D array or pandas DataFrame
-        Data to be encoded.
-
-    Returns
-    -------
-    2D array of float64s
-        An array identical to data, but all elements that cannot be cast
-        to np.float64s replaced with integer values.
-    """
-    # don't want to overwrite data
-    if isinstance(data, np.ndarray):
-
-        encoded = data.copy()
-
-    # coerce dataframe to numpy array
-    elif isinstance(data, pd.DataFrame):
-
-        encoded = data.to_numpy()
-
-    for idx, v in enumerate(encoded.T):
-        # attempt to cast array as floats
-        try:
-            encoded = encoded.astype(np.float64)
-            # if we can cast the array as floats, encoding is complete
-            break
-
-        except ValueError:
-            # if we can't, attempt to cast one column as floats
-            try:
-                encoded[:, idx] = encoded[:, idx].astype(np.float64)
-            # if we still can't, encode that column
-            except ValueError:
-                encoded[:, idx] = np.unique(v, return_inverse=True)[1]
-    # stable sort sort the output by row
-    encoded = np.unique(encoded, axis=0)
-
-    return encoded
-
-
 class GroupbyMean:
     """Class for performing groupby reductions on numpy arrays.
 
diff --git a/hierarch/power.py b/hierarch/power.py
index 9bb375f..b3455c3 100644
--- a/hierarch/power.py
+++ b/hierarch/power.py
@@ -12,13 +12,6 @@ class DataSimulator:
     random_state : int or numpy.random.Generator instance, optional
         Seedable for reproducibility, by default None
 
-    Methods
-    -------
-    fit:
-        Fit the class to a hierarchical structure.
-    generate:
-        Generate a simulated dataset.
-
     Examples
     --------
     Each sublist in paramlist can either be an integer or a scipy.stats
diff --git a/hierarch/resampling.py b/hierarch/resampling.py
index 2641d1a..a34745b 100644
--- a/hierarch/resampling.py
+++ b/hierarch/resampling.py
@@ -37,13 +37,6 @@ class Bootstrapper:
         "indexes" generates a set of new indexes for the dataset.
         Mathematically, this is equivalent to demanding integer weights.
 
-    Methods
-    -------
-    fit:
-        Fit the class to a dataset.
-    transform:
-        Generate a bootstrapped sample.
-
     Notes
     -----
     These approaches have different outputs - "weights" and "bayesian"
@@ -213,7 +206,9 @@ class Bootstrapper:
 
     """
 
-    BOOTSTRAP_ALGORITHMS = tuple(["weights", "indexes", "bayesian"])
+    #: ("weights", "indexes", "bayesian) The three possible arguments that
+    # can be provided to the "kind" keyword argument.
+    _BOOTSTRAP_ALGORITHMS = tuple(["weights", "indexes", "bayesian"])
 
     def __init__(self, random_state=None, kind="weights"):
 
@@ -222,7 +217,7 @@ def __init__(self, random_state=None, kind="weights"):
         # this makes it both reproducible and thread-safe enough
         nb_seed = self.random_generator.integers(low=2 ** 32 - 1)
         set_numba_random_state(nb_seed)
-        if kind in self.BOOTSTRAP_ALGORITHMS:
+        if kind in self._BOOTSTRAP_ALGORITHMS:
             self.kind = kind
         else:
             raise KeyError("Invalid 'kind' argument.")
@@ -332,14 +327,7 @@ class Permuter:
     ----------
     random_state : int or numpy.random.Generator instance, optional
         Seedable for reproducibility, by default None
-    
-    Methods
-    -------
-    fit:
-        Fit the class to a dataset.
-    transform:
-        Generate a permuted sample.
-    
+        
     Examples
     --------
     When the column to resample is the first column, Permuter performs an
diff --git a/hierarch/stats.py b/hierarch/stats.py
index d2068de..58b282b 100644
--- a/hierarch/stats.py
+++ b/hierarch/stats.py
@@ -5,7 +5,6 @@
 import pandas as pd
 from hierarch.internal_functions import (
     nb_data_grabber,
-    preprocess_data,
     GroupbyMean,
     _compute_interval,
     bivar_central_moment,
@@ -14,6 +13,50 @@
 from warnings import warn, simplefilter
 
 
+def preprocess_data(data):
+    """Performs label encoding without overwriting numerical variables.
+
+    Parameters
+    ----------
+    data : 2D array or pandas DataFrame
+        Data to be encoded.
+
+    Returns
+    -------
+    2D array of float64s
+        An array identical to data, but all elements that cannot be cast
+        to np.float64s replaced with integer values.
+    """
+    # don't want to overwrite data
+    if isinstance(data, np.ndarray):
+
+        encoded = data.copy()
+
+    # coerce dataframe to numpy array
+    elif isinstance(data, pd.DataFrame):
+
+        encoded = data.to_numpy()
+
+    for idx, v in enumerate(encoded.T):
+        # attempt to cast array as floats
+        try:
+            encoded = encoded.astype(np.float64)
+            # if we can cast the array as floats, encoding is complete
+            break
+
+        except ValueError:
+            # if we can't, attempt to cast one column as floats
+            try:
+                encoded[:, idx] = encoded[:, idx].astype(np.float64)
+            # if we still can't, encode that column
+            except ValueError:
+                encoded[:, idx] = np.unique(v, return_inverse=True)[1]
+    # stable sort sort the output by row
+    encoded = np.unique(encoded, axis=0)
+
+    return encoded
+
+
 @jit(nopython=True, cache=True)
 def studentized_covariance(x, y):
     """Studentized sample covariance between two variables.
diff --git a/setup.py b/setup.py
index 1b0bc43..d2e8f6c 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="hierarch",
-    version="0.4.0",
+    version="1.0.0",
     author="Rishi Kulkarni",
     author_email="rkulk@stanford.edu",
     description="Hierarchical hypothesis testing library",