diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 000000000..d92839944
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,42 @@
+name: Performance Benchmarks
+
+on:
+  push:
+    branches:
+      - master
+      - develop
+
+jobs:
+  benchmark:
+    name: ${{ matrix.os }} x ${{ matrix.python }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ ubuntu-latest ] #, macos-latest, windows-latest ]
+        python: ['3.8']
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+      - uses: actions/setup-python@v1
+        with:
+          python-version: ${{ matrix.python }}
+      - name: Run benchmark
+        run: |
+          pip install --upgrade pip setuptools wheel
+          pip install -r requirements.txt
+          pip install -r requirements-test.txt
+      - run: make install
+      - run: pytest tests/benchmarks/bench.py --benchmark-min-rounds 10 --benchmark-warmup "on" --benchmark-json benchmark.json
+      - name: Store benchmark result
+        uses: rhysd/github-action-benchmark@v1
+        with:
+          name: Pandas Profiling Benchmarks
+          tool: 'pytest'
+          output-file-path: benchmark.json
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          auto-push: true
+
+          comment-on-alert: true
+          alert-comment-cc-users: '@sbrugman'
diff --git a/.github/workflows/commit.yml b/.github/workflows/commit.yml
new file mode 100644
index 000000000..818987e0f
--- /dev/null
+++ b/.github/workflows/commit.yml
@@ -0,0 +1,11 @@
+name: Lint Commit Messages
+on: [pull_request]
+
+jobs:
+  commitlint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+      - uses: wagoid/commitlint-github-action@v3
\ No newline at end of file
diff --git a/.github/workflows/ci.yml b/.github/workflows/release.yml
similarity index 99%
rename from .github/workflows/ci.yml
rename to .github/workflows/release.yml
index 48e1aa31b..e286aa439 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/release.yml
@@ -1,4 +1,4 @@
-name: CI
+name: Release CI
 
 on:
   push:
diff --git a/.github/workflows/ci_test.yml b/.github/workflows/tests.yml
similarity index 56%
rename from .github/workflows/ci_test.yml
rename to .github/workflows/tests.yml
index cd797af42..95fbbbc21 100644
--- a/.github/workflows/ci_test.yml
+++ b/.github/workflows/tests.yml
@@ -1,9 +1,9 @@
-name: Tests and Coverage
+name: CI
 
 on: push
 
 jobs:
-  build:
+  test:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
@@ -33,7 +33,53 @@ jobs:
             pandas: "pandas>1.1"
             numpy: "numpy"
 
-    name: python ${{ matrix.python-version }}, ${{ matrix.os }}, ${{ matrix.pandas }}, ${{ matrix.numpy }}
+    name: Tests | python ${{ matrix.python-version }}, ${{ matrix.os }}, ${{ matrix.pandas }}, ${{ matrix.numpy }}
+    steps:
+      - uses: actions/checkout@v2
+      - name: Setup python
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+          architecture: x64
+      - uses: actions/cache@v2
+        if: startsWith(runner.os, 'Linux')
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-${{ matrix.pandas }}-pip-
+
+      - uses: actions/cache@v2
+        if: startsWith(runner.os, 'macOS')
+        with:
+          path: ~/Library/Caches/pip
+          key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-${{ matrix.pandas }}-pip-
+
+      - uses: actions/cache@v2
+        if: startsWith(runner.os, 'Windows')
+        with:
+          path: ~\AppData\Local\pip\Cache
+          key: ${{ runner.os }}-${{ matrix.pandas }}-pip-${{ hashFiles('**/requirements.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-${{ matrix.pandas }}-pip-
+      - run: |
+          pip install --upgrade pip setuptools wheel
+          pip install -r requirements.txt "${{ matrix.pandas }}" "${{ matrix.numpy }}"
+          pip install -r requirements-test.txt
+      - run: make install
+      - run: make test
+  coverage:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ ubuntu-latest ]
+        python-version: [ 3.8 ]
+        pandas: [ "pandas>1.1"]
+        numpy: ["numpy"]
+
+    name: Coverage | python ${{ matrix.python-version }}, ${{ matrix.os }}, ${{ matrix.pandas }}, ${{ matrix.numpy }}
     steps:
       - uses: actions/checkout@v2
       - name: Setup python
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 20305ac6d..a81c863b9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -5,7 +5,7 @@ repos:
     - id: black
       language_version: python3.8
 -   repo: https://github.com/nbQA-dev/nbQA
-    rev: 0.5.9
+    rev: 0.7.0
     hooks:
     - id: nbqa-black
       additional_dependencies: [ black==20.8b1 ]
@@ -17,12 +17,12 @@ repos:
       additional_dependencies: [ pyupgrade==2.7.3 ]
       args: [ --nbqa-mutate, --py36-plus ]
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v2.10.0
+    rev: v2.12.0
     hooks:
     -   id: pyupgrade
         args: ['--py36-plus','--exit-zero-even-if-changed']
 -   repo: https://github.com/pycqa/isort
-    rev: 5.7.0
+    rev: 5.8.0
     hooks:
       - id: isort
         files: '.*'
@@ -31,8 +31,8 @@ repos:
     rev: "0.46"
     hooks:
     -   id: check-manifest
--   repo: https://gitlab.com/pycqa/flake8
-    rev: "3.8.4"
+-   repo: https://github.com/PyCQA/flake8
+    rev: "3.9.1"
     hooks:
     -   id: flake8
         args: [ "--select=E9,F63,F7,F82"] #,T001
diff --git a/Makefile b/Makefile
index bbcd539e9..3a8e2d836 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,9 @@ test:
 	pytest tests/issues/
 	pytest --nbval tests/notebooks/
 	flake8 . --select=E9,F63,F7,F82 --show-source --statistics
-	
+	pandas_profiling -h
+	make typing
+
 test_cov:
 	pytest --cov=. tests/unit/
 	pytest --cov=. --cov-append tests/issues/
diff --git a/README.md b/README.md
index b1838fe88..de9e02497 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@
 <p align="center">
   <a href="https://pandas-profiling.github.io/pandas-profiling/docs/master/rtd/">Documentation</a>
   |
-  <a href="https://join.slack.com/t/pandas-profiling/shared_invite/zt-l2iqwb92-9JpTEdFBijR2G798j2MpQw">Slack</a>
+  <a href="https://join.slack.com/t/pandas-profiling/shared_invite/zt-oe5ol4yc-YtbOxNBGUCb~v73TamRLuA">Slack</a>
   | 
   <a href="https://stackoverflow.com/questions/tagged/pandas-profiling">Stack Overflow</a>
 </p>
@@ -79,6 +79,7 @@ The following examples can give you an impression of what the package can do:
 * [Vektis](https://pandas-profiling.github.io/pandas-profiling/examples/master/vektis/vektis_report.html) (Vektis Dutch Healthcare data)
 * [Colors](https://pandas-profiling.github.io/pandas-profiling/examples/master/colors/colors_report.html) (a simple colors dataset)
 * [UCI Bank Dataset](https://pandas-profiling.github.io/pandas-profiling/examples/master/cbank_marketing_data/uci_bank_marketing_report.html) (banking marketing dataset)
+* [RDW](https://pandas-profiling.github.io/pandas-profiling/examples/master/rdw/rdw.html) (RDW, the Dutch DMV's vehicle registration 10 million rows, 71 features)
 
 
 Specific features:
@@ -211,7 +212,7 @@ profile.to_file("your_report.json")
 
 Version 2.4 introduces minimal mode. 
 
-This is a default configuration that disables expensive computations (such as correlations and dynamic binning).
+This is a default configuration that disables expensive computations (such as correlations and duplicate row detection).
 
 Use the following syntax:
 
@@ -220,6 +221,8 @@ profile = ProfileReport(large_dataset, minimal=True)
 profile.to_file("output.html")
 ```
 
+Benchmarks are available [here](https://pandas-profiling.github.io/pandas-profiling/dev/bench/).
+
 ### Command line usage
 
 For standard formatted CSV files that can be read immediately by pandas, you can use the `pandas_profiling` executable. 
@@ -239,7 +242,7 @@ A set of options is available in order to adapt the report generated.
 * `progress_bar` (`bool`): If True, `pandas-profiling` will display a progress bar.
 * `infer_dtypes` (`bool`): When `True` (default) the `dtype` of variables are inferred using `visions` using the typeset logic (for instance a column that has integers stored as string will be analyzed as if being numeric).
 
-More settings can be found in the [default configuration file](https://github.com/pandas-profiling/pandas-profiling/blob/master/src/pandas_profiling/config_default.yaml), [minimal configuration file](https://github.com/pandas-profiling/pandas-profiling/blob/master/src/pandas_profiling/config_minimal.yaml) and [dark themed configuration file](https://github.com/pandas-profiling/pandas-profiling/blob/master/src/pandas_profiling/config_dark.yaml).
+More settings can be found in the [default configuration file](https://github.com/pandas-profiling/pandas-profiling/blob/master/src/pandas_profiling/config_default.yaml) and [minimal configuration file](https://github.com/pandas-profiling/pandas-profiling/blob/master/src/pandas_profiling/config_minimal.yaml).
 
 You find the configuration docs on the advanced usage page [here](https://pandas-profiling.github.io/pandas-profiling/docs/master/rtd/pages/advanced_usage.html)
 
@@ -306,14 +309,15 @@ Types are a powerful abstraction for effective data analysis, that goes beyond t
 `pandas-profiling` currently, recognizes the following types: _Boolean, Numerical, Date, Categorical, URL, Path, File_ and _Image_.
 
 We have developed a type system for Python, tailored for data analysis: [visions](https://github.com/dylan-profiler/visions).
-Selecting the right typeset drastically reduces the complexity the code of your analysis.
-Future versions of `pandas-profiling` will have extended type support through `visions`!
+Choosing an appropriate typeset can both improve the overall expressiveness and reduce the complexity of your analysis/code.
+To learn more about `pandas-profiling`'s type system, check out the default implementation [here](https://github.com/pandas-profiling/pandas-profiling/blob/develop/src/pandas_profiling/model/typeset.py).
+In the meantime, user customized summarizations and type definitions are now fully supported - if you have a specific use-case please reach out with ideas or a PR!
 
 ## Contributing
 
 Read on getting involved in the [Contribution Guide](https://pandas-profiling.github.io/pandas-profiling/docs/master/rtd/pages/contribution_guidelines.html).
 
-A low threshold place to ask questions or start contributing is by reaching out on the pandas-profiling Slack. [Join the Slack community](https://join.slack.com/t/pandas-profiling/shared_invite/zt-hfy3iwp2-qEJSItye5QBZf8YGFMaMnQ).
+A low threshold place to ask questions or start contributing is by reaching out on the pandas-profiling Slack. [Join the Slack community](https://join.slack.com/t/pandas-profiling/shared_invite/zt-oe5ol4yc-YtbOxNBGUCb~v73TamRLuA).
 
 ## Editor integration
 
diff --git a/docsrc/source/pages/advanced_usage.rst b/docsrc/source/pages/advanced_usage.rst
index 8f7099ef6..ba39f6bbb 100644
--- a/docsrc/source/pages/advanced_usage.rst
+++ b/docsrc/source/pages/advanced_usage.rst
@@ -165,3 +165,75 @@ It's possible to disable certain groups of features through configuration shorth
     r.set_variable("correlations", None)
     r.set_variable("missing_diagrams", None)
     r.set_variable("interactions", None)
+
+
+
+
+Customise plots
+---------------
+
+A way how to pass arguments to the underlying matplotlib is to use the ``plot`` argument. It is possible to change the default format of images to png (default svg) using the key-pair ``image_format: "png"`` and also the resolution of the image using ``dpi: 800``. 
+
+An example would be:
+
+.. code-block:: python
+
+	profile = ProfileReport(planets, title='Pandas Profiling Report', explorative=True,
+			       plot={
+				   'dpi':200,
+				   'image_format': 'png'
+			       })
+
+
+Furthermore, it is possible to change the default values of histograms, the options for that are the following:
+
+    histogram:
+            x_axis_labels: True
+
+    # Number of bins (set to 0 to automatically detect the bin size)
+            bins: 50
+
+    # Maximum number of bins (when bins=0)
+            max_bins: 250
+
+
+
+
+
+Customise correlation matrix
+-----------------------------
+
+It's possible to directly access the correlation matrix as well. That is done with the ``plot`` argument and then with the `correlation` key. It is possible to customise the palett, one can use the following list used in seaborn or create [their own custom matplotlib palette](https://matplotlib.org/stable/gallery/color/custom_cmap.html). Supported values are 
+
+```
+'Accent', 'Accent_r', 'Blues', 'Blues_r', 'BrBG', 'BrBG_r', 'BuGn', 'BuGn_r', 'BuPu', 'BuPu_r', 'CMRmap', 'CMRmap_r', 'Dark2', 'Dark2_r', 'GnBu', 'GnBu_r', 'Greens', 'Greens_r', 'Greys', 'Greys_r', 'OrRd', 'OrRd_r', 'Oranges', 'Oranges_r', 'PRGn', 'PRGn_r', 'Paired', 'Paired_r', 'Pastel1', 'Pastel1_r', 'Pastel2', 'Pastel2_r', 'PiYG', 'PiYG_r', 'PuBu', 'PuBuGn', 'PuBuGn_r', 'PuBu_r', 'PuOr', 'PuOr_r', 'PuRd', 'PuRd_r', 'Purples', 'Purples_r', 'RdBu', 'RdBu_r', 'RdGy', 'RdGy_r', 'RdPu', 'RdPu_r', 'RdYlBu', 'RdYlBu_r', 'RdYlGn', 'RdYlGn_r', 'Reds', 'Reds_r', 'Set1', 'Set1_r', 'Set2', 'Set2_r', 'Set3', 'Set3_r', 'Spectral', 'Spectral_r', 'Wistia', 'Wistia_r', 'YlGn', 'YlGnBu', 'YlGnBu_r', 'YlGn_r', 'YlOrBr', 'YlOrBr_r', 'YlOrRd', 'YlOrRd_r', 'afmhot', 'afmhot_r', 'autumn', 'autumn_r', 'binary', 'binary_r', 'bone', 'bone_r', 'brg', 'brg_r', 'bwr', 'bwr_r', 'cividis', 'cividis_r', 'cool', 'cool_r', 'coolwarm', 'coolwarm_r', 'copper', 'copper_r', 'crest', 'crest_r', 'cubehelix', 'cubehelix_r', 'flag', 'flag_r', 'flare', 'flare_r', 'gist_earth', 'gist_earth_r', 'gist_gray', 'gist_gray_r', 'gist_heat', 'gist_heat_r', 'gist_ncar', 'gist_ncar_r', 'gist_rainbow', 'gist_rainbow_r', 'gist_stern', 'gist_stern_r', 'gist_yarg', 'gist_yarg_r', 'gnuplot', 'gnuplot2', 'gnuplot2_r', 'gnuplot_r', 'gray', 'gray_r', 'hot', 'hot_r', 'hsv', 'hsv_r', 'icefire', 'icefire_r', 'inferno', 'inferno_r', 'jet', 'jet_r', 'magma', 'magma_r', 'mako', 'mako_r', 'nipy_spectral', 'nipy_spectral_r', 'ocean', 'ocean_r', 'pink', 'pink_r', 'plasma', 'plasma_r', 'prism', 'prism_r', 'rainbow', 'rainbow_r', 'rocket', 'rocket_r', 'seismic', 'seismic_r', 'spring', 'spring_r', 'summer', 'summer_r', 'tab10', 'tab10_r', 'tab20', 'tab20_r', 'tab20b', 'tab20b_r', 'tab20c', 'tab20c_r', 'terrain', 'terrain_r', 'turbo', 'turbo_r', 'twilight', 'twilight_r', 'twilight_shifted', 'twilight_shifted_r', 'viridis', 'viridis_r', 'vlag', 'vlag_r', 'winter', 'winter_r'
+```
+
+An example can be:
+
+.. code-block:: python
+
+  from pandas_profiling import ProfileReport
+
+  profile = ProfileReport(df, title='Pandas Profiling Report', explorative=True,
+                       plot={
+                           'correlation':{
+                               'cmap': 'RdBu_r',
+                               'bad': '#000000'}}
+                       )
+
+
+Similarly, one can change the palette for *Missing values* using the ``missing`` argument, eg:
+
+.. code-block:: python
+
+  from pandas_profiling import ProfileReport
+
+  profile = ProfileReport(df, title='Pandas Profiling Report', explorative=True,
+                       plot={
+                           'missing':{
+                               'cmap': 'RdBu_r'}}
+                       )
+
+
+
diff --git a/docsrc/source/pages/changelog/v2_12_0.rst b/docsrc/source/pages/changelog/v2_12_0.rst
index 2b6b6a5e8..02d35bb4a 100644
--- a/docsrc/source/pages/changelog/v2_12_0.rst
+++ b/docsrc/source/pages/changelog/v2_12_0.rst
@@ -3,14 +3,27 @@ Changelog v2.12.0
 
 🎉 Features
 ^^^^^^^^^^^
-- Add the number and the percentage of negative values for numerical variables `[695] <https://github.com/pandas-profiling/pandas-profiling/issues/695>`- (contributed by @gverbock). 
+- Add the number and the percentage of negative values for numerical variables `[695] <https://github.com/pandas-profiling/pandas-profiling/issues/695>`_ (contributed by @gverbock)
 - Enable setting of typeset/summarizer (contributed by @ieaves)
+- Allow empty data frames `[678] <https://github.com/pandas-profiling/pandas-profiling/issues/678>`_ (contributed by @spbail, @fwd2020-c)
+
+🐛 Bug fixes
+^^^^^^^^^^^^
+- Patch args for great_expectations datetime profiler `[727] <https://github.com/pandas-profiling/pandas-profiling/issues/727>`_ (contributed by @jstammers)
+- Negative exponent formatting `[723]  <https://github.com/pandas-profiling/pandas-profiling/issues/723>`_ (reported by @rdpapworth)
 
 📖 Documentation
 ^^^^^^^^^^^^^^^^
 - Fix link syntax (contributed by @ChrisCarini)
 
+👷‍♂️ Internal Improvements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+- Several performance improvements (minimal mode, duplicates, frequency table sorting)
+- Introduce ``pytest-benchmark`` in CI to monitor commit performance impact
+- Introduce ``commitlint`` in CI to start automating the changelog generation
+
 ⬆️ Dependencies
 ^^^^^^^^^^^^^^^^^^
-- The `ipywidgets` dependency was moved to the `[notebook]` extra, so most of Jupyter will not be installed alongside this package by default (contributed by @akx).
-- Replaced the (testing only) `fastparquet` dependency with `pyarrow` (default pandas parquet engine, contributed by @kurosch).
\ No newline at end of file
+- The ``ipywidgets`` dependency was moved to the ``[notebook]`` extra, so most of Jupyter will not be installed alongside this package by default (contributed by @akx)
+- Replaced the (testing only) ``fastparquet`` dependency with ``pyarrow`` (default pandas parquet engine, contributed by @kurosch)
+- Upgrade ``phik``. This drops the hard dependency on numba (contributed by @akx)
diff --git a/docsrc/source/pages/changelog/v2_13_0.rst b/docsrc/source/pages/changelog/v2_13_0.rst
index f2d44616a..d8b8eb1c3 100644
--- a/docsrc/source/pages/changelog/v2_13_0.rst
+++ b/docsrc/source/pages/changelog/v2_13_0.rst
@@ -1,5 +1,5 @@
-Changelog vx.y.z
-----------------
+Changelog v2.13.0
+-----------------
 
 🎉 Features
 ^^^^^^^^^^^
diff --git a/docsrc/source/pages/contribution_guidelines.rst b/docsrc/source/pages/contribution_guidelines.rst
index e0d1b4bc3..2d0b80b42 100644
--- a/docsrc/source/pages/contribution_guidelines.rst
+++ b/docsrc/source/pages/contribution_guidelines.rst
@@ -9,6 +9,10 @@ Contributing a new feature
 
 * Ensure the PR description clearly describes the problem and solution.
   Include the relevant issue number if applicable.
+  
+Slack community
+---------------
+A low threshold place to ask questions or start contributing is by reaching out on the pandas-profiling Slack. `Join the Slack community <https://join.slack.com/t/pandas-profiling/shared_invite/zt-oe5ol4yc-YtbOxNBGUCb~v73TamRLuA>`_.
 
 Developer tools
 ---------------
@@ -61,4 +65,4 @@ Read Github's `open source legal guide <https://opensource.guide/legal/#does-my-
 More information
 ----------------
 
-Read more on getting involved in the `Contribution Guide <https://github.com/pandas-profiling/pandas-profiling/blob/master/CONTRIBUTING.md>`_ on Github.
\ No newline at end of file
+Read more on getting involved in the `Contribution Guide <https://github.com/pandas-profiling/pandas-profiling/blob/master/CONTRIBUTING.md>`_ on Github.
diff --git a/docsrc/source/pages/resources.rst b/docsrc/source/pages/resources.rst
index 16096af8d..81f85fff1 100644
--- a/docsrc/source/pages/resources.rst
+++ b/docsrc/source/pages/resources.rst
@@ -14,7 +14,7 @@ Notebooks
 
 Articles
 --------
-
+- `Bringing Customization to Pandas Profiling <https://medium.com/@ianeaves/customizing-pandas-profiling-summaries-b16714d0dac9>`_ (Ian Eaves, March 5, 2021)
 - `Beginner Friendly Data Science Projects Accepting Contributions <https://towardsdatascience.com/beginner-friendly-data-science-projects-accepting-contributions-3b8e26f7e88e>`_ (Adam Ross Nelson, January 18, 2021)
 - `Pandas profiling and exploratory data analysis with line one of code! <https://towardsdatascience.com/pandas-profiling-and-exploratory-data-analysis-with-line-one-of-code-423111991e58>`_ (Magdalena Konkiewicz, Jun 10, 2020)
 - `The Covid 19 health issue <https://concillier.squarespace.com/datasets/covid-19>`_ (Concillier Kitungulu, April 20, 2020)
diff --git a/docsrc/source/pages/support.rst b/docsrc/source/pages/support.rst
index 46ed2e1e4..3f35ac3bd 100644
--- a/docsrc/source/pages/support.rst
+++ b/docsrc/source/pages/support.rst
@@ -35,6 +35,10 @@ Users with a request for help on how to use `pandas-profiling` should consider a
   :alt: Questions: Stackoverflow "pandas-profiling"
   :target: https://stackoverflow.com/questions/tagged/pandas-profiling
 
+Slack community
+---------------
+
+`Join the Slack community <https://join.slack.com/t/pandas-profiling/shared_invite/zt-oe5ol4yc-YtbOxNBGUCb~v73TamRLuA>`_ and come into contact with other users and developers, that might be able to answer your questions.
 
 Reporting a bug
 ---------------
diff --git a/examples/bank_marketing_data/banking_data.py b/examples/bank_marketing_data/banking_data.py
index 9d5eb285c..139c5e964 100644
--- a/examples/bank_marketing_data/banking_data.py
+++ b/examples/bank_marketing_data/banking_data.py
@@ -5,12 +5,12 @@
 import pandas as pd
 
 from pandas_profiling import ProfileReport
-from pandas_profiling.utils.cache import cache_file
+from pandas_profiling.utils.cache import cache_zipped_file
 
 if __name__ == "__main__":
-    file_name = cache_file(
+    file_name = cache_zipped_file(
         "bank-full.csv",
-        "https://storage.googleapis.com/erwinh-public-data/bankingdata/bank-full.csv",
+        "https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank.zip",
     )
 
     # Download the UCI Bank Marketing Dataset
diff --git a/examples/rdw/rdw.py b/examples/rdw/rdw.py
new file mode 100644
index 000000000..3c500882c
--- /dev/null
+++ b/examples/rdw/rdw.py
@@ -0,0 +1,14 @@
+import pandas as pd
+
+from pandas_profiling import ProfileReport
+from pandas_profiling.utils.cache import cache_file
+
+if __name__ == "__main__":
+    file_name = cache_file(
+        "rdw.parquet",
+        "https://raw.githubusercontent.com/pandas-profiling/pandas-profiling-data/master/data/rdw.parquet",
+    )
+    data = pd.read_parquet(file_name)
+
+    profile = ProfileReport(data, title="RDW Dataset", minimal=True)
+    profile.to_file("rdw.html")
diff --git a/requirements-test.txt b/requirements-test.txt
index 89c4b5df1..e92c82343 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -3,6 +3,7 @@ coverage<5
 codecov
 pytest-mypy
 pytest-cov
+pytest-benchmark~=3.4.1
 nbval
 pyarrow
 flake8
diff --git a/requirements.txt b/requirements.txt
index 56149d813..e290cb734 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,7 +13,7 @@ htmlmin>=0.1.12
 # Missing values
 missingno>=0.4.2
 # Correlations
-phik>=0.10.0
+phik>=0.11.1
 # Text analysis
 tangled-up-in-unicode>=0.0.6
 # Examples
diff --git a/src/pandas_profiling/config_default.yaml b/src/pandas_profiling/config_default.yaml
index 42c1b1368..fa7009003 100644
--- a/src/pandas_profiling/config_default.yaml
+++ b/src/pandas_profiling/config_default.yaml
@@ -48,6 +48,7 @@ vars:
         chi_squared_threshold: 0.999
         coerce_str_to_date: False
         redact: False
+        histogram_largest: 50
     bool:
         n_obs: 3
 # string to boolean mappings pairs (true, false)
@@ -150,6 +151,7 @@ memory_deep: False
 # Configuration related to the duplicates
 duplicates:
     head: 10
+    key: "# duplicates"
 
 # Configuration related to the samples area
 samples:
diff --git a/src/pandas_profiling/config_minimal.yaml b/src/pandas_profiling/config_minimal.yaml
index e1aacad3d..16076c90f 100644
--- a/src/pandas_profiling/config_minimal.yaml
+++ b/src/pandas_profiling/config_minimal.yaml
@@ -14,7 +14,7 @@ variables:
   descriptions: {}
 
 # infer dtypes
-infer_dtypes: True
+infer_dtypes: False
 
 # Show the description at each variable (in addition to the overview tab)
 show_variable_description: True
@@ -48,6 +48,7 @@ vars:
         chi_squared_threshold: 0.0
         coerce_str_to_date: False
         redact: False
+        histogram_largest: 10
     bool:
         n_obs: 3
 # string to boolean mappings pairs (true, false)
@@ -151,6 +152,7 @@ memory_deep: False
 # Configuration related to the duplicates
 duplicates:
     head: 0
+    key: "# duplicates"
 
 # Configuration related to the samples area
 samples:
diff --git a/src/pandas_profiling/model/correlations.py b/src/pandas_profiling/model/correlations.py
index 99756ed71..ee64edc0d 100644
--- a/src/pandas_profiling/model/correlations.py
+++ b/src/pandas_profiling/model/correlations.py
@@ -155,6 +155,9 @@ def calculate_correlation(
         The correlation matrices for the given correlation measures. Return None if correlation is empty.
     """
 
+    if len(df) == 0:
+        return None
+
     correlation_measures = {
         "pearson": Pearson,
         "spearman": Spearman,
diff --git a/src/pandas_profiling/model/describe.py b/src/pandas_profiling/model/describe.py
index 80b53d253..22d8b1f39 100644
--- a/src/pandas_profiling/model/describe.py
+++ b/src/pandas_profiling/model/describe.py
@@ -47,9 +47,6 @@ def describe(
     if not isinstance(df, pd.DataFrame):
         warnings.warn("df is not of type pandas.DataFrame")
 
-    if df.empty:
-        raise ValueError("df can not be empty")
-
     disable_progress_bar = not config["progress_bar"].get(bool)
 
     date_start = datetime.utcnow()
@@ -134,7 +131,8 @@ def describe(
 
         # Duplicates
         pbar.set_postfix_str("Locating duplicates")
-        duplicates = get_duplicates(df, supported_columns)
+        metrics, duplicates = get_duplicates(df, supported_columns)
+        table_stats.update(metrics)
         pbar.update()
 
         # Messages
diff --git a/src/pandas_profiling/model/duplicates.py b/src/pandas_profiling/model/duplicates.py
index b81fc232b..90b1ad0fe 100644
--- a/src/pandas_profiling/model/duplicates.py
+++ b/src/pandas_profiling/model/duplicates.py
@@ -1,11 +1,13 @@
-from typing import Optional
+from typing import Any, Dict, Optional, Tuple
 
 import pandas as pd
 
 from pandas_profiling.config import config
 
 
-def get_duplicates(df: pd.DataFrame, supported_columns) -> Optional[pd.DataFrame]:
+def get_duplicates(
+    df: pd.DataFrame, supported_columns
+) -> Tuple[Dict[str, Any], Optional[pd.DataFrame]]:
     """Obtain the most occurring duplicate rows in the DataFrame.
 
     Args:
@@ -17,12 +19,34 @@ def get_duplicates(df: pd.DataFrame, supported_columns) -> Optional[pd.DataFrame
     """
     n_head = config["duplicates"]["head"].get(int)
 
-    if n_head > 0 and supported_columns:
-        return (
-            df[df.duplicated(subset=supported_columns, keep=False)]
-            .groupby(supported_columns)
-            .size()
-            .reset_index(name="count")
-            .nlargest(n_head, "count")
-        )
-    return None
+    metrics: Dict[str, Any] = {}
+    if n_head > 0:
+        if supported_columns and len(df) > 0:
+            duplicates_key = config["duplicates"]["key"].get(str)
+            if duplicates_key in df.columns:
+                raise ValueError(
+                    f"Duplicates key ({duplicates_key}) may not be part of the DataFrame. Either change the "
+                    f" column name in the DataFrame or change the 'duplicates.key' parameter."
+                )
+
+            duplicated_rows = df.duplicated(subset=supported_columns, keep=False)
+            duplicated_rows = (
+                df[duplicated_rows]
+                .groupby(supported_columns)
+                .size()
+                .reset_index(name=duplicates_key)
+            )
+
+            metrics["n_duplicates"] = len(duplicated_rows[duplicates_key])
+            metrics["p_duplicates"] = metrics["n_duplicates"] / len(df)
+
+            return (
+                metrics,
+                duplicated_rows.nlargest(n_head, duplicates_key),
+            )
+        else:
+            metrics["n_duplicates"] = 0
+            metrics["p_duplicates"] = 0.0
+            return metrics, None
+    else:
+        return metrics, None
diff --git a/src/pandas_profiling/model/expectation_algorithms.py b/src/pandas_profiling/model/expectation_algorithms.py
index 83e748a36..efac257b5 100644
--- a/src/pandas_profiling/model/expectation_algorithms.py
+++ b/src/pandas_profiling/model/expectation_algorithms.py
@@ -69,7 +69,10 @@ def path_expectations(name, summary, batch, *args):
 def datetime_expectations(name, summary, batch, *args):
     if any(k in summary for k in ["min", "max"]):
         batch.expect_column_values_to_be_between(
-            name, min_value=summary.get("min"), max_value=summary.get("max")
+            name,
+            min_value=summary.get("min"),
+            max_value=summary.get("max"),
+            parse_strings_as_datetimes=True,
         )
 
     return name, summary, batch
diff --git a/src/pandas_profiling/model/messages.py b/src/pandas_profiling/model/messages.py
index 3330c049e..3e557cc63 100644
--- a/src/pandas_profiling/model/messages.py
+++ b/src/pandas_profiling/model/messages.py
@@ -56,6 +56,9 @@ class MessageType(Enum):
     UNIFORM = auto()
     """The variable is uniformly distributed"""
 
+    EMPTY = auto()
+    """The DataFrame is empty"""
+
 
 class Message:
     """A message object (type, values, column)."""
@@ -109,7 +112,7 @@ def check_table_messages(table: dict) -> List[Message]:
         A list of messages.
     """
     messages = []
-    if warning_value(table["n_duplicates"]):
+    if "n_duplicates" in table and warning_value(table["n_duplicates"]):
         messages.append(
             Message(
                 message_type=MessageType.DUPLICATES,
@@ -117,6 +120,14 @@ def check_table_messages(table: dict) -> List[Message]:
                 fields={"n_duplicates"},
             )
         )
+    if table["n"] == 0:
+        messages.append(
+            Message(
+                message_type=MessageType.EMPTY,
+                values=table,
+                fields={"n"},
+            )
+        )
     return messages
 
 
diff --git a/src/pandas_profiling/model/sample.py b/src/pandas_profiling/model/sample.py
index 1df2acc78..50fac9397 100644
--- a/src/pandas_profiling/model/sample.py
+++ b/src/pandas_profiling/model/sample.py
@@ -1,3 +1,5 @@
+from typing import List
+
 import attr
 import pandas as pd
 
@@ -12,7 +14,7 @@ class Sample:
     caption = attr.ib(default=None)
 
 
-def get_sample(df: pd.DataFrame) -> list:
+def get_sample(df: pd.DataFrame) -> List[Sample]:
     """Obtains a sample from head and tail of the DataFrame
 
     Args:
@@ -21,7 +23,10 @@ def get_sample(df: pd.DataFrame) -> list:
     Returns:
         a list of Sample objects
     """
-    samples = []
+    samples: List[Sample] = []
+    if len(df) == 0:
+        return samples
+
     n_head = config["samples"]["head"].get(int)
     if n_head > 0:
         samples.append(Sample("head", df.head(n=n_head), "First rows"))
diff --git a/src/pandas_profiling/model/summary.py b/src/pandas_profiling/model/summary.py
index a579275c1..ea14eae7e 100644
--- a/src/pandas_profiling/model/summary.py
+++ b/src/pandas_profiling/model/summary.py
@@ -4,7 +4,7 @@
 import multiprocessing.pool
 import warnings
 from collections import Counter
-from typing import Callable, Mapping, Optional, Tuple
+from typing import Callable, Mapping, Tuple
 
 import numpy as np
 import pandas as pd
@@ -16,7 +16,6 @@
     check_variable_messages,
 )
 from pandas_profiling.model.summarizer import BaseSummarizer
-from pandas_profiling.model.typeset import Unsupported
 from pandas_profiling.visualisation.missing import (
     missing_bar,
     missing_dendrogram,
@@ -124,7 +123,7 @@ def get_table_stats(df: pd.DataFrame, variable_stats: dict) -> dict:
     n = len(df)
 
     memory_size = df.memory_usage(deep=config["memory_deep"].get(bool)).sum()
-    record_size = float(memory_size) / n
+    record_size = float(memory_size) / n if n > 0 else 0
 
     table_stats = {
         "n": n,
@@ -143,21 +142,9 @@ def get_table_stats(df: pd.DataFrame, variable_stats: dict) -> dict:
             if series_summary["n_missing"] == n:
                 table_stats["n_vars_all_missing"] += 1
 
-    table_stats["p_cells_missing"] = table_stats["n_cells_missing"] / (
-        table_stats["n"] * table_stats["n_var"]
-    )
-
-    supported_columns = [
-        k for k, v in variable_stats.items() if v["type"] != Unsupported
-    ]
-    table_stats["n_duplicates"] = (
-        sum(df.duplicated(subset=supported_columns))
-        if len(supported_columns) > 0
-        else 0
-    )
-    table_stats["p_duplicates"] = (
-        (table_stats["n_duplicates"] / len(df))
-        if (len(supported_columns) > 0 and len(df) > 0)
+    table_stats["p_cells_missing"] = (
+        table_stats["n_cells_missing"] / (table_stats["n"] * table_stats["n_var"])
+        if table_stats["n"] > 0
         else 0
     )
 
@@ -169,29 +156,6 @@ def get_table_stats(df: pd.DataFrame, variable_stats: dict) -> dict:
     return table_stats
 
 
-def get_duplicates(df: pd.DataFrame, supported_columns) -> Optional[pd.DataFrame]:
-    """Obtain the most occurring duplicate rows in the DataFrame.
-
-    Args:
-        df: the Pandas DataFrame.
-        supported_columns: the columns to consider
-
-    Returns:
-        A subset of the DataFrame, ordered by occurrence.
-    """
-    n_head = config["duplicates"]["head"].get(int)
-
-    if n_head > 0 and supported_columns:
-        return (
-            df[df.duplicated(subset=supported_columns, keep=False)]
-            .groupby(supported_columns)
-            .size()
-            .reset_index(name="count")
-            .nlargest(n_head, "count")
-        )
-    return None
-
-
 def get_missing_diagrams(df: pd.DataFrame, table_stats: dict) -> dict:
     """Gets the rendered diagrams for missing values.
 
@@ -203,6 +167,9 @@ def get_missing_diagrams(df: pd.DataFrame, table_stats: dict) -> dict:
         A dictionary containing the base64 encoded plots for each diagram that is active in the config (matrix, bar, heatmap, dendrogram).
     """
 
+    if len(df) == 0:
+        return {}
+
     def warn_missing(missing_name, error):
         warnings.warn(
             f"""There was an attempt to generate the {missing_name} missing values diagrams, but this failed.
diff --git a/src/pandas_profiling/model/summary_algorithms.py b/src/pandas_profiling/model/summary_algorithms.py
index b5150228d..9a95f6be3 100644
--- a/src/pandas_profiling/model/summary_algorithms.py
+++ b/src/pandas_profiling/model/summary_algorithms.py
@@ -96,7 +96,7 @@ def describe_supported(
     stats = {
         "n_distinct": distinct_count,
         "p_distinct": distinct_count / count if count > 0 else 0,
-        "is_unique": unique_count == count,
+        "is_unique": unique_count == count and count > 0,
         "n_unique": unique_count,
         "p_unique": unique_count / count if count > 0 else 0,
     }
@@ -120,7 +120,7 @@ def describe_generic(series: pd.Series, summary: dict) -> Tuple[pd.Series, dict]
     summary.update(
         {
             "n": length,
-            "p_missing": summary["n_missing"] / length,
+            "p_missing": summary["n_missing"] / length if length > 0 else 0,
             "count": length - summary["n_missing"],
             "memory_size": series.memory_usage(deep=config["memory_deep"].get(bool)),
         }
@@ -233,6 +233,16 @@ def describe_numeric_1d(series: pd.Series, summary: dict) -> Tuple[pd.Series, di
     stats["monotonic_decrease_strict"] = (
         stats["monotonic_decrease"] and series.is_unique
     )
+    if summary["monotonic_increase_strict"]:
+        stats["monotonic"] = 2
+    elif summary["monotonic_decrease_strict"]:
+        stats["monotonic"] = -2
+    elif summary["monotonic_increase"]:
+        stats["monotonic"] = 1
+    elif summary["monotonic_decrease"]:
+        stats["monotonic"] = -1
+    else:
+        stats["monotonic"] = 0
 
     stats.update(
         histogram_compute(
@@ -295,10 +305,16 @@ def describe_categorical_1d(series: pd.Series, summary: dict) -> Tuple[pd.Series
 
     # Only run if at least 1 non-missing value
     value_counts = summary["value_counts_without_nan"]
+    histogram_largest = config["vars"]["cat"]["histogram_largest"].get(int)
+    histogram_data = value_counts
+    if histogram_largest > 0:
+        histogram_data = histogram_data.nlargest(histogram_largest)
 
     summary.update(
         histogram_compute(
-            value_counts, summary["n_distinct"], name="histogram_frequencies"
+            histogram_data,
+            summary["n_distinct"],
+            name="histogram_frequencies",
         )
     )
 
diff --git a/src/pandas_profiling/report/formatters.py b/src/pandas_profiling/report/formatters.py
index 6a29e8bdd..558f4fa4e 100644
--- a/src/pandas_profiling/report/formatters.py
+++ b/src/pandas_profiling/report/formatters.py
@@ -78,7 +78,7 @@ def fmt_timespan(num_seconds, detailed=False, max_units=3):
     import math
     import numbers
     import re
-    from datetime import datetime, timedelta
+    from datetime import timedelta
 
     time_units = (
         dict(
@@ -206,8 +206,10 @@ def fmt_numeric(value: float, precision=10) -> str:
     fmtted = f"{{:.{precision}g}}".format(value)
     for v in ["e+", "e-"]:
         if v in fmtted:
+            sign = "-" if v in "e-" else ""
             fmtted = fmtted.replace(v, " × 10<sup>") + "</sup>"
             fmtted = fmtted.replace("<sup>0", "<sup>")
+            fmtted = fmtted.replace("<sup>", f"<sup>{sign}")
 
     return fmtted
 
@@ -255,6 +257,21 @@ def fmt(value) -> str:
         return str(escape(value))
 
 
+def fmt_monotonic(value: int) -> str:
+    if value == 2:
+        return "Strictly increasing"
+    elif value == 1:
+        return "Increasing"
+    elif value == 0:
+        return "Not monotonic"
+    elif value == -1:
+        return "Decreasing"
+    elif value == -2:
+        return "Strictly decreasing"
+    else:
+        raise ValueError("Value should be integer ranging from -2 to 2.")
+
+
 def help(title, url=None) -> str:
     """Creat help badge
 
@@ -281,6 +298,7 @@ def get_fmt_mapping() -> Dict[str, Callable]:
         "fmt_bytesize": fmt_bytesize,
         "fmt_timespan": fmt_timespan,
         "fmt_numeric": fmt_numeric,
+        "fmt_monotonic": fmt_monotonic,
         "fmt_number": fmt_number,
         "fmt_array": fmt_array,
         "fmt": fmt,
diff --git a/src/pandas_profiling/report/presentation/flavours/html/templates/warnings/warning_duplicates.html b/src/pandas_profiling/report/presentation/flavours/html/templates/warnings/warning_duplicates.html
index 8820e1d50..59bb93c56 100644
--- a/src/pandas_profiling/report/presentation/flavours/html/templates/warnings/warning_duplicates.html
+++ b/src/pandas_profiling/report/presentation/flavours/html/templates/warnings/warning_duplicates.html
@@ -1 +1 @@
-Dataset has {{ message.values['n_duplicates'] }} ({{ message.values['p_duplicates'] | fmt_percent }}) duplicate rows
\ No newline at end of file
+Dataset has {{ message.values['n_duplicates'] }} ({{ message.values['p_duplicates'] | fmt_percent }}) <a class="anchor" href="#duplicate">duplicate rows</a>
\ No newline at end of file
diff --git a/src/pandas_profiling/report/presentation/flavours/html/templates/warnings/warning_empty.html b/src/pandas_profiling/report/presentation/flavours/html/templates/warnings/warning_empty.html
new file mode 100644
index 000000000..a676c9577
--- /dev/null
+++ b/src/pandas_profiling/report/presentation/flavours/html/templates/warnings/warning_empty.html
@@ -0,0 +1 @@
+Dataset is empty
diff --git a/src/pandas_profiling/report/presentation/flavours/widget/warnings.py b/src/pandas_profiling/report/presentation/flavours/widget/warnings.py
index 97f830014..43b959f79 100644
--- a/src/pandas_profiling/report/presentation/flavours/widget/warnings.py
+++ b/src/pandas_profiling/report/presentation/flavours/widget/warnings.py
@@ -25,6 +25,7 @@ def render(self):
             "skewed": "info",
             "high_correlation": "",
             "duplicates": "",
+            "empty": "",
         }
 
         items = []
diff --git a/src/pandas_profiling/report/presentation/frequency_table_utils.py b/src/pandas_profiling/report/presentation/frequency_table_utils.py
index bb53e1dae..0862a19b8 100644
--- a/src/pandas_profiling/report/presentation/frequency_table_utils.py
+++ b/src/pandas_profiling/report/presentation/frequency_table_utils.py
@@ -1,7 +1,9 @@
-from typing import Dict, Sequence
+from typing import Any, Dict, List
 
+import numpy as np
 
-def freq_table(freqtable, n: int, max_number_to_print: int) -> Sequence[Dict]:
+
+def freq_table(freqtable, n: int, max_number_to_print: int) -> List[Dict]:
     """Render the rows for a frequency table (value, count).
 
     Args:
@@ -19,13 +21,13 @@ def freq_table(freqtable, n: int, max_number_to_print: int) -> Sequence[Dict]:
         max_number_to_print = n
 
     if max_number_to_print < len(freqtable):
-        freq_other = sum(freqtable.iloc[max_number_to_print:])
+        freq_other = np.sum(freqtable.iloc[max_number_to_print:])
         min_freq = freqtable.values[max_number_to_print]
     else:
         freq_other = 0
         min_freq = 0
 
-    freq_missing = n - sum(freqtable)
+    freq_missing = n - np.sum(freqtable)
     # No values
     if len(freqtable) == 0:
         return []
@@ -79,39 +81,37 @@ def freq_table(freqtable, n: int, max_number_to_print: int) -> Sequence[Dict]:
     return rows
 
 
-def extreme_obs_table(freqtable, number_to_print, n, ascending=True) -> list:
+def extreme_obs_table(freqtable, number_to_print: int, n: int) -> List[Dict[str, Any]]:
     """Similar to the frequency table, for extreme observations.
 
     Args:
-      freqtable: The frequency table.
+      freqtable: The (sorted) frequency table.
       number_to_print: The number of observations to print.
       n: The total number of observations.
-      ascending: The ordering of the observations (Default value = True)
 
     Returns:
         The HTML rendering of the extreme observation table.
     """
+
     # If it's mixed between base types (str, int) convert to str. Pure "mixed" types are filtered during type
     # discovery
     # TODO: should be in cast?
-    if "mixed" in freqtable.index.inferred_type:
-        freqtable.index = freqtable.index.astype(str)
-
-    sorted_freqtable = freqtable.sort_index(ascending=ascending)
-    obs_to_print = sorted_freqtable.iloc[:number_to_print]
-    max_freq = max(obs_to_print.values)
-
-    rows = []
-    for label, freq in obs_to_print.items():
-        rows.append(
-            {
-                "label": label,
-                "width": freq / max_freq if max_freq != 0 else 0,
-                "count": freq,
-                "percentage": float(freq) / n,
-                "extra_class": "",
-                "n": n,
-            }
-        )
+    # if "mixed" in freqtable.index.inferred_type:
+    #     freqtable.index = freqtable.index.astype(str)
+
+    obs_to_print = freqtable.iloc[:number_to_print]
+    max_freq = obs_to_print.max()
+
+    rows = [
+        {
+            "label": label,
+            "width": freq / max_freq if max_freq != 0 else 0,
+            "count": freq,
+            "percentage": float(freq) / n,
+            "extra_class": "",
+            "n": n,
+        }
+        for label, freq in obs_to_print.items()
+    ]
 
     return rows
diff --git a/src/pandas_profiling/report/structure/overview.py b/src/pandas_profiling/report/structure/overview.py
index e8751086a..2ab3bcb60 100644
--- a/src/pandas_profiling/report/structure/overview.py
+++ b/src/pandas_profiling/report/structure/overview.py
@@ -7,38 +7,46 @@
 
 
 def get_dataset_overview(summary):
-    dataset_info = Table(
+    table_metrics = [
+        {
+            "name": "Number of variables",
+            "value": summary["table"]["n_var"],
+            "fmt": "fmt_number",
+        },
+        {
+            "name": "Number of observations",
+            "value": summary["table"]["n"],
+            "fmt": "fmt_number",
+        },
+        {
+            "name": "Missing cells",
+            "value": summary["table"]["n_cells_missing"],
+            "fmt": "fmt_number",
+        },
+        {
+            "name": "Missing cells (%)",
+            "value": summary["table"]["p_cells_missing"],
+            "fmt": "fmt_percent",
+        },
+    ]
+    if "n_duplicates" in summary["table"]:
+        table_metrics.extend(
+            [
+                {
+                    "name": "Duplicate rows",
+                    "value": summary["table"]["n_duplicates"],
+                    "fmt": "fmt_number",
+                },
+                {
+                    "name": "Duplicate rows (%)",
+                    "value": summary["table"]["p_duplicates"],
+                    "fmt": "fmt_percent",
+                },
+            ]
+        )
+
+    table_metrics.extend(
         [
-            {
-                "name": "Number of variables",
-                "value": summary["table"]["n_var"],
-                "fmt": "fmt_number",
-            },
-            {
-                "name": "Number of observations",
-                "value": summary["table"]["n"],
-                "fmt": "fmt_number",
-            },
-            {
-                "name": "Missing cells",
-                "value": summary["table"]["n_cells_missing"],
-                "fmt": "fmt_number",
-            },
-            {
-                "name": "Missing cells (%)",
-                "value": summary["table"]["p_cells_missing"],
-                "fmt": "fmt_percent",
-            },
-            {
-                "name": "Duplicate rows",
-                "value": summary["table"]["n_duplicates"],
-                "fmt": "fmt_number",
-            },
-            {
-                "name": "Duplicate rows (%)",
-                "value": summary["table"]["p_duplicates"],
-                "fmt": "fmt_percent",
-            },
             {
                 "name": "Total size in memory",
                 "value": summary["table"]["memory_size"],
@@ -49,7 +57,11 @@ def get_dataset_overview(summary):
                 "value": summary["table"]["record_size"],
                 "fmt": "fmt_bytesize",
             },
-        ],
+        ]
+    )
+
+    dataset_info = Table(
+        table_metrics,
         name="Dataset statistics",
     )
 
diff --git a/src/pandas_profiling/report/structure/variables/render_common.py b/src/pandas_profiling/report/structure/variables/render_common.py
index 426f258b1..e55d29536 100644
--- a/src/pandas_profiling/report/structure/variables/render_common.py
+++ b/src/pandas_profiling/report/structure/variables/render_common.py
@@ -9,6 +9,8 @@ def render_common(summary):
     n_extreme_obs = config["n_extreme_obs"].get(int)
     n_freq_table_max = config["n_freq_table_max"].get(int)
 
+    sorted_freqtable = summary["value_counts_without_nan"].sort_index(ascending=True)
+
     template_variables = {
         # TODO: with nan
         "freq_table_rows": freq_table(
@@ -17,16 +19,14 @@ def render_common(summary):
             max_number_to_print=n_freq_table_max,
         ),
         "firstn_expanded": extreme_obs_table(
-            freqtable=summary["value_counts_without_nan"],
+            freqtable=sorted_freqtable,
             number_to_print=n_extreme_obs,
             n=summary["n"],
-            ascending=True,
         ),
         "lastn_expanded": extreme_obs_table(
-            freqtable=summary["value_counts_without_nan"],
+            freqtable=sorted_freqtable[::-1],
             number_to_print=n_extreme_obs,
             n=summary["n"],
-            ascending=False,
         ),
     }
 
diff --git a/src/pandas_profiling/report/structure/variables/render_real.py b/src/pandas_profiling/report/structure/variables/render_real.py
index e7ce82412..6624548d5 100644
--- a/src/pandas_profiling/report/structure/variables/render_real.py
+++ b/src/pandas_profiling/report/structure/variables/render_real.py
@@ -152,17 +152,6 @@ def render_real(summary):
         name="Quantile statistics",
     )
 
-    if summary["monotonic_increase_strict"]:
-        monotocity = "Strictly increasing"
-    elif summary["monotonic_decrease_strict"]:
-        monotocity = "Strictly decreasing"
-    elif summary["monotonic_increase"]:
-        monotocity = "Increasing"
-    elif summary["monotonic_decrease"]:
-        monotocity = "Decreasing"
-    else:
-        monotocity = "Not monotonic"
-
     descriptive_statistics = Table(
         [
             {
@@ -190,7 +179,11 @@ def render_real(summary):
             },
             {"name": "Sum", "value": summary["sum"], "fmt": "fmt_numeric"},
             {"name": "Variance", "value": summary["variance"], "fmt": "fmt_numeric"},
-            {"name": "Monotocity", "value": monotocity, "fmt": "fmt"},
+            {
+                "name": "Monotonicity",
+                "value": summary["monotonic"],
+                "fmt": "fmt_monotonic",
+            },
         ],
         name="Descriptive statistics",
     )
diff --git a/src/pandas_profiling/utils/cache.py b/src/pandas_profiling/utils/cache.py
index 1699b2c22..356d6fea8 100644
--- a/src/pandas_profiling/utils/cache.py
+++ b/src/pandas_profiling/utils/cache.py
@@ -1,4 +1,5 @@
 """Dataset cache utility functions"""
+import zipfile
 from pathlib import Path
 
 import requests
@@ -20,9 +21,44 @@ def cache_file(file_name: str, url: str) -> Path:
     data_path = get_data_path()
     data_path.mkdir(exist_ok=True)
 
+    file_path = data_path / file_name
+
+    # If not exists, download and create file
+    if not file_path.exists():
+        response = requests.get(url)
+        file_path.write_bytes(response.content)
+
+    return file_path
+
+
+def cache_zipped_file(file_name: str, url: str) -> Path:
+    """Check if file_name already is in the data path, otherwise download it from url.
+
+    Args:
+        file_name: the file name
+        url: the URL of the dataset
+
+    Returns:
+        The relative path to the dataset
+    """
+
+    data_path = get_data_path()
+    data_path.mkdir(exist_ok=True)
+
+    file_path = data_path / file_name
+
     # If not exists, download and create file
-    if not (data_path / file_name).exists():
-        data = requests.get(url)
-        (data_path / file_name).write_bytes(data.content)
+    if not file_path.exists():
+        response = requests.get(url)
+        if response.status_code != 200:
+            raise FileNotFoundError("Could not download resource")
+
+        tmp_path = data_path / "tmp.zip"
+        tmp_path.write_bytes(response.content)
+
+        with zipfile.ZipFile(tmp_path, "r") as zip_file:
+            zip_file.extract(file_path.name, data_path)
+
+        tmp_path.unlink()
 
-    return data_path / file_name
+    return file_path
diff --git a/tests/benchmarks/bench.py b/tests/benchmarks/bench.py
new file mode 100644
index 000000000..a1db2a0b0
--- /dev/null
+++ b/tests/benchmarks/bench.py
@@ -0,0 +1,59 @@
+from functools import partial
+
+import pandas as pd
+
+from pandas_profiling import ProfileReport
+from pandas_profiling.utils.cache import cache_file
+
+
+def func(df, **kwargs):
+    profile = ProfileReport(df, progress_bar=False, **kwargs)
+    report = profile.to_html()
+    return report
+
+
+def test_titanic_explorative(benchmark):
+    file_name = cache_file(
+        "titanic.parquet",
+        "https://github.com/pandas-profiling/pandas-profiling-data/raw/master/data/titanic.parquet",
+    )
+
+    data = pd.read_parquet(file_name)
+
+    kwargs = dict(explorative=True)
+    benchmark(partial(func, **kwargs), data)
+
+
+def test_titanic_default(benchmark):
+    file_name = cache_file(
+        "titanic.parquet",
+        "https://github.com/pandas-profiling/pandas-profiling-data/raw/master/data/titanic.parquet",
+    )
+
+    data = pd.read_parquet(file_name)
+
+    benchmark(partial(func), data)
+
+
+def test_titanic_minimal(benchmark):
+    file_name = cache_file(
+        "titanic.parquet",
+        "https://github.com/pandas-profiling/pandas-profiling-data/raw/master/data/titanic.parquet",
+    )
+
+    data = pd.read_parquet(file_name)
+
+    kwargs = dict(minimal=True)
+    benchmark(partial(func, **kwargs), data)
+
+
+def test_rdw_minimal(benchmark):
+    file_name = cache_file(
+        "rdw_sample_100k.parquet",
+        "https://github.com/pandas-profiling/pandas-profiling-data/raw/master/data/rdw_sample_100k.parquet",
+    )
+
+    data = pd.read_parquet(file_name)
+
+    kwargs = dict(minimal=True)
+    benchmark(partial(func, **kwargs), data)
diff --git a/tests/issues/test_issue377.py b/tests/issues/test_issue377.py
index 2ffa39a92..3362e812e 100644
--- a/tests/issues/test_issue377.py
+++ b/tests/issues/test_issue377.py
@@ -6,25 +6,35 @@
 
 import pandas as pd
 import pytest
+import requests
 
-import pandas_profiling
-from pandas_profiling.utils.cache import cache_file
+from pandas_profiling import ProfileReport
+from pandas_profiling.utils.cache import cache_zipped_file
 
 
-@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
-def test_issue377():
-    file_name = cache_file(
-        "bank-full.csv",
-        "https://storage.googleapis.com/erwinh-public-data/bankingdata/bank-full.csv",
-    )
+@pytest.fixture()
+def df():
+    try:
+        file_name = cache_zipped_file(
+            "bank-full.csv",
+            "https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank.zip",
+        )
+    except (requests.exceptions.ConnectionError, FileNotFoundError):
+        return
 
     # Download the UCI Bank Marketing Dataset
     df = pd.read_csv(file_name, sep=";")
+    return df
+
+
+@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
+def test_issue377(df):
+    if df is None:
+        pytest.skip("dataset unavailable")
+        return
 
     original_order = tuple(df.columns.values)
 
-    profile = pandas_profiling.ProfileReport(
-        df, sort="None", pool_size=1, progress_bar=False
-    )
+    profile = ProfileReport(df, sort="None", pool_size=1, progress_bar=False)
     new_order = tuple(profile.get_description()["variables"].keys())
     assert original_order == new_order
diff --git a/tests/issues/test_issue51.py b/tests/issues/test_issue51.py
index 50617ca81..71815f23e 100644
--- a/tests/issues/test_issue51.py
+++ b/tests/issues/test_issue51.py
@@ -7,9 +7,6 @@
 
 import pandas_profiling
 
-# FIXME: correlations can be computed stand alone to speed up tests
-from pandas_profiling.config import config
-
 
 def test_issue51(get_data_file):
     # Categorical has empty ('') value
diff --git a/tests/performance/time_inf.py b/tests/performance/time_inf.py
deleted file mode 100644
index ba2aecaa4..000000000
--- a/tests/performance/time_inf.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import timeit
-
-testcode = """
-import numpy as np
-import pandas as pd
-
-np.random.seed(12)
-vals = np.random.random(10000)
-series = pd.Series(vals)
-series[series < 0.3] = np.nan
-series[series < 0.2] = np.Inf
-
-
-
-def f1(series):
-    return len(series.loc[(~np.isfinite(series)) & series.notnull()])
-    
-    
-def f2(series):
-    return ((series == np.inf) | (series == -np.inf)).sum()
-"""
-
-
-print(timeit.timeit("f1(series)", number=10, setup=testcode))
-print(timeit.timeit("f2(series)", number=10, setup=testcode))
diff --git a/tests/performance/time_kurtosis.py b/tests/performance/time_kurtosis.py
deleted file mode 100644
index dfa106272..000000000
--- a/tests/performance/time_kurtosis.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import timeit
-
-testcode = """
-import numpy as np
-import pandas as pd
-import scipy.stats
-
-np.random.seed(12)
-vals = np.random.random(1000)
-series = pd.Series(vals)
-series[series < 0.2] = pd.NA
-
-def f1(series):
-    arr = series.values
-    return scipy.stats.kurtosis(arr, bias=False, nan_policy='omit')
-
-
-def f2(series):
-    arr = series.values
-    arr_without_nan = arr[~np.isnan(arr)]
-    return scipy.stats.kurtosis(arr_without_nan, bias=False)
-
-
-def f3(series):
-    return series.kurtosis()
-
-
-def f4(series):
-    return series[series.notna()].kurtosis()
-"""
-
-
-print(timeit.timeit("f1(series)", number=10, setup=testcode))
-print(timeit.timeit("f2(series)", number=10, setup=testcode))
-print(timeit.timeit("f3(series)", number=10, setup=testcode))
-print(timeit.timeit("f4(series)", number=10, setup=testcode))
diff --git a/tests/performance/time_mad.py b/tests/performance/time_mad.py
deleted file mode 100644
index 8c6107614..000000000
--- a/tests/performance/time_mad.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import timeit
-
-testcode = '''
-import numpy as np
-import pandas as pd
-
-np.random.seed(12)
-vals = np.random.random(1000)
-series = pd.Series(vals)
-series[series < 0.2] = pd.NA
-
-
-def mad(arr):
-    """ Median Absolute Deviation: a "Robust" version of standard deviation.
-        Indices variabililty of the sample.
-        https://en.wikipedia.org/wiki/Median_absolute_deviation 
-    """
-    arr = np.ma.array(arr).compressed() # should be faster to not use masked arrays.
-    med = np.median(arr)
-    return np.median(np.abs(arr - med))
-    
-    
-def mad2(arr):
-    """ Median Absolute Deviation: a "Robust" version of standard deviation.
-        Indices variabililty of the sample.
-        https://en.wikipedia.org/wiki/Median_absolute_deviation 
-    """
-    med = np.median(arr)
-    return np.median(np.abs(arr - med))
-    
-    
-def f1(series):
-    arr = series.values
-    arr_without_nan = arr[~np.isnan(arr)]
-    return mad(arr_without_nan)
-    
-    
-def f2(series):
-    arr = series.values
-    arr_without_nan = arr[~np.isnan(arr)]
-    return mad(arr_without_nan)
-    
-
-def f3(series):
-    return series.mad()
-
-
-def f4(series):
-    return series[series.notna()].mad()
-'''
-
-
-print(timeit.timeit("f1(series)", number=10, setup=testcode))
-print(timeit.timeit("f2(series)", number=10, setup=testcode))
-print(timeit.timeit("f3(series)", number=10, setup=testcode))
-print(timeit.timeit("f4(series)", number=10, setup=testcode))
diff --git a/tests/performance/time_mean.py b/tests/performance/time_mean.py
deleted file mode 100644
index f6149a4c0..000000000
--- a/tests/performance/time_mean.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import timeit
-
-testcode = """
-import numpy as np
-import pandas as pd
-
-np.random.seed(12)
-vals = np.random.random(1000)
-series = pd.Series(vals)
-series[series < 0.2] = pd.NA
-
-
-def f1(series):
-    arr = series.values
-    arr_without_nan = arr[~np.isnan(arr)]
-    return np.mean(arr_without_nan)
-    
-    
-def f2(series):
-    arr = series.values
-    return np.nanmean(arr)
-
-
-def f3(series):
-    return series.mean()
-
-
-def f4(series):
-    return series[series.notna()].mean()
-"""
-
-
-print(timeit.timeit("f1(series)", number=10, setup=testcode))
-print(timeit.timeit("f2(series)", number=10, setup=testcode))
-print(timeit.timeit("f3(series)", number=10, setup=testcode))
-print(timeit.timeit("f4(series)", number=10, setup=testcode))
diff --git a/tests/performance/timings.py b/tests/performance/timings.py
deleted file mode 100644
index acde9360d..000000000
--- a/tests/performance/timings.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import timeit
-from itertools import product
-from string import ascii_lowercase
-
-import numpy as np
-import pandas as pd
-import seaborn as sns
-from matplotlib import pyplot as plt
-
-from pandas_profiling import ProfileReport
-
-
-def generate_column_names(n):
-    column_names = []
-    iters = 1
-    while len(column_names) < n:
-        column_names += list(
-            "".join(combo) for combo in product(ascii_lowercase, repeat=iters)
-        )
-        iters += 1
-    return column_names
-
-
-def make_sample_data(cols, rows):
-    column_names = generate_column_names(cols)
-
-    df = pd.DataFrame(
-        np.random.randint(0, 1000000, size=(rows, cols)), columns=column_names[0:cols]
-    )
-    df = df.astype(str)
-
-    assert df.shape == (rows, cols)
-    return df.copy()
-
-
-def make_report_minimal(df):
-    report = ProfileReport(
-        df,
-        minimal=True,
-        pool_size=0,
-        sort="None",
-        title="Dataset with <em>Numeric</em> Categories",
-    )
-    html = report.to_html()
-    assert type(html) == str and '<p class="h2">Dataset info</p>' in html
-
-
-def make_report(df):
-    report = ProfileReport(
-        df,
-        minimal=False,
-        pool_size=0,
-        sort="None",
-        title="Dataset with <em>Numeric</em> Categories",
-    )
-    html = report.to_html()
-    assert type(html) == str and '<p class="h2">Dataset info</p>' in html
-
-
-def wrap_func(function):
-    def inner(df):
-        def double_inner():
-            return function(df)
-
-        return double_inner
-
-    return inner
-
-
-def time_report(func, cols, rows, runs=5):
-    df = make_sample_data(cols, rows)
-    print(df.shape)
-    test = wrap_func(func)(df.copy())
-    return timeit.timeit(test, number=runs) / runs
-
-
-def plot_col_run_time():
-    cols = [2, 4, 10, 50]
-    row = 1000
-    default_times = [time_report(make_report, col, row) for col in cols]
-    minimal_times = [time_report(make_report_minimal, col, row) for col in cols]
-
-    ax1 = sns.scatterplot(cols, default_times)
-    ax2 = sns.scatterplot(cols, minimal_times)
-    _ = ax1.set(
-        xlabel=f"Number of columns (row={row})",
-        ylabel="time (s)",
-        title="Run Time Complexity",
-    )
-    plt.show()
-
-
-def plot_row_run_time():
-    # 10, 100
-    # https://github.com/pandas-profiling/pandas-profiling/issues/270
-    rows = [1000, 10000, 100000]
-    col = 10
-    default_times = [time_report(make_report, col, row) for row in rows]
-    minimal_times = [time_report(make_report_minimal, col, row) for row in rows]
-
-    ax1 = sns.scatterplot(rows, default_times)
-    ax2 = sns.scatterplot(rows, minimal_times)
-    _ = ax1.set(
-        xlabel=f"Number of rows (col={col})",
-        ylabel="time (s)",
-        title="Run Time Complexity",
-    )
-    plt.show()
-
-
-if __name__ == "__main__":
-    plot_col_run_time()
-    plot_row_run_time()
diff --git a/tests/unit/test_custom_sample.py b/tests/unit/test_custom_sample.py
index 81a0bd551..4aab90280 100644
--- a/tests/unit/test_custom_sample.py
+++ b/tests/unit/test_custom_sample.py
@@ -1,5 +1,3 @@
-from pathlib import Path
-
 import pandas as pd
 
 from pandas_profiling import ProfileReport
diff --git a/tests/unit/test_decorator.py b/tests/unit/test_decorator.py
index 25d0a58bc..57f3dc5e0 100644
--- a/tests/unit/test_decorator.py
+++ b/tests/unit/test_decorator.py
@@ -1,5 +1,4 @@
 import pandas as pd
-import pytest
 
 import pandas_profiling
 
@@ -16,11 +15,3 @@ def test_decorator(get_data_file):
         missing_diagrams={"heatmap": False, "dendrogram": False},
     )
     assert "Coursera Test Report" in report.to_html(), "Title is not found"
-
-
-def test_empty_decorator():
-    df = pd.DataFrame().profile_report(progress_bar=False)
-    with pytest.raises(ValueError) as e:
-        df.get_description()
-
-    assert e.value.args[0] == "df can not be empty"
diff --git a/tests/unit/test_describe.py b/tests/unit/test_describe.py
index 1d6589df3..cbec917f8 100644
--- a/tests/unit/test_describe.py
+++ b/tests/unit/test_describe.py
@@ -571,12 +571,6 @@ def test_describe_df(column, describe_data, expected_results, summarizer, typese
         ), f"Histogram missing for column {column}"
 
 
-def test_describe_empty(summarizer, typeset):
-    empty_frame = pd.DataFrame()
-    with pytest.raises(ValueError):
-        describe("", empty_frame, summarizer, typeset)
-
-
 def test_describe_list(summarizer, typeset):
     with pytest.raises(AttributeError):
         with pytest.warns(UserWarning):
diff --git a/tests/unit/test_duplicates.py b/tests/unit/test_duplicates.py
new file mode 100644
index 000000000..b3043edce
--- /dev/null
+++ b/tests/unit/test_duplicates.py
@@ -0,0 +1,30 @@
+"""Test for the duplicates functionality"""
+import numpy as np
+import pandas as pd
+import pytest
+
+from pandas_profiling.model.duplicates import get_duplicates
+
+
+@pytest.fixture(scope="module")
+def test_data():
+    np.random.seed(5)
+    df = pd.DataFrame(
+        np.random.randint(1, 100, (100, 5)),
+        columns=["a", "b", "c", "duplicates", "count"],
+    )
+    df = pd.concat([df, df], axis=0)
+    return df
+
+
+def test_issue725(test_data):
+    metrics, duplicates = get_duplicates(test_data, list(test_data.columns))
+    assert metrics["n_duplicates"] == 100
+    assert metrics["p_duplicates"] == 0.5
+    assert set(duplicates.columns) == set(test_data.columns).union({"# duplicates"})
+
+
+def test_issue725_existing(test_data):
+    test_data = test_data.rename(columns={"count": "# duplicates"})
+    with pytest.raises(ValueError):
+        _, _ = get_duplicates(test_data, list(test_data.columns))
diff --git a/tests/unit/test_example.py b/tests/unit/test_example.py
index cbb72a6ee..8b1487543 100644
--- a/tests/unit/test_example.py
+++ b/tests/unit/test_example.py
@@ -50,3 +50,16 @@ def test_example(get_data_file, test_output_dir):
         and len(profile.get_description().items()) == 10
     ), "Unexpected result"
     assert "<span class=badge>12</span>" in profile.to_html()
+
+
+def test_example_empty():
+    df = pd.DataFrame({"A": [], "B": []})
+    profile = ProfileReport(df)
+    description = profile.get_description()
+
+    assert len(description["correlations"]) == 0
+    assert len(description["missing"]) == 0
+    assert len(description["sample"]) == 0
+
+    html = profile.to_html()
+    assert "Dataset is empty" in html
diff --git a/tests/unit/test_formatters.py b/tests/unit/test_formatters.py
index 09711cd47..4f6f46faf 100644
--- a/tests/unit/test_formatters.py
+++ b/tests/unit/test_formatters.py
@@ -6,6 +6,7 @@
     fmt_bytesize,
     fmt_class,
     fmt_color,
+    fmt_monotonic,
     fmt_numeric,
 )
 
@@ -79,7 +80,36 @@ def test_fmt_array(array, threshold, expected):
         (81.000000, 10, "81"),
         (81, 10, "81"),
         (81.999861123123123123, 10, "81.99986112"),
+        (1e20, 10, "1 × 10<sup>20</sup>"),
+        (1e-20, 10, "1 × 10<sup>-20</sup>"),
+        (1e8, 3, "1 × 10<sup>8</sup>"),
     ],
 )
 def test_fmt_numeric(value, precision, expected):
     assert fmt_numeric(value, precision) == expected
+
+
+@pytest.mark.parametrize(
+    "value, expected",
+    [
+        (-2, "Strictly decreasing"),
+        (-1, "Decreasing"),
+        (0, "Not monotonic"),
+        (1, "Increasing"),
+        (2, "Strictly increasing"),
+    ],
+)
+def test_fmt_monotonic(value, expected):
+    assert fmt_monotonic(value) == expected
+
+
+@pytest.mark.parametrize(
+    "value",
+    [
+        -3,
+        3,
+    ],
+)
+def test_fmt_monotonic_err(value):
+    with pytest.raises(ValueError):
+        fmt_monotonic(value)
diff --git a/tests/unit/test_ge_integration_expectations.py b/tests/unit/test_ge_integration_expectations.py
index 40f3850ca..443f2f1aa 100644
--- a/tests/unit/test_ge_integration_expectations.py
+++ b/tests/unit/test_ge_integration_expectations.py
@@ -123,6 +123,7 @@ def test_datetime_expectations(batch):
         "column",
         min_value=0,
         max_value=100,
+        parse_strings_as_datetimes=True,
     )
 
 
diff --git a/tests/unit/test_interactions.py b/tests/unit/test_interactions.py
index ac658bb99..e02dac497 100644
--- a/tests/unit/test_interactions.py
+++ b/tests/unit/test_interactions.py
@@ -1,5 +1,3 @@
-from pathlib import Path
-
 import numpy as np
 import pandas as pd
 
diff --git a/tests/unit/test_summary.py b/tests/unit/test_summary.py
new file mode 100644
index 000000000..f44de68b0
--- /dev/null
+++ b/tests/unit/test_summary.py
@@ -0,0 +1,10 @@
+import pandas as pd
+
+from pandas_profiling.model.summary import get_table_stats
+
+
+def test_get_table_stats_empty_df():
+    df = pd.DataFrame({"A": [], "B": []})
+    table_stats = get_table_stats(df, {})
+    assert table_stats["n"] == 0
+    assert table_stats["p_cells_missing"] == 0
diff --git a/tests/unit/test_summary_algos.py b/tests/unit/test_summary_algos.py
index 98460bec6..ec5846670 100644
--- a/tests/unit/test_summary_algos.py
+++ b/tests/unit/test_summary_algos.py
@@ -1,7 +1,12 @@
 import numpy as np
 import pandas as pd
+import pytest
 
-from pandas_profiling.model.summary_algorithms import describe_counts
+from pandas_profiling.model.summary_algorithms import (
+    describe_counts,
+    describe_generic,
+    describe_supported,
+)
 
 
 def test_count_summary_sorted():
@@ -24,3 +29,25 @@ def test_count_summary_category():
     )
     sn, r = describe_counts(s, {})
     assert len(r["value_counts_without_nan"].index) == 2
+
+
+@pytest.fixture(scope="class")
+def empty_data() -> pd.DataFrame:
+    return pd.DataFrame({"A": []})
+
+
+def test_summary_supported_empty_df(empty_data):
+    series, summary = describe_counts(empty_data["A"], {})
+    assert summary["n_missing"] == 0
+    assert "p_missing" not in summary
+
+    series, summary = describe_generic(series, summary)
+    assert summary["n_missing"] == 0
+    assert summary["p_missing"] == 0
+    assert summary["count"] == 0
+
+    _, summary = describe_supported(series, summary)
+    assert summary["n_distinct"] == 0
+    assert summary["p_distinct"] == 0
+    assert summary["n_unique"] == 0
+    assert not summary["is_unique"]