diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 4f2a7fbf..30ec26b6 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -16,7 +16,7 @@ Provide a concise Python code snippet that demonstrates the issue. To display th ```python import xeofs as xe -model = xe.models.EOF() +model = xe.single.EOF() ... ``` diff --git a/.github/ISSUE_TEMPLATE/how_to.md b/.github/ISSUE_TEMPLATE/how_to.md index f29b2c6f..10f41a3b 100644 --- a/.github/ISSUE_TEMPLATE/how_to.md +++ b/.github/ISSUE_TEMPLATE/how_to.md @@ -16,7 +16,7 @@ Provide a concise Python code snippet that demonstrates your approach. To displa ```python import xeofs as xe -model = xe.models.EOF() +model = xe.single.EOF() ... ``` diff --git a/.gitignore b/.gitignore index 91b8f5cf..2051d342 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ # Personal .vscode/ # Test related to CCA -tests/models/test_cca_solution.py +tests/**/test_cca_solution.py # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/README.md b/README.md index ff963931..9da5e218 100644 --- a/README.md +++ b/README.md @@ -65,9 +65,9 @@ In order to get started with `xeofs`, follow these simple steps: Initiate and fit the EOF/PCA model to the data ```python ->>> eof = xe.models.EOF(n_modes=10) +>>> eof = xe.single.EOF(n_modes=10) >>> eof.fit(t2m, dim="time") # doctest: +ELLIPSIS - + ``` Now, you can access the model's EOF components and PC scores: @@ -82,9 +82,9 @@ Now, you can access the model's EOF components and PC scores: Initiate and fit an `EOFRotator` class to the model to obtain a varimax-rotated EOF analysis ```python ->>> rotator = xe.models.EOFRotator(n_modes=3) +>>> rotator = xe.single.EOFRotator(n_modes=3) >>> rotator.fit(eof) # doctest: +ELLIPSIS - + >>> rot_comps = rotator.components() # Rotated EOFs (spatial patterns) >>> rot_scores = rotator.scores() # Rotated PCs (temporal patterns) @@ -94,9 +94,9 @@ Initiate and fit an `EOFRotator` class to the model to obtain a varimax-rotated **Maximum Covariance Analysis (MCA)** ```python ->>> mca = xe.models.MCA(n_modes=10) +>>> mca = xe.cross.MCA(n_modes=10) >>> mca.fit(t2m_west, t2m_east, dim="time") # doctest: +ELLIPSIS - + >>> comps1, comps2 = mca.components() # Singular vectors (spatial patterns) >>> scores1, scores2 = mca.scores() # Expansion coefficients (temporal patterns) @@ -106,9 +106,9 @@ Initiate and fit an `EOFRotator` class to the model to obtain a varimax-rotated **Varimax-rotated MCA** ```python ->>> rotator = xe.models.MCARotator(n_modes=10) +>>> rotator = xe.cross.MCARotator(n_modes=10) >>> rotator.fit(mca) # doctest: +ELLIPSIS - + >>> rot_comps = rotator.components() # Rotated singular vectors (spatial patterns) >>> rot_scores = rotator.scores() # Rotated expansion coefficients (temporal patterns) diff --git a/xeofs/models/_np_classes/__init__.py b/docs/__init__.py similarity index 100% rename from xeofs/models/_np_classes/__init__.py rename to docs/__init__.py diff --git a/docs/_autosummary/xeofs.models.ComplexEOF.rst b/docs/_autosummary/xeofs.models.ComplexEOF.rst deleted file mode 100644 index 7eb1f432..00000000 --- a/docs/_autosummary/xeofs.models.ComplexEOF.rst +++ /dev/null @@ -1,45 +0,0 @@ -xeofs.models.ComplexEOF -======================= - -.. currentmodule:: xeofs.models - -.. autoclass:: ComplexEOF - :members: - :show-inheritance: - :inherited-members: - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~ComplexEOF.__init__ - ~ComplexEOF.components - ~ComplexEOF.components_amplitude - ~ComplexEOF.components_phase - ~ComplexEOF.compute - ~ComplexEOF.deserialize - ~ComplexEOF.explained_variance - ~ComplexEOF.explained_variance_ratio - ~ComplexEOF.fit - ~ComplexEOF.fit_transform - ~ComplexEOF.get_params - ~ComplexEOF.get_serialization_attrs - ~ComplexEOF.inverse_transform - ~ComplexEOF.load - ~ComplexEOF.save - ~ComplexEOF.scores - ~ComplexEOF.scores_amplitude - ~ComplexEOF.scores_phase - ~ComplexEOF.serialize - ~ComplexEOF.singular_values - ~ComplexEOF.transform - - - - - - \ No newline at end of file diff --git a/docs/_autosummary/xeofs.models.ComplexEOFRotator.rst b/docs/_autosummary/xeofs.models.ComplexEOFRotator.rst deleted file mode 100644 index 41edbcde..00000000 --- a/docs/_autosummary/xeofs.models.ComplexEOFRotator.rst +++ /dev/null @@ -1,45 +0,0 @@ -xeofs.models.ComplexEOFRotator -============================== - -.. currentmodule:: xeofs.models - -.. autoclass:: ComplexEOFRotator - :members: - :show-inheritance: - :inherited-members: - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~ComplexEOFRotator.__init__ - ~ComplexEOFRotator.components - ~ComplexEOFRotator.components_amplitude - ~ComplexEOFRotator.components_phase - ~ComplexEOFRotator.compute - ~ComplexEOFRotator.deserialize - ~ComplexEOFRotator.explained_variance - ~ComplexEOFRotator.explained_variance_ratio - ~ComplexEOFRotator.fit - ~ComplexEOFRotator.fit_transform - ~ComplexEOFRotator.get_params - ~ComplexEOFRotator.get_serialization_attrs - ~ComplexEOFRotator.inverse_transform - ~ComplexEOFRotator.load - ~ComplexEOFRotator.save - ~ComplexEOFRotator.scores - ~ComplexEOFRotator.scores_amplitude - ~ComplexEOFRotator.scores_phase - ~ComplexEOFRotator.serialize - ~ComplexEOFRotator.singular_values - ~ComplexEOFRotator.transform - - - - - - \ No newline at end of file diff --git a/docs/_autosummary/xeofs.models.ComplexMCA.rst b/docs/_autosummary/xeofs.models.ComplexMCA.rst deleted file mode 100644 index 9d8c7f53..00000000 --- a/docs/_autosummary/xeofs.models.ComplexMCA.rst +++ /dev/null @@ -1,48 +0,0 @@ -xeofs.models.ComplexMCA -======================= - -.. currentmodule:: xeofs.models - -.. autoclass:: ComplexMCA - :members: - :show-inheritance: - :inherited-members: - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~ComplexMCA.__init__ - ~ComplexMCA.components - ~ComplexMCA.components_amplitude - ~ComplexMCA.components_phase - ~ComplexMCA.compute - ~ComplexMCA.covariance_fraction - ~ComplexMCA.deserialize - ~ComplexMCA.fit - ~ComplexMCA.get_params - ~ComplexMCA.get_serialization_attrs - ~ComplexMCA.heterogeneous_patterns - ~ComplexMCA.homogeneous_patterns - ~ComplexMCA.inverse_transform - ~ComplexMCA.load - ~ComplexMCA.save - ~ComplexMCA.scores - ~ComplexMCA.scores_amplitude - ~ComplexMCA.scores_phase - ~ComplexMCA.serialize - ~ComplexMCA.singular_values - ~ComplexMCA.squared_covariance - ~ComplexMCA.squared_covariance_fraction - ~ComplexMCA.total_covariance - ~ComplexMCA.transform - - - - - - \ No newline at end of file diff --git a/docs/_autosummary/xeofs.models.EOF.rst b/docs/_autosummary/xeofs.models.EOF.rst deleted file mode 100644 index 90da1cde..00000000 --- a/docs/_autosummary/xeofs.models.EOF.rst +++ /dev/null @@ -1,41 +0,0 @@ -xeofs.models.EOF -================ - -.. currentmodule:: xeofs.models - -.. autoclass:: EOF - :members: - :show-inheritance: - :inherited-members: - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~EOF.__init__ - ~EOF.components - ~EOF.compute - ~EOF.deserialize - ~EOF.explained_variance - ~EOF.explained_variance_ratio - ~EOF.fit - ~EOF.fit_transform - ~EOF.get_params - ~EOF.get_serialization_attrs - ~EOF.inverse_transform - ~EOF.load - ~EOF.save - ~EOF.scores - ~EOF.serialize - ~EOF.singular_values - ~EOF.transform - - - - - - \ No newline at end of file diff --git a/docs/_autosummary/xeofs.models.EOFRotator.rst b/docs/_autosummary/xeofs.models.EOFRotator.rst deleted file mode 100644 index c1f06783..00000000 --- a/docs/_autosummary/xeofs.models.EOFRotator.rst +++ /dev/null @@ -1,41 +0,0 @@ -xeofs.models.EOFRotator -======================= - -.. currentmodule:: xeofs.models - -.. autoclass:: EOFRotator - :members: - :show-inheritance: - :inherited-members: - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~EOFRotator.__init__ - ~EOFRotator.components - ~EOFRotator.compute - ~EOFRotator.deserialize - ~EOFRotator.explained_variance - ~EOFRotator.explained_variance_ratio - ~EOFRotator.fit - ~EOFRotator.fit_transform - ~EOFRotator.get_params - ~EOFRotator.get_serialization_attrs - ~EOFRotator.inverse_transform - ~EOFRotator.load - ~EOFRotator.save - ~EOFRotator.scores - ~EOFRotator.serialize - ~EOFRotator.singular_values - ~EOFRotator.transform - - - - - - \ No newline at end of file diff --git a/docs/_autosummary/xeofs.models.ExtendedEOF.rst b/docs/_autosummary/xeofs.models.ExtendedEOF.rst deleted file mode 100644 index b1740349..00000000 --- a/docs/_autosummary/xeofs.models.ExtendedEOF.rst +++ /dev/null @@ -1,41 +0,0 @@ -xeofs.models.ExtendedEOF -======================== - -.. currentmodule:: xeofs.models - -.. autoclass:: ExtendedEOF - :members: - :show-inheritance: - :inherited-members: - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~ExtendedEOF.__init__ - ~ExtendedEOF.components - ~ExtendedEOF.compute - ~ExtendedEOF.deserialize - ~ExtendedEOF.explained_variance - ~ExtendedEOF.explained_variance_ratio - ~ExtendedEOF.fit - ~ExtendedEOF.fit_transform - ~ExtendedEOF.get_params - ~ExtendedEOF.get_serialization_attrs - ~ExtendedEOF.inverse_transform - ~ExtendedEOF.load - ~ExtendedEOF.save - ~ExtendedEOF.scores - ~ExtendedEOF.serialize - ~ExtendedEOF.singular_values - ~ExtendedEOF.transform - - - - - - \ No newline at end of file diff --git a/docs/_autosummary/xeofs.models.GWPCA.rst b/docs/_autosummary/xeofs.models.GWPCA.rst deleted file mode 100644 index 2bc03eb9..00000000 --- a/docs/_autosummary/xeofs.models.GWPCA.rst +++ /dev/null @@ -1,41 +0,0 @@ -xeofs.models.GWPCA -================== - -.. currentmodule:: xeofs.models - -.. autoclass:: GWPCA - :members: - :show-inheritance: - :inherited-members: - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~GWPCA.__init__ - ~GWPCA.components - ~GWPCA.compute - ~GWPCA.deserialize - ~GWPCA.explained_variance - ~GWPCA.explained_variance_ratio - ~GWPCA.fit - ~GWPCA.fit_transform - ~GWPCA.get_params - ~GWPCA.get_serialization_attrs - ~GWPCA.inverse_transform - ~GWPCA.largest_locally_weighted_components - ~GWPCA.load - ~GWPCA.save - ~GWPCA.scores - ~GWPCA.serialize - ~GWPCA.transform - - - - - - \ No newline at end of file diff --git a/docs/_autosummary/xeofs.models.OPA.rst b/docs/_autosummary/xeofs.models.OPA.rst deleted file mode 100644 index 53000a77..00000000 --- a/docs/_autosummary/xeofs.models.OPA.rst +++ /dev/null @@ -1,40 +0,0 @@ -xeofs.models.OPA -================ - -.. currentmodule:: xeofs.models - -.. autoclass:: OPA - :members: - :show-inheritance: - :inherited-members: - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~OPA.__init__ - ~OPA.components - ~OPA.compute - ~OPA.decorrelation_time - ~OPA.deserialize - ~OPA.filter_patterns - ~OPA.fit - ~OPA.fit_transform - ~OPA.get_params - ~OPA.get_serialization_attrs - ~OPA.inverse_transform - ~OPA.load - ~OPA.save - ~OPA.scores - ~OPA.serialize - ~OPA.transform - - - - - - \ No newline at end of file diff --git a/docs/_autosummary/xeofs.validation.EOFBootstrapper.rst b/docs/_autosummary/xeofs.validation.EOFBootstrapper.rst deleted file mode 100644 index 748247b8..00000000 --- a/docs/_autosummary/xeofs.validation.EOFBootstrapper.rst +++ /dev/null @@ -1,41 +0,0 @@ -xeofs.validation.EOFBootstrapper -================================ - -.. currentmodule:: xeofs.validation - -.. autoclass:: EOFBootstrapper - :members: - :show-inheritance: - :inherited-members: - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~EOFBootstrapper.__init__ - ~EOFBootstrapper.components - ~EOFBootstrapper.compute - ~EOFBootstrapper.deserialize - ~EOFBootstrapper.explained_variance - ~EOFBootstrapper.explained_variance_ratio - ~EOFBootstrapper.fit - ~EOFBootstrapper.fit_transform - ~EOFBootstrapper.get_params - ~EOFBootstrapper.get_serialization_attrs - ~EOFBootstrapper.inverse_transform - ~EOFBootstrapper.load - ~EOFBootstrapper.save - ~EOFBootstrapper.scores - ~EOFBootstrapper.serialize - ~EOFBootstrapper.singular_values - ~EOFBootstrapper.transform - - - - - - \ No newline at end of file diff --git a/docs/_templates/custom-class-template.rst b/docs/_templates/custom-class-template.rst index 4197f9f0..a7db2b37 100644 --- a/docs/_templates/custom-class-template.rst +++ b/docs/_templates/custom-class-template.rst @@ -4,7 +4,6 @@ .. autoclass:: {{ objname }} :members: - :show-inheritance: :inherited-members: {% block methods %} diff --git a/docs/_autosummary/xeofs.models.RotatorFactory.rst b/docs/api_reference/_autosummary/xeofs.RotatorFactory.rst similarity index 67% rename from docs/_autosummary/xeofs.models.RotatorFactory.rst rename to docs/api_reference/_autosummary/xeofs.RotatorFactory.rst index b23f04da..0b54f4b2 100644 --- a/docs/_autosummary/xeofs.models.RotatorFactory.rst +++ b/docs/api_reference/_autosummary/xeofs.RotatorFactory.rst @@ -1,11 +1,10 @@ -xeofs.models.RotatorFactory -=========================== +RotatorFactory +============== -.. currentmodule:: xeofs.models +.. currentmodule:: xeofs .. autoclass:: RotatorFactory :members: - :show-inheritance: :inherited-members: diff --git a/docs/api_reference/_autosummary/xeofs.cross.CCA.rst b/docs/api_reference/_autosummary/xeofs.cross.CCA.rst new file mode 100644 index 00000000..1149b43d --- /dev/null +++ b/docs/api_reference/_autosummary/xeofs.cross.CCA.rst @@ -0,0 +1,46 @@ +CCA +=== + +.. currentmodule:: xeofs.cross + +.. autoclass:: CCA + :members: + :inherited-members: + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~CCA.__init__ + ~CCA.components + ~CCA.compute + ~CCA.correlation_coefficients_X + ~CCA.correlation_coefficients_Y + ~CCA.cross_correlation_coefficients + ~CCA.deserialize + ~CCA.fit + ~CCA.fraction_variance_X_explained_by_X + ~CCA.fraction_variance_Y_explained_by_X + ~CCA.fraction_variance_Y_explained_by_Y + ~CCA.get_params + ~CCA.get_serialization_attrs + ~CCA.heterogeneous_patterns + ~CCA.homogeneous_patterns + ~CCA.inverse_transform + ~CCA.load + ~CCA.predict + ~CCA.save + ~CCA.scores + ~CCA.serialize + ~CCA.squared_covariance_fraction + ~CCA.transform + + + + + + \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.cross.CPCCA.rst b/docs/api_reference/_autosummary/xeofs.cross.CPCCA.rst new file mode 100644 index 00000000..0cf9e049 --- /dev/null +++ b/docs/api_reference/_autosummary/xeofs.cross.CPCCA.rst @@ -0,0 +1,46 @@ +CPCCA +===== + +.. currentmodule:: xeofs.cross + +.. autoclass:: CPCCA + :members: + :inherited-members: + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~CPCCA.__init__ + ~CPCCA.components + ~CPCCA.compute + ~CPCCA.correlation_coefficients_X + ~CPCCA.correlation_coefficients_Y + ~CPCCA.cross_correlation_coefficients + ~CPCCA.deserialize + ~CPCCA.fit + ~CPCCA.fraction_variance_X_explained_by_X + ~CPCCA.fraction_variance_Y_explained_by_X + ~CPCCA.fraction_variance_Y_explained_by_Y + ~CPCCA.get_params + ~CPCCA.get_serialization_attrs + ~CPCCA.heterogeneous_patterns + ~CPCCA.homogeneous_patterns + ~CPCCA.inverse_transform + ~CPCCA.load + ~CPCCA.predict + ~CPCCA.save + ~CPCCA.scores + ~CPCCA.serialize + ~CPCCA.squared_covariance_fraction + ~CPCCA.transform + + + + + + \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.cross.CPCCARotator.rst b/docs/api_reference/_autosummary/xeofs.cross.CPCCARotator.rst new file mode 100644 index 00000000..534d5abe --- /dev/null +++ b/docs/api_reference/_autosummary/xeofs.cross.CPCCARotator.rst @@ -0,0 +1,46 @@ +CPCCARotator +============ + +.. currentmodule:: xeofs.cross + +.. autoclass:: CPCCARotator + :members: + :inherited-members: + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~CPCCARotator.__init__ + ~CPCCARotator.components + ~CPCCARotator.compute + ~CPCCARotator.correlation_coefficients_X + ~CPCCARotator.correlation_coefficients_Y + ~CPCCARotator.cross_correlation_coefficients + ~CPCCARotator.deserialize + ~CPCCARotator.fit + ~CPCCARotator.fraction_variance_X_explained_by_X + ~CPCCARotator.fraction_variance_Y_explained_by_X + ~CPCCARotator.fraction_variance_Y_explained_by_Y + ~CPCCARotator.get_params + ~CPCCARotator.get_serialization_attrs + ~CPCCARotator.heterogeneous_patterns + ~CPCCARotator.homogeneous_patterns + ~CPCCARotator.inverse_transform + ~CPCCARotator.load + ~CPCCARotator.predict + ~CPCCARotator.save + ~CPCCARotator.scores + ~CPCCARotator.serialize + ~CPCCARotator.squared_covariance_fraction + ~CPCCARotator.transform + + + + + + \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.cross.ComplexCCA.rst b/docs/api_reference/_autosummary/xeofs.cross.ComplexCCA.rst new file mode 100644 index 00000000..0c09990b --- /dev/null +++ b/docs/api_reference/_autosummary/xeofs.cross.ComplexCCA.rst @@ -0,0 +1,50 @@ +ComplexCCA +========== + +.. currentmodule:: xeofs.cross + +.. autoclass:: ComplexCCA + :members: + :inherited-members: + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~ComplexCCA.__init__ + ~ComplexCCA.components + ~ComplexCCA.components_amplitude + ~ComplexCCA.components_phase + ~ComplexCCA.compute + ~ComplexCCA.correlation_coefficients_X + ~ComplexCCA.correlation_coefficients_Y + ~ComplexCCA.cross_correlation_coefficients + ~ComplexCCA.deserialize + ~ComplexCCA.fit + ~ComplexCCA.fraction_variance_X_explained_by_X + ~ComplexCCA.fraction_variance_Y_explained_by_X + ~ComplexCCA.fraction_variance_Y_explained_by_Y + ~ComplexCCA.get_params + ~ComplexCCA.get_serialization_attrs + ~ComplexCCA.heterogeneous_patterns + ~ComplexCCA.homogeneous_patterns + ~ComplexCCA.inverse_transform + ~ComplexCCA.load + ~ComplexCCA.predict + ~ComplexCCA.save + ~ComplexCCA.scores + ~ComplexCCA.scores_amplitude + ~ComplexCCA.scores_phase + ~ComplexCCA.serialize + ~ComplexCCA.squared_covariance_fraction + ~ComplexCCA.transform + + + + + + \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.cross.ComplexCPCCA.rst b/docs/api_reference/_autosummary/xeofs.cross.ComplexCPCCA.rst new file mode 100644 index 00000000..617ae608 --- /dev/null +++ b/docs/api_reference/_autosummary/xeofs.cross.ComplexCPCCA.rst @@ -0,0 +1,50 @@ +ComplexCPCCA +============ + +.. currentmodule:: xeofs.cross + +.. autoclass:: ComplexCPCCA + :members: + :inherited-members: + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~ComplexCPCCA.__init__ + ~ComplexCPCCA.components + ~ComplexCPCCA.components_amplitude + ~ComplexCPCCA.components_phase + ~ComplexCPCCA.compute + ~ComplexCPCCA.correlation_coefficients_X + ~ComplexCPCCA.correlation_coefficients_Y + ~ComplexCPCCA.cross_correlation_coefficients + ~ComplexCPCCA.deserialize + ~ComplexCPCCA.fit + ~ComplexCPCCA.fraction_variance_X_explained_by_X + ~ComplexCPCCA.fraction_variance_Y_explained_by_X + ~ComplexCPCCA.fraction_variance_Y_explained_by_Y + ~ComplexCPCCA.get_params + ~ComplexCPCCA.get_serialization_attrs + ~ComplexCPCCA.heterogeneous_patterns + ~ComplexCPCCA.homogeneous_patterns + ~ComplexCPCCA.inverse_transform + ~ComplexCPCCA.load + ~ComplexCPCCA.predict + ~ComplexCPCCA.save + ~ComplexCPCCA.scores + ~ComplexCPCCA.scores_amplitude + ~ComplexCPCCA.scores_phase + ~ComplexCPCCA.serialize + ~ComplexCPCCA.squared_covariance_fraction + ~ComplexCPCCA.transform + + + + + + \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.cross.ComplexCPCCARotator.rst b/docs/api_reference/_autosummary/xeofs.cross.ComplexCPCCARotator.rst new file mode 100644 index 00000000..aac762a6 --- /dev/null +++ b/docs/api_reference/_autosummary/xeofs.cross.ComplexCPCCARotator.rst @@ -0,0 +1,50 @@ +ComplexCPCCARotator +=================== + +.. currentmodule:: xeofs.cross + +.. autoclass:: ComplexCPCCARotator + :members: + :inherited-members: + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~ComplexCPCCARotator.__init__ + ~ComplexCPCCARotator.components + ~ComplexCPCCARotator.components_amplitude + ~ComplexCPCCARotator.components_phase + ~ComplexCPCCARotator.compute + ~ComplexCPCCARotator.correlation_coefficients_X + ~ComplexCPCCARotator.correlation_coefficients_Y + ~ComplexCPCCARotator.cross_correlation_coefficients + ~ComplexCPCCARotator.deserialize + ~ComplexCPCCARotator.fit + ~ComplexCPCCARotator.fraction_variance_X_explained_by_X + ~ComplexCPCCARotator.fraction_variance_Y_explained_by_X + ~ComplexCPCCARotator.fraction_variance_Y_explained_by_Y + ~ComplexCPCCARotator.get_params + ~ComplexCPCCARotator.get_serialization_attrs + ~ComplexCPCCARotator.heterogeneous_patterns + ~ComplexCPCCARotator.homogeneous_patterns + ~ComplexCPCCARotator.inverse_transform + ~ComplexCPCCARotator.load + ~ComplexCPCCARotator.predict + ~ComplexCPCCARotator.save + ~ComplexCPCCARotator.scores + ~ComplexCPCCARotator.scores_amplitude + ~ComplexCPCCARotator.scores_phase + ~ComplexCPCCARotator.serialize + ~ComplexCPCCARotator.squared_covariance_fraction + ~ComplexCPCCARotator.transform + + + + + + \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.models.ComplexMCA.rst b/docs/api_reference/_autosummary/xeofs.cross.ComplexMCA.rst similarity index 65% rename from docs/api_reference/_autosummary/xeofs.models.ComplexMCA.rst rename to docs/api_reference/_autosummary/xeofs.cross.ComplexMCA.rst index 9d8c7f53..7cf80815 100644 --- a/docs/api_reference/_autosummary/xeofs.models.ComplexMCA.rst +++ b/docs/api_reference/_autosummary/xeofs.cross.ComplexMCA.rst @@ -1,11 +1,10 @@ -xeofs.models.ComplexMCA -======================= +ComplexMCA +========== -.. currentmodule:: xeofs.models +.. currentmodule:: xeofs.cross .. autoclass:: ComplexMCA :members: - :show-inheritance: :inherited-members: @@ -21,24 +20,28 @@ ~ComplexMCA.components_amplitude ~ComplexMCA.components_phase ~ComplexMCA.compute - ~ComplexMCA.covariance_fraction + ~ComplexMCA.correlation_coefficients_X + ~ComplexMCA.correlation_coefficients_Y + ~ComplexMCA.covariance_fraction_CD95 + ~ComplexMCA.cross_correlation_coefficients ~ComplexMCA.deserialize ~ComplexMCA.fit + ~ComplexMCA.fraction_variance_X_explained_by_X + ~ComplexMCA.fraction_variance_Y_explained_by_X + ~ComplexMCA.fraction_variance_Y_explained_by_Y ~ComplexMCA.get_params ~ComplexMCA.get_serialization_attrs ~ComplexMCA.heterogeneous_patterns ~ComplexMCA.homogeneous_patterns ~ComplexMCA.inverse_transform ~ComplexMCA.load + ~ComplexMCA.predict ~ComplexMCA.save ~ComplexMCA.scores ~ComplexMCA.scores_amplitude ~ComplexMCA.scores_phase ~ComplexMCA.serialize - ~ComplexMCA.singular_values - ~ComplexMCA.squared_covariance ~ComplexMCA.squared_covariance_fraction - ~ComplexMCA.total_covariance ~ComplexMCA.transform diff --git a/docs/_autosummary/xeofs.models.ComplexMCARotator.rst b/docs/api_reference/_autosummary/xeofs.cross.ComplexMCARotator.rst similarity index 65% rename from docs/_autosummary/xeofs.models.ComplexMCARotator.rst rename to docs/api_reference/_autosummary/xeofs.cross.ComplexMCARotator.rst index 4c647104..b05a460a 100644 --- a/docs/_autosummary/xeofs.models.ComplexMCARotator.rst +++ b/docs/api_reference/_autosummary/xeofs.cross.ComplexMCARotator.rst @@ -1,11 +1,10 @@ -xeofs.models.ComplexMCARotator -============================== +ComplexMCARotator +================= -.. currentmodule:: xeofs.models +.. currentmodule:: xeofs.cross .. autoclass:: ComplexMCARotator :members: - :show-inheritance: :inherited-members: @@ -21,24 +20,28 @@ ~ComplexMCARotator.components_amplitude ~ComplexMCARotator.components_phase ~ComplexMCARotator.compute - ~ComplexMCARotator.covariance_fraction + ~ComplexMCARotator.correlation_coefficients_X + ~ComplexMCARotator.correlation_coefficients_Y + ~ComplexMCARotator.covariance_fraction_CD95 + ~ComplexMCARotator.cross_correlation_coefficients ~ComplexMCARotator.deserialize ~ComplexMCARotator.fit + ~ComplexMCARotator.fraction_variance_X_explained_by_X + ~ComplexMCARotator.fraction_variance_Y_explained_by_X + ~ComplexMCARotator.fraction_variance_Y_explained_by_Y ~ComplexMCARotator.get_params ~ComplexMCARotator.get_serialization_attrs ~ComplexMCARotator.heterogeneous_patterns ~ComplexMCARotator.homogeneous_patterns ~ComplexMCARotator.inverse_transform ~ComplexMCARotator.load + ~ComplexMCARotator.predict ~ComplexMCARotator.save ~ComplexMCARotator.scores ~ComplexMCARotator.scores_amplitude ~ComplexMCARotator.scores_phase ~ComplexMCARotator.serialize - ~ComplexMCARotator.singular_values - ~ComplexMCARotator.squared_covariance ~ComplexMCARotator.squared_covariance_fraction - ~ComplexMCARotator.total_covariance ~ComplexMCARotator.transform diff --git a/docs/api_reference/_autosummary/xeofs.cross.ComplexRDA.rst b/docs/api_reference/_autosummary/xeofs.cross.ComplexRDA.rst new file mode 100644 index 00000000..d8c22172 --- /dev/null +++ b/docs/api_reference/_autosummary/xeofs.cross.ComplexRDA.rst @@ -0,0 +1,50 @@ +ComplexRDA +========== + +.. currentmodule:: xeofs.cross + +.. autoclass:: ComplexRDA + :members: + :inherited-members: + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~ComplexRDA.__init__ + ~ComplexRDA.components + ~ComplexRDA.components_amplitude + ~ComplexRDA.components_phase + ~ComplexRDA.compute + ~ComplexRDA.correlation_coefficients_X + ~ComplexRDA.correlation_coefficients_Y + ~ComplexRDA.cross_correlation_coefficients + ~ComplexRDA.deserialize + ~ComplexRDA.fit + ~ComplexRDA.fraction_variance_X_explained_by_X + ~ComplexRDA.fraction_variance_Y_explained_by_X + ~ComplexRDA.fraction_variance_Y_explained_by_Y + ~ComplexRDA.get_params + ~ComplexRDA.get_serialization_attrs + ~ComplexRDA.heterogeneous_patterns + ~ComplexRDA.homogeneous_patterns + ~ComplexRDA.inverse_transform + ~ComplexRDA.load + ~ComplexRDA.predict + ~ComplexRDA.save + ~ComplexRDA.scores + ~ComplexRDA.scores_amplitude + ~ComplexRDA.scores_phase + ~ComplexRDA.serialize + ~ComplexRDA.squared_covariance_fraction + ~ComplexRDA.transform + + + + + + \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.cross.HilbertCCA.rst b/docs/api_reference/_autosummary/xeofs.cross.HilbertCCA.rst new file mode 100644 index 00000000..6f696a7e --- /dev/null +++ b/docs/api_reference/_autosummary/xeofs.cross.HilbertCCA.rst @@ -0,0 +1,50 @@ +HilbertCCA +========== + +.. currentmodule:: xeofs.cross + +.. autoclass:: HilbertCCA + :members: + :inherited-members: + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~HilbertCCA.__init__ + ~HilbertCCA.components + ~HilbertCCA.components_amplitude + ~HilbertCCA.components_phase + ~HilbertCCA.compute + ~HilbertCCA.correlation_coefficients_X + ~HilbertCCA.correlation_coefficients_Y + ~HilbertCCA.cross_correlation_coefficients + ~HilbertCCA.deserialize + ~HilbertCCA.fit + ~HilbertCCA.fraction_variance_X_explained_by_X + ~HilbertCCA.fraction_variance_Y_explained_by_X + ~HilbertCCA.fraction_variance_Y_explained_by_Y + ~HilbertCCA.get_params + ~HilbertCCA.get_serialization_attrs + ~HilbertCCA.heterogeneous_patterns + ~HilbertCCA.homogeneous_patterns + ~HilbertCCA.inverse_transform + ~HilbertCCA.load + ~HilbertCCA.predict + ~HilbertCCA.save + ~HilbertCCA.scores + ~HilbertCCA.scores_amplitude + ~HilbertCCA.scores_phase + ~HilbertCCA.serialize + ~HilbertCCA.squared_covariance_fraction + ~HilbertCCA.transform + + + + + + \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.cross.HilbertCPCCA.rst b/docs/api_reference/_autosummary/xeofs.cross.HilbertCPCCA.rst new file mode 100644 index 00000000..1d6410ea --- /dev/null +++ b/docs/api_reference/_autosummary/xeofs.cross.HilbertCPCCA.rst @@ -0,0 +1,50 @@ +HilbertCPCCA +============ + +.. currentmodule:: xeofs.cross + +.. autoclass:: HilbertCPCCA + :members: + :inherited-members: + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~HilbertCPCCA.__init__ + ~HilbertCPCCA.components + ~HilbertCPCCA.components_amplitude + ~HilbertCPCCA.components_phase + ~HilbertCPCCA.compute + ~HilbertCPCCA.correlation_coefficients_X + ~HilbertCPCCA.correlation_coefficients_Y + ~HilbertCPCCA.cross_correlation_coefficients + ~HilbertCPCCA.deserialize + ~HilbertCPCCA.fit + ~HilbertCPCCA.fraction_variance_X_explained_by_X + ~HilbertCPCCA.fraction_variance_Y_explained_by_X + ~HilbertCPCCA.fraction_variance_Y_explained_by_Y + ~HilbertCPCCA.get_params + ~HilbertCPCCA.get_serialization_attrs + ~HilbertCPCCA.heterogeneous_patterns + ~HilbertCPCCA.homogeneous_patterns + ~HilbertCPCCA.inverse_transform + ~HilbertCPCCA.load + ~HilbertCPCCA.predict + ~HilbertCPCCA.save + ~HilbertCPCCA.scores + ~HilbertCPCCA.scores_amplitude + ~HilbertCPCCA.scores_phase + ~HilbertCPCCA.serialize + ~HilbertCPCCA.squared_covariance_fraction + ~HilbertCPCCA.transform + + + + + + \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.cross.HilbertCPCCARotator.rst b/docs/api_reference/_autosummary/xeofs.cross.HilbertCPCCARotator.rst new file mode 100644 index 00000000..d097cd3e --- /dev/null +++ b/docs/api_reference/_autosummary/xeofs.cross.HilbertCPCCARotator.rst @@ -0,0 +1,50 @@ +HilbertCPCCARotator +=================== + +.. currentmodule:: xeofs.cross + +.. autoclass:: HilbertCPCCARotator + :members: + :inherited-members: + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~HilbertCPCCARotator.__init__ + ~HilbertCPCCARotator.components + ~HilbertCPCCARotator.components_amplitude + ~HilbertCPCCARotator.components_phase + ~HilbertCPCCARotator.compute + ~HilbertCPCCARotator.correlation_coefficients_X + ~HilbertCPCCARotator.correlation_coefficients_Y + ~HilbertCPCCARotator.cross_correlation_coefficients + ~HilbertCPCCARotator.deserialize + ~HilbertCPCCARotator.fit + ~HilbertCPCCARotator.fraction_variance_X_explained_by_X + ~HilbertCPCCARotator.fraction_variance_Y_explained_by_X + ~HilbertCPCCARotator.fraction_variance_Y_explained_by_Y + ~HilbertCPCCARotator.get_params + ~HilbertCPCCARotator.get_serialization_attrs + ~HilbertCPCCARotator.heterogeneous_patterns + ~HilbertCPCCARotator.homogeneous_patterns + ~HilbertCPCCARotator.inverse_transform + ~HilbertCPCCARotator.load + ~HilbertCPCCARotator.predict + ~HilbertCPCCARotator.save + ~HilbertCPCCARotator.scores + ~HilbertCPCCARotator.scores_amplitude + ~HilbertCPCCARotator.scores_phase + ~HilbertCPCCARotator.serialize + ~HilbertCPCCARotator.squared_covariance_fraction + ~HilbertCPCCARotator.transform + + + + + + \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.cross.HilbertMCA.rst b/docs/api_reference/_autosummary/xeofs.cross.HilbertMCA.rst new file mode 100644 index 00000000..8b6bc60e --- /dev/null +++ b/docs/api_reference/_autosummary/xeofs.cross.HilbertMCA.rst @@ -0,0 +1,51 @@ +HilbertMCA +========== + +.. currentmodule:: xeofs.cross + +.. autoclass:: HilbertMCA + :members: + :inherited-members: + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~HilbertMCA.__init__ + ~HilbertMCA.components + ~HilbertMCA.components_amplitude + ~HilbertMCA.components_phase + ~HilbertMCA.compute + ~HilbertMCA.correlation_coefficients_X + ~HilbertMCA.correlation_coefficients_Y + ~HilbertMCA.covariance_fraction_CD95 + ~HilbertMCA.cross_correlation_coefficients + ~HilbertMCA.deserialize + ~HilbertMCA.fit + ~HilbertMCA.fraction_variance_X_explained_by_X + ~HilbertMCA.fraction_variance_Y_explained_by_X + ~HilbertMCA.fraction_variance_Y_explained_by_Y + ~HilbertMCA.get_params + ~HilbertMCA.get_serialization_attrs + ~HilbertMCA.heterogeneous_patterns + ~HilbertMCA.homogeneous_patterns + ~HilbertMCA.inverse_transform + ~HilbertMCA.load + ~HilbertMCA.predict + ~HilbertMCA.save + ~HilbertMCA.scores + ~HilbertMCA.scores_amplitude + ~HilbertMCA.scores_phase + ~HilbertMCA.serialize + ~HilbertMCA.squared_covariance_fraction + ~HilbertMCA.transform + + + + + + \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.cross.HilbertMCARotator.rst b/docs/api_reference/_autosummary/xeofs.cross.HilbertMCARotator.rst new file mode 100644 index 00000000..3d3d2bcd --- /dev/null +++ b/docs/api_reference/_autosummary/xeofs.cross.HilbertMCARotator.rst @@ -0,0 +1,51 @@ +HilbertMCARotator +================= + +.. currentmodule:: xeofs.cross + +.. autoclass:: HilbertMCARotator + :members: + :inherited-members: + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~HilbertMCARotator.__init__ + ~HilbertMCARotator.components + ~HilbertMCARotator.components_amplitude + ~HilbertMCARotator.components_phase + ~HilbertMCARotator.compute + ~HilbertMCARotator.correlation_coefficients_X + ~HilbertMCARotator.correlation_coefficients_Y + ~HilbertMCARotator.covariance_fraction_CD95 + ~HilbertMCARotator.cross_correlation_coefficients + ~HilbertMCARotator.deserialize + ~HilbertMCARotator.fit + ~HilbertMCARotator.fraction_variance_X_explained_by_X + ~HilbertMCARotator.fraction_variance_Y_explained_by_X + ~HilbertMCARotator.fraction_variance_Y_explained_by_Y + ~HilbertMCARotator.get_params + ~HilbertMCARotator.get_serialization_attrs + ~HilbertMCARotator.heterogeneous_patterns + ~HilbertMCARotator.homogeneous_patterns + ~HilbertMCARotator.inverse_transform + ~HilbertMCARotator.load + ~HilbertMCARotator.predict + ~HilbertMCARotator.save + ~HilbertMCARotator.scores + ~HilbertMCARotator.scores_amplitude + ~HilbertMCARotator.scores_phase + ~HilbertMCARotator.serialize + ~HilbertMCARotator.squared_covariance_fraction + ~HilbertMCARotator.transform + + + + + + \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.cross.HilbertRDA.rst b/docs/api_reference/_autosummary/xeofs.cross.HilbertRDA.rst new file mode 100644 index 00000000..61f10c35 --- /dev/null +++ b/docs/api_reference/_autosummary/xeofs.cross.HilbertRDA.rst @@ -0,0 +1,50 @@ +HilbertRDA +========== + +.. currentmodule:: xeofs.cross + +.. autoclass:: HilbertRDA + :members: + :inherited-members: + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~HilbertRDA.__init__ + ~HilbertRDA.components + ~HilbertRDA.components_amplitude + ~HilbertRDA.components_phase + ~HilbertRDA.compute + ~HilbertRDA.correlation_coefficients_X + ~HilbertRDA.correlation_coefficients_Y + ~HilbertRDA.cross_correlation_coefficients + ~HilbertRDA.deserialize + ~HilbertRDA.fit + ~HilbertRDA.fraction_variance_X_explained_by_X + ~HilbertRDA.fraction_variance_Y_explained_by_X + ~HilbertRDA.fraction_variance_Y_explained_by_Y + ~HilbertRDA.get_params + ~HilbertRDA.get_serialization_attrs + ~HilbertRDA.heterogeneous_patterns + ~HilbertRDA.homogeneous_patterns + ~HilbertRDA.inverse_transform + ~HilbertRDA.load + ~HilbertRDA.predict + ~HilbertRDA.save + ~HilbertRDA.scores + ~HilbertRDA.scores_amplitude + ~HilbertRDA.scores_phase + ~HilbertRDA.serialize + ~HilbertRDA.squared_covariance_fraction + ~HilbertRDA.transform + + + + + + \ No newline at end of file diff --git a/docs/_autosummary/xeofs.models.MCA.rst b/docs/api_reference/_autosummary/xeofs.cross.MCA.rst similarity index 60% rename from docs/_autosummary/xeofs.models.MCA.rst rename to docs/api_reference/_autosummary/xeofs.cross.MCA.rst index 16b58d96..751ef8fd 100644 --- a/docs/_autosummary/xeofs.models.MCA.rst +++ b/docs/api_reference/_autosummary/xeofs.cross.MCA.rst @@ -1,11 +1,10 @@ -xeofs.models.MCA -================ +MCA +=== -.. currentmodule:: xeofs.models +.. currentmodule:: xeofs.cross .. autoclass:: MCA :members: - :show-inheritance: :inherited-members: @@ -19,22 +18,26 @@ ~MCA.__init__ ~MCA.components ~MCA.compute - ~MCA.covariance_fraction + ~MCA.correlation_coefficients_X + ~MCA.correlation_coefficients_Y + ~MCA.covariance_fraction_CD95 + ~MCA.cross_correlation_coefficients ~MCA.deserialize ~MCA.fit + ~MCA.fraction_variance_X_explained_by_X + ~MCA.fraction_variance_Y_explained_by_X + ~MCA.fraction_variance_Y_explained_by_Y ~MCA.get_params ~MCA.get_serialization_attrs ~MCA.heterogeneous_patterns ~MCA.homogeneous_patterns ~MCA.inverse_transform ~MCA.load + ~MCA.predict ~MCA.save ~MCA.scores ~MCA.serialize - ~MCA.singular_values - ~MCA.squared_covariance ~MCA.squared_covariance_fraction - ~MCA.total_covariance ~MCA.transform diff --git a/docs/_autosummary/xeofs.models.MCARotator.rst b/docs/api_reference/_autosummary/xeofs.cross.MCARotator.rst similarity index 61% rename from docs/_autosummary/xeofs.models.MCARotator.rst rename to docs/api_reference/_autosummary/xeofs.cross.MCARotator.rst index 785836da..d8cbbe56 100644 --- a/docs/_autosummary/xeofs.models.MCARotator.rst +++ b/docs/api_reference/_autosummary/xeofs.cross.MCARotator.rst @@ -1,11 +1,10 @@ -xeofs.models.MCARotator -======================= +MCARotator +========== -.. currentmodule:: xeofs.models +.. currentmodule:: xeofs.cross .. autoclass:: MCARotator :members: - :show-inheritance: :inherited-members: @@ -19,22 +18,26 @@ ~MCARotator.__init__ ~MCARotator.components ~MCARotator.compute - ~MCARotator.covariance_fraction + ~MCARotator.correlation_coefficients_X + ~MCARotator.correlation_coefficients_Y + ~MCARotator.covariance_fraction_CD95 + ~MCARotator.cross_correlation_coefficients ~MCARotator.deserialize ~MCARotator.fit + ~MCARotator.fraction_variance_X_explained_by_X + ~MCARotator.fraction_variance_Y_explained_by_X + ~MCARotator.fraction_variance_Y_explained_by_Y ~MCARotator.get_params ~MCARotator.get_serialization_attrs ~MCARotator.heterogeneous_patterns ~MCARotator.homogeneous_patterns ~MCARotator.inverse_transform ~MCARotator.load + ~MCARotator.predict ~MCARotator.save ~MCARotator.scores ~MCARotator.serialize - ~MCARotator.singular_values - ~MCARotator.squared_covariance ~MCARotator.squared_covariance_fraction - ~MCARotator.total_covariance ~MCARotator.transform diff --git a/docs/api_reference/_autosummary/xeofs.cross.RDA.rst b/docs/api_reference/_autosummary/xeofs.cross.RDA.rst new file mode 100644 index 00000000..bab7b535 --- /dev/null +++ b/docs/api_reference/_autosummary/xeofs.cross.RDA.rst @@ -0,0 +1,46 @@ +RDA +=== + +.. currentmodule:: xeofs.cross + +.. autoclass:: RDA + :members: + :inherited-members: + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~RDA.__init__ + ~RDA.components + ~RDA.compute + ~RDA.correlation_coefficients_X + ~RDA.correlation_coefficients_Y + ~RDA.cross_correlation_coefficients + ~RDA.deserialize + ~RDA.fit + ~RDA.fraction_variance_X_explained_by_X + ~RDA.fraction_variance_Y_explained_by_X + ~RDA.fraction_variance_Y_explained_by_Y + ~RDA.get_params + ~RDA.get_serialization_attrs + ~RDA.heterogeneous_patterns + ~RDA.homogeneous_patterns + ~RDA.inverse_transform + ~RDA.load + ~RDA.predict + ~RDA.save + ~RDA.scores + ~RDA.serialize + ~RDA.squared_covariance_fraction + ~RDA.transform + + + + + + \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.models.CCA.rst b/docs/api_reference/_autosummary/xeofs.models.CCA.rst deleted file mode 100644 index 3238c999..00000000 --- a/docs/api_reference/_autosummary/xeofs.models.CCA.rst +++ /dev/null @@ -1,39 +0,0 @@ -xeofs.models.CCA -================ - -.. currentmodule:: xeofs.models - -.. autoclass:: CCA - :members: - :show-inheritance: - :inherited-members: - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~CCA.__init__ - ~CCA.components - ~CCA.explained_covariance - ~CCA.explained_covariance_ratio - ~CCA.explained_variance - ~CCA.explained_variance_ratio - ~CCA.fit - ~CCA.get_metadata_routing - ~CCA.get_params - ~CCA.scores - ~CCA.set_fit_request - ~CCA.set_params - ~CCA.set_transform_request - ~CCA.transform - ~CCA.weights - - - - - - \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.models.ComplexMCARotator.rst b/docs/api_reference/_autosummary/xeofs.models.ComplexMCARotator.rst deleted file mode 100644 index 4c647104..00000000 --- a/docs/api_reference/_autosummary/xeofs.models.ComplexMCARotator.rst +++ /dev/null @@ -1,48 +0,0 @@ -xeofs.models.ComplexMCARotator -============================== - -.. currentmodule:: xeofs.models - -.. autoclass:: ComplexMCARotator - :members: - :show-inheritance: - :inherited-members: - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~ComplexMCARotator.__init__ - ~ComplexMCARotator.components - ~ComplexMCARotator.components_amplitude - ~ComplexMCARotator.components_phase - ~ComplexMCARotator.compute - ~ComplexMCARotator.covariance_fraction - ~ComplexMCARotator.deserialize - ~ComplexMCARotator.fit - ~ComplexMCARotator.get_params - ~ComplexMCARotator.get_serialization_attrs - ~ComplexMCARotator.heterogeneous_patterns - ~ComplexMCARotator.homogeneous_patterns - ~ComplexMCARotator.inverse_transform - ~ComplexMCARotator.load - ~ComplexMCARotator.save - ~ComplexMCARotator.scores - ~ComplexMCARotator.scores_amplitude - ~ComplexMCARotator.scores_phase - ~ComplexMCARotator.serialize - ~ComplexMCARotator.singular_values - ~ComplexMCARotator.squared_covariance - ~ComplexMCARotator.squared_covariance_fraction - ~ComplexMCARotator.total_covariance - ~ComplexMCARotator.transform - - - - - - \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.models.MCA.rst b/docs/api_reference/_autosummary/xeofs.models.MCA.rst deleted file mode 100644 index 16b58d96..00000000 --- a/docs/api_reference/_autosummary/xeofs.models.MCA.rst +++ /dev/null @@ -1,44 +0,0 @@ -xeofs.models.MCA -================ - -.. currentmodule:: xeofs.models - -.. autoclass:: MCA - :members: - :show-inheritance: - :inherited-members: - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~MCA.__init__ - ~MCA.components - ~MCA.compute - ~MCA.covariance_fraction - ~MCA.deserialize - ~MCA.fit - ~MCA.get_params - ~MCA.get_serialization_attrs - ~MCA.heterogeneous_patterns - ~MCA.homogeneous_patterns - ~MCA.inverse_transform - ~MCA.load - ~MCA.save - ~MCA.scores - ~MCA.serialize - ~MCA.singular_values - ~MCA.squared_covariance - ~MCA.squared_covariance_fraction - ~MCA.total_covariance - ~MCA.transform - - - - - - \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.models.MCARotator.rst b/docs/api_reference/_autosummary/xeofs.models.MCARotator.rst deleted file mode 100644 index 785836da..00000000 --- a/docs/api_reference/_autosummary/xeofs.models.MCARotator.rst +++ /dev/null @@ -1,44 +0,0 @@ -xeofs.models.MCARotator -======================= - -.. currentmodule:: xeofs.models - -.. autoclass:: MCARotator - :members: - :show-inheritance: - :inherited-members: - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~MCARotator.__init__ - ~MCARotator.components - ~MCARotator.compute - ~MCARotator.covariance_fraction - ~MCARotator.deserialize - ~MCARotator.fit - ~MCARotator.get_params - ~MCARotator.get_serialization_attrs - ~MCARotator.heterogeneous_patterns - ~MCARotator.homogeneous_patterns - ~MCARotator.inverse_transform - ~MCARotator.load - ~MCARotator.save - ~MCARotator.scores - ~MCARotator.serialize - ~MCARotator.singular_values - ~MCARotator.squared_covariance - ~MCARotator.squared_covariance_fraction - ~MCARotator.total_covariance - ~MCARotator.transform - - - - - - \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.models.RotatorFactory.rst b/docs/api_reference/_autosummary/xeofs.models.RotatorFactory.rst deleted file mode 100644 index b23f04da..00000000 --- a/docs/api_reference/_autosummary/xeofs.models.RotatorFactory.rst +++ /dev/null @@ -1,26 +0,0 @@ -xeofs.models.RotatorFactory -=========================== - -.. currentmodule:: xeofs.models - -.. autoclass:: RotatorFactory - :members: - :show-inheritance: - :inherited-members: - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~RotatorFactory.__init__ - ~RotatorFactory.create_rotator - - - - - - \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.models.SparsePCA.rst b/docs/api_reference/_autosummary/xeofs.models.SparsePCA.rst deleted file mode 100644 index 068ccd89..00000000 --- a/docs/api_reference/_autosummary/xeofs.models.SparsePCA.rst +++ /dev/null @@ -1,40 +0,0 @@ -xeofs.models.SparsePCA -====================== - -.. currentmodule:: xeofs.models - -.. autoclass:: SparsePCA - :members: - :show-inheritance: - :inherited-members: - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~SparsePCA.__init__ - ~SparsePCA.components - ~SparsePCA.compute - ~SparsePCA.deserialize - ~SparsePCA.explained_variance - ~SparsePCA.explained_variance_ratio - ~SparsePCA.fit - ~SparsePCA.fit_transform - ~SparsePCA.get_params - ~SparsePCA.get_serialization_attrs - ~SparsePCA.inverse_transform - ~SparsePCA.load - ~SparsePCA.save - ~SparsePCA.scores - ~SparsePCA.serialize - ~SparsePCA.transform - - - - - - \ No newline at end of file diff --git a/docs/_autosummary/xeofs.models.CCA.rst b/docs/api_reference/_autosummary/xeofs.multi.CCA.rst similarity index 85% rename from docs/_autosummary/xeofs.models.CCA.rst rename to docs/api_reference/_autosummary/xeofs.multi.CCA.rst index 3238c999..93abea08 100644 --- a/docs/_autosummary/xeofs.models.CCA.rst +++ b/docs/api_reference/_autosummary/xeofs.multi.CCA.rst @@ -1,11 +1,10 @@ -xeofs.models.CCA -================ +CCA +=== -.. currentmodule:: xeofs.models +.. currentmodule:: xeofs.multi .. autoclass:: CCA :members: - :show-inheritance: :inherited-members: diff --git a/docs/api_reference/_autosummary/xeofs.models.ComplexEOF.rst b/docs/api_reference/_autosummary/xeofs.single.ComplexEOF.rst similarity index 88% rename from docs/api_reference/_autosummary/xeofs.models.ComplexEOF.rst rename to docs/api_reference/_autosummary/xeofs.single.ComplexEOF.rst index 7eb1f432..5bac3289 100644 --- a/docs/api_reference/_autosummary/xeofs.models.ComplexEOF.rst +++ b/docs/api_reference/_autosummary/xeofs.single.ComplexEOF.rst @@ -1,11 +1,10 @@ -xeofs.models.ComplexEOF -======================= +ComplexEOF +========== -.. currentmodule:: xeofs.models +.. currentmodule:: xeofs.single .. autoclass:: ComplexEOF :members: - :show-inheritance: :inherited-members: diff --git a/docs/api_reference/_autosummary/xeofs.models.ComplexEOFRotator.rst b/docs/api_reference/_autosummary/xeofs.single.ComplexEOFRotator.rst similarity index 89% rename from docs/api_reference/_autosummary/xeofs.models.ComplexEOFRotator.rst rename to docs/api_reference/_autosummary/xeofs.single.ComplexEOFRotator.rst index 41edbcde..ebedc5cd 100644 --- a/docs/api_reference/_autosummary/xeofs.models.ComplexEOFRotator.rst +++ b/docs/api_reference/_autosummary/xeofs.single.ComplexEOFRotator.rst @@ -1,11 +1,10 @@ -xeofs.models.ComplexEOFRotator -============================== +ComplexEOFRotator +================= -.. currentmodule:: xeofs.models +.. currentmodule:: xeofs.single .. autoclass:: ComplexEOFRotator :members: - :show-inheritance: :inherited-members: diff --git a/docs/api_reference/_autosummary/xeofs.models.EOF.rst b/docs/api_reference/_autosummary/xeofs.single.EOF.rst similarity index 85% rename from docs/api_reference/_autosummary/xeofs.models.EOF.rst rename to docs/api_reference/_autosummary/xeofs.single.EOF.rst index 90da1cde..38f3c7dd 100644 --- a/docs/api_reference/_autosummary/xeofs.models.EOF.rst +++ b/docs/api_reference/_autosummary/xeofs.single.EOF.rst @@ -1,11 +1,10 @@ -xeofs.models.EOF -================ +EOF +=== -.. currentmodule:: xeofs.models +.. currentmodule:: xeofs.single .. autoclass:: EOF :members: - :show-inheritance: :inherited-members: diff --git a/docs/api_reference/_autosummary/xeofs.models.EOFRotator.rst b/docs/api_reference/_autosummary/xeofs.single.EOFRotator.rst similarity index 86% rename from docs/api_reference/_autosummary/xeofs.models.EOFRotator.rst rename to docs/api_reference/_autosummary/xeofs.single.EOFRotator.rst index c1f06783..0c54557a 100644 --- a/docs/api_reference/_autosummary/xeofs.models.EOFRotator.rst +++ b/docs/api_reference/_autosummary/xeofs.single.EOFRotator.rst @@ -1,11 +1,10 @@ -xeofs.models.EOFRotator -======================= +EOFRotator +========== -.. currentmodule:: xeofs.models +.. currentmodule:: xeofs.single .. autoclass:: EOFRotator :members: - :show-inheritance: :inherited-members: diff --git a/docs/api_reference/_autosummary/xeofs.models.ExtendedEOF.rst b/docs/api_reference/_autosummary/xeofs.single.ExtendedEOF.rst similarity index 86% rename from docs/api_reference/_autosummary/xeofs.models.ExtendedEOF.rst rename to docs/api_reference/_autosummary/xeofs.single.ExtendedEOF.rst index b1740349..c9d06932 100644 --- a/docs/api_reference/_autosummary/xeofs.models.ExtendedEOF.rst +++ b/docs/api_reference/_autosummary/xeofs.single.ExtendedEOF.rst @@ -1,11 +1,10 @@ -xeofs.models.ExtendedEOF -======================== +ExtendedEOF +=========== -.. currentmodule:: xeofs.models +.. currentmodule:: xeofs.single .. autoclass:: ExtendedEOF :members: - :show-inheritance: :inherited-members: diff --git a/docs/api_reference/_autosummary/xeofs.models.GWPCA.rst b/docs/api_reference/_autosummary/xeofs.single.GWPCA.rst similarity index 86% rename from docs/api_reference/_autosummary/xeofs.models.GWPCA.rst rename to docs/api_reference/_autosummary/xeofs.single.GWPCA.rst index 2bc03eb9..486bc2e6 100644 --- a/docs/api_reference/_autosummary/xeofs.models.GWPCA.rst +++ b/docs/api_reference/_autosummary/xeofs.single.GWPCA.rst @@ -1,11 +1,10 @@ -xeofs.models.GWPCA -================== +GWPCA +===== -.. currentmodule:: xeofs.models +.. currentmodule:: xeofs.single .. autoclass:: GWPCA :members: - :show-inheritance: :inherited-members: diff --git a/docs/api_reference/_autosummary/xeofs.single.HilbertEOF.rst b/docs/api_reference/_autosummary/xeofs.single.HilbertEOF.rst new file mode 100644 index 00000000..286f6500 --- /dev/null +++ b/docs/api_reference/_autosummary/xeofs.single.HilbertEOF.rst @@ -0,0 +1,44 @@ +HilbertEOF +========== + +.. currentmodule:: xeofs.single + +.. autoclass:: HilbertEOF + :members: + :inherited-members: + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~HilbertEOF.__init__ + ~HilbertEOF.components + ~HilbertEOF.components_amplitude + ~HilbertEOF.components_phase + ~HilbertEOF.compute + ~HilbertEOF.deserialize + ~HilbertEOF.explained_variance + ~HilbertEOF.explained_variance_ratio + ~HilbertEOF.fit + ~HilbertEOF.fit_transform + ~HilbertEOF.get_params + ~HilbertEOF.get_serialization_attrs + ~HilbertEOF.inverse_transform + ~HilbertEOF.load + ~HilbertEOF.save + ~HilbertEOF.scores + ~HilbertEOF.scores_amplitude + ~HilbertEOF.scores_phase + ~HilbertEOF.serialize + ~HilbertEOF.singular_values + ~HilbertEOF.transform + + + + + + \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.single.HilbertEOFRotator.rst b/docs/api_reference/_autosummary/xeofs.single.HilbertEOFRotator.rst new file mode 100644 index 00000000..fe1e4f67 --- /dev/null +++ b/docs/api_reference/_autosummary/xeofs.single.HilbertEOFRotator.rst @@ -0,0 +1,44 @@ +HilbertEOFRotator +================= + +.. currentmodule:: xeofs.single + +.. autoclass:: HilbertEOFRotator + :members: + :inherited-members: + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~HilbertEOFRotator.__init__ + ~HilbertEOFRotator.components + ~HilbertEOFRotator.components_amplitude + ~HilbertEOFRotator.components_phase + ~HilbertEOFRotator.compute + ~HilbertEOFRotator.deserialize + ~HilbertEOFRotator.explained_variance + ~HilbertEOFRotator.explained_variance_ratio + ~HilbertEOFRotator.fit + ~HilbertEOFRotator.fit_transform + ~HilbertEOFRotator.get_params + ~HilbertEOFRotator.get_serialization_attrs + ~HilbertEOFRotator.inverse_transform + ~HilbertEOFRotator.load + ~HilbertEOFRotator.save + ~HilbertEOFRotator.scores + ~HilbertEOFRotator.scores_amplitude + ~HilbertEOFRotator.scores_phase + ~HilbertEOFRotator.serialize + ~HilbertEOFRotator.singular_values + ~HilbertEOFRotator.transform + + + + + + \ No newline at end of file diff --git a/docs/api_reference/_autosummary/xeofs.models.OPA.rst b/docs/api_reference/_autosummary/xeofs.single.OPA.rst similarity index 85% rename from docs/api_reference/_autosummary/xeofs.models.OPA.rst rename to docs/api_reference/_autosummary/xeofs.single.OPA.rst index 53000a77..dc2094f4 100644 --- a/docs/api_reference/_autosummary/xeofs.models.OPA.rst +++ b/docs/api_reference/_autosummary/xeofs.single.OPA.rst @@ -1,11 +1,10 @@ -xeofs.models.OPA -================ +OPA +=== -.. currentmodule:: xeofs.models +.. currentmodule:: xeofs.single .. autoclass:: OPA :members: - :show-inheritance: :inherited-members: diff --git a/docs/api_reference/_autosummary/xeofs.single.POP.rst b/docs/api_reference/_autosummary/xeofs.single.POP.rst new file mode 100644 index 00000000..8d1e6b8b --- /dev/null +++ b/docs/api_reference/_autosummary/xeofs.single.POP.rst @@ -0,0 +1,44 @@ +POP +=== + +.. currentmodule:: xeofs.single + +.. autoclass:: POP + :members: + :inherited-members: + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~POP.__init__ + ~POP.components + ~POP.components_amplitude + ~POP.components_phase + ~POP.compute + ~POP.damping_times + ~POP.deserialize + ~POP.eigenvalues + ~POP.fit + ~POP.fit_transform + ~POP.get_params + ~POP.get_serialization_attrs + ~POP.inverse_transform + ~POP.load + ~POP.periods + ~POP.save + ~POP.scores + ~POP.scores_amplitude + ~POP.scores_phase + ~POP.serialize + ~POP.transform + + + + + + \ No newline at end of file diff --git a/docs/_autosummary/xeofs.models.SparsePCA.rst b/docs/api_reference/_autosummary/xeofs.single.SparsePCA.rst similarity index 85% rename from docs/_autosummary/xeofs.models.SparsePCA.rst rename to docs/api_reference/_autosummary/xeofs.single.SparsePCA.rst index 068ccd89..11754996 100644 --- a/docs/_autosummary/xeofs.models.SparsePCA.rst +++ b/docs/api_reference/_autosummary/xeofs.single.SparsePCA.rst @@ -1,11 +1,10 @@ -xeofs.models.SparsePCA -====================== +SparsePCA +========= -.. currentmodule:: xeofs.models +.. currentmodule:: xeofs.single .. autoclass:: SparsePCA :members: - :show-inheritance: :inherited-members: diff --git a/docs/api_reference/_autosummary/xeofs.validation.EOFBootstrapper.rst b/docs/api_reference/_autosummary/xeofs.validation.EOFBootstrapper.rst index 748247b8..63bba4be 100644 --- a/docs/api_reference/_autosummary/xeofs.validation.EOFBootstrapper.rst +++ b/docs/api_reference/_autosummary/xeofs.validation.EOFBootstrapper.rst @@ -1,11 +1,10 @@ -xeofs.validation.EOFBootstrapper -================================ +EOFBootstrapper +=============== .. currentmodule:: xeofs.validation .. autoclass:: EOFBootstrapper :members: - :show-inheritance: :inherited-members: diff --git a/docs/api_reference/cross_set_analysis.rst b/docs/api_reference/cross_set_analysis.rst new file mode 100644 index 00000000..d454a4c9 --- /dev/null +++ b/docs/api_reference/cross_set_analysis.rst @@ -0,0 +1,38 @@ +=============== +``xeofs.cross`` +=============== +Methods that investigate relationships or patterns between variables **across two** distinct datasets. + +.. autosummary:: + :toctree: _autosummary + :template: custom-class-template.rst + :recursive: + + ~xeofs.cross.CCA + ~xeofs.cross.MCA + ~xeofs.cross.RDA + ~xeofs.cross.CPCCA + ~xeofs.cross.ComplexCCA + ~xeofs.cross.ComplexMCA + ~xeofs.cross.ComplexRDA + ~xeofs.cross.ComplexCPCCA + ~xeofs.cross.HilbertCCA + ~xeofs.cross.HilbertMCA + ~xeofs.cross.HilbertRDA + ~xeofs.cross.HilbertCPCCA + +------------------------------ +Sparse Solutions via Rotation +------------------------------ + +.. autosummary:: + :toctree: _autosummary + :template: custom-class-template.rst + :recursive: + + ~xeofs.cross.MCARotator + ~xeofs.cross.CPCCARotator + ~xeofs.cross.ComplexMCARotator + ~xeofs.cross.ComplexCPCCARotator + ~xeofs.cross.HilbertMCARotator + ~xeofs.cross.HilbertCPCCARotator diff --git a/docs/api_reference/index.rst b/docs/api_reference/index.rst index 011fff02..c74ab81d 100644 --- a/docs/api_reference/index.rst +++ b/docs/api_reference/index.rst @@ -6,22 +6,24 @@ API Reference The package is under development, and its API may change. -The xeofs package focuses on eigenmethods for dimensionality reduction in climate science. These methods are categorized into two groups: +The xeofs package focuses on eigenmethods for dimensionality reduction in climate science. It is organized into methods that examine relationships between variables -1. :doc:`Single-Set Analysis `: Methods that examine relationships or patterns within a single dataset. -2. :doc:`Multi-Set Analysis `: Methods that investigate relationships or patterns between variables across two or more distinct datasets. +1. within a **single dataset** (``xeofs.single``), +2. across **two datasets** (``xeofs.cross``) and +3. across **more than two datasets** (``xeofs.multi``). -------------------- Single-Set Analysis -------------------- -A classic example of single-set analysis is Principal Component Analysis (PCA/EOF analysis), used to extract the dominant patterns of variability within a single dataset. While PCA can be applied to multiple (standardized) datasets simultaneously, it treats all datasets as one large dataset, maximizing overall variability without considering inter-dataset relationships. Consequently, the most important variables may come from only one dataset, ignoring others. +A classic example of :doc:`single-set analysis ` is Principal Component Analysis (PCA/EOF analysis), used to extract the dominant patterns of variability within a single dataset. While PCA can be applied to multiple (standardized) datasets simultaneously, it treats all datasets as one large dataset, maximizing overall variability without considering inter-dataset relationships. Consequently, the most important variables may come from only one dataset, ignoring others. --------------------- -Multi-Set Analysis --------------------- +---------------------------- +Cross and Multi-Set Analysis +---------------------------- + +Classic examples of :doc:`cross ` or :doc:`multi`-set analysis methods include Canonical Correlation Analysis (CCA), Maximum Covariance Analysis (MCA) and Redundancy Analyis (RDA). These techniques identify shared patterns of variability between two distinct datasets, focusing on common patterns rather than those unique to each dataset. -Examples of multi-set analysis methods include Canonical Correlation Analysis (CCA) and Maximum Covariance Analysis (MCA). These techniques identify shared patterns of variability between two or more datasets, focusing on common patterns rather than those unique to each dataset. For instance, if you have two datasets (e.g., monthly temperatures from tropical and polar regions over 70 years), CCA or MCA would likely highlight the global warming signal as the dominant pattern common to both datasets, while the seasonal cycle would not be dominant as it is only prominent in the polar region. Additionally, xeofs offers tools for :doc:`model evaluation `, though these are still in early development stages. @@ -30,9 +32,17 @@ Additionally, xeofs offers tools for :doc:`model evaluation `, .. toctree:: :maxdepth: 3 :hidden: + :caption: Methods single_set_analysis + cross_set_analysis multi_set_analysis - model_evaluation utilities +.. toctree:: + :maxdepth: 3 + :hidden: + :caption: Significance Testing + + model_evaluation + diff --git a/docs/api_reference/model_evaluation.rst b/docs/api_reference/model_evaluation.rst index d6d07718..7dc7dfd8 100644 --- a/docs/api_reference/model_evaluation.rst +++ b/docs/api_reference/model_evaluation.rst @@ -1,11 +1,11 @@ -================ -Model Evaluation -================ -Tools to assess the quality of your model. +==================== +``xeofs.validation`` +==================== +Tools to assess the significane of your model. .. autosummary:: :toctree: _autosummary :template: custom-class-template.rst :recursive: - xeofs.validation.EOFBootstrapper + ~xeofs.validation.EOFBootstrapper diff --git a/docs/api_reference/multi_set_analysis.rst b/docs/api_reference/multi_set_analysis.rst index 15626a31..ce4b1ea4 100644 --- a/docs/api_reference/multi_set_analysis.rst +++ b/docs/api_reference/multi_set_analysis.rst @@ -1,16 +1,14 @@ -================== -Multi-Set Analysis -================== -Methods that investigate relationships or patterns between variables across two or more distinct datasets. +================ +``xeofs.multi`` +================ +Methods that investigate relationships or patterns between variables across **more than two** distinct datasets. .. autosummary:: :toctree: _autosummary :template: custom-class-template.rst :recursive: - xeofs.models.MCA - xeofs.models.HilbertMCA - xeofs.models.CCA + ~xeofs.multi.CCA ------------------------------ Sparse Solutions via Rotation @@ -21,5 +19,3 @@ Sparse Solutions via Rotation :template: custom-class-template.rst :recursive: - xeofs.models.MCARotator - xeofs.models.HilbertMCARotator diff --git a/docs/api_reference/single_set_analysis.rst b/docs/api_reference/single_set_analysis.rst index 256b2ac6..744f0f20 100644 --- a/docs/api_reference/single_set_analysis.rst +++ b/docs/api_reference/single_set_analysis.rst @@ -1,20 +1,22 @@ -==================== -Single-Set Analysis -==================== +================ +``xeofs.single`` +================ -Methods that examine relationships among variables within a single dataset, or when multiple datasets are combined and analyzed as one. +Methods that investigate relationships or patterns between variables within a single dataset. .. autosummary:: :toctree: _autosummary :template: custom-class-template.rst :recursive: - xeofs.models.EOF - xeofs.models.HilbertEOF - xeofs.models.ExtendedEOF - xeofs.models.OPA - xeofs.models.GWPCA - xeofs.models.SparsePCA + ~xeofs.single.EOF + ~xeofs.single.ComplexEOF + ~xeofs.single.HilbertEOF + ~xeofs.single.ExtendedEOF + ~xeofs.single.POP + ~xeofs.single.OPA + ~xeofs.single.GWPCA + ~xeofs.single.SparsePCA ------------------------------ @@ -26,5 +28,6 @@ Sparse Solutions via Rotation :template: custom-class-template.rst :recursive: - xeofs.models.EOFRotator - xeofs.models.HilbertEOFRotator \ No newline at end of file + ~xeofs.single.EOFRotator + ~xeofs.single.ComplexEOFRotator + ~xeofs.single.HilbertEOFRotator \ No newline at end of file diff --git a/docs/api_reference/utilities.rst b/docs/api_reference/utilities.rst index cf1e6e0b..9f4b5682 100644 --- a/docs/api_reference/utilities.rst +++ b/docs/api_reference/utilities.rst @@ -1,14 +1,14 @@ ========= Utilities ========= -Support functions. +Tools that may be helpful .. autosummary:: :toctree: _autosummary :template: custom-class-template.rst :recursive: - xeofs.models.RotatorFactory + ~xeofs.RotatorFactory diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_complex_eof_001.png b/docs/auto_examples/1single/images/sphx_glr_plot_complex_eof_001.png index dd107e59..459361c1 100644 Binary files a/docs/auto_examples/1single/images/sphx_glr_plot_complex_eof_001.png and b/docs/auto_examples/1single/images/sphx_glr_plot_complex_eof_001.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_complex_eof_002.png b/docs/auto_examples/1single/images/sphx_glr_plot_complex_eof_002.png index 987da8bd..57c1da8f 100644 Binary files a/docs/auto_examples/1single/images/sphx_glr_plot_complex_eof_002.png and b/docs/auto_examples/1single/images/sphx_glr_plot_complex_eof_002.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_complex_eof_003.png b/docs/auto_examples/1single/images/sphx_glr_plot_complex_eof_003.png index e8c8f2ab..dd415ed2 100644 Binary files a/docs/auto_examples/1single/images/sphx_glr_plot_complex_eof_003.png and b/docs/auto_examples/1single/images/sphx_glr_plot_complex_eof_003.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_complex_eof_004.png b/docs/auto_examples/1single/images/sphx_glr_plot_complex_eof_004.png index b225380b..26e1d783 100644 Binary files a/docs/auto_examples/1single/images/sphx_glr_plot_complex_eof_004.png and b/docs/auto_examples/1single/images/sphx_glr_plot_complex_eof_004.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_eeof_001.png b/docs/auto_examples/1single/images/sphx_glr_plot_eeof_001.png index 5535ac43..5738db32 100644 Binary files a/docs/auto_examples/1single/images/sphx_glr_plot_eeof_001.png and b/docs/auto_examples/1single/images/sphx_glr_plot_eeof_001.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_eeof_002.png b/docs/auto_examples/1single/images/sphx_glr_plot_eeof_002.png index 3a57887f..8080e895 100644 Binary files a/docs/auto_examples/1single/images/sphx_glr_plot_eeof_002.png and b/docs/auto_examples/1single/images/sphx_glr_plot_eeof_002.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_eeof_003.png b/docs/auto_examples/1single/images/sphx_glr_plot_eeof_003.png index fa6e1144..a43b108f 100644 Binary files a/docs/auto_examples/1single/images/sphx_glr_plot_eeof_003.png and b/docs/auto_examples/1single/images/sphx_glr_plot_eeof_003.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_eeof_trend_001.png b/docs/auto_examples/1single/images/sphx_glr_plot_eeof_trend_001.png index 9e35afcb..50e9f8d8 100644 Binary files a/docs/auto_examples/1single/images/sphx_glr_plot_eeof_trend_001.png and b/docs/auto_examples/1single/images/sphx_glr_plot_eeof_trend_001.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_eeof_trend_002.png b/docs/auto_examples/1single/images/sphx_glr_plot_eeof_trend_002.png index 18441375..7fb77f55 100644 Binary files a/docs/auto_examples/1single/images/sphx_glr_plot_eeof_trend_002.png and b/docs/auto_examples/1single/images/sphx_glr_plot_eeof_trend_002.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_eeof_trend_003.png b/docs/auto_examples/1single/images/sphx_glr_plot_eeof_trend_003.png index 165aa6df..3ca36fdd 100644 Binary files a/docs/auto_examples/1single/images/sphx_glr_plot_eeof_trend_003.png and b/docs/auto_examples/1single/images/sphx_glr_plot_eeof_trend_003.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_eof-smode_001.png b/docs/auto_examples/1single/images/sphx_glr_plot_eof-smode_001.png index 49615fde..fe0814bf 100644 Binary files a/docs/auto_examples/1single/images/sphx_glr_plot_eof-smode_001.png and b/docs/auto_examples/1single/images/sphx_glr_plot_eof-smode_001.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_eof-tmode_001.png b/docs/auto_examples/1single/images/sphx_glr_plot_eof-tmode_001.png index 48a24ea6..d7dfb2c1 100644 Binary files a/docs/auto_examples/1single/images/sphx_glr_plot_eof-tmode_001.png and b/docs/auto_examples/1single/images/sphx_glr_plot_eof-tmode_001.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_gwpca_001.png b/docs/auto_examples/1single/images/sphx_glr_plot_gwpca_001.png index 080b2db8..1512e549 100644 Binary files a/docs/auto_examples/1single/images/sphx_glr_plot_gwpca_001.png and b/docs/auto_examples/1single/images/sphx_glr_plot_gwpca_001.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_gwpca_002.png b/docs/auto_examples/1single/images/sphx_glr_plot_gwpca_002.png index a78ea394..101ecca7 100644 Binary files a/docs/auto_examples/1single/images/sphx_glr_plot_gwpca_002.png and b/docs/auto_examples/1single/images/sphx_glr_plot_gwpca_002.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_hilbert_eof_001.png b/docs/auto_examples/1single/images/sphx_glr_plot_hilbert_eof_001.png new file mode 100644 index 00000000..c3dc6518 Binary files /dev/null and b/docs/auto_examples/1single/images/sphx_glr_plot_hilbert_eof_001.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_hilbert_eof_002.png b/docs/auto_examples/1single/images/sphx_glr_plot_hilbert_eof_002.png new file mode 100644 index 00000000..b3fece2e Binary files /dev/null and b/docs/auto_examples/1single/images/sphx_glr_plot_hilbert_eof_002.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_hilbert_eof_003.png b/docs/auto_examples/1single/images/sphx_glr_plot_hilbert_eof_003.png new file mode 100644 index 00000000..dd415ed2 Binary files /dev/null and b/docs/auto_examples/1single/images/sphx_glr_plot_hilbert_eof_003.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_hilbert_eof_004.png b/docs/auto_examples/1single/images/sphx_glr_plot_hilbert_eof_004.png new file mode 100644 index 00000000..49f8a91a Binary files /dev/null and b/docs/auto_examples/1single/images/sphx_glr_plot_hilbert_eof_004.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_mreof_001.png b/docs/auto_examples/1single/images/sphx_glr_plot_mreof_001.png index 445d06f5..96c346c3 100644 Binary files a/docs/auto_examples/1single/images/sphx_glr_plot_mreof_001.png and b/docs/auto_examples/1single/images/sphx_glr_plot_mreof_001.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_multivariate-eof_001.png b/docs/auto_examples/1single/images/sphx_glr_plot_multivariate-eof_001.png index 3ed35002..7d75a14f 100644 Binary files a/docs/auto_examples/1single/images/sphx_glr_plot_multivariate-eof_001.png and b/docs/auto_examples/1single/images/sphx_glr_plot_multivariate-eof_001.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_rotated_eof_001.png b/docs/auto_examples/1single/images/sphx_glr_plot_rotated_eof_001.png index ce7389fd..8489241a 100644 Binary files a/docs/auto_examples/1single/images/sphx_glr_plot_rotated_eof_001.png and b/docs/auto_examples/1single/images/sphx_glr_plot_rotated_eof_001.png differ diff --git a/docs/auto_examples/1single/images/sphx_glr_plot_weighted-eof_001.png b/docs/auto_examples/1single/images/sphx_glr_plot_weighted-eof_001.png index e99e6a9e..c8d0078e 100644 Binary files a/docs/auto_examples/1single/images/sphx_glr_plot_weighted-eof_001.png and b/docs/auto_examples/1single/images/sphx_glr_plot_weighted-eof_001.png differ diff --git a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_complex_eof_thumb.png b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_complex_eof_thumb.png index f817916d..1cf18dc0 100644 Binary files a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_complex_eof_thumb.png and b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_complex_eof_thumb.png differ diff --git a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_eeof_thumb.png b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_eeof_thumb.png index db423829..fedce0de 100644 Binary files a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_eeof_thumb.png and b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_eeof_thumb.png differ diff --git a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_eeof_trend_thumb.png b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_eeof_trend_thumb.png index c7af6964..4552a60f 100644 Binary files a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_eeof_trend_thumb.png and b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_eeof_trend_thumb.png differ diff --git a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_eof-smode_thumb.png b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_eof-smode_thumb.png index 081c54cf..0677f04a 100644 Binary files a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_eof-smode_thumb.png and b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_eof-smode_thumb.png differ diff --git a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_eof-tmode_thumb.png b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_eof-tmode_thumb.png index e1a73f04..4557bf9b 100644 Binary files a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_eof-tmode_thumb.png and b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_eof-tmode_thumb.png differ diff --git a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_gwpca_thumb.png b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_gwpca_thumb.png index 32f370d6..b7a9a0fd 100644 Binary files a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_gwpca_thumb.png and b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_gwpca_thumb.png differ diff --git a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_hilbert_eof_thumb.png b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_hilbert_eof_thumb.png new file mode 100644 index 00000000..582b7c73 Binary files /dev/null and b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_hilbert_eof_thumb.png differ diff --git a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_mreof_thumb.png b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_mreof_thumb.png index b1a2f9ca..8a1c9666 100644 Binary files a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_mreof_thumb.png and b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_mreof_thumb.png differ diff --git a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_multivariate-eof_thumb.png b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_multivariate-eof_thumb.png index 012f0e98..8b65e4ad 100644 Binary files a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_multivariate-eof_thumb.png and b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_multivariate-eof_thumb.png differ diff --git a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_rotated_eof_thumb.png b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_rotated_eof_thumb.png index a75cd05d..0416fc09 100644 Binary files a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_rotated_eof_thumb.png and b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_rotated_eof_thumb.png differ diff --git a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_weighted-eof_thumb.png b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_weighted-eof_thumb.png index 0d76d4f7..14a3e92f 100644 Binary files a/docs/auto_examples/1single/images/thumb/sphx_glr_plot_weighted-eof_thumb.png and b/docs/auto_examples/1single/images/thumb/sphx_glr_plot_weighted-eof_thumb.png differ diff --git a/docs/auto_examples/1single/index.rst b/docs/auto_examples/1single/index.rst index 246366b4..536b6aa5 100644 --- a/docs/auto_examples/1single/index.rst +++ b/docs/auto_examples/1single/index.rst @@ -29,6 +29,23 @@ +.. raw:: html + +
+ +.. only:: html + + .. image:: /auto_examples/1single/images/thumb/sphx_glr_plot_complex_eof_thumb.png + :alt: + + :ref:`sphx_glr_auto_examples_1single_plot_complex_eof.py` + +.. raw:: html + +
Complex EOF analysis
+
+ + .. raw:: html
@@ -48,18 +65,18 @@ .. raw:: html -
+
.. only:: html - .. image:: /auto_examples/1single/images/thumb/sphx_glr_plot_complex_eof_thumb.png + .. image:: /auto_examples/1single/images/thumb/sphx_glr_plot_hilbert_eof_thumb.png :alt: - :ref:`sphx_glr_auto_examples_1single_plot_complex_eof.py` + :ref:`sphx_glr_auto_examples_1single_plot_hilbert_eof.py` .. raw:: html -
Complex/Hilbert EOF analysis
+
Hilbert EOF analysis
@@ -191,8 +208,9 @@ :hidden: /auto_examples/1single/plot_eeof - /auto_examples/1single/plot_eof-tmode /auto_examples/1single/plot_complex_eof + /auto_examples/1single/plot_eof-tmode + /auto_examples/1single/plot_hilbert_eof /auto_examples/1single/plot_eof-smode /auto_examples/1single/plot_eeof_trend /auto_examples/1single/plot_multivariate-eof diff --git a/docs/auto_examples/1single/plot_complex_eof.ipynb b/docs/auto_examples/1single/plot_complex_eof.ipynb index aab0c5d6..91109d28 100644 --- a/docs/auto_examples/1single/plot_complex_eof.ipynb +++ b/docs/auto_examples/1single/plot_complex_eof.ipynb @@ -5,21 +5,10 @@ "metadata": {}, "source": [ "\n", - "# Complex/Hilbert EOF analysis\n", + "# Complex EOF analysis\n", "\n", - "We demonstrate how to execute a Complex EOF (or Hilbert EOF) analysis [1]_ [2]_ [3]_.\n", - "This method extends traditional EOF analysis into the complex domain, allowing\n", - "the EOF components to have real and imaginary parts. This capability can reveal\n", - "oscillatory patterns in datasets, which are common in Earth observations.\n", - "For example, beyond typical examples like seasonal cycles, you can think of\n", - "internal waves in the ocean, or the Quasi-Biennial Oscillation in the atmosphere.\n", - "\n", - "Using monthly sea surface temperature data from 1970 to 2021 as an example, we\n", - "highlight the method's key features and address edge effects as a common challenge.\n", - "\n", - ".. [1] Rasmusson, E. M., Arkin, P. A., Chen, W.-Y. & Jalickee, J. B. Biennial variations in surface temperature over the United States as revealed by singular decomposition. Monthly Weather Review 109, 587–598 (1981).\n", - ".. [2] Barnett, T. P. Interaction of the Monsoon and Pacific Trade Wind System at Interannual Time Scales Part I: The Equatorial Zone. Monthly Weather Review 111, 756–773 (1983).\n", - ".. [3] Horel, J. Complex Principal Component Analysis: Theory and Examples. J. Climate Appl. Meteor. 23, 1660–1673 (1984).\n", + "In this tutorial, we'll walk through how to perform a Complex EOF analysis on\n", + "the zonal and meridional wind components.\n", "\n", "Let's start by importing the necessary packages and loading the data:\n" ] @@ -30,61 +19,19 @@ "metadata": {}, "outputs": [], "source": [ - "import xeofs as xe\n", + "import matplotlib.pyplot as plt\n", "import xarray as xr\n", "\n", - "xr.set_options(display_expand_attrs=False)\n", + "import xeofs as xe\n", "\n", - "sst = xr.tutorial.open_dataset(\"ersstv5\").sst\n", - "sst" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We fit the Complex EOF model directly to the raw data, retaining the seasonal\n", - "cycle for study. The model initialization specifies the desired number of\n", - "modes. The ``use_coslat`` parameter is set to ``True`` to adjust for grid\n", - "convergence at the poles. While the ``ComplexEOF`` class offers padding options\n", - "to mitigate potential edge effects, we'll begin with no padding.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "kwargs = dict(n_modes=4, use_coslat=True, random_state=7)\n", - "model = xe.models.ComplexEOF(padding=\"none\", **kwargs)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, we fit the model to the data and extract the explained variance.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model.fit(sst, dim=\"time\")\n", - "expvar = model.explained_variance()\n", - "expvar_ratio = model.explained_variance_ratio()" + "xr.set_options(display_expand_attrs=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Let's have a look at the explained variance of the first five modes:\n", + "For this example, we'll use the ERA-Interim tutorial dataset ``eraint_uvz``:\n", "\n" ] }, @@ -94,15 +41,20 @@ "metadata": {}, "outputs": [], "source": [ - "expvar.round(0)" + "uvz = xr.tutorial.open_dataset(\"eraint_uvz\")\n", + "uvz" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Clearly, the first mode completely dominates and already explains a substantial amount of variance.\n", - "If we look at the fraction of explained variance, we see that the first mode explains about 88.8 %.\n", + "This dataset contains the zonal, meridional, and vertical wind components at\n", + "three different atmospheric levels. Note that the data only covers two months,\n", + "so we have just two time steps (samples). While this isn't enough for a robust\n", + "EOF analysis, we'll proceed for demonstration purposes. Now, let's combine the\n", + "zonal (``u``) and meridional (``v``) wind components into a complex-valued\n", + "dataset:\n", "\n" ] }, @@ -112,19 +64,19 @@ "metadata": {}, "outputs": [], "source": [ - "(expvar_ratio * 100).round(1)" + "Z = uvz[\"u\"] + 1j * uvz[\"v\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In comparison to standard EOF analysis (check the corresponding example,\n", - "S-mode), the first complex mode seems to integrate the first two standard\n", - "modes in terms of explained variance.\n", - "This makes sense as the two modes in standard EOF are both showing parts of\n", - "an annual cycle (which are in quadrature) and thus the complex mode combines both of them.\n", - "Let's confirm our hypothesis by looking at the real part the complex-valued scores:\n", + "Next, we'll initialize and fit the ``ComplexEOF`` model to our data. The\n", + "``xeofs`` package makes this easy by allowing us to specify the sample\n", + "dimension (``month``), automatically performing the Complex EOF analysis\n", + "across all three atmospheric levels. As a standard practice, we'll also weigh\n", + "each grid cell by the square root of the cosine of the latitude\n", + "(``use_coslat=True``).\n", "\n" ] }, @@ -134,26 +86,17 @@ "metadata": {}, "outputs": [], "source": [ - "scores = model.scores()\n", - "scores.real.plot.line(x=\"time\", col=\"mode\", lw=1, ylim=(-0.1, 0.1))" + "model = xe.single.ComplexEOF(n_modes=1, use_coslat=True, random_state=7)\n", + "model.fit(Z, dim=\"month\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "And indeed the annual cycle is completed incorporated into the first mode,\n", - "while the second mode shows a semi-annual cycle (mode 3 in standard EOF).\n", - "\n", - "However, mode three and four look unusual. While showing some similarity to\n", - "ENSO (e.g. in mode 3 peaks in 1982, 1998 and 2016), they exhibit a \"running away\"\n", - "behaviour towards the boundaries of the time series.\n", - "This a common issue in complex EOF analysis which is based on the Hilbert transform (a convolution)\n", - "that suffers from the absence of information at the time series boundaries. One way to mitigate this\n", - "is to artificially extend the time series also known as *padding*. In ``xeofs``, you can enable\n", - "such a padding by setting the ``padding`` parameter to ``\"exp\"`` which will extent the boundaries by an exponential\n", - "decaying function. The ``decay_factor`` parameter controls the decay rate of the exponential function measured in\n", - "multiples of the time series length. Let's see how the decay parameter impacts the results:\n", + "Instead of just extracting the complex-valued components, we can also get the\n", + "amplitude and phase of these components. Let's start by looking at the\n", + "amplitude of the first mode:\n", "\n" ] }, @@ -163,41 +106,22 @@ "metadata": {}, "outputs": [], "source": [ - "model_ext = xe.models.ComplexEOF(padding=\"exp\", decay_factor=0.01, **kwargs)\n", - "model_ext.fit(sst, dim=\"time\")\n", - "scores_ext = model_ext.scores().sel(mode=slice(1, 4))\n", + "spatial_ampltiudes = model.components_amplitude()\n", + "spatial_phases = model.components_phase()\n", "\n", - "scores_ext.real.plot.line(x=\"time\", col=\"mode\", lw=1, ylim=(-0.1, 0.1))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "And indeed, padding the time series effectively reduced the artifacts at the boundaries.\n", - "Lastly, we examine the complex component amplitudes and phases.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "comp_amps = model.components_amplitude()\n", - "comp_amps.plot(col=\"mode\", vmin=0, vmax=0.025)" + "spatial_ampltiudes.sel(mode=1).plot(col=\"level\")\n", + "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The component phases of the first mode clearly show the seasonal cycle as\n", - "the northern and southern hemisphere are phase shifted by 180 degrees (white and black).\n", - "Note the blueish regions in the central East Pacific and Indian Ocean which indicate\n", - "a phase shift of 90 degrees compared to the main annual cycle. This is in agreement\n", - "with mode 3 of the standard EOF analysis.\n", + "It looks like the first mode picks up a pattern resembling the location of the\n", + "subtropical jet stream around ±30º latitude, particularly strong in the upper\n", + "troposphere at 200 hPa and weaker toward the surface. We can also plot the\n", + "phase of the first mode. To keep the plot clear, we'll only show the phase\n", + "where the amplitude is above a certain threshold (e.g., 0.004):\n", "\n" ] }, @@ -207,8 +131,9 @@ "metadata": {}, "outputs": [], "source": [ - "comp_phases = model.components_phase()\n", - "comp_phases.plot(col=\"mode\", cmap=\"twilight\")" + "relevant_phases = spatial_phases.where(spatial_ampltiudes > 0.004)\n", + "relevant_phases.sel(mode=1).plot(col=\"level\", cmap=\"twilight\")\n", + "plt.show()" ] } ], @@ -228,7 +153,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/docs/auto_examples/1single/plot_complex_eof.py b/docs/auto_examples/1single/plot_complex_eof.py index d72e8132..1c07cb13 100644 --- a/docs/auto_examples/1single/plot_complex_eof.py +++ b/docs/auto_examples/1single/plot_complex_eof.py @@ -1,107 +1,69 @@ """ -Complex/Hilbert EOF analysis +Complex EOF analysis ============================================ -We demonstrate how to execute a Complex EOF (or Hilbert EOF) analysis [1]_ [2]_ [3]_. -This method extends traditional EOF analysis into the complex domain, allowing -the EOF components to have real and imaginary parts. This capability can reveal -oscillatory patterns in datasets, which are common in Earth observations. -For example, beyond typical examples like seasonal cycles, you can think of -internal waves in the ocean, or the Quasi-Biennial Oscillation in the atmosphere. - -Using monthly sea surface temperature data from 1970 to 2021 as an example, we -highlight the method's key features and address edge effects as a common challenge. - -.. [1] Rasmusson, E. M., Arkin, P. A., Chen, W.-Y. & Jalickee, J. B. Biennial variations in surface temperature over the United States as revealed by singular decomposition. Monthly Weather Review 109, 587–598 (1981). -.. [2] Barnett, T. P. Interaction of the Monsoon and Pacific Trade Wind System at Interannual Time Scales Part I: The Equatorial Zone. Monthly Weather Review 111, 756–773 (1983). -.. [3] Horel, J. Complex Principal Component Analysis: Theory and Examples. J. Climate Appl. Meteor. 23, 1660–1673 (1984). +In this tutorial, we'll walk through how to perform a Complex EOF analysis on +the zonal and meridional wind components. Let's start by importing the necessary packages and loading the data: """ # %% -import xeofs as xe +import matplotlib.pyplot as plt import xarray as xr -xr.set_options(display_expand_attrs=False) - -sst = xr.tutorial.open_dataset("ersstv5").sst -sst - -# %% -# We fit the Complex EOF model directly to the raw data, retaining the seasonal -# cycle for study. The model initialization specifies the desired number of -# modes. The ``use_coslat`` parameter is set to ``True`` to adjust for grid -# convergence at the poles. While the ``ComplexEOF`` class offers padding options -# to mitigate potential edge effects, we'll begin with no padding. +import xeofs as xe -kwargs = dict(n_modes=4, use_coslat=True, random_state=7) -model = xe.models.ComplexEOF(padding="none", **kwargs) +xr.set_options(display_expand_attrs=False) # %% -# Now, we fit the model to the data and extract the explained variance. +# For this example, we'll use the ERA-Interim tutorial dataset ``eraint_uvz``: -model.fit(sst, dim="time") -expvar = model.explained_variance() -expvar_ratio = model.explained_variance_ratio() +uvz = xr.tutorial.open_dataset("eraint_uvz") +uvz # %% -# Let's have a look at the explained variance of the first five modes: - -expvar.round(0) +# This dataset contains the zonal, meridional, and vertical wind components at +# three different atmospheric levels. Note that the data only covers two months, +# so we have just two time steps (samples). While this isn't enough for a robust +# EOF analysis, we'll proceed for demonstration purposes. Now, let's combine the +# zonal (``u``) and meridional (``v``) wind components into a complex-valued +# dataset: +Z = uvz["u"] + 1j * uvz["v"] # %% -# Clearly, the first mode completely dominates and already explains a substantial amount of variance. -# If we look at the fraction of explained variance, we see that the first mode explains about 88.8 %. +# Next, we'll initialize and fit the ``ComplexEOF`` model to our data. The +# ``xeofs`` package makes this easy by allowing us to specify the sample +# dimension (``month``), automatically performing the Complex EOF analysis +# across all three atmospheric levels. As a standard practice, we'll also weigh +# each grid cell by the square root of the cosine of the latitude +# (``use_coslat=True``). -(expvar_ratio * 100).round(1) +model = xe.single.ComplexEOF(n_modes=1, use_coslat=True, random_state=7) +model.fit(Z, dim="month") # %% -# In comparison to standard EOF analysis (check the corresponding example, -# S-mode), the first complex mode seems to integrate the first two standard -# modes in terms of explained variance. -# This makes sense as the two modes in standard EOF are both showing parts of -# an annual cycle (which are in quadrature) and thus the complex mode combines both of them. -# Let's confirm our hypothesis by looking at the real part the complex-valued scores: +# Instead of just extracting the complex-valued components, we can also get the +# amplitude and phase of these components. Let's start by looking at the +# amplitude of the first mode: -scores = model.scores() -scores.real.plot.line(x="time", col="mode", lw=1, ylim=(-0.1, 0.1)) +spatial_ampltiudes = model.components_amplitude() +spatial_phases = model.components_phase() -# %% -# And indeed the annual cycle is completed incorporated into the first mode, -# while the second mode shows a semi-annual cycle (mode 3 in standard EOF). -# -# However, mode three and four look unusual. While showing some similarity to -# ENSO (e.g. in mode 3 peaks in 1982, 1998 and 2016), they exhibit a "running away" -# behaviour towards the boundaries of the time series. -# This a common issue in complex EOF analysis which is based on the Hilbert transform (a convolution) -# that suffers from the absence of information at the time series boundaries. One way to mitigate this -# is to artificially extend the time series also known as *padding*. In ``xeofs``, you can enable -# such a padding by setting the ``padding`` parameter to ``"exp"`` which will extent the boundaries by an exponential -# decaying function. The ``decay_factor`` parameter controls the decay rate of the exponential function measured in -# multiples of the time series length. Let's see how the decay parameter impacts the results: - -model_ext = xe.models.ComplexEOF(padding="exp", decay_factor=0.01, **kwargs) -model_ext.fit(sst, dim="time") -scores_ext = model_ext.scores().sel(mode=slice(1, 4)) - -scores_ext.real.plot.line(x="time", col="mode", lw=1, ylim=(-0.1, 0.1)) +spatial_ampltiudes.sel(mode=1).plot(col="level") +plt.show() # %% -# And indeed, padding the time series effectively reduced the artifacts at the boundaries. -# Lastly, we examine the complex component amplitudes and phases. +# It looks like the first mode picks up a pattern resembling the location of the +# subtropical jet stream around ±30º latitude, particularly strong in the upper +# troposphere at 200 hPa and weaker toward the surface. We can also plot the +# phase of the first mode. To keep the plot clear, we'll only show the phase +# where the amplitude is above a certain threshold (e.g., 0.004): -comp_amps = model.components_amplitude() -comp_amps.plot(col="mode", vmin=0, vmax=0.025) +relevant_phases = spatial_phases.where(spatial_ampltiudes > 0.004) +relevant_phases.sel(mode=1).plot(col="level", cmap="twilight") +plt.show() # %% -# The component phases of the first mode clearly show the seasonal cycle as -# the northern and southern hemisphere are phase shifted by 180 degrees (white and black). -# Note the blueish regions in the central East Pacific and Indian Ocean which indicate -# a phase shift of 90 degrees compared to the main annual cycle. This is in agreement -# with mode 3 of the standard EOF analysis. - -comp_phases = model.components_phase() -comp_phases.plot(col="mode", cmap="twilight") diff --git a/docs/auto_examples/1single/plot_complex_eof.py.md5 b/docs/auto_examples/1single/plot_complex_eof.py.md5 index 90412219..d2b482e4 100644 --- a/docs/auto_examples/1single/plot_complex_eof.py.md5 +++ b/docs/auto_examples/1single/plot_complex_eof.py.md5 @@ -1 +1 @@ -169f32ef68f5635b4a8af2ec5a6be351 \ No newline at end of file +00a4013c7fa42189d7980ec2a54d0eae \ No newline at end of file diff --git a/docs/auto_examples/1single/plot_complex_eof.rst b/docs/auto_examples/1single/plot_complex_eof.rst index b2be8bde..5f973424 100644 --- a/docs/auto_examples/1single/plot_complex_eof.rst +++ b/docs/auto_examples/1single/plot_complex_eof.rst @@ -18,909 +18,49 @@ .. _sphx_glr_auto_examples_1single_plot_complex_eof.py: -Complex/Hilbert EOF analysis +Complex EOF analysis ============================================ -We demonstrate how to execute a Complex EOF (or Hilbert EOF) analysis [1]_ [2]_ [3]_. -This method extends traditional EOF analysis into the complex domain, allowing -the EOF components to have real and imaginary parts. This capability can reveal -oscillatory patterns in datasets, which are common in Earth observations. -For example, beyond typical examples like seasonal cycles, you can think of -internal waves in the ocean, or the Quasi-Biennial Oscillation in the atmosphere. +In this tutorial, we'll walk through how to perform a Complex EOF analysis on +the zonal and meridional wind components. -Using monthly sea surface temperature data from 1970 to 2021 as an example, we -highlight the method's key features and address edge effects as a common challenge. - -.. [1] Rasmusson, E. M., Arkin, P. A., Chen, W.-Y. & Jalickee, J. B. Biennial variations in surface temperature over the United States as revealed by singular decomposition. Monthly Weather Review 109, 587–598 (1981). -.. [2] Barnett, T. P. Interaction of the Monsoon and Pacific Trade Wind System at Interannual Time Scales Part I: The Equatorial Zone. Monthly Weather Review 111, 756–773 (1983). -.. [3] Horel, J. Complex Principal Component Analysis: Theory and Examples. J. Climate Appl. Meteor. 23, 1660–1673 (1984). - -Let's start by importing the necessary packages and loading the data: - -.. GENERATED FROM PYTHON SOURCE LINES 23-31 - -.. code-block:: Python - - import xeofs as xe - import xarray as xr - - xr.set_options(display_expand_attrs=False) - - sst = xr.tutorial.open_dataset("ersstv5").sst - sst - - - - - - -.. raw:: html - -
-
- - - - - - - - - - - - - - -
<xarray.DataArray 'sst' (time: 624, lat: 89, lon: 180)> Size: 40MB
-    [9996480 values with dtype=float32]
-    Coordinates:
-      * lat      (lat) float32 356B 88.0 86.0 84.0 82.0 ... -82.0 -84.0 -86.0 -88.0
-      * lon      (lon) float32 720B 0.0 2.0 4.0 6.0 8.0 ... 352.0 354.0 356.0 358.0
-      * time     (time) datetime64[ns] 5kB 1970-01-01 1970-02-01 ... 2021-12-01
-    Attributes: (9)
-
-
-
- -.. GENERATED FROM PYTHON SOURCE LINES 32-37 - -We fit the Complex EOF model directly to the raw data, retaining the seasonal -cycle for study. The model initialization specifies the desired number of -modes. The ``use_coslat`` parameter is set to ``True`` to adjust for grid -convergence at the poles. While the ``ComplexEOF`` class offers padding options -to mitigate potential edge effects, we'll begin with no padding. - -.. GENERATED FROM PYTHON SOURCE LINES 37-41 - -.. code-block:: Python - - - kwargs = dict(n_modes=4, use_coslat=True, random_state=7) - model = xe.models.ComplexEOF(padding="none", **kwargs) - - - - - - - - -.. GENERATED FROM PYTHON SOURCE LINES 42-43 - -Now, we fit the model to the data and extract the explained variance. - -.. GENERATED FROM PYTHON SOURCE LINES 43-48 - -.. code-block:: Python - - - model.fit(sst, dim="time") - expvar = model.explained_variance() - expvar_ratio = model.explained_variance_ratio() - - - - - - - - -.. GENERATED FROM PYTHON SOURCE LINES 49-50 - -Let's have a look at the explained variance of the first five modes: - -.. GENERATED FROM PYTHON SOURCE LINES 50-54 - -.. code-block:: Python - - - expvar.round(0) - - - - - - - -.. raw:: html - -
-
- - - - - - - - - - - - - - -
<xarray.DataArray 'explained_variance' (mode: 4)> Size: 32B
-    5.069e+04 1.705e+03 1.105e+03 519.0
-    Coordinates:
-      * mode     (mode) int64 32B 1 2 3 4
-    Attributes: (16)
-
-
-
-.. GENERATED FROM PYTHON SOURCE LINES 55-57 +.. GENERATED FROM PYTHON SOURCE LINES 20-21 -Clearly, the first mode completely dominates and already explains a substantial amount of variance. -If we look at the fraction of explained variance, we see that the first mode explains about 88.8 %. +For this example, we'll use the ERA-Interim tutorial dataset ``eraint_uvz``: -.. GENERATED FROM PYTHON SOURCE LINES 57-60 +.. GENERATED FROM PYTHON SOURCE LINES 21-25 -.. code-block:: Python +.. code-block:: default - (expvar_ratio * 100).round(1) + uvz = xr.tutorial.open_dataset("eraint_uvz") + uvz @@ -1293,163 +433,151 @@ If we look at the fraction of explained variance, we see that the first mode exp stroke: currentColor; fill: currentColor; } -
<xarray.DataArray 'explained_variance_ratio' (mode: 4)> Size: 32B
-    88.8 3.0 1.9 0.9
+    
<xarray.Dataset>
+    Dimensions:    (longitude: 480, latitude: 241, level: 3, month: 2)
     Coordinates:
-      * mode     (mode) int64 32B 1 2 3 4
-    Attributes: (16)
+ * longitude (longitude) float32 -180.0 -179.2 -178.5 ... 177.8 178.5 179.2 + * latitude (latitude) float32 90.0 89.25 88.5 87.75 ... -88.5 -89.25 -90.0 + * level (level) int32 200 500 850 + * month (month) int32 1 7 + Data variables: + z (month, level, latitude, longitude) float64 ... + u (month, level, latitude, longitude) float64 ... + v (month, level, latitude, longitude) float64 ... + Attributes: (2)


-.. GENERATED FROM PYTHON SOURCE LINES 61-67 - -In comparison to standard EOF analysis (check the corresponding example, -S-mode), the first complex mode seems to integrate the first two standard -modes in terms of explained variance. -This makes sense as the two modes in standard EOF are both showing parts of -an annual cycle (which are in quadrature) and thus the complex mode combines both of them. -Let's confirm our hypothesis by looking at the real part the complex-valued scores: - -.. GENERATED FROM PYTHON SOURCE LINES 67-72 - -.. code-block:: Python - +.. GENERATED FROM PYTHON SOURCE LINES 26-32 - scores = model.scores() - scores.real.plot.line(x="time", col="mode", lw=1, ylim=(-0.1, 0.1)) +This dataset contains the zonal, meridional, and vertical wind components at +three different atmospheric levels. Note that the data only covers two months, +so we have just two time steps (samples). While this isn't enough for a robust +EOF analysis, we'll proceed for demonstration purposes. Now, let's combine the +zonal (``u``) and meridional (``v``) wind components into a complex-valued +dataset: +.. GENERATED FROM PYTHON SOURCE LINES 32-35 +.. code-block:: default + Z = uvz["u"] + 1j * uvz["v"] -.. image-sg:: /auto_examples/1single/images/sphx_glr_plot_complex_eof_001.png - :alt: mode = 1, mode = 2, mode = 3, mode = 4 - :srcset: /auto_examples/1single/images/sphx_glr_plot_complex_eof_001.png - :class: sphx-glr-single-img - - -.. rst-class:: sphx-glr-script-out - - .. code-block:: none - -.. GENERATED FROM PYTHON SOURCE LINES 73-85 -And indeed the annual cycle is completed incorporated into the first mode, -while the second mode shows a semi-annual cycle (mode 3 in standard EOF). -However, mode three and four look unusual. While showing some similarity to -ENSO (e.g. in mode 3 peaks in 1982, 1998 and 2016), they exhibit a "running away" -behaviour towards the boundaries of the time series. -This a common issue in complex EOF analysis which is based on the Hilbert transform (a convolution) -that suffers from the absence of information at the time series boundaries. One way to mitigate this -is to artificially extend the time series also known as *padding*. In ``xeofs``, you can enable -such a padding by setting the ``padding`` parameter to ``"exp"`` which will extent the boundaries by an exponential -decaying function. The ``decay_factor`` parameter controls the decay rate of the exponential function measured in -multiples of the time series length. Let's see how the decay parameter impacts the results: +.. GENERATED FROM PYTHON SOURCE LINES 36-42 -.. GENERATED FROM PYTHON SOURCE LINES 85-92 +Next, we'll initialize and fit the ``ComplexEOF`` model to our data. The +``xeofs`` package makes this easy by allowing us to specify the sample +dimension (``month``), automatically performing the Complex EOF analysis +across all three atmospheric levels. As a standard practice, we'll also weigh +each grid cell by the square root of the cosine of the latitude +(``use_coslat=True``). -.. code-block:: Python +.. GENERATED FROM PYTHON SOURCE LINES 42-46 +.. code-block:: default - model_ext = xe.models.ComplexEOF(padding="exp", decay_factor=0.01, **kwargs) - model_ext.fit(sst, dim="time") - scores_ext = model_ext.scores().sel(mode=slice(1, 4)) - scores_ext.real.plot.line(x="time", col="mode", lw=1, ylim=(-0.1, 0.1)) + model = xe.single.ComplexEOF(n_modes=1, use_coslat=True, random_state=7) + model.fit(Z, dim="month") -.. image-sg:: /auto_examples/1single/images/sphx_glr_plot_complex_eof_002.png - :alt: mode = 1, mode = 2, mode = 3, mode = 4 - :srcset: /auto_examples/1single/images/sphx_glr_plot_complex_eof_002.png - :class: sphx-glr-single-img - .. rst-class:: sphx-glr-script-out .. code-block:: none + /home/nrieger/miniconda3/envs/xeofs/lib/python3.11/site-packages/scipy/sparse/linalg/_eigen/_svds.py:483: UserWarning: The problem size 2 minus the constraints size 0 is too small relative to the block size 1. Using a dense eigensolver instead of LOBPCG iterations.No output of the history of the iterations. + _, eigvec = lobpcg(XH_X, X, tol=tol ** 2, maxiter=maxiter, - + -.. GENERATED FROM PYTHON SOURCE LINES 93-95 +.. GENERATED FROM PYTHON SOURCE LINES 47-50 -And indeed, padding the time series effectively reduced the artifacts at the boundaries. -Lastly, we examine the complex component amplitudes and phases. +Instead of just extracting the complex-valued components, we can also get the +amplitude and phase of these components. Let's start by looking at the +amplitude of the first mode: -.. GENERATED FROM PYTHON SOURCE LINES 95-99 +.. GENERATED FROM PYTHON SOURCE LINES 50-58 -.. code-block:: Python +.. code-block:: default - comp_amps = model.components_amplitude() - comp_amps.plot(col="mode", vmin=0, vmax=0.025) + spatial_ampltiudes = model.components_amplitude() + spatial_phases = model.components_phase() + spatial_ampltiudes.sel(mode=1).plot(col="level") + plt.show() -.. image-sg:: /auto_examples/1single/images/sphx_glr_plot_complex_eof_003.png - :alt: mode = 1, mode = 2, mode = 3, mode = 4 - :srcset: /auto_examples/1single/images/sphx_glr_plot_complex_eof_003.png - :class: sphx-glr-single-img -.. rst-class:: sphx-glr-script-out +.. image-sg:: /auto_examples/1single/images/sphx_glr_plot_complex_eof_001.png + :alt: level = 200, level = 500, level = 850 + :srcset: /auto_examples/1single/images/sphx_glr_plot_complex_eof_001.png + :class: sphx-glr-single-img - .. code-block:: none - +.. GENERATED FROM PYTHON SOURCE LINES 59-64 -.. GENERATED FROM PYTHON SOURCE LINES 100-105 +It looks like the first mode picks up a pattern resembling the location of the +subtropical jet stream around ±30º latitude, particularly strong in the upper +troposphere at 200 hPa and weaker toward the surface. We can also plot the +phase of the first mode. To keep the plot clear, we'll only show the phase +where the amplitude is above a certain threshold (e.g., 0.004): -The component phases of the first mode clearly show the seasonal cycle as -the northern and southern hemisphere are phase shifted by 180 degrees (white and black). -Note the blueish regions in the central East Pacific and Indian Ocean which indicate -a phase shift of 90 degrees compared to the main annual cycle. This is in agreement -with mode 3 of the standard EOF analysis. +.. GENERATED FROM PYTHON SOURCE LINES 64-69 -.. GENERATED FROM PYTHON SOURCE LINES 105-108 +.. code-block:: default -.. code-block:: Python + relevant_phases = spatial_phases.where(spatial_ampltiudes > 0.004) + relevant_phases.sel(mode=1).plot(col="level", cmap="twilight") + plt.show() - comp_phases = model.components_phase() - comp_phases.plot(col="mode", cmap="twilight") -.. image-sg:: /auto_examples/1single/images/sphx_glr_plot_complex_eof_004.png - :alt: mode = 1, mode = 2, mode = 3, mode = 4 - :srcset: /auto_examples/1single/images/sphx_glr_plot_complex_eof_004.png +.. image-sg:: /auto_examples/1single/images/sphx_glr_plot_complex_eof_002.png + :alt: level = 200, level = 500, level = 850 + :srcset: /auto_examples/1single/images/sphx_glr_plot_complex_eof_002.png :class: sphx-glr-single-img -.. rst-class:: sphx-glr-script-out - - .. code-block:: none - - - .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 3.119 seconds) + **Total running time of the script:** (0 minutes 1.162 seconds) .. _sphx_glr_download_auto_examples_1single_plot_complex_eof.py: @@ -1458,14 +586,17 @@ with mode 3 of the standard EOF analysis. .. container:: sphx-glr-footer sphx-glr-footer-example - .. container:: sphx-glr-download sphx-glr-download-jupyter - :download:`Download Jupyter notebook: plot_complex_eof.ipynb ` + .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: plot_complex_eof.py ` + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: plot_complex_eof.ipynb ` + .. only:: html diff --git a/docs/auto_examples/1single/plot_complex_eof_codeobj.pickle b/docs/auto_examples/1single/plot_complex_eof_codeobj.pickle index 8f11151a..e927b2a4 100644 Binary files a/docs/auto_examples/1single/plot_complex_eof_codeobj.pickle and b/docs/auto_examples/1single/plot_complex_eof_codeobj.pickle differ diff --git a/docs/auto_examples/1single/plot_eeof.ipynb b/docs/auto_examples/1single/plot_eeof.ipynb index 93ba4890..f2c98695 100644 --- a/docs/auto_examples/1single/plot_eeof.ipynb +++ b/docs/auto_examples/1single/plot_eeof.ipynb @@ -24,9 +24,10 @@ "metadata": {}, "outputs": [], "source": [ + "import matplotlib.pyplot as plt\n", "import xarray as xr\n", + "\n", "import xeofs as xe\n", - "import matplotlib.pyplot as plt\n", "\n", "xr.set_options(display_expand_data=False)" ] @@ -98,7 +99,7 @@ "metadata": {}, "outputs": [], "source": [ - "model = xe.models.ExtendedEOF(\n", + "model = xe.single.ExtendedEOF(\n", " n_modes=10, tau=4, embedding=40, n_pca_modes=50, use_coslat=True\n", ")\n", "model.fit(t2m, dim=\"time\")\n", @@ -183,7 +184,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/docs/auto_examples/1single/plot_eeof.py b/docs/auto_examples/1single/plot_eeof.py index ab804f5b..4605b9fd 100644 --- a/docs/auto_examples/1single/plot_eeof.py +++ b/docs/auto_examples/1single/plot_eeof.py @@ -13,9 +13,10 @@ Let's begin by setting up the required packages and fetching the data: """ +import matplotlib.pyplot as plt import xarray as xr + import xeofs as xe -import matplotlib.pyplot as plt xr.set_options(display_expand_data=False) @@ -51,7 +52,7 @@ # With these parameters set, we proceed to instantiate the ``ExtendedEOF`` # model and fit our data. -model = xe.models.ExtendedEOF( +model = xe.single.ExtendedEOF( n_modes=10, tau=4, embedding=40, n_pca_modes=50, use_coslat=True ) model.fit(t2m, dim="time") diff --git a/docs/auto_examples/1single/plot_eeof.py.md5 b/docs/auto_examples/1single/plot_eeof.py.md5 index c812c58c..dd2a59f8 100644 --- a/docs/auto_examples/1single/plot_eeof.py.md5 +++ b/docs/auto_examples/1single/plot_eeof.py.md5 @@ -1 +1 @@ -df94cdd1a2a195a28f08b955bafe0677 \ No newline at end of file +dad4ad8d30e4903c94110f68dfed3398 \ No newline at end of file diff --git a/docs/auto_examples/1single/plot_eeof.rst b/docs/auto_examples/1single/plot_eeof.rst index e5e68399..4d381907 100644 --- a/docs/auto_examples/1single/plot_eeof.rst +++ b/docs/auto_examples/1single/plot_eeof.rst @@ -31,14 +31,15 @@ decomposed to yield its eigenvectors (components) and eigenvalues (explained var Let's begin by setting up the required packages and fetching the data: -.. GENERATED FROM PYTHON SOURCE LINES 15-22 +.. GENERATED FROM PYTHON SOURCE LINES 15-23 -.. code-block:: Python +.. code-block:: default + import matplotlib.pyplot as plt import xarray as xr + import xeofs as xe - import matplotlib.pyplot as plt xr.set_options(display_expand_data=False) @@ -51,17 +52,17 @@ Let's begin by setting up the required packages and fetching the data: .. code-block:: none - + -.. GENERATED FROM PYTHON SOURCE LINES 23-24 +.. GENERATED FROM PYTHON SOURCE LINES 24-25 Load the tutorial data. -.. GENERATED FROM PYTHON SOURCE LINES 24-27 +.. GENERATED FROM PYTHON SOURCE LINES 25-28 -.. code-block:: Python +.. code-block:: default t2m = xr.tutorial.load_dataset("air_temperature").air @@ -73,7 +74,7 @@ Load the tutorial data. -.. GENERATED FROM PYTHON SOURCE LINES 28-41 +.. GENERATED FROM PYTHON SOURCE LINES 29-42 Prior to conducting the EEOF analysis, it's essential to determine the structure of the lagged covariance matrix. This entails defining the time @@ -89,9 +90,9 @@ It's obvious that this way of constructing the lagged covariance matrix and subsequently decomposing it can be computationally expensive. For example, given our dataset's dimensions, -.. GENERATED FROM PYTHON SOURCE LINES 41-44 +.. GENERATED FROM PYTHON SOURCE LINES 42-45 -.. code-block:: Python +.. code-block:: default t2m.shape @@ -109,7 +110,7 @@ given our dataset's dimensions, -.. GENERATED FROM PYTHON SOURCE LINES 45-53 +.. GENERATED FROM PYTHON SOURCE LINES 46-54 the extended dataset would have 40 x 25 x 53 = 53000 features which is much larger than the original dataset's 1325 features. @@ -120,12 +121,12 @@ up with 40 x 50 = 200 (latent) features. With these parameters set, we proceed to instantiate the ``ExtendedEOF`` model and fit our data. -.. GENERATED FROM PYTHON SOURCE LINES 53-62 +.. GENERATED FROM PYTHON SOURCE LINES 54-63 -.. code-block:: Python +.. code-block:: default - model = xe.models.ExtendedEOF( + model = xe.single.ExtendedEOF( n_modes=10, tau=4, embedding=40, n_pca_modes=50, use_coslat=True ) model.fit(t2m, dim="time") @@ -504,102 +505,102 @@ model and fit our data. stroke: currentColor; fill: currentColor; } -
<xarray.DataArray 'components' (mode: 10, embedding: 40, lat: 25, lon: 53)> Size: 4MB
-    0.0003854 0.0003646 0.000357 0.0003562 ... -0.001459 -0.00105 -0.0006424
+    
<xarray.DataArray 'components' (mode: 10, embedding: 40, lat: 25, lon: 53)>
+    0.0003855 0.0003648 0.0003573 0.0003565 ... -0.001427 -0.001012 -0.0006065
     Coordinates:
-      * lat        (lat) float32 100B 15.0 17.5 20.0 22.5 ... 67.5 70.0 72.5 75.0
-      * lon        (lon) float32 212B 200.0 202.5 205.0 207.5 ... 325.0 327.5 330.0
-      * embedding  (embedding) int64 320B 0 4 8 12 16 20 ... 136 140 144 148 152 156
-      * mode       (mode) int64 80B 1 2 3 4 5 6 7 8 9 10
-    Attributes: (12/16)
+      * lat        (lat) float32 15.0 17.5 20.0 22.5 25.0 ... 67.5 70.0 72.5 75.0
+      * lon        (lon) float32 200.0 202.5 205.0 207.5 ... 322.5 325.0 327.5 330.0
+      * embedding  (embedding) int64 0 4 8 12 16 20 24 ... 136 140 144 148 152 156
+      * mode       (mode) int64 1 2 3 4 5 6 7 8 9 10
+    Attributes: (12/15)
         model:          Extended EOF Analysis
         software:       xeofs
-        version:        2.3.2
-        date:           2024-03-31 20:34:10
+        version:        1.2.0
+        date:           2024-09-02 02:16:24
         n_modes:        10
         center:         True
         ...             ...
+        sample_name:    sample
         feature_name:   feature
         random_state:   None
-        verbose:        False
         compute:        True
         solver:         auto
-        solver_kwargs:  {}
  • model :
    Extended EOF Analysis
    software :
    xeofs
    version :
    1.2.0
    date :
    2024-09-02 02:16:24
    n_modes :
    10
    center :
    True
    standardize :
    False
    use_coslat :
    True
    check_nans :
    True
    sample_name :
    sample
    feature_name :
    feature
    random_state :
    None
    compute :
    True
    solver :
    auto
    solver_kwargs :
    {}


  • -.. GENERATED FROM PYTHON SOURCE LINES 63-67 +.. GENERATED FROM PYTHON SOURCE LINES 64-68 A notable distinction from standard EOF analysis is the incorporation of an extra ``embedding`` dimension in the components. Nonetheless, the overarching methodology mirrors traditional EOF practices. The results, for instance, can be assessed by examining the explained variance ratio. -.. GENERATED FROM PYTHON SOURCE LINES 67-71 +.. GENERATED FROM PYTHON SOURCE LINES 68-72 -.. code-block:: Python +.. code-block:: default model.explained_variance_ratio().plot() @@ -617,13 +618,13 @@ for instance, can be assessed by examining the explained variance ratio. -.. GENERATED FROM PYTHON SOURCE LINES 72-73 +.. GENERATED FROM PYTHON SOURCE LINES 73-74 Additionally, we can look into the scores; let's spotlight mode 4. -.. GENERATED FROM PYTHON SOURCE LINES 73-77 +.. GENERATED FROM PYTHON SOURCE LINES 74-78 -.. code-block:: Python +.. code-block:: default scores.sel(mode=4).plot() @@ -641,15 +642,15 @@ Additionally, we can look into the scores; let's spotlight mode 4. -.. GENERATED FROM PYTHON SOURCE LINES 78-81 +.. GENERATED FROM PYTHON SOURCE LINES 79-82 In wrapping up, we visualize the corresponding EEOF component of mode 4. For visualization purposes, we'll focus on the component at a specific latitude, in this instance, 60 degrees north. -.. GENERATED FROM PYTHON SOURCE LINES 81-84 +.. GENERATED FROM PYTHON SOURCE LINES 82-85 -.. code-block:: Python +.. code-block:: default components.sel(mode=4, lat=60).plot() @@ -669,7 +670,7 @@ latitude, in this instance, 60 degrees north. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 0.777 seconds) + **Total running time of the script:** (0 minutes 5.099 seconds) .. _sphx_glr_download_auto_examples_1single_plot_eeof.py: @@ -678,14 +679,17 @@ latitude, in this instance, 60 degrees north. .. container:: sphx-glr-footer sphx-glr-footer-example - .. container:: sphx-glr-download sphx-glr-download-jupyter - :download:`Download Jupyter notebook: plot_eeof.ipynb ` + .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: plot_eeof.py ` + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: plot_eeof.ipynb ` + .. only:: html diff --git a/docs/auto_examples/1single/plot_eeof_codeobj.pickle b/docs/auto_examples/1single/plot_eeof_codeobj.pickle index 02887345..e272407f 100644 Binary files a/docs/auto_examples/1single/plot_eeof_codeobj.pickle and b/docs/auto_examples/1single/plot_eeof_codeobj.pickle differ diff --git a/docs/auto_examples/1single/plot_eeof_trend.ipynb b/docs/auto_examples/1single/plot_eeof_trend.ipynb index 1bb3f455..f7850e99 100644 --- a/docs/auto_examples/1single/plot_eeof_trend.ipynb +++ b/docs/auto_examples/1single/plot_eeof_trend.ipynb @@ -1,187 +1,214 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n# Removing nonlinear trends with EEOF analysis\n\nThis tutorial illustrates the application of Extended EOF (EEOF) analysis\nto isolate and remove nonlinear trends within a dataset.\n\nLet's begin by setting up the required packages and fetching the data.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import xarray as xr\nimport xeofs as xe\nimport matplotlib.pyplot as plt\n\nxr.set_options(display_expand_data=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We load the sea surface temperature (SST) data from the xarray tutorial.\nThe dataset consists of monthly averages from 1970 to 2021. To ensure the seasonal\ncycle doesn't overshadow the analysis, we remove the monthly climatologies.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "sst = xr.tutorial.open_dataset(\"ersstv5\").sst\nsst = sst.groupby(\"time.month\") - sst.groupby(\"time.month\").mean(\"time\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We start by performing a standard EOF analysis on the dataset.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "eof = xe.models.EOF(n_modes=10)\neof.fit(sst, dim=\"time\")\nscores = eof.scores()\ncomponents = eof.components()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We immediately see that the first mode represents the global warming trend.\nYet, the signal is somewhat muddled by short-term and year-to-year variations.\nNote the pronounced spikes around 1998 and 2016, hinting at the leakage of\nENSO signatures into this mode.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "fig, ax = plt.subplots(1, 2, figsize=(10, 5))\nscores.sel(mode=1).plot(ax=ax[0])\ncomponents.sel(mode=1).plot(ax=ax[1])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, let's try to identify this trend more cleanly. To this end, we perform an\nEEOF analysis on the same data with a suitably large embedding dimension.\nWe choose an embedding dimensioncorresponding to 120 months which is large enough\nto capture long-term trends. To speed up computation, we apply the EEOF analysis\nto the extended (lag) covariance matrix derived from the first 50 PCs.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "eeof = xe.models.ExtendedEOF(n_modes=5, tau=1, embedding=120, n_pca_modes=50)\neeof.fit(sst, dim=\"time\")\ncomponents_ext = eeof.components()\nscores_ext = eeof.scores()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The first mode now represents the global warming trend much more clearly.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "fig, ax = plt.subplots(1, 2, figsize=(10, 5))\nscores_ext.sel(mode=1).plot(ax=ax[0])\ncomponents_ext.sel(mode=1, embedding=0).plot(ax=ax[1])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can use this to the first mode to remove this nonlinear trend from our original dataset.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "sst_trends = eeof.inverse_transform(scores_ext.sel(mode=1))\nsst_detrended = sst - sst_trends.drop_vars(\"mode\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Reapplying the standard EOF analysis on our now detrended dataset:\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "eof_model_detrended = xe.models.EOF(n_modes=5)\neof_model_detrended.fit(sst_detrended, dim=\"time\")\nscores_detrended = eof_model_detrended.scores()\ncomponents_detrended = eof_model_detrended.components()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The first mode now represents ENSO without any trend component.\n\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "fig, ax = plt.subplots(1, 2, figsize=(10, 5))\nscores_detrended.sel(mode=1).plot(ax=ax[0])\ncomponents_detrended.sel(mode=1).plot(ax=ax[1])" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} \ No newline at end of file + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# Removing nonlinear trends with EEOF analysis\n", + "\n", + "This tutorial illustrates the application of Extended EOF (EEOF) analysis\n", + "to isolate and remove nonlinear trends within a dataset.\n", + "\n", + "Let's begin by setting up the required packages and fetching the data.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import xarray as xr\n", + "\n", + "import xeofs as xe\n", + "\n", + "xr.set_options(display_expand_data=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We load the sea surface temperature (SST) data from the xarray tutorial.\n", + "The dataset consists of monthly averages from 1970 to 2021. To ensure the seasonal\n", + "cycle doesn't overshadow the analysis, we remove the monthly climatologies.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sst = xr.tutorial.open_dataset(\"ersstv5\").sst\n", + "sst = sst.groupby(\"time.month\") - sst.groupby(\"time.month\").mean(\"time\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We start by performing a standard EOF analysis on the dataset.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "eof = xe.single.EOF(n_modes=10)\n", + "eof.fit(sst, dim=\"time\")\n", + "scores = eof.scores()\n", + "components = eof.components()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We immediately see that the first mode represents the global warming trend.\n", + "Yet, the signal is somewhat muddled by short-term and year-to-year variations.\n", + "Note the pronounced spikes around 1998 and 2016, hinting at the leakage of\n", + "ENSO signatures into this mode.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(1, 2, figsize=(10, 5))\n", + "scores.sel(mode=1).plot(ax=ax[0])\n", + "components.sel(mode=1).plot(ax=ax[1])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, let's try to identify this trend more cleanly. To this end, we perform an\n", + "EEOF analysis on the same data with a suitably large embedding dimension.\n", + "We choose an embedding dimensioncorresponding to 120 months which is large enough\n", + "to capture long-term trends. To speed up computation, we apply the EEOF analysis\n", + "to the extended (lag) covariance matrix derived from the first 50 PCs.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "eeof = xe.single.ExtendedEOF(n_modes=5, tau=1, embedding=120, n_pca_modes=50)\n", + "eeof.fit(sst, dim=\"time\")\n", + "components_ext = eeof.components()\n", + "scores_ext = eeof.scores()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The first mode now represents the global warming trend much more clearly.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(1, 2, figsize=(10, 5))\n", + "scores_ext.sel(mode=1).plot(ax=ax[0])\n", + "components_ext.sel(mode=1, embedding=0).plot(ax=ax[1])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can use this to the first mode to remove this nonlinear trend from our original dataset.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sst_trends = eeof.inverse_transform(scores_ext.sel(mode=1))\n", + "sst_detrended = sst - sst_trends" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Reapplying the standard EOF analysis on our now detrended dataset:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "eof_model_detrended = xe.single.EOF(n_modes=5)\n", + "eof_model_detrended.fit(sst_detrended, dim=\"time\")\n", + "scores_detrended = eof_model_detrended.scores()\n", + "components_detrended = eof_model_detrended.components()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The first mode now represents ENSO without any trend component.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(1, 2, figsize=(10, 5))\n", + "scores_detrended.sel(mode=1).plot(ax=ax[0])\n", + "components_detrended.sel(mode=1).plot(ax=ax[1])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/docs/auto_examples/1single/plot_eeof_trend.py b/docs/auto_examples/1single/plot_eeof_trend.py index d309c138..86fb6526 100644 --- a/docs/auto_examples/1single/plot_eeof_trend.py +++ b/docs/auto_examples/1single/plot_eeof_trend.py @@ -8,9 +8,10 @@ Let's begin by setting up the required packages and fetching the data. """ +import matplotlib.pyplot as plt import xarray as xr + import xeofs as xe -import matplotlib.pyplot as plt xr.set_options(display_expand_data=False) @@ -26,7 +27,7 @@ # %% # We start by performing a standard EOF analysis on the dataset. -eof = xe.models.EOF(n_modes=10) +eof = xe.single.EOF(n_modes=10) eof.fit(sst, dim="time") scores = eof.scores() components = eof.components() @@ -49,7 +50,7 @@ # to capture long-term trends. To speed up computation, we apply the EEOF analysis # to the extended (lag) covariance matrix derived from the first 50 PCs. -eeof = xe.models.ExtendedEOF(n_modes=5, tau=1, embedding=120, n_pca_modes=50) +eeof = xe.single.ExtendedEOF(n_modes=5, tau=1, embedding=120, n_pca_modes=50) eeof.fit(sst, dim="time") components_ext = eeof.components() scores_ext = eeof.scores() @@ -65,13 +66,13 @@ # We can use this to the first mode to remove this nonlinear trend from our original dataset. sst_trends = eeof.inverse_transform(scores_ext.sel(mode=1)) -sst_detrended = sst - sst_trends.drop_vars("mode") +sst_detrended = sst - sst_trends # %% # Reapplying the standard EOF analysis on our now detrended dataset: -eof_model_detrended = xe.models.EOF(n_modes=5) +eof_model_detrended = xe.single.EOF(n_modes=5) eof_model_detrended.fit(sst_detrended, dim="time") scores_detrended = eof_model_detrended.scores() components_detrended = eof_model_detrended.components() diff --git a/docs/auto_examples/1single/plot_eeof_trend.py.md5 b/docs/auto_examples/1single/plot_eeof_trend.py.md5 index 1bea8bf4..ce05ba56 100644 --- a/docs/auto_examples/1single/plot_eeof_trend.py.md5 +++ b/docs/auto_examples/1single/plot_eeof_trend.py.md5 @@ -1 +1 @@ -4ca793199afeed8a98375c67b7f63197 \ No newline at end of file +e84d0364f470ed5e3eae38a735ab52e0 \ No newline at end of file diff --git a/docs/auto_examples/1single/plot_eeof_trend.rst b/docs/auto_examples/1single/plot_eeof_trend.rst index 72ca435b..b273263c 100644 --- a/docs/auto_examples/1single/plot_eeof_trend.rst +++ b/docs/auto_examples/1single/plot_eeof_trend.rst @@ -26,14 +26,15 @@ to isolate and remove nonlinear trends within a dataset. Let's begin by setting up the required packages and fetching the data. -.. GENERATED FROM PYTHON SOURCE LINES 10-17 +.. GENERATED FROM PYTHON SOURCE LINES 10-18 .. code-block:: default + import matplotlib.pyplot as plt import xarray as xr + import xeofs as xe - import matplotlib.pyplot as plt xr.set_options(display_expand_data=False) @@ -46,17 +47,17 @@ Let's begin by setting up the required packages and fetching the data. .. code-block:: none - + -.. GENERATED FROM PYTHON SOURCE LINES 18-21 +.. GENERATED FROM PYTHON SOURCE LINES 19-22 We load the sea surface temperature (SST) data from the xarray tutorial. The dataset consists of monthly averages from 1970 to 2021. To ensure the seasonal cycle doesn't overshadow the analysis, we remove the monthly climatologies. -.. GENERATED FROM PYTHON SOURCE LINES 21-26 +.. GENERATED FROM PYTHON SOURCE LINES 22-27 .. code-block:: default @@ -72,16 +73,16 @@ cycle doesn't overshadow the analysis, we remove the monthly climatologies. -.. GENERATED FROM PYTHON SOURCE LINES 27-28 +.. GENERATED FROM PYTHON SOURCE LINES 28-29 We start by performing a standard EOF analysis on the dataset. -.. GENERATED FROM PYTHON SOURCE LINES 28-34 +.. GENERATED FROM PYTHON SOURCE LINES 29-35 .. code-block:: default - eof = xe.models.EOF(n_modes=10) + eof = xe.single.EOF(n_modes=10) eof.fit(sst, dim="time") scores = eof.scores() components = eof.components() @@ -93,14 +94,14 @@ We start by performing a standard EOF analysis on the dataset. -.. GENERATED FROM PYTHON SOURCE LINES 35-39 +.. GENERATED FROM PYTHON SOURCE LINES 36-40 We immediately see that the first mode represents the global warming trend. Yet, the signal is somewhat muddled by short-term and year-to-year variations. Note the pronounced spikes around 1998 and 2016, hinting at the leakage of ENSO signatures into this mode. -.. GENERATED FROM PYTHON SOURCE LINES 39-45 +.. GENERATED FROM PYTHON SOURCE LINES 40-46 .. code-block:: default @@ -124,11 +125,11 @@ ENSO signatures into this mode. .. code-block:: none - + -.. GENERATED FROM PYTHON SOURCE LINES 46-51 +.. GENERATED FROM PYTHON SOURCE LINES 47-52 Now, let's try to identify this trend more cleanly. To this end, we perform an EEOF analysis on the same data with a suitably large embedding dimension. @@ -136,12 +137,12 @@ We choose an embedding dimensioncorresponding to 120 months which is large enoug to capture long-term trends. To speed up computation, we apply the EEOF analysis to the extended (lag) covariance matrix derived from the first 50 PCs. -.. GENERATED FROM PYTHON SOURCE LINES 51-57 +.. GENERATED FROM PYTHON SOURCE LINES 52-58 .. code-block:: default - eeof = xe.models.ExtendedEOF(n_modes=5, tau=1, embedding=120, n_pca_modes=50) + eeof = xe.single.ExtendedEOF(n_modes=5, tau=1, embedding=120, n_pca_modes=50) eeof.fit(sst, dim="time") components_ext = eeof.components() scores_ext = eeof.scores() @@ -153,11 +154,11 @@ to the extended (lag) covariance matrix derived from the first 50 PCs. -.. GENERATED FROM PYTHON SOURCE LINES 58-59 +.. GENERATED FROM PYTHON SOURCE LINES 59-60 The first mode now represents the global warming trend much more clearly. -.. GENERATED FROM PYTHON SOURCE LINES 59-64 +.. GENERATED FROM PYTHON SOURCE LINES 60-65 .. code-block:: default @@ -180,21 +181,21 @@ The first mode now represents the global warming trend much more clearly. .. code-block:: none - + -.. GENERATED FROM PYTHON SOURCE LINES 65-66 +.. GENERATED FROM PYTHON SOURCE LINES 66-67 We can use this to the first mode to remove this nonlinear trend from our original dataset. -.. GENERATED FROM PYTHON SOURCE LINES 66-71 +.. GENERATED FROM PYTHON SOURCE LINES 67-72 .. code-block:: default sst_trends = eeof.inverse_transform(scores_ext.sel(mode=1)) - sst_detrended = sst - sst_trends.drop_vars("mode") + sst_detrended = sst - sst_trends @@ -204,16 +205,16 @@ We can use this to the first mode to remove this nonlinear trend from our origin -.. GENERATED FROM PYTHON SOURCE LINES 72-73 +.. GENERATED FROM PYTHON SOURCE LINES 73-74 Reapplying the standard EOF analysis on our now detrended dataset: -.. GENERATED FROM PYTHON SOURCE LINES 73-80 +.. GENERATED FROM PYTHON SOURCE LINES 74-81 .. code-block:: default - eof_model_detrended = xe.models.EOF(n_modes=5) + eof_model_detrended = xe.single.EOF(n_modes=5) eof_model_detrended.fit(sst_detrended, dim="time") scores_detrended = eof_model_detrended.scores() components_detrended = eof_model_detrended.components() @@ -226,11 +227,11 @@ Reapplying the standard EOF analysis on our now detrended dataset: -.. GENERATED FROM PYTHON SOURCE LINES 81-82 +.. GENERATED FROM PYTHON SOURCE LINES 82-83 The first mode now represents ENSO without any trend component. -.. GENERATED FROM PYTHON SOURCE LINES 82-88 +.. GENERATED FROM PYTHON SOURCE LINES 83-89 .. code-block:: default @@ -254,14 +255,14 @@ The first mode now represents ENSO without any trend component. .. code-block:: none - + .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 28.072 seconds) + **Total running time of the script:** (0 minutes 7.526 seconds) .. _sphx_glr_download_auto_examples_1single_plot_eeof_trend.py: diff --git a/docs/auto_examples/1single/plot_eeof_trend_codeobj.pickle b/docs/auto_examples/1single/plot_eeof_trend_codeobj.pickle index dfc530a8..f2098a18 100644 Binary files a/docs/auto_examples/1single/plot_eeof_trend_codeobj.pickle and b/docs/auto_examples/1single/plot_eeof_trend_codeobj.pickle differ diff --git a/docs/auto_examples/1single/plot_eof-smode.ipynb b/docs/auto_examples/1single/plot_eof-smode.ipynb index 97441b1c..2e186a73 100644 --- a/docs/auto_examples/1single/plot_eof-smode.ipynb +++ b/docs/auto_examples/1single/plot_eof-smode.ipynb @@ -27,7 +27,7 @@ "from cartopy.crs import EqualEarth, PlateCarree\n", "from matplotlib.gridspec import GridSpec\n", "\n", - "from xeofs.models import SparsePCA" + "import xeofs as xe" ] }, { @@ -62,7 +62,7 @@ "metadata": {}, "outputs": [], "source": [ - "model = SparsePCA(n_modes=4, alpha=1e-5)\n", + "model = xe.single.SparsePCA(n_modes=4, alpha=1e-5)\n", "model.fit(sst, dim=\"time\")\n", "expvar = model.explained_variance()\n", "expvar_ratio = model.explained_variance_ratio()\n", diff --git a/docs/auto_examples/1single/plot_eof-smode.py b/docs/auto_examples/1single/plot_eof-smode.py index a81d00c0..479367d7 100644 --- a/docs/auto_examples/1single/plot_eof-smode.py +++ b/docs/auto_examples/1single/plot_eof-smode.py @@ -18,7 +18,7 @@ from cartopy.crs import EqualEarth, PlateCarree from matplotlib.gridspec import GridSpec -from xeofs.models import SparsePCA +import xeofs as xe # %% # We use sea surface temperature data from 1990 to 2017, consistent with the original paper. @@ -29,7 +29,7 @@ # %% # We perform sparse PCA using the `alpha` and `beta` parameters, which define the sparsity imposed by the elastic net (refer to Table 1 in the paper). In our analysis, we set `alpha` to 1e-5, as specified by the authors. Although the authors do not specify a value for `beta`, it appears that the results are not highly sensitive to this parameter. Therefore, we use the default `beta` value of 1e-4. -model = SparsePCA(n_modes=4, alpha=1e-5) +model = xe.single.SparsePCA(n_modes=4, alpha=1e-5) model.fit(sst, dim="time") expvar = model.explained_variance() expvar_ratio = model.explained_variance_ratio() diff --git a/docs/auto_examples/1single/plot_eof-smode.py.md5 b/docs/auto_examples/1single/plot_eof-smode.py.md5 index 59576489..981c2288 100644 --- a/docs/auto_examples/1single/plot_eof-smode.py.md5 +++ b/docs/auto_examples/1single/plot_eof-smode.py.md5 @@ -1 +1 @@ -bb0b6c390528787d00735dd586afb457 \ No newline at end of file +6d0f12994146614921c2bbc9e655d938 \ No newline at end of file diff --git a/docs/auto_examples/1single/plot_eof-smode.rst b/docs/auto_examples/1single/plot_eof-smode.rst index 1f930ac2..c8012335 100644 --- a/docs/auto_examples/1single/plot_eof-smode.rst +++ b/docs/auto_examples/1single/plot_eof-smode.rst @@ -40,7 +40,7 @@ References from cartopy.crs import EqualEarth, PlateCarree from matplotlib.gridspec import GridSpec - from xeofs.models import SparsePCA + import xeofs as xe @@ -77,7 +77,7 @@ We perform sparse PCA using the `alpha` and `beta` parameters, which define the .. code-block:: default - model = SparsePCA(n_modes=4, alpha=1e-5) + model = xe.single.SparsePCA(n_modes=4, alpha=1e-5) model.fit(sst, dim="time") expvar = model.explained_variance() expvar_ratio = model.explained_variance_ratio() @@ -159,7 +159,7 @@ Examining the first four modes, we clearly identify ENSO as the fourth mode. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 7.992 seconds) + **Total running time of the script:** (0 minutes 7.551 seconds) .. _sphx_glr_download_auto_examples_1single_plot_eof-smode.py: diff --git a/docs/auto_examples/1single/plot_eof-smode_codeobj.pickle b/docs/auto_examples/1single/plot_eof-smode_codeobj.pickle index 78dfade0..f93d16e6 100644 Binary files a/docs/auto_examples/1single/plot_eof-smode_codeobj.pickle and b/docs/auto_examples/1single/plot_eof-smode_codeobj.pickle differ diff --git a/docs/auto_examples/1single/plot_eof-tmode.ipynb b/docs/auto_examples/1single/plot_eof-tmode.ipynb index ac26dc99..e9254f10 100644 --- a/docs/auto_examples/1single/plot_eof-tmode.ipynb +++ b/docs/auto_examples/1single/plot_eof-tmode.ipynb @@ -18,12 +18,12 @@ "metadata": {}, "outputs": [], "source": [ - "import xarray as xr\n", "import matplotlib.pyplot as plt\n", - "from matplotlib.gridspec import GridSpec\n", + "import xarray as xr\n", "from cartopy.crs import EqualEarth, PlateCarree\n", + "from matplotlib.gridspec import GridSpec\n", "\n", - "from xeofs.models import EOF\n", + "import xeofs as xe\n", "\n", "sst = xr.tutorial.open_dataset(\"ersstv5\")[\"sst\"]" ] @@ -42,7 +42,7 @@ "metadata": {}, "outputs": [], "source": [ - "model = EOF(n_modes=5)\n", + "model = xe.single.EOF(n_modes=5)\n", "model.fit(sst, dim=(\"lat\", \"lon\"))\n", "expvar = model.explained_variance_ratio()\n", "components = model.components()\n", @@ -99,7 +99,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/docs/auto_examples/1single/plot_eof-tmode.py b/docs/auto_examples/1single/plot_eof-tmode.py index 6a618e3e..f85b97e9 100644 --- a/docs/auto_examples/1single/plot_eof-tmode.py +++ b/docs/auto_examples/1single/plot_eof-tmode.py @@ -7,19 +7,19 @@ Load packages and data: """ -import xarray as xr import matplotlib.pyplot as plt -from matplotlib.gridspec import GridSpec +import xarray as xr from cartopy.crs import EqualEarth, PlateCarree +from matplotlib.gridspec import GridSpec -from xeofs.models import EOF +import xeofs as xe sst = xr.tutorial.open_dataset("ersstv5")["sst"] # %% # Perform the actual analysis -model = EOF(n_modes=5) +model = xe.single.EOF(n_modes=5) model.fit(sst, dim=("lat", "lon")) expvar = model.explained_variance_ratio() components = model.components() diff --git a/docs/auto_examples/1single/plot_eof-tmode.py.md5 b/docs/auto_examples/1single/plot_eof-tmode.py.md5 index 0e6e4115..2a678441 100644 --- a/docs/auto_examples/1single/plot_eof-tmode.py.md5 +++ b/docs/auto_examples/1single/plot_eof-tmode.py.md5 @@ -1 +1 @@ -cb4eb5e93d5365bebbbecc2581f70358 \ No newline at end of file +af1f1248f081012ef3cb40381d6bbe73 \ No newline at end of file diff --git a/docs/auto_examples/1single/plot_eof-tmode.rst b/docs/auto_examples/1single/plot_eof-tmode.rst index 04053bb4..b0857bf6 100644 --- a/docs/auto_examples/1single/plot_eof-tmode.rst +++ b/docs/auto_examples/1single/plot_eof-tmode.rst @@ -27,15 +27,15 @@ Load packages and data: .. GENERATED FROM PYTHON SOURCE LINES 9-19 -.. code-block:: Python +.. code-block:: default - import xarray as xr import matplotlib.pyplot as plt - from matplotlib.gridspec import GridSpec + import xarray as xr from cartopy.crs import EqualEarth, PlateCarree + from matplotlib.gridspec import GridSpec - from xeofs.models import EOF + import xeofs as xe sst = xr.tutorial.open_dataset("ersstv5")["sst"] @@ -52,10 +52,10 @@ Perform the actual analysis .. GENERATED FROM PYTHON SOURCE LINES 21-28 -.. code-block:: Python +.. code-block:: default - model = EOF(n_modes=5) + model = xe.single.EOF(n_modes=5) model.fit(sst, dim=("lat", "lon")) expvar = model.explained_variance_ratio() components = model.components() @@ -74,7 +74,7 @@ Create figure showing the first two modes .. GENERATED FROM PYTHON SOURCE LINES 30-48 -.. code-block:: Python +.. code-block:: default proj = EqualEarth(central_longitude=180) @@ -109,7 +109,7 @@ Create figure showing the first two modes .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 2.323 seconds) + **Total running time of the script:** (0 minutes 3.854 seconds) .. _sphx_glr_download_auto_examples_1single_plot_eof-tmode.py: @@ -118,14 +118,17 @@ Create figure showing the first two modes .. container:: sphx-glr-footer sphx-glr-footer-example - .. container:: sphx-glr-download sphx-glr-download-jupyter - :download:`Download Jupyter notebook: plot_eof-tmode.ipynb ` + .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: plot_eof-tmode.py ` + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: plot_eof-tmode.ipynb ` + .. only:: html diff --git a/docs/auto_examples/1single/plot_eof-tmode_codeobj.pickle b/docs/auto_examples/1single/plot_eof-tmode_codeobj.pickle index de37415c..34517ab3 100644 Binary files a/docs/auto_examples/1single/plot_eof-tmode_codeobj.pickle and b/docs/auto_examples/1single/plot_eof-tmode_codeobj.pickle differ diff --git a/docs/auto_examples/1single/plot_gwpca.ipynb b/docs/auto_examples/1single/plot_gwpca.ipynb index 2d12398f..6cc31b2c 100644 --- a/docs/auto_examples/1single/plot_gwpca.ipynb +++ b/docs/auto_examples/1single/plot_gwpca.ipynb @@ -43,18 +43,18 @@ "outputs": [], "source": [ "# For the analysis\n", - "import numpy as np\n", - "import xarray as xr\n", - "import xeofs as xe\n", - "\n", "# For visualization\n", "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", + "import numpy as np\n", "\n", "# For accessing R packages\n", "import rpy2.robjects as ro\n", + "import seaborn as sns\n", + "import xarray as xr\n", + "from rpy2.robjects import pandas2ri\n", "from rpy2.robjects.packages import importr\n", - "from rpy2.robjects import pandas2ri" + "\n", + "import xeofs as xe" ] }, { @@ -155,7 +155,7 @@ "metadata": {}, "outputs": [], "source": [ - "gwpca = xe.models.GWPCA(\n", + "gwpca = xe.single.GWPCA(\n", " n_modes=5,\n", " standardize=True,\n", " metric=\"euclidean\",\n", @@ -285,7 +285,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/docs/auto_examples/1single/plot_gwpca.py b/docs/auto_examples/1single/plot_gwpca.py index f1542228..2a9462cf 100644 --- a/docs/auto_examples/1single/plot_gwpca.py +++ b/docs/auto_examples/1single/plot_gwpca.py @@ -32,18 +32,18 @@ """ # For the analysis -import numpy as np -import xarray as xr -import xeofs as xe - # For visualization import matplotlib.pyplot as plt -import seaborn as sns +import numpy as np # For accessing R packages import rpy2.robjects as ro -from rpy2.robjects.packages import importr +import seaborn as sns +import xarray as xr from rpy2.robjects import pandas2ri +from rpy2.robjects.packages import importr + +import xeofs as xe # %% # Next, we'll install the R package `mvoutlier `_ @@ -96,7 +96,7 @@ # kilometers. Lastly, we'll standardize the input to ensure consistent scales # for the chemical elements. -gwpca = xe.models.GWPCA( +gwpca = xe.single.GWPCA( n_modes=5, standardize=True, metric="euclidean", diff --git a/docs/auto_examples/1single/plot_gwpca.py.md5 b/docs/auto_examples/1single/plot_gwpca.py.md5 index 638f2378..74b515fb 100644 --- a/docs/auto_examples/1single/plot_gwpca.py.md5 +++ b/docs/auto_examples/1single/plot_gwpca.py.md5 @@ -1 +1 @@ -77b9f112f4bc958b9490200a1e64d954 \ No newline at end of file +ae1e8af4b123c765c1cfd4d63d9386c2 \ No newline at end of file diff --git a/docs/auto_examples/1single/plot_gwpca.rst b/docs/auto_examples/1single/plot_gwpca.rst index 11fe5aa3..35913c3a 100644 --- a/docs/auto_examples/1single/plot_gwpca.rst +++ b/docs/auto_examples/1single/plot_gwpca.rst @@ -51,22 +51,22 @@ Let's import the necessary packages. .. GENERATED FROM PYTHON SOURCE LINES 33-48 -.. code-block:: Python +.. code-block:: default # For the analysis - import numpy as np - import xarray as xr - import xeofs as xe - # For visualization import matplotlib.pyplot as plt - import seaborn as sns + import numpy as np # For accessing R packages import rpy2.robjects as ro - from rpy2.robjects.packages import importr + import seaborn as sns + import xarray as xr from rpy2.robjects import pandas2ri + from rpy2.robjects.packages import importr + + import xeofs as xe @@ -82,7 +82,7 @@ using the `rpy2 `_ package. .. GENERATED FROM PYTHON SOURCE LINES 51-57 -.. code-block:: Python +.. code-block:: default xr.set_options(display_expand_data=False) @@ -98,189 +98,6 @@ using the `rpy2 `_ package. .. code-block:: none - R[write to console]: also installing the dependencies ‘DEoptimR’, ‘sgeostat’, ‘robustbase’ - - - R[write to console]: trying URL 'https://cloud.r-project.org/src/contrib/DEoptimR_1.1-3.tar.gz' - - R[write to console]: Content type 'application/x-gzip' - R[write to console]: length 19222 bytes (18 KB) - - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: - - R[write to console]: downloaded 18 KB - - - R[write to console]: trying URL 'https://cloud.r-project.org/src/contrib/sgeostat_1.0-27.tar.gz' - - R[write to console]: Content type 'application/x-gzip' - R[write to console]: length 35685 bytes (34 KB) - - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: - - R[write to console]: downloaded 34 KB - - - R[write to console]: trying URL 'https://cloud.r-project.org/src/contrib/robustbase_0.99-2.tar.gz' - - R[write to console]: Content type 'application/x-gzip' - R[write to console]: length 2292086 bytes (2.2 MB) - - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: = - R[write to console]: - - R[write to console]: downloaded 2.2 MB - - R[write to console]: trying URL 'https://cloud.r-project.org/src/contrib/mvoutlier_2.1.1.tar.gz' R[write to console]: Content type 'application/x-gzip' @@ -345,7 +162,7 @@ using the `rpy2 `_ package. R[write to console]: R[write to console]: The downloaded source packages are in - ‘/tmp/Rtmpr8iXHn/downloaded_packages’ + ‘/tmp/RtmpRG5HUi/downloaded_packages’ R[write to console]: R[write to console]: @@ -355,7 +172,7 @@ using the `rpy2 `_ package. R[write to console]: done - [0] + [0] @@ -367,7 +184,7 @@ in the Baltic Sea region. This will help us visually represent the GWPCA results .. GENERATED FROM PYTHON SOURCE LINES 61-75 -.. code-block:: Python +.. code-block:: default ro.r( @@ -533,7 +350,7 @@ Since ``xeofs`` uses ``xarray``, we convert the data into an ``xarray.DataArray` .. GENERATED FROM PYTHON SOURCE LINES 77-86 -.. code-block:: Python +.. code-block:: default data_df = data_df.rename(columns={"ID": "station"}).set_index("station") @@ -915,22 +732,22 @@ Since ``xeofs`` uses ``xarray``, we convert the data into an ``xarray.DataArray` stroke: currentColor; fill: currentColor; } -
    <xarray.DataArray (element: 10, station: 768)> Size: 61kB
    +    
    <xarray.DataArray (element: 10, station: 768)>
         43.61 58.73 58.14 43.98 60.9 54.0 82.72 ... 0.196 0.202 0.207 0.109 0.141 0.185
         Coordinates:
    -      * station  (station) object 6kB MultiIndex
    -      * x        (station) float64 6kB -6.197e+05 2.147e+05 ... -2.82e+05 -1.273e+05
    -      * y        (station) float64 6kB 6.805e+06 7.746e+06 ... 5.796e+06 6.523e+06
    -      * element  (element) object 80B 'SiO2_T' 'TiO2_T' ... 'K2O_T' 'P2O5_T'


  • @@ -974,10 +791,10 @@ for the chemical elements. .. GENERATED FROM PYTHON SOURCE LINES 98-110 -.. code-block:: Python +.. code-block:: default - gwpca = xe.models.GWPCA( + gwpca = xe.single.GWPCA( n_modes=5, standardize=True, metric="euclidean", @@ -1359,28 +1176,15 @@ for the chemical elements. stroke: currentColor; fill: currentColor; } -
    <xarray.DataArray 'components' (mode: 5, element: 10, station: 768)> Size: 307kB
    +    
    <xarray.DataArray 'components' (mode: 5, element: 10, station: 768)>
         0.1813 -0.3584 0.1243 0.2 -0.3812 ... 0.1229 -0.2865 0.4732 0.4197 -0.4249
         Coordinates:
    -      * mode     (mode) int64 40B 1 2 3 4 5
    -      * element  (element) object 80B 'SiO2_T' 'TiO2_T' ... 'K2O_T' 'P2O5_T'
    -      * station  (station) object 6kB MultiIndex
    -      * x        (station) float64 6kB -6.197e+05 2.147e+05 ... -2.82e+05 -1.273e+05
    -      * y        (station) float64 6kB 6.805e+06 7.746e+06 ... 5.796e+06 6.523e+06
    -    Attributes: (12/16)
    -        model:          GWPCA
    -        software:       xeofs
    -        version:        2.3.2
    -        date:           2024-03-31 21:13:03
    -        n_modes:        5
    -        center:         True
    -        ...             ...
    -        feature_name:   feature
    -        random_state:   None
    -        verbose:        False
    -        compute:        True
    -        solver:         auto
    -        solver_kwargs:  {}
  • model :
    GWPCA
    software :
    xeofs
    version :
    1.2.0
    date :
    2024-09-02 02:16:58
    n_modes :
    5
    center :
    True
    standardize :
    True
    use_coslat :
    False
    check_nans :
    True
    sample_name :
    sample
    feature_name :
    feature
    random_state :
    None
    compute :
    True
    solver :
    auto
    solver_kwargs :
    {}


  • @@ -1461,7 +1265,7 @@ dominate the local PCAs. .. GENERATED FROM PYTHON SOURCE LINES 116-120 -.. code-block:: Python +.. code-block:: default llwc = gwpca.largest_locally_weighted_components() @@ -1838,20 +1642,20 @@ dominate the local PCAs. stroke: currentColor; fill: currentColor; } -
    <xarray.DataArray 'largest_locally_weighted_components' (mode: 5, station: 768)> Size: 31kB
    +    
    <xarray.DataArray 'largest_locally_weighted_components' (mode: 5, station: 768)>
         'MgO_T' 'Al2O3_T' 'MgO_T' 'TiO2_T' ... 'K2O_T' 'Fe2O3_T' 'Fe2O3_T' 'CaO_T'
         Coordinates:
    -      * mode     (mode) int64 40B 1 2 3 4 5
    -      * station  (station) object 6kB MultiIndex
    -      * x        (station) float64 6kB -6.197e+05 2.147e+05 ... -2.82e+05 -1.273e+05
    -      * y        (station) float64 6kB 6.805e+06 7.746e+06 ... 5.796e+06 6.523e+06


  • @@ -1887,7 +1691,7 @@ For demonstation, we'll concentrate on the first mode: .. GENERATED FROM PYTHON SOURCE LINES 126-157 -.. code-block:: Python +.. code-block:: default llwc1_df = llwc.sel(mode=1).to_dataframe() @@ -1942,7 +1746,7 @@ roughly 40% to 70%. .. GENERATED FROM PYTHON SOURCE LINES 163-176 -.. code-block:: Python +.. code-block:: default @@ -1972,7 +1776,7 @@ roughly 40% to 70%. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 34.593 seconds) + **Total running time of the script:** (0 minutes 39.918 seconds) .. _sphx_glr_download_auto_examples_1single_plot_gwpca.py: @@ -1981,14 +1785,17 @@ roughly 40% to 70%. .. container:: sphx-glr-footer sphx-glr-footer-example - .. container:: sphx-glr-download sphx-glr-download-jupyter - :download:`Download Jupyter notebook: plot_gwpca.ipynb ` + .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: plot_gwpca.py ` + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: plot_gwpca.ipynb ` + .. only:: html diff --git a/docs/auto_examples/1single/plot_gwpca_codeobj.pickle b/docs/auto_examples/1single/plot_gwpca_codeobj.pickle index 508520c6..bd8fd2fe 100644 Binary files a/docs/auto_examples/1single/plot_gwpca_codeobj.pickle and b/docs/auto_examples/1single/plot_gwpca_codeobj.pickle differ diff --git a/docs/auto_examples/1single/plot_hilbert_eof.ipynb b/docs/auto_examples/1single/plot_hilbert_eof.ipynb new file mode 100644 index 00000000..e704627c --- /dev/null +++ b/docs/auto_examples/1single/plot_hilbert_eof.ipynb @@ -0,0 +1,251 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# Hilbert EOF analysis\n", + "\n", + "We demonstrate how to execute a Hilbert EOF analysis [1]_ [2]_\n", + "[3]_. This method extends traditional EOF analysis into the complex domain,\n", + "allowing the EOF components to have real and imaginary parts. This capability\n", + "can reveal oscillatory patterns in datasets, which are common in Earth\n", + "observations. For example, beyond typical examples like seasonal cycles, you can\n", + "think of internal waves in the ocean, or the Quasi-Biennial Oscillation in the\n", + "atmosphere.\n", + "\n", + "Using monthly sea surface temperature data from 1970 to 2021 as an example, we\n", + "highlight the method's key features and address edge effects as a common\n", + "challenge.\n", + "\n", + ".. [1] Rasmusson, E. M., Arkin, P. A., Chen, W.-Y. & Jalickee, J. B. Biennial\n", + " variations in surface temperature over the United States as revealed by\n", + " singular decomposition. Monthly Weather Review 109, 587–598 (1981).\n", + ".. [2] Barnett, T. P. Interaction of the Monsoon and Pacific Trade Wind System\n", + " at Interannual Time Scales Part I: The Equatorial Zone. Monthly Weather\n", + " Review 111, 756–773 (1983).\n", + ".. [3] Horel, J. Complex Principal Component Analysis: Theory and Examples. J.\n", + " Climate Appl. Meteor. 23, 1660–1673 (1984).\n", + "\n", + "Let's start by importing the necessary packages and loading the data:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import xarray as xr\n", + "\n", + "import xeofs as xe\n", + "\n", + "xr.set_options(display_expand_attrs=False)\n", + "\n", + "sst = xr.tutorial.open_dataset(\"ersstv5\").sst\n", + "sst" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We fit the ``HilbertEOF`` model directly to the raw data, retaining the seasonal\n", + "cycle for study. The model initialization specifies the desired number of\n", + "modes. The ``use_coslat`` parameter is set to ``True`` to adjust for grid\n", + "convergence at the poles. While the ``HilbertEOF`` class offers padding\n", + "options to mitigate potential edge effects, we'll begin with no padding.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "kwargs = dict(n_modes=4, use_coslat=True, random_state=7)\n", + "model = xe.single.HilbertEOF(padding=\"none\", **kwargs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, we fit the model to the data and extract the explained variance.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.fit(sst, dim=\"time\")\n", + "expvar = model.explained_variance()\n", + "expvar_ratio = model.explained_variance_ratio()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's have a look at the explained variance of the first five modes:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "expvar.round(0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Clearly, the first mode completely dominates and already explains a\n", + "substantial amount of variance. If we look at the fraction of explained\n", + "variance, we see that the first mode explains about 88.8 %.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "(expvar_ratio * 100).round(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In comparison to standard EOF analysis (check the corresponding example,\n", + "S-mode), the first complex mode seems to integrate the first two standard\n", + "modes in terms of explained variance. This makes sense as the two modes in\n", + "standard EOF are both showing parts of an annual cycle (which are in\n", + "quadrature) and thus the complex mode combines both of them. Let's confirm our\n", + "hypothesis by looking at the real part the complex-valued scores:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scores = model.scores()\n", + "scores.real.plot.line(x=\"time\", col=\"mode\", lw=1, ylim=(-0.1, 0.1))\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And indeed the annual cycle is completed incorporated into the first mode,\n", + "while the second mode shows a semi-annual cycle (mode 3 in standard EOF).\n", + "However, mode three and four look unusual. While showing some similarity to\n", + "ENSO (e.g. in mode 3 peaks in 1982, 1998 and 2016), they exhibit a \"running\n", + "away\" behaviour towards the boundaries of the time series. This a common issue\n", + "in Hilbert EOF analysis which is based on the Hilbert transform (a\n", + "convolution) that suffers from the absence of information at the time series\n", + "boundaries. One way to mitigate this is to artificially extend the time series\n", + "also known as *padding*. In ``xeofs``, you can enable such a padding by\n", + "setting the ``padding`` parameter to ``\"exp\"`` which will extent the\n", + "boundaries by an exponential decaying function. The ``decay_factor`` parameter\n", + "controls the decay rate of the exponential function measured in multiples of\n", + "the time series length. Let's see how the decay parameter impacts the results:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model_ext = xe.single.HilbertEOF(padding=\"exp\", decay_factor=0.01, **kwargs)\n", + "model_ext.fit(sst, dim=\"time\")\n", + "scores_ext = model_ext.scores().sel(mode=slice(1, 4))\n", + "\n", + "scores_ext.real.plot.line(x=\"time\", col=\"mode\", lw=1, ylim=(-0.1, 0.1))\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And indeed, padding the time series effectively reduced the artifacts at the\n", + "boundaries. Lastly, we examine the complex component amplitudes and phases.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "comp_amps = model.components_amplitude()\n", + "comp_amps.plot(col=\"mode\", vmin=0, vmax=0.025)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The component phases of the first mode clearly show the seasonal cycle as the\n", + "northern and southern hemisphere are phase shifted by 180 degrees (white and\n", + "black). Note the blueish regions in the central East Pacific and Indian Ocean\n", + "which indicate a phase shift of 90 degrees compared to the main annual cycle.\n", + "This is in agreement with mode 3 of the standard EOF analysis.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "comp_phases = model.components_phase()\n", + "comp_phases.plot(col=\"mode\", cmap=\"twilight\")\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/docs/auto_examples/1single/plot_hilbert_eof.py b/docs/auto_examples/1single/plot_hilbert_eof.py new file mode 100644 index 00000000..a2fd42ba --- /dev/null +++ b/docs/auto_examples/1single/plot_hilbert_eof.py @@ -0,0 +1,124 @@ +""" +Hilbert EOF analysis +============================================ + +We demonstrate how to execute a Hilbert EOF analysis [1]_ [2]_ +[3]_. This method extends traditional EOF analysis into the complex domain, +allowing the EOF components to have real and imaginary parts. This capability +can reveal oscillatory patterns in datasets, which are common in Earth +observations. For example, beyond typical examples like seasonal cycles, you can +think of internal waves in the ocean, or the Quasi-Biennial Oscillation in the +atmosphere. + +Using monthly sea surface temperature data from 1970 to 2021 as an example, we +highlight the method's key features and address edge effects as a common +challenge. + +.. [1] Rasmusson, E. M., Arkin, P. A., Chen, W.-Y. & Jalickee, J. B. Biennial + variations in surface temperature over the United States as revealed by + singular decomposition. Monthly Weather Review 109, 587–598 (1981). +.. [2] Barnett, T. P. Interaction of the Monsoon and Pacific Trade Wind System + at Interannual Time Scales Part I: The Equatorial Zone. Monthly Weather + Review 111, 756–773 (1983). +.. [3] Horel, J. Complex Principal Component Analysis: Theory and Examples. J. + Climate Appl. Meteor. 23, 1660–1673 (1984). + +Let's start by importing the necessary packages and loading the data: +""" + +# %% +import matplotlib.pyplot as plt +import xarray as xr + +import xeofs as xe + +xr.set_options(display_expand_attrs=False) + +sst = xr.tutorial.open_dataset("ersstv5").sst +sst + +# %% +# We fit the ``HilbertEOF`` model directly to the raw data, retaining the seasonal +# cycle for study. The model initialization specifies the desired number of +# modes. The ``use_coslat`` parameter is set to ``True`` to adjust for grid +# convergence at the poles. While the ``HilbertEOF`` class offers padding +# options to mitigate potential edge effects, we'll begin with no padding. + +kwargs = dict(n_modes=4, use_coslat=True, random_state=7) +model = xe.single.HilbertEOF(padding="none", **kwargs) + +# %% +# Now, we fit the model to the data and extract the explained variance. + +model.fit(sst, dim="time") +expvar = model.explained_variance() +expvar_ratio = model.explained_variance_ratio() + +# %% +# Let's have a look at the explained variance of the first five modes: + +expvar.round(0) + + +# %% +# Clearly, the first mode completely dominates and already explains a +# substantial amount of variance. If we look at the fraction of explained +# variance, we see that the first mode explains about 88.8 %. + +(expvar_ratio * 100).round(1) + +# %% +# In comparison to standard EOF analysis (check the corresponding example, +# S-mode), the first complex mode seems to integrate the first two standard +# modes in terms of explained variance. This makes sense as the two modes in +# standard EOF are both showing parts of an annual cycle (which are in +# quadrature) and thus the complex mode combines both of them. Let's confirm our +# hypothesis by looking at the real part the complex-valued scores: + +scores = model.scores() +scores.real.plot.line(x="time", col="mode", lw=1, ylim=(-0.1, 0.1)) +plt.show() + + +# %% +# And indeed the annual cycle is completed incorporated into the first mode, +# while the second mode shows a semi-annual cycle (mode 3 in standard EOF). +# However, mode three and four look unusual. While showing some similarity to +# ENSO (e.g. in mode 3 peaks in 1982, 1998 and 2016), they exhibit a "running +# away" behaviour towards the boundaries of the time series. This a common issue +# in Hilbert EOF analysis which is based on the Hilbert transform (a +# convolution) that suffers from the absence of information at the time series +# boundaries. One way to mitigate this is to artificially extend the time series +# also known as *padding*. In ``xeofs``, you can enable such a padding by +# setting the ``padding`` parameter to ``"exp"`` which will extent the +# boundaries by an exponential decaying function. The ``decay_factor`` parameter +# controls the decay rate of the exponential function measured in multiples of +# the time series length. Let's see how the decay parameter impacts the results: + +model_ext = xe.single.HilbertEOF(padding="exp", decay_factor=0.01, **kwargs) +model_ext.fit(sst, dim="time") +scores_ext = model_ext.scores().sel(mode=slice(1, 4)) + +scores_ext.real.plot.line(x="time", col="mode", lw=1, ylim=(-0.1, 0.1)) +plt.show() + +# %% +# And indeed, padding the time series effectively reduced the artifacts at the +# boundaries. Lastly, we examine the complex component amplitudes and phases. + +comp_amps = model.components_amplitude() +comp_amps.plot(col="mode", vmin=0, vmax=0.025) +plt.show() + +# %% +# The component phases of the first mode clearly show the seasonal cycle as the +# northern and southern hemisphere are phase shifted by 180 degrees (white and +# black). Note the blueish regions in the central East Pacific and Indian Ocean +# which indicate a phase shift of 90 degrees compared to the main annual cycle. +# This is in agreement with mode 3 of the standard EOF analysis. + +comp_phases = model.components_phase() +comp_phases.plot(col="mode", cmap="twilight") +plt.show() + +# %% diff --git a/docs/auto_examples/1single/plot_hilbert_eof.py.md5 b/docs/auto_examples/1single/plot_hilbert_eof.py.md5 new file mode 100644 index 00000000..c20a2e8f --- /dev/null +++ b/docs/auto_examples/1single/plot_hilbert_eof.py.md5 @@ -0,0 +1 @@ +28f29c6a357ea6a325769ce0c7554634 \ No newline at end of file diff --git a/docs/auto_examples/1single/plot_hilbert_eof.rst b/docs/auto_examples/1single/plot_hilbert_eof.rst new file mode 100644 index 00000000..ddcf5a25 --- /dev/null +++ b/docs/auto_examples/1single/plot_hilbert_eof.rst @@ -0,0 +1,1469 @@ + +.. DO NOT EDIT. +.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. +.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: +.. "auto_examples/1single/plot_hilbert_eof.py" +.. LINE NUMBERS ARE GIVEN BELOW. + +.. only:: html + + .. note:: + :class: sphx-glr-download-link-note + + :ref:`Go to the end ` + to download the full example code + +.. rst-class:: sphx-glr-example-title + +.. _sphx_glr_auto_examples_1single_plot_hilbert_eof.py: + + +Hilbert EOF analysis +============================================ + +We demonstrate how to execute a Hilbert EOF analysis [1]_ [2]_ +[3]_. This method extends traditional EOF analysis into the complex domain, +allowing the EOF components to have real and imaginary parts. This capability +can reveal oscillatory patterns in datasets, which are common in Earth +observations. For example, beyond typical examples like seasonal cycles, you can +think of internal waves in the ocean, or the Quasi-Biennial Oscillation in the +atmosphere. + +Using monthly sea surface temperature data from 1970 to 2021 as an example, we +highlight the method's key features and address edge effects as a common +challenge. + +.. [1] Rasmusson, E. M., Arkin, P. A., Chen, W.-Y. & Jalickee, J. B. Biennial + variations in surface temperature over the United States as revealed by + singular decomposition. Monthly Weather Review 109, 587–598 (1981). +.. [2] Barnett, T. P. Interaction of the Monsoon and Pacific Trade Wind System + at Interannual Time Scales Part I: The Equatorial Zone. Monthly Weather + Review 111, 756–773 (1983). +.. [3] Horel, J. Complex Principal Component Analysis: Theory and Examples. J. + Climate Appl. Meteor. 23, 1660–1673 (1984). + +Let's start by importing the necessary packages and loading the data: + +.. GENERATED FROM PYTHON SOURCE LINES 30-40 + +.. code-block:: default + + import matplotlib.pyplot as plt + import xarray as xr + + import xeofs as xe + + xr.set_options(display_expand_attrs=False) + + sst = xr.tutorial.open_dataset("ersstv5").sst + sst + + + + + + +.. raw:: html + +
    +
    + + + + + + + + + + + + + + +
    <xarray.DataArray 'sst' (time: 624, lat: 89, lon: 180)>
    +    [9996480 values with dtype=float32]
    +    Coordinates:
    +      * lat      (lat) float32 88.0 86.0 84.0 82.0 80.0 ... -82.0 -84.0 -86.0 -88.0
    +      * lon      (lon) float32 0.0 2.0 4.0 6.0 8.0 ... 350.0 352.0 354.0 356.0 358.0
    +      * time     (time) datetime64[ns] 1970-01-01 1970-02-01 ... 2021-12-01
    +    Attributes: (9)
    +
    +
    +
    + +.. GENERATED FROM PYTHON SOURCE LINES 41-46 + +We fit the ``HilbertEOF`` model directly to the raw data, retaining the seasonal +cycle for study. The model initialization specifies the desired number of +modes. The ``use_coslat`` parameter is set to ``True`` to adjust for grid +convergence at the poles. While the ``HilbertEOF`` class offers padding +options to mitigate potential edge effects, we'll begin with no padding. + +.. GENERATED FROM PYTHON SOURCE LINES 46-50 + +.. code-block:: default + + + kwargs = dict(n_modes=4, use_coslat=True, random_state=7) + model = xe.single.HilbertEOF(padding="none", **kwargs) + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 51-52 + +Now, we fit the model to the data and extract the explained variance. + +.. GENERATED FROM PYTHON SOURCE LINES 52-57 + +.. code-block:: default + + + model.fit(sst, dim="time") + expvar = model.explained_variance() + expvar_ratio = model.explained_variance_ratio() + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 58-59 + +Let's have a look at the explained variance of the first five modes: + +.. GENERATED FROM PYTHON SOURCE LINES 59-63 + +.. code-block:: default + + + expvar.round(0) + + + + + + + +.. raw:: html + +
    +
    + + + + + + + + + + + + + + +
    <xarray.DataArray 'explained_variance' (mode: 4)>
    +    5.069e+04 1.705e+03 1.105e+03 519.0
    +    Coordinates:
    +      * mode     (mode) int64 1 2 3 4
    +    Attributes: (15)
    +
    +
    +
    + +.. GENERATED FROM PYTHON SOURCE LINES 64-67 + +Clearly, the first mode completely dominates and already explains a +substantial amount of variance. If we look at the fraction of explained +variance, we see that the first mode explains about 88.8 %. + +.. GENERATED FROM PYTHON SOURCE LINES 67-70 + +.. code-block:: default + + + (expvar_ratio * 100).round(1) + + + + + + +.. raw:: html + +
    +
    + + + + + + + + + + + + + + +
    <xarray.DataArray 'explained_variance_ratio' (mode: 4)>
    +    88.8 3.0 1.9 0.9
    +    Coordinates:
    +      * mode     (mode) int64 1 2 3 4
    +    Attributes: (15)
    +
    +
    +
    + +.. GENERATED FROM PYTHON SOURCE LINES 71-77 + +In comparison to standard EOF analysis (check the corresponding example, +S-mode), the first complex mode seems to integrate the first two standard +modes in terms of explained variance. This makes sense as the two modes in +standard EOF are both showing parts of an annual cycle (which are in +quadrature) and thus the complex mode combines both of them. Let's confirm our +hypothesis by looking at the real part the complex-valued scores: + +.. GENERATED FROM PYTHON SOURCE LINES 77-83 + +.. code-block:: default + + + scores = model.scores() + scores.real.plot.line(x="time", col="mode", lw=1, ylim=(-0.1, 0.1)) + plt.show() + + + + + +.. image-sg:: /auto_examples/1single/images/sphx_glr_plot_hilbert_eof_001.png + :alt: mode = 1, mode = 2, mode = 3, mode = 4 + :srcset: /auto_examples/1single/images/sphx_glr_plot_hilbert_eof_001.png + :class: sphx-glr-single-img + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 84-97 + +And indeed the annual cycle is completed incorporated into the first mode, +while the second mode shows a semi-annual cycle (mode 3 in standard EOF). +However, mode three and four look unusual. While showing some similarity to +ENSO (e.g. in mode 3 peaks in 1982, 1998 and 2016), they exhibit a "running +away" behaviour towards the boundaries of the time series. This a common issue +in Hilbert EOF analysis which is based on the Hilbert transform (a +convolution) that suffers from the absence of information at the time series +boundaries. One way to mitigate this is to artificially extend the time series +also known as *padding*. In ``xeofs``, you can enable such a padding by +setting the ``padding`` parameter to ``"exp"`` which will extent the +boundaries by an exponential decaying function. The ``decay_factor`` parameter +controls the decay rate of the exponential function measured in multiples of +the time series length. Let's see how the decay parameter impacts the results: + +.. GENERATED FROM PYTHON SOURCE LINES 97-105 + +.. code-block:: default + + + model_ext = xe.single.HilbertEOF(padding="exp", decay_factor=0.01, **kwargs) + model_ext.fit(sst, dim="time") + scores_ext = model_ext.scores().sel(mode=slice(1, 4)) + + scores_ext.real.plot.line(x="time", col="mode", lw=1, ylim=(-0.1, 0.1)) + plt.show() + + + + +.. image-sg:: /auto_examples/1single/images/sphx_glr_plot_hilbert_eof_002.png + :alt: mode = 1, mode = 2, mode = 3, mode = 4 + :srcset: /auto_examples/1single/images/sphx_glr_plot_hilbert_eof_002.png + :class: sphx-glr-single-img + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 106-108 + +And indeed, padding the time series effectively reduced the artifacts at the +boundaries. Lastly, we examine the complex component amplitudes and phases. + +.. GENERATED FROM PYTHON SOURCE LINES 108-113 + +.. code-block:: default + + + comp_amps = model.components_amplitude() + comp_amps.plot(col="mode", vmin=0, vmax=0.025) + plt.show() + + + + +.. image-sg:: /auto_examples/1single/images/sphx_glr_plot_hilbert_eof_003.png + :alt: mode = 1, mode = 2, mode = 3, mode = 4 + :srcset: /auto_examples/1single/images/sphx_glr_plot_hilbert_eof_003.png + :class: sphx-glr-single-img + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 114-119 + +The component phases of the first mode clearly show the seasonal cycle as the +northern and southern hemisphere are phase shifted by 180 degrees (white and +black). Note the blueish regions in the central East Pacific and Indian Ocean +which indicate a phase shift of 90 degrees compared to the main annual cycle. +This is in agreement with mode 3 of the standard EOF analysis. + +.. GENERATED FROM PYTHON SOURCE LINES 119-124 + +.. code-block:: default + + + comp_phases = model.components_phase() + comp_phases.plot(col="mode", cmap="twilight") + plt.show() + + + + +.. image-sg:: /auto_examples/1single/images/sphx_glr_plot_hilbert_eof_004.png + :alt: mode = 1, mode = 2, mode = 3, mode = 4 + :srcset: /auto_examples/1single/images/sphx_glr_plot_hilbert_eof_004.png + :class: sphx-glr-single-img + + + + + + +.. rst-class:: sphx-glr-timing + + **Total running time of the script:** (0 minutes 8.098 seconds) + + +.. _sphx_glr_download_auto_examples_1single_plot_hilbert_eof.py: + +.. only:: html + + .. container:: sphx-glr-footer sphx-glr-footer-example + + + + + .. container:: sphx-glr-download sphx-glr-download-python + + :download:`Download Python source code: plot_hilbert_eof.py ` + + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: plot_hilbert_eof.ipynb ` + + +.. only:: html + + .. rst-class:: sphx-glr-signature + + `Gallery generated by Sphinx-Gallery `_ diff --git a/docs/auto_examples/1single/plot_hilbert_eof_codeobj.pickle b/docs/auto_examples/1single/plot_hilbert_eof_codeobj.pickle new file mode 100644 index 00000000..cc488e64 Binary files /dev/null and b/docs/auto_examples/1single/plot_hilbert_eof_codeobj.pickle differ diff --git a/docs/auto_examples/1single/plot_mreof.ipynb b/docs/auto_examples/1single/plot_mreof.ipynb index 9eceb7e9..3b209a07 100644 --- a/docs/auto_examples/1single/plot_mreof.ipynb +++ b/docs/auto_examples/1single/plot_mreof.ipynb @@ -17,12 +17,12 @@ "outputs": [], "source": [ "# Load packages and data:\n", - "import xarray as xr\n", "import matplotlib.pyplot as plt\n", - "from matplotlib.gridspec import GridSpec\n", + "import xarray as xr\n", "from cartopy.crs import PlateCarree\n", + "from matplotlib.gridspec import GridSpec\n", "\n", - "from xeofs.models import EOF, EOFRotator" + "import xeofs as xe" ] }, { @@ -61,9 +61,9 @@ "outputs": [], "source": [ "multivariate_data = [subset1, subset2, subset3, subset4]\n", - "mpca = EOF(n_modes=100, standardize=False, use_coslat=True)\n", + "mpca = xe.single.EOF(n_modes=100, standardize=False, use_coslat=True)\n", "mpca.fit(multivariate_data, dim=\"time\")\n", - "rotator = EOFRotator(n_modes=20)\n", + "rotator = xe.single.EOFRotator(n_modes=20)\n", "rotator.fit(mpca)\n", "rcomponents = rotator.components()\n", "rscores = rotator.scores()" @@ -135,7 +135,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/docs/auto_examples/1single/plot_mreof.py b/docs/auto_examples/1single/plot_mreof.py index 5bbc6a0f..e8c14b50 100644 --- a/docs/auto_examples/1single/plot_mreof.py +++ b/docs/auto_examples/1single/plot_mreof.py @@ -6,12 +6,12 @@ """ # Load packages and data: -import xarray as xr import matplotlib.pyplot as plt -from matplotlib.gridspec import GridSpec +import xarray as xr from cartopy.crs import PlateCarree +from matplotlib.gridspec import GridSpec -from xeofs.models import EOF, EOFRotator +import xeofs as xe # %% # Create four different dataarrayss @@ -25,9 +25,9 @@ # Perform the actual analysis multivariate_data = [subset1, subset2, subset3, subset4] -mpca = EOF(n_modes=100, standardize=False, use_coslat=True) +mpca = xe.single.EOF(n_modes=100, standardize=False, use_coslat=True) mpca.fit(multivariate_data, dim="time") -rotator = EOFRotator(n_modes=20) +rotator = xe.single.EOFRotator(n_modes=20) rotator.fit(mpca) rcomponents = rotator.components() rscores = rotator.scores() diff --git a/docs/auto_examples/1single/plot_mreof.py.md5 b/docs/auto_examples/1single/plot_mreof.py.md5 index 57db41e3..67b0fb08 100644 --- a/docs/auto_examples/1single/plot_mreof.py.md5 +++ b/docs/auto_examples/1single/plot_mreof.py.md5 @@ -1 +1 @@ -26962c4661f0e87127e3ebcadae33bb7 \ No newline at end of file +d7e99506275146b34aa2607581ecbd1e \ No newline at end of file diff --git a/docs/auto_examples/1single/plot_mreof.rst b/docs/auto_examples/1single/plot_mreof.rst index 105ef3cc..f8dcd602 100644 --- a/docs/auto_examples/1single/plot_mreof.rst +++ b/docs/auto_examples/1single/plot_mreof.rst @@ -25,16 +25,16 @@ Multivariate EOF analysis with additional Varimax rotation. .. GENERATED FROM PYTHON SOURCE LINES 7-16 -.. code-block:: Python +.. code-block:: default # Load packages and data: - import xarray as xr import matplotlib.pyplot as plt - from matplotlib.gridspec import GridSpec + import xarray as xr from cartopy.crs import PlateCarree + from matplotlib.gridspec import GridSpec - from xeofs.models import EOF, EOFRotator + import xeofs as xe @@ -49,7 +49,7 @@ Create four different dataarrayss .. GENERATED FROM PYTHON SOURCE LINES 18-24 -.. code-block:: Python +.. code-block:: default sst = xr.tutorial.open_dataset("ersstv5")["sst"] subset1 = sst.isel(lon=slice(0, 45)) @@ -70,13 +70,13 @@ Perform the actual analysis .. GENERATED FROM PYTHON SOURCE LINES 26-35 -.. code-block:: Python +.. code-block:: default multivariate_data = [subset1, subset2, subset3, subset4] - mpca = EOF(n_modes=100, standardize=False, use_coslat=True) + mpca = xe.single.EOF(n_modes=100, standardize=False, use_coslat=True) mpca.fit(multivariate_data, dim="time") - rotator = EOFRotator(n_modes=20) + rotator = xe.single.EOFRotator(n_modes=20) rotator.fit(mpca) rcomponents = rotator.components() rscores = rotator.scores() @@ -94,7 +94,7 @@ Plot mode 1 .. GENERATED FROM PYTHON SOURCE LINES 37-71 -.. code-block:: Python +.. code-block:: default mode = 5 @@ -145,7 +145,7 @@ Plot mode 1 .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 1.727 seconds) + **Total running time of the script:** (0 minutes 3.255 seconds) .. _sphx_glr_download_auto_examples_1single_plot_mreof.py: @@ -154,14 +154,17 @@ Plot mode 1 .. container:: sphx-glr-footer sphx-glr-footer-example - .. container:: sphx-glr-download sphx-glr-download-jupyter - :download:`Download Jupyter notebook: plot_mreof.ipynb ` + .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: plot_mreof.py ` + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: plot_mreof.ipynb ` + .. only:: html diff --git a/docs/auto_examples/1single/plot_mreof_codeobj.pickle b/docs/auto_examples/1single/plot_mreof_codeobj.pickle index bd2093e1..778fef67 100644 Binary files a/docs/auto_examples/1single/plot_mreof_codeobj.pickle and b/docs/auto_examples/1single/plot_mreof_codeobj.pickle differ diff --git a/docs/auto_examples/1single/plot_multivariate-eof.ipynb b/docs/auto_examples/1single/plot_multivariate-eof.ipynb index 6983769d..853147e6 100644 --- a/docs/auto_examples/1single/plot_multivariate-eof.ipynb +++ b/docs/auto_examples/1single/plot_multivariate-eof.ipynb @@ -17,12 +17,12 @@ "outputs": [], "source": [ "# Load packages and data:\n", - "import xarray as xr\n", "import matplotlib.pyplot as plt\n", - "from matplotlib.gridspec import GridSpec\n", + "import xarray as xr\n", "from cartopy.crs import PlateCarree\n", + "from matplotlib.gridspec import GridSpec\n", "\n", - "from xeofs.models import EOF\n", + "import xeofs as xe\n", "\n", "# Create four different dataarrayss\n", "sst = xr.tutorial.open_dataset(\"ersstv5\")[\"sst\"]\n", @@ -47,7 +47,7 @@ "metadata": {}, "outputs": [], "source": [ - "pca = EOF(n_modes=10, standardize=False, use_coslat=True)\n", + "pca = xe.single.EOF(n_modes=10, standardize=False, use_coslat=True)\n", "pca.fit(multivariate_data, dim=\"time\")\n", "components = pca.components()\n", "scores = pca.scores()" @@ -119,7 +119,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/docs/auto_examples/1single/plot_multivariate-eof.py b/docs/auto_examples/1single/plot_multivariate-eof.py index 31c6f623..84d0a36b 100644 --- a/docs/auto_examples/1single/plot_multivariate-eof.py +++ b/docs/auto_examples/1single/plot_multivariate-eof.py @@ -6,12 +6,12 @@ """ # Load packages and data: -import xarray as xr import matplotlib.pyplot as plt -from matplotlib.gridspec import GridSpec +import xarray as xr from cartopy.crs import PlateCarree +from matplotlib.gridspec import GridSpec -from xeofs.models import EOF +import xeofs as xe # Create four different dataarrayss sst = xr.tutorial.open_dataset("ersstv5")["sst"] @@ -24,7 +24,7 @@ # %% # Perform the actual analysis -pca = EOF(n_modes=10, standardize=False, use_coslat=True) +pca = xe.single.EOF(n_modes=10, standardize=False, use_coslat=True) pca.fit(multivariate_data, dim="time") components = pca.components() scores = pca.scores() diff --git a/docs/auto_examples/1single/plot_multivariate-eof.py.md5 b/docs/auto_examples/1single/plot_multivariate-eof.py.md5 index d14cd21e..10cd5ff8 100644 --- a/docs/auto_examples/1single/plot_multivariate-eof.py.md5 +++ b/docs/auto_examples/1single/plot_multivariate-eof.py.md5 @@ -1 +1 @@ -d76ccd30400590a4e6d49d598bbdb423 \ No newline at end of file +58257a2d0b4d2b974cf43e80e195be40 \ No newline at end of file diff --git a/docs/auto_examples/1single/plot_multivariate-eof.rst b/docs/auto_examples/1single/plot_multivariate-eof.rst index f0ef5c22..0af5e513 100644 --- a/docs/auto_examples/1single/plot_multivariate-eof.rst +++ b/docs/auto_examples/1single/plot_multivariate-eof.rst @@ -25,16 +25,16 @@ Multivariate EOF analysis. .. GENERATED FROM PYTHON SOURCE LINES 7-24 -.. code-block:: Python +.. code-block:: default # Load packages and data: - import xarray as xr import matplotlib.pyplot as plt - from matplotlib.gridspec import GridSpec + import xarray as xr from cartopy.crs import PlateCarree + from matplotlib.gridspec import GridSpec - from xeofs.models import EOF + import xeofs as xe # Create four different dataarrayss sst = xr.tutorial.open_dataset("ersstv5")["sst"] @@ -57,10 +57,10 @@ Perform the actual analysis .. GENERATED FROM PYTHON SOURCE LINES 26-32 -.. code-block:: Python +.. code-block:: default - pca = EOF(n_modes=10, standardize=False, use_coslat=True) + pca = xe.single.EOF(n_modes=10, standardize=False, use_coslat=True) pca.fit(multivariate_data, dim="time") components = pca.components() scores = pca.scores() @@ -78,7 +78,7 @@ Plot mode 1 .. GENERATED FROM PYTHON SOURCE LINES 34-68 -.. code-block:: Python +.. code-block:: default mode = 5 @@ -129,7 +129,7 @@ Plot mode 1 .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 1.211 seconds) + **Total running time of the script:** (0 minutes 2.083 seconds) .. _sphx_glr_download_auto_examples_1single_plot_multivariate-eof.py: @@ -138,14 +138,17 @@ Plot mode 1 .. container:: sphx-glr-footer sphx-glr-footer-example - .. container:: sphx-glr-download sphx-glr-download-jupyter - :download:`Download Jupyter notebook: plot_multivariate-eof.ipynb ` + .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: plot_multivariate-eof.py ` + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: plot_multivariate-eof.ipynb ` + .. only:: html diff --git a/docs/auto_examples/1single/plot_multivariate-eof_codeobj.pickle b/docs/auto_examples/1single/plot_multivariate-eof_codeobj.pickle index 96521a56..e3afdbd9 100644 Binary files a/docs/auto_examples/1single/plot_multivariate-eof_codeobj.pickle and b/docs/auto_examples/1single/plot_multivariate-eof_codeobj.pickle differ diff --git a/docs/auto_examples/1single/plot_rotated_eof.ipynb b/docs/auto_examples/1single/plot_rotated_eof.ipynb index b92799d3..caf07ab1 100644 --- a/docs/auto_examples/1single/plot_rotated_eof.ipynb +++ b/docs/auto_examples/1single/plot_rotated_eof.ipynb @@ -33,14 +33,13 @@ "metadata": {}, "outputs": [], "source": [ - "import xarray as xr\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", + "import xarray as xr\n", + "from cartopy.crs import PlateCarree, Robinson\n", "from matplotlib.gridspec import GridSpec\n", - "from cartopy.crs import Robinson, PlateCarree\n", - "\n", - "from xeofs.models import EOF, EOFRotator\n", "\n", + "import xeofs as xe\n", "\n", "sns.set_context(\"paper\")\n", "\n", @@ -64,17 +63,17 @@ "components = []\n", "scores = []\n", "# (1) Standard EOF without regularization\n", - "model = EOF(n_modes=100, standardize=True, use_coslat=True)\n", + "model = xe.single.EOF(n_modes=100, standardize=True, use_coslat=True)\n", "model.fit(sst, dim=\"time\")\n", "components.append(model.components())\n", "scores.append(model.scores())\n", "# (2) Varimax-rotated EOF analysis\n", - "rot_var = EOFRotator(n_modes=50, power=1)\n", + "rot_var = xe.single.EOFRotator(n_modes=50, power=1)\n", "rot_var.fit(model)\n", "components.append(rot_var.components())\n", "scores.append(rot_var.scores())\n", "# (3) Promax-rotated EOF analysis\n", - "rot_pro = EOFRotator(n_modes=50, power=4)\n", + "rot_pro = xe.single.EOFRotator(n_modes=50, power=4)\n", "rot_pro.fit(model)\n", "components.append(rot_pro.components())\n", "scores.append(rot_pro.scores())" @@ -148,7 +147,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/docs/auto_examples/1single/plot_rotated_eof.py b/docs/auto_examples/1single/plot_rotated_eof.py index 0f8b296b..33f6ab72 100644 --- a/docs/auto_examples/1single/plot_rotated_eof.py +++ b/docs/auto_examples/1single/plot_rotated_eof.py @@ -22,14 +22,13 @@ We'll start by loading the necessary packages and data: """ -import xarray as xr import matplotlib.pyplot as plt import seaborn as sns +import xarray as xr +from cartopy.crs import PlateCarree, Robinson from matplotlib.gridspec import GridSpec -from cartopy.crs import Robinson, PlateCarree - -from xeofs.models import EOF, EOFRotator +import xeofs as xe sns.set_context("paper") @@ -42,17 +41,17 @@ components = [] scores = [] # (1) Standard EOF without regularization -model = EOF(n_modes=100, standardize=True, use_coslat=True) +model = xe.single.EOF(n_modes=100, standardize=True, use_coslat=True) model.fit(sst, dim="time") components.append(model.components()) scores.append(model.scores()) # (2) Varimax-rotated EOF analysis -rot_var = EOFRotator(n_modes=50, power=1) +rot_var = xe.single.EOFRotator(n_modes=50, power=1) rot_var.fit(model) components.append(rot_var.components()) scores.append(rot_var.scores()) # (3) Promax-rotated EOF analysis -rot_pro = EOFRotator(n_modes=50, power=4) +rot_pro = xe.single.EOFRotator(n_modes=50, power=4) rot_pro.fit(model) components.append(rot_pro.components()) scores.append(rot_pro.scores()) diff --git a/docs/auto_examples/1single/plot_rotated_eof.py.md5 b/docs/auto_examples/1single/plot_rotated_eof.py.md5 index 46e91846..b79832fd 100644 --- a/docs/auto_examples/1single/plot_rotated_eof.py.md5 +++ b/docs/auto_examples/1single/plot_rotated_eof.py.md5 @@ -1 +1 @@ -88116a4a1f79c81f89706b7345a6ba70 \ No newline at end of file +669ef8691b2403f69184b4309b0f154b \ No newline at end of file diff --git a/docs/auto_examples/1single/plot_rotated_eof.rst b/docs/auto_examples/1single/plot_rotated_eof.rst index 410b573f..89024f93 100644 --- a/docs/auto_examples/1single/plot_rotated_eof.rst +++ b/docs/auto_examples/1single/plot_rotated_eof.rst @@ -40,19 +40,18 @@ without regularization, (2) with Varimax rotation, and (3) with Promax rotation. We'll start by loading the necessary packages and data: -.. GENERATED FROM PYTHON SOURCE LINES 24-39 +.. GENERATED FROM PYTHON SOURCE LINES 24-38 -.. code-block:: Python +.. code-block:: default - import xarray as xr import matplotlib.pyplot as plt import seaborn as sns + import xarray as xr + from cartopy.crs import PlateCarree, Robinson from matplotlib.gridspec import GridSpec - from cartopy.crs import Robinson, PlateCarree - - from xeofs.models import EOF, EOFRotator + import xeofs as xe sns.set_context("paper") @@ -66,29 +65,29 @@ We'll start by loading the necessary packages and data: -.. GENERATED FROM PYTHON SOURCE LINES 40-41 +.. GENERATED FROM PYTHON SOURCE LINES 39-40 Perform the actual analysis -.. GENERATED FROM PYTHON SOURCE LINES 41-61 +.. GENERATED FROM PYTHON SOURCE LINES 40-60 -.. code-block:: Python +.. code-block:: default components = [] scores = [] # (1) Standard EOF without regularization - model = EOF(n_modes=100, standardize=True, use_coslat=True) + model = xe.single.EOF(n_modes=100, standardize=True, use_coslat=True) model.fit(sst, dim="time") components.append(model.components()) scores.append(model.scores()) # (2) Varimax-rotated EOF analysis - rot_var = EOFRotator(n_modes=50, power=1) + rot_var = xe.single.EOFRotator(n_modes=50, power=1) rot_var.fit(model) components.append(rot_var.components()) scores.append(rot_var.scores()) # (3) Promax-rotated EOF analysis - rot_pro = EOFRotator(n_modes=50, power=4) + rot_pro = xe.single.EOFRotator(n_modes=50, power=4) rot_pro.fit(model) components.append(rot_pro.components()) scores.append(rot_pro.scores()) @@ -98,17 +97,10 @@ Perform the actual analysis -.. rst-class:: sphx-glr-script-out - - .. code-block:: none - /home/slevang/miniconda3/envs/xeofs-docs/lib/python3.11/site-packages/numpy/lib/nanfunctions.py:1879: RuntimeWarning: Degrees of freedom <= 0 for slice. - var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, - - -.. GENERATED FROM PYTHON SOURCE LINES 62-67 +.. GENERATED FROM PYTHON SOURCE LINES 61-66 Create figure showing the first 6 modes for all 3 cases. While the first mode is very similar in all three cases the subsequent modes of the standard @@ -116,9 +108,9 @@ solution exhibit dipole and tripole-like patterns. Under Varimax and Promax rotation, these structures completely disappear suggesting that these patterns were mere artifacts due to the orthogonality. -.. GENERATED FROM PYTHON SOURCE LINES 67-99 +.. GENERATED FROM PYTHON SOURCE LINES 66-98 -.. code-block:: Python +.. code-block:: default proj = Robinson(central_longitude=180) @@ -167,7 +159,7 @@ were mere artifacts due to the orthogonality. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 6.472 seconds) + **Total running time of the script:** (0 minutes 11.973 seconds) .. _sphx_glr_download_auto_examples_1single_plot_rotated_eof.py: @@ -176,14 +168,17 @@ were mere artifacts due to the orthogonality. .. container:: sphx-glr-footer sphx-glr-footer-example - .. container:: sphx-glr-download sphx-glr-download-jupyter - :download:`Download Jupyter notebook: plot_rotated_eof.ipynb ` + .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: plot_rotated_eof.py ` + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: plot_rotated_eof.ipynb ` + .. only:: html diff --git a/docs/auto_examples/1single/plot_rotated_eof_codeobj.pickle b/docs/auto_examples/1single/plot_rotated_eof_codeobj.pickle index 63217752..4df019f7 100644 Binary files a/docs/auto_examples/1single/plot_rotated_eof_codeobj.pickle and b/docs/auto_examples/1single/plot_rotated_eof_codeobj.pickle differ diff --git a/docs/auto_examples/1single/plot_weighted-eof.ipynb b/docs/auto_examples/1single/plot_weighted-eof.ipynb index d9aef5e2..85f6f443 100644 --- a/docs/auto_examples/1single/plot_weighted-eof.ipynb +++ b/docs/auto_examples/1single/plot_weighted-eof.ipynb @@ -22,13 +22,13 @@ "metadata": {}, "outputs": [], "source": [ - "import xarray as xr\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", - "from matplotlib.gridspec import GridSpec\n", + "import xarray as xr\n", "from cartopy.crs import Orthographic, PlateCarree\n", + "from matplotlib.gridspec import GridSpec\n", "\n", - "from xeofs.models import EOF\n", + "import xeofs as xe\n", "\n", "sns.set_context(\"paper\")\n", "\n", @@ -52,22 +52,22 @@ "components = []\n", "scores = []\n", "# (1) Based on covariance matrix\n", - "model_cov = EOF(n_modes=5, standardize=False, use_coslat=False)\n", + "model_cov = xe.single.EOF(n_modes=5, standardize=False, use_coslat=False)\n", "model_cov.fit(t2m, \"time\")\n", "components.append(model_cov.components())\n", "scores.append(model_cov.scores())\n", "# (2) Based on coslat weighted covariance matrix\n", - "model_lat = EOF(n_modes=5, standardize=False, use_coslat=True)\n", + "model_lat = xe.single.EOF(n_modes=5, standardize=False, use_coslat=True)\n", "model_lat.fit(t2m, \"time\")\n", "components.append(model_lat.components())\n", "scores.append(model_lat.scores())\n", "# (3) Based on correlation matrix\n", - "model_cor = EOF(n_modes=5, standardize=True, use_coslat=False)\n", + "model_cor = xe.single.EOF(n_modes=5, standardize=True, use_coslat=False)\n", "model_cor.fit(t2m, \"time\")\n", "components.append(model_cor.components())\n", "scores.append(model_cor.scores())\n", "# (4) Based on coslat weighted correlation matrix\n", - "model_cor_lat = EOF(n_modes=5, standardize=True, use_coslat=True)\n", + "model_cor_lat = xe.single.EOF(n_modes=5, standardize=True, use_coslat=True)\n", "model_cor_lat.fit(t2m, \"time\")\n", "components.append(model_cor_lat.components())\n", "scores.append(model_cor_lat.scores())" @@ -139,7 +139,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/docs/auto_examples/1single/plot_weighted-eof.py b/docs/auto_examples/1single/plot_weighted-eof.py index 26a88473..6d67964b 100644 --- a/docs/auto_examples/1single/plot_weighted-eof.py +++ b/docs/auto_examples/1single/plot_weighted-eof.py @@ -11,13 +11,13 @@ Load packages and data: """ -import xarray as xr import matplotlib.pyplot as plt import seaborn as sns -from matplotlib.gridspec import GridSpec +import xarray as xr from cartopy.crs import Orthographic, PlateCarree +from matplotlib.gridspec import GridSpec -from xeofs.models import EOF +import xeofs as xe sns.set_context("paper") @@ -29,22 +29,22 @@ components = [] scores = [] # (1) Based on covariance matrix -model_cov = EOF(n_modes=5, standardize=False, use_coslat=False) +model_cov = xe.single.EOF(n_modes=5, standardize=False, use_coslat=False) model_cov.fit(t2m, "time") components.append(model_cov.components()) scores.append(model_cov.scores()) # (2) Based on coslat weighted covariance matrix -model_lat = EOF(n_modes=5, standardize=False, use_coslat=True) +model_lat = xe.single.EOF(n_modes=5, standardize=False, use_coslat=True) model_lat.fit(t2m, "time") components.append(model_lat.components()) scores.append(model_lat.scores()) # (3) Based on correlation matrix -model_cor = EOF(n_modes=5, standardize=True, use_coslat=False) +model_cor = xe.single.EOF(n_modes=5, standardize=True, use_coslat=False) model_cor.fit(t2m, "time") components.append(model_cor.components()) scores.append(model_cor.scores()) # (4) Based on coslat weighted correlation matrix -model_cor_lat = EOF(n_modes=5, standardize=True, use_coslat=True) +model_cor_lat = xe.single.EOF(n_modes=5, standardize=True, use_coslat=True) model_cor_lat.fit(t2m, "time") components.append(model_cor_lat.components()) scores.append(model_cor_lat.scores()) diff --git a/docs/auto_examples/1single/plot_weighted-eof.py.md5 b/docs/auto_examples/1single/plot_weighted-eof.py.md5 index b015a5cf..329812d3 100644 --- a/docs/auto_examples/1single/plot_weighted-eof.py.md5 +++ b/docs/auto_examples/1single/plot_weighted-eof.py.md5 @@ -1 +1 @@ -8ca089ebc9dfca92e49dbd81d4047695 \ No newline at end of file +7047ce79c7b692b5a3beb8b39381ff16 \ No newline at end of file diff --git a/docs/auto_examples/1single/plot_weighted-eof.rst b/docs/auto_examples/1single/plot_weighted-eof.rst index 90769f45..0cb3b794 100644 --- a/docs/auto_examples/1single/plot_weighted-eof.rst +++ b/docs/auto_examples/1single/plot_weighted-eof.rst @@ -31,16 +31,16 @@ Load packages and data: .. GENERATED FROM PYTHON SOURCE LINES 13-26 -.. code-block:: Python +.. code-block:: default - import xarray as xr import matplotlib.pyplot as plt import seaborn as sns - from matplotlib.gridspec import GridSpec + import xarray as xr from cartopy.crs import Orthographic, PlateCarree + from matplotlib.gridspec import GridSpec - from xeofs.models import EOF + import xeofs as xe sns.set_context("paper") @@ -59,28 +59,28 @@ Perform the actual analysis .. GENERATED FROM PYTHON SOURCE LINES 28-53 -.. code-block:: Python +.. code-block:: default components = [] scores = [] # (1) Based on covariance matrix - model_cov = EOF(n_modes=5, standardize=False, use_coslat=False) + model_cov = xe.single.EOF(n_modes=5, standardize=False, use_coslat=False) model_cov.fit(t2m, "time") components.append(model_cov.components()) scores.append(model_cov.scores()) # (2) Based on coslat weighted covariance matrix - model_lat = EOF(n_modes=5, standardize=False, use_coslat=True) + model_lat = xe.single.EOF(n_modes=5, standardize=False, use_coslat=True) model_lat.fit(t2m, "time") components.append(model_lat.components()) scores.append(model_lat.scores()) # (3) Based on correlation matrix - model_cor = EOF(n_modes=5, standardize=True, use_coslat=False) + model_cor = xe.single.EOF(n_modes=5, standardize=True, use_coslat=False) model_cor.fit(t2m, "time") components.append(model_cor.components()) scores.append(model_cor.scores()) # (4) Based on coslat weighted correlation matrix - model_cor_lat = EOF(n_modes=5, standardize=True, use_coslat=True) + model_cor_lat = xe.single.EOF(n_modes=5, standardize=True, use_coslat=True) model_cor_lat.fit(t2m, "time") components.append(model_cor_lat.components()) scores.append(model_cor_lat.scores()) @@ -99,7 +99,7 @@ Create figure showing the first mode for all 4 cases .. GENERATED FROM PYTHON SOURCE LINES 55-89 -.. code-block:: Python +.. code-block:: default proj = Orthographic(central_latitude=30, central_longitude=-80) @@ -150,7 +150,7 @@ Create figure showing the first mode for all 4 cases .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 1.928 seconds) + **Total running time of the script:** (0 minutes 2.990 seconds) .. _sphx_glr_download_auto_examples_1single_plot_weighted-eof.py: @@ -159,14 +159,17 @@ Create figure showing the first mode for all 4 cases .. container:: sphx-glr-footer sphx-glr-footer-example - .. container:: sphx-glr-download sphx-glr-download-jupyter - :download:`Download Jupyter notebook: plot_weighted-eof.ipynb ` + .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: plot_weighted-eof.py ` + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: plot_weighted-eof.ipynb ` + .. only:: html diff --git a/docs/auto_examples/1single/plot_weighted-eof_codeobj.pickle b/docs/auto_examples/1single/plot_weighted-eof_codeobj.pickle index be19075a..63a2501d 100644 Binary files a/docs/auto_examples/1single/plot_weighted-eof_codeobj.pickle and b/docs/auto_examples/1single/plot_weighted-eof_codeobj.pickle differ diff --git a/docs/auto_examples/1single/sg_execution_times.rst b/docs/auto_examples/1single/sg_execution_times.rst index 1693d432..996f063c 100644 --- a/docs/auto_examples/1single/sg_execution_times.rst +++ b/docs/auto_examples/1single/sg_execution_times.rst @@ -6,21 +6,23 @@ Computation times ================= -**00:07.992** total execution time for **auto_examples_1single** files: +**00:07.526** total execution time for **auto_examples_1single** files: +-----------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_auto_examples_1single_plot_eof-smode.py` (``plot_eof-smode.py``) | 00:07.992 | 0.0 MB | +| :ref:`sphx_glr_auto_examples_1single_plot_eeof_trend.py` (``plot_eeof_trend.py``) | 00:07.526 | 0.0 MB | +-----------------------------------------------------------------------------------------------+-----------+--------+ | :ref:`sphx_glr_auto_examples_1single_plot_complex_eof.py` (``plot_complex_eof.py``) | 00:00.000 | 0.0 MB | +-----------------------------------------------------------------------------------------------+-----------+--------+ | :ref:`sphx_glr_auto_examples_1single_plot_eeof.py` (``plot_eeof.py``) | 00:00.000 | 0.0 MB | +-----------------------------------------------------------------------------------------------+-----------+--------+ -| :ref:`sphx_glr_auto_examples_1single_plot_eeof_trend.py` (``plot_eeof_trend.py``) | 00:00.000 | 0.0 MB | +| :ref:`sphx_glr_auto_examples_1single_plot_eof-smode.py` (``plot_eof-smode.py``) | 00:00.000 | 0.0 MB | +-----------------------------------------------------------------------------------------------+-----------+--------+ | :ref:`sphx_glr_auto_examples_1single_plot_eof-tmode.py` (``plot_eof-tmode.py``) | 00:00.000 | 0.0 MB | +-----------------------------------------------------------------------------------------------+-----------+--------+ | :ref:`sphx_glr_auto_examples_1single_plot_gwpca.py` (``plot_gwpca.py``) | 00:00.000 | 0.0 MB | +-----------------------------------------------------------------------------------------------+-----------+--------+ +| :ref:`sphx_glr_auto_examples_1single_plot_hilbert_eof.py` (``plot_hilbert_eof.py``) | 00:00.000 | 0.0 MB | ++-----------------------------------------------------------------------------------------------+-----------+--------+ | :ref:`sphx_glr_auto_examples_1single_plot_mreof.py` (``plot_mreof.py``) | 00:00.000 | 0.0 MB | +-----------------------------------------------------------------------------------------------+-----------+--------+ | :ref:`sphx_glr_auto_examples_1single_plot_multivariate-eof.py` (``plot_multivariate-eof.py``) | 00:00.000 | 0.0 MB | diff --git a/docs/auto_examples/2cross/images/sphx_glr_plot_mca_001.png b/docs/auto_examples/2cross/images/sphx_glr_plot_mca_001.png new file mode 100644 index 00000000..2c5ee20e Binary files /dev/null and b/docs/auto_examples/2cross/images/sphx_glr_plot_mca_001.png differ diff --git a/docs/auto_examples/2cross/images/sphx_glr_plot_rotated_mca_001.png b/docs/auto_examples/2cross/images/sphx_glr_plot_rotated_mca_001.png new file mode 100644 index 00000000..e7ea9017 Binary files /dev/null and b/docs/auto_examples/2cross/images/sphx_glr_plot_rotated_mca_001.png differ diff --git a/docs/auto_examples/2cross/images/thumb/sphx_glr_plot_mca_thumb.png b/docs/auto_examples/2cross/images/thumb/sphx_glr_plot_mca_thumb.png new file mode 100644 index 00000000..c6a52d88 Binary files /dev/null and b/docs/auto_examples/2cross/images/thumb/sphx_glr_plot_mca_thumb.png differ diff --git a/docs/auto_examples/2cross/images/thumb/sphx_glr_plot_rotated_mca_thumb.png b/docs/auto_examples/2cross/images/thumb/sphx_glr_plot_rotated_mca_thumb.png new file mode 100644 index 00000000..8d6dc8db Binary files /dev/null and b/docs/auto_examples/2cross/images/thumb/sphx_glr_plot_rotated_mca_thumb.png differ diff --git a/docs/auto_examples/2cross/index.rst b/docs/auto_examples/2cross/index.rst new file mode 100644 index 00000000..7400c8e6 --- /dev/null +++ b/docs/auto_examples/2cross/index.rst @@ -0,0 +1,59 @@ + + +.. _sphx_glr_auto_examples_2cross: + +2 | Cross-Set Analysis +======================== + + + +.. raw:: html + +
    + + +.. raw:: html + +
    + +.. only:: html + + .. image:: /auto_examples/2cross/images/thumb/sphx_glr_plot_mca_thumb.png + :alt: + + :ref:`sphx_glr_auto_examples_2cross_plot_mca.py` + +.. raw:: html + +
    Maximum Covariance Analysis
    +
    + + +.. raw:: html + +
    + +.. only:: html + + .. image:: /auto_examples/2cross/images/thumb/sphx_glr_plot_rotated_mca_thumb.png + :alt: + + :ref:`sphx_glr_auto_examples_2cross_plot_rotated_mca.py` + +.. raw:: html + +
    Rotated Maximum Covariance Analysis
    +
    + + +.. raw:: html + +
    + + +.. toctree:: + :hidden: + + /auto_examples/2cross/plot_mca + /auto_examples/2cross/plot_rotated_mca + diff --git a/docs/auto_examples/2cross/plot_mca.ipynb b/docs/auto_examples/2cross/plot_mca.ipynb new file mode 100644 index 00000000..16d73cb7 --- /dev/null +++ b/docs/auto_examples/2cross/plot_mca.ipynb @@ -0,0 +1,219 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# Maximum Covariance Analysis\n", + "\n", + "Maximum Covariance Analysis (MCA) between two data sets.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load packages and data:\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import xarray as xr\n", + "from cartopy.crs import Orthographic, PlateCarree\n", + "from cartopy.feature import LAND\n", + "from matplotlib.gridspec import GridSpec\n", + "\n", + "import xeofs as xe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create 2 different DataArrays\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t2m = xr.tutorial.load_dataset(\"air_temperature\")[\"air\"]\n", + "da1 = t2m.isel(lon=slice(0, 26))\n", + "da2 = t2m.isel(lon=slice(27, None))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Perform MCA\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mca = xe.cross.MCA(n_modes=20, standardize=False, use_coslat=True)\n", + "mca.fit(da1, da2, dim=\"time\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get singular vectors, projections (PCs), homogeneous and heterogeneous\n", + "patterns:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "singular_vectors = mca.components()\n", + "scores = mca.scores()\n", + "hom_pats, pvals_hom = mca.homogeneous_patterns()\n", + "het_pats, pvals_het = mca.heterogeneous_patterns()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When two fields are expected, the output of the above methods is a list of\n", + "length 2, with the first and second entry containing the relevant object for\n", + "``X`` and ``Y``. For example, the p-values obtained from the two-sided t-test\n", + "for the homogeneous patterns of ``X`` are:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pvals_hom[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a mask to identifiy where p-values are below 0.05\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hom_mask = [values < 0.05 for values in pvals_hom]\n", + "het_mask = [values < 0.05 for values in pvals_het]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot some relevant quantities of mode 2.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lonlats = [\n", + " np.meshgrid(pvals_hom[0].lon.values, pvals_hom[0].lat.values),\n", + " np.meshgrid(pvals_hom[1].lon.values, pvals_hom[1].lat.values),\n", + "]\n", + "proj = [\n", + " Orthographic(central_latitude=30, central_longitude=-120),\n", + " Orthographic(central_latitude=30, central_longitude=-60),\n", + "]\n", + "kwargs1 = {\"cmap\": \"BrBG\", \"vmin\": -0.05, \"vmax\": 0.05, \"transform\": PlateCarree()}\n", + "kwargs2 = {\"cmap\": \"RdBu\", \"vmin\": -1, \"vmax\": 1, \"transform\": PlateCarree()}\n", + "\n", + "mode = 2\n", + "\n", + "fig = plt.figure(figsize=(7, 14))\n", + "gs = GridSpec(5, 2)\n", + "ax1 = [fig.add_subplot(gs[0, i], projection=proj[i]) for i in range(2)]\n", + "ax2 = [fig.add_subplot(gs[1, i], projection=proj[i]) for i in range(2)]\n", + "ax3 = [fig.add_subplot(gs[2, i], projection=proj[i]) for i in range(2)]\n", + "ax4 = [fig.add_subplot(gs[3, i]) for i in range(2)]\n", + "\n", + "for i, a in enumerate(ax1):\n", + " singular_vectors[i].sel(mode=mode).plot(ax=a, **kwargs1)\n", + "\n", + "for i, a in enumerate(ax2):\n", + " hom_pats[i].sel(mode=mode).plot(ax=a, **kwargs2)\n", + " a.scatter(\n", + " lonlats[i][0],\n", + " lonlats[i][1],\n", + " hom_mask[i].sel(mode=mode).values * 0.5,\n", + " color=\"k\",\n", + " alpha=0.5,\n", + " transform=PlateCarree(),\n", + " )\n", + "for i, a in enumerate(ax3):\n", + " het_pats[i].sel(mode=mode).plot(ax=a, **kwargs2)\n", + " a.scatter(\n", + " lonlats[i][0],\n", + " lonlats[i][1],\n", + " het_mask[i].sel(mode=mode).values * 0.5,\n", + " color=\"k\",\n", + " alpha=0.5,\n", + " transform=PlateCarree(),\n", + " )\n", + "\n", + "for i, a in enumerate(ax4):\n", + " scores[i].sel(mode=mode).plot(ax=a)\n", + " a.set_xlabel(\"\")\n", + "\n", + "\n", + "for a in np.ravel([ax1, ax2, ax3]):\n", + " a.coastlines(color=\".5\")\n", + " a.add_feature(LAND)\n", + "\n", + "plt.tight_layout()\n", + "plt.savefig(\"mca.jpg\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/examples/2multi/plot_mca.py b/docs/auto_examples/2cross/plot_mca.py similarity index 97% rename from examples/2multi/plot_mca.py rename to docs/auto_examples/2cross/plot_mca.py index d3b48de7..a93d5546 100644 --- a/examples/2multi/plot_mca.py +++ b/docs/auto_examples/2cross/plot_mca.py @@ -6,14 +6,14 @@ """ # Load packages and data: +import matplotlib.pyplot as plt import numpy as np import xarray as xr -import matplotlib.pyplot as plt -from matplotlib.gridspec import GridSpec from cartopy.crs import Orthographic, PlateCarree from cartopy.feature import LAND +from matplotlib.gridspec import GridSpec -from xeofs.models import MCA +import xeofs as xe # %% # Create 2 different DataArrays @@ -25,7 +25,7 @@ # %% # Perform MCA -mca = MCA(n_modes=20, standardize=False, use_coslat=True) +mca = xe.cross.MCA(n_modes=20, standardize=False, use_coslat=True) mca.fit(da1, da2, dim="time") # %% diff --git a/docs/auto_examples/2cross/plot_mca.py.md5 b/docs/auto_examples/2cross/plot_mca.py.md5 new file mode 100644 index 00000000..d980cde7 --- /dev/null +++ b/docs/auto_examples/2cross/plot_mca.py.md5 @@ -0,0 +1 @@ +e696fb777ef84ec447201ca9c01d1dfe \ No newline at end of file diff --git a/docs/auto_examples/2cross/plot_mca.rst b/docs/auto_examples/2cross/plot_mca.rst new file mode 100644 index 00000000..8f072e19 --- /dev/null +++ b/docs/auto_examples/2cross/plot_mca.rst @@ -0,0 +1,740 @@ + +.. DO NOT EDIT. +.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. +.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: +.. "auto_examples/2cross/plot_mca.py" +.. LINE NUMBERS ARE GIVEN BELOW. + +.. only:: html + + .. note:: + :class: sphx-glr-download-link-note + + :ref:`Go to the end ` + to download the full example code + +.. rst-class:: sphx-glr-example-title + +.. _sphx_glr_auto_examples_2cross_plot_mca.py: + + +Maximum Covariance Analysis +=========================== + +Maximum Covariance Analysis (MCA) between two data sets. + +.. GENERATED FROM PYTHON SOURCE LINES 7-18 + +.. code-block:: default + + + # Load packages and data: + import matplotlib.pyplot as plt + import numpy as np + import xarray as xr + from cartopy.crs import Orthographic, PlateCarree + from cartopy.feature import LAND + from matplotlib.gridspec import GridSpec + + import xeofs as xe + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 19-20 + +Create 2 different DataArrays + +.. GENERATED FROM PYTHON SOURCE LINES 20-25 + +.. code-block:: default + + + t2m = xr.tutorial.load_dataset("air_temperature")["air"] + da1 = t2m.isel(lon=slice(0, 26)) + da2 = t2m.isel(lon=slice(27, None)) + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 26-27 + +Perform MCA + +.. GENERATED FROM PYTHON SOURCE LINES 27-31 + +.. code-block:: default + + + mca = xe.cross.MCA(n_modes=20, standardize=False, use_coslat=True) + mca.fit(da1, da2, dim="time") + + + + + +.. rst-class:: sphx-glr-script-out + + .. code-block:: none + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 32-34 + +Get singular vectors, projections (PCs), homogeneous and heterogeneous +patterns: + +.. GENERATED FROM PYTHON SOURCE LINES 34-40 + +.. code-block:: default + + + singular_vectors = mca.components() + scores = mca.scores() + hom_pats, pvals_hom = mca.homogeneous_patterns() + het_pats, pvals_het = mca.heterogeneous_patterns() + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 41-45 + +When two fields are expected, the output of the above methods is a list of +length 2, with the first and second entry containing the relevant object for +``X`` and ``Y``. For example, the p-values obtained from the two-sided t-test +for the homogeneous patterns of ``X`` are: + +.. GENERATED FROM PYTHON SOURCE LINES 45-48 + +.. code-block:: default + + + pvals_hom[0] + + + + + + +.. raw:: html + +
    +
    + + + + + + + + + + + + + + +
    <xarray.DataArray 'pvalues_of_left_homogeneous_patterns' (mode: 20, lat: 25,
    +                                                              lon: 26)>
    +    array([[[0.00000000e+000, 3.32602740e-290, 2.86712051e-286, ...,
    +             1.32392878e-119, 9.37036267e-186, 0.00000000e+000],
    +            [0.00000000e+000, 0.00000000e+000, 2.87964095e-272, ...,
    +             8.56508898e-140, 4.38532320e-131, 6.54040862e-156],
    +            [0.00000000e+000, 0.00000000e+000, 0.00000000e+000, ...,
    +             6.15522890e-039, 9.85389342e-095, 4.88663208e-307],
    +            ...,
    +            [0.00000000e+000, 0.00000000e+000, 0.00000000e+000, ...,
    +             0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
    +            [0.00000000e+000, 0.00000000e+000, 0.00000000e+000, ...,
    +             0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
    +            [0.00000000e+000, 0.00000000e+000, 0.00000000e+000, ...,
    +             0.00000000e+000, 0.00000000e+000, 0.00000000e+000]],
    +
    +           [[1.47816130e-058, 9.59629629e-069, 2.49426225e-077, ...,
    +             4.04710709e-067, 1.68530273e-001, 1.57942472e-054],
    +            [5.21001151e-051, 1.96182008e-055, 4.53391465e-063, ...,
    +             8.23812362e-066, 5.60289947e-142, 3.17052995e-126],
    +            [4.26755773e-027, 2.16865183e-023, 5.55666292e-044, ...,
    +             2.57077253e-302, 7.17664416e-212, 1.99383130e-056],
    +    ...
    +            [3.76773409e-015, 3.63595418e-016, 8.16407667e-016, ...,
    +             7.94237942e-001, 5.61772548e-001, 4.05805498e-001],
    +            [1.34077306e-010, 2.27353460e-010, 1.17695930e-009, ...,
    +             2.36922957e-001, 1.51587935e-001, 1.10342500e-001],
    +            [1.25888311e-009, 3.73287601e-009, 1.96403692e-008, ...,
    +             1.74507572e-001, 1.55251763e-001, 1.57874323e-001]],
    +
    +           [[3.23043237e-001, 1.34846412e-002, 1.49786437e-002, ...,
    +             8.59230128e-001, 9.39956204e-005, 7.73160479e-010],
    +            [3.30997229e-001, 1.89803851e-001, 1.63645931e-001, ...,
    +             1.15188423e-003, 1.70856527e-004, 3.80119477e-006],
    +            [4.80410883e-001, 3.58267371e-001, 1.05572199e-001, ...,
    +             1.33882929e-007, 8.22926072e-005, 3.56634908e-004],
    +            ...,
    +            [9.48231125e-019, 3.95379639e-018, 4.27550070e-017, ...,
    +             6.62164515e-001, 6.17150180e-001, 5.85772391e-001],
    +            [1.19130038e-006, 4.18324319e-006, 1.51748129e-005, ...,
    +             8.09046665e-001, 9.15065924e-001, 9.75206621e-001],
    +            [4.03876714e-002, 7.52746031e-002, 1.39540472e-001, ...,
    +             8.48692484e-002, 1.65380473e-001, 2.84566826e-001]]])
    +    Coordinates:
    +      * lat      (lat) float32 15.0 17.5 20.0 22.5 25.0 ... 65.0 67.5 70.0 72.5 75.0
    +      * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 255.0 257.5 260.0 262.5
    +      * mode     (mode) int64 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
    +    Attributes: (12/18)
    +        model:                    Maximum Covariance Analysis
    +        software:                 xeofs
    +        version:                  1.2.0
    +        date:                     2024-09-02 02:52:38
    +        n_modes:                  20
    +        center:                   ['True', 'True']
    +        ...                       ...
    +        alpha:                    [1.0, 1.0]
    +        sample_name:              sample
    +        feature_name:             ['feature1', 'feature2']
    +        random_state:             None
    +        compute:                  True
    +        solver:                   auto
    +
    +
    +
    + +.. GENERATED FROM PYTHON SOURCE LINES 49-50 + +Create a mask to identifiy where p-values are below 0.05 + +.. GENERATED FROM PYTHON SOURCE LINES 50-55 + +.. code-block:: default + + + hom_mask = [values < 0.05 for values in pvals_hom] + het_mask = [values < 0.05 for values in pvals_het] + + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 56-57 + +Plot some relevant quantities of mode 2. + +.. GENERATED FROM PYTHON SOURCE LINES 57-113 + +.. code-block:: default + + + lonlats = [ + np.meshgrid(pvals_hom[0].lon.values, pvals_hom[0].lat.values), + np.meshgrid(pvals_hom[1].lon.values, pvals_hom[1].lat.values), + ] + proj = [ + Orthographic(central_latitude=30, central_longitude=-120), + Orthographic(central_latitude=30, central_longitude=-60), + ] + kwargs1 = {"cmap": "BrBG", "vmin": -0.05, "vmax": 0.05, "transform": PlateCarree()} + kwargs2 = {"cmap": "RdBu", "vmin": -1, "vmax": 1, "transform": PlateCarree()} + + mode = 2 + + fig = plt.figure(figsize=(7, 14)) + gs = GridSpec(5, 2) + ax1 = [fig.add_subplot(gs[0, i], projection=proj[i]) for i in range(2)] + ax2 = [fig.add_subplot(gs[1, i], projection=proj[i]) for i in range(2)] + ax3 = [fig.add_subplot(gs[2, i], projection=proj[i]) for i in range(2)] + ax4 = [fig.add_subplot(gs[3, i]) for i in range(2)] + + for i, a in enumerate(ax1): + singular_vectors[i].sel(mode=mode).plot(ax=a, **kwargs1) + + for i, a in enumerate(ax2): + hom_pats[i].sel(mode=mode).plot(ax=a, **kwargs2) + a.scatter( + lonlats[i][0], + lonlats[i][1], + hom_mask[i].sel(mode=mode).values * 0.5, + color="k", + alpha=0.5, + transform=PlateCarree(), + ) + for i, a in enumerate(ax3): + het_pats[i].sel(mode=mode).plot(ax=a, **kwargs2) + a.scatter( + lonlats[i][0], + lonlats[i][1], + het_mask[i].sel(mode=mode).values * 0.5, + color="k", + alpha=0.5, + transform=PlateCarree(), + ) + + for i, a in enumerate(ax4): + scores[i].sel(mode=mode).plot(ax=a) + a.set_xlabel("") + + + for a in np.ravel([ax1, ax2, ax3]): + a.coastlines(color=".5") + a.add_feature(LAND) + + plt.tight_layout() + plt.savefig("mca.jpg") + + + +.. image-sg:: /auto_examples/2cross/images/sphx_glr_plot_mca_001.png + :alt: mode = 2, mode = 2, mode = 2, mode = 2, mode = 2, mode = 2, mode = 2, mode = 2 + :srcset: /auto_examples/2cross/images/sphx_glr_plot_mca_001.png + :class: sphx-glr-single-img + + + + + + +.. rst-class:: sphx-glr-timing + + **Total running time of the script:** (0 minutes 10.270 seconds) + + +.. _sphx_glr_download_auto_examples_2cross_plot_mca.py: + +.. only:: html + + .. container:: sphx-glr-footer sphx-glr-footer-example + + + + + .. container:: sphx-glr-download sphx-glr-download-python + + :download:`Download Python source code: plot_mca.py ` + + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: plot_mca.ipynb ` + + +.. only:: html + + .. rst-class:: sphx-glr-signature + + `Gallery generated by Sphinx-Gallery `_ diff --git a/docs/auto_examples/2cross/plot_mca_codeobj.pickle b/docs/auto_examples/2cross/plot_mca_codeobj.pickle new file mode 100644 index 00000000..8013a6f4 Binary files /dev/null and b/docs/auto_examples/2cross/plot_mca_codeobj.pickle differ diff --git a/docs/auto_examples/2cross/plot_rotated_mca.ipynb b/docs/auto_examples/2cross/plot_rotated_mca.ipynb new file mode 100644 index 00000000..e69caebc --- /dev/null +++ b/docs/auto_examples/2cross/plot_rotated_mca.ipynb @@ -0,0 +1,237 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# Rotated Maximum Covariance Analysis\n", + "\n", + "Rotated Maximum Covariance Analysis (MCA) between two data sets.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load packages and data:\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import xarray as xr\n", + "from cartopy.crs import Orthographic, PlateCarree\n", + "from cartopy.feature import LAND\n", + "from matplotlib.gridspec import GridSpec\n", + "\n", + "import xeofs as xe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create 2 different DataArrays\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t2m = xr.tutorial.load_dataset(\"air_temperature\")[\"air\"]\n", + "da1 = t2m.isel(lon=slice(0, 26))\n", + "da2 = t2m.isel(lon=slice(27, None))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Perform MCA\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mca = xe.cross.MCA(n_modes=20, standardize=False, use_coslat=True)\n", + "mca.fit(da1, da2, dim=\"time\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Apply Varimax-rotation to MCA solution\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rot = xe.cross.MCARotator(n_modes=10)\n", + "rot.fit(mca)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get rotated singular vectors, projections (PCs), homogeneous and heterogeneous\n", + "patterns:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "singular_vectors = rot.components()\n", + "scores = rot.scores()\n", + "hom_pats, pvals_hom = rot.homogeneous_patterns()\n", + "het_pats, pvals_het = rot.heterogeneous_patterns()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When two fields are expected, the output of the above methods is a list of\n", + "length 2, with the first and second entry containing the relevant object for\n", + "``X`` and ``Y``. For example, the p-values obtained from the two-sided t-test\n", + "for the homogeneous patterns of ``X`` are:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pvals_hom[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a mask to identifiy where p-values are below 0.05\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hom_mask = [values < 0.05 for values in pvals_hom]\n", + "het_mask = [values < 0.05 for values in pvals_het]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot some relevant quantities of mode 2.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lonlats = [\n", + " np.meshgrid(pvals_hom[0].lon.values, pvals_hom[0].lat.values),\n", + " np.meshgrid(pvals_hom[1].lon.values, pvals_hom[1].lat.values),\n", + "]\n", + "proj = [\n", + " Orthographic(central_latitude=30, central_longitude=-120),\n", + " Orthographic(central_latitude=30, central_longitude=-60),\n", + "]\n", + "kwargs1 = {\"cmap\": \"BrBG\", \"vmin\": -0.05, \"vmax\": 0.05, \"transform\": PlateCarree()}\n", + "kwargs2 = {\"cmap\": \"RdBu\", \"vmin\": -1, \"vmax\": 1, \"transform\": PlateCarree()}\n", + "\n", + "mode = 2\n", + "\n", + "fig = plt.figure(figsize=(7, 14))\n", + "gs = GridSpec(5, 2)\n", + "ax1 = [fig.add_subplot(gs[0, i], projection=proj[i]) for i in range(2)]\n", + "ax2 = [fig.add_subplot(gs[1, i], projection=proj[i]) for i in range(2)]\n", + "ax3 = [fig.add_subplot(gs[2, i], projection=proj[i]) for i in range(2)]\n", + "ax4 = [fig.add_subplot(gs[3, i]) for i in range(2)]\n", + "\n", + "for i, a in enumerate(ax1):\n", + " singular_vectors[i].sel(mode=mode).plot(ax=a, **kwargs1)\n", + "\n", + "for i, a in enumerate(ax2):\n", + " hom_pats[i].sel(mode=mode).plot(ax=a, **kwargs2)\n", + " a.scatter(\n", + " lonlats[i][0],\n", + " lonlats[i][1],\n", + " hom_mask[i].sel(mode=mode).values * 0.5,\n", + " color=\"k\",\n", + " alpha=0.5,\n", + " transform=PlateCarree(),\n", + " )\n", + "for i, a in enumerate(ax3):\n", + " het_pats[i].sel(mode=mode).plot(ax=a, **kwargs2)\n", + " a.scatter(\n", + " lonlats[i][0],\n", + " lonlats[i][1],\n", + " het_mask[i].sel(mode=mode).values * 0.5,\n", + " color=\"k\",\n", + " alpha=0.5,\n", + " transform=PlateCarree(),\n", + " )\n", + "\n", + "for i, a in enumerate(ax4):\n", + " scores[i].sel(mode=mode).plot(ax=a)\n", + " a.set_xlabel(\"\")\n", + "\n", + "\n", + "for a in np.ravel([ax1, ax2, ax3]):\n", + " a.coastlines(color=\".5\")\n", + " a.add_feature(LAND)\n", + "\n", + "plt.tight_layout()\n", + "plt.savefig(\"rotated_mca.jpg\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/examples/2multi/plot_rotated_mca.py b/docs/auto_examples/2cross/plot_rotated_mca.py similarity index 95% rename from examples/2multi/plot_rotated_mca.py rename to docs/auto_examples/2cross/plot_rotated_mca.py index a7ec89fe..2ae0c501 100644 --- a/examples/2multi/plot_rotated_mca.py +++ b/docs/auto_examples/2cross/plot_rotated_mca.py @@ -6,14 +6,14 @@ """ # Load packages and data: +import matplotlib.pyplot as plt import numpy as np import xarray as xr -import matplotlib.pyplot as plt -from matplotlib.gridspec import GridSpec from cartopy.crs import Orthographic, PlateCarree from cartopy.feature import LAND +from matplotlib.gridspec import GridSpec -from xeofs.models import MCA, MCARotator +import xeofs as xe # %% # Create 2 different DataArrays @@ -25,13 +25,13 @@ # %% # Perform MCA -mca = MCA(n_modes=20, standardize=False, use_coslat=True) +mca = xe.cross.MCA(n_modes=20, standardize=False, use_coslat=True) mca.fit(da1, da2, dim="time") # %% # Apply Varimax-rotation to MCA solution -rot = MCARotator(n_modes=10) +rot = xe.cross.MCARotator(n_modes=10) rot.fit(mca) # %% diff --git a/docs/auto_examples/2cross/plot_rotated_mca.py.md5 b/docs/auto_examples/2cross/plot_rotated_mca.py.md5 new file mode 100644 index 00000000..deea9105 --- /dev/null +++ b/docs/auto_examples/2cross/plot_rotated_mca.py.md5 @@ -0,0 +1 @@ +00a21b73b61a542faad2eabd27cfcdf8 \ No newline at end of file diff --git a/docs/auto_examples/2cross/plot_rotated_mca.rst b/docs/auto_examples/2cross/plot_rotated_mca.rst new file mode 100644 index 00000000..a58d6b97 --- /dev/null +++ b/docs/auto_examples/2cross/plot_rotated_mca.rst @@ -0,0 +1,760 @@ + +.. DO NOT EDIT. +.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. +.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: +.. "auto_examples/2cross/plot_rotated_mca.py" +.. LINE NUMBERS ARE GIVEN BELOW. + +.. only:: html + + .. note:: + :class: sphx-glr-download-link-note + + :ref:`Go to the end ` + to download the full example code + +.. rst-class:: sphx-glr-example-title + +.. _sphx_glr_auto_examples_2cross_plot_rotated_mca.py: + + +Rotated Maximum Covariance Analysis +=================================== + +Rotated Maximum Covariance Analysis (MCA) between two data sets. + +.. GENERATED FROM PYTHON SOURCE LINES 7-18 + +.. code-block:: default + + + # Load packages and data: + import matplotlib.pyplot as plt + import numpy as np + import xarray as xr + from cartopy.crs import Orthographic, PlateCarree + from cartopy.feature import LAND + from matplotlib.gridspec import GridSpec + + import xeofs as xe + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 19-20 + +Create 2 different DataArrays + +.. GENERATED FROM PYTHON SOURCE LINES 20-25 + +.. code-block:: default + + + t2m = xr.tutorial.load_dataset("air_temperature")["air"] + da1 = t2m.isel(lon=slice(0, 26)) + da2 = t2m.isel(lon=slice(27, None)) + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 26-27 + +Perform MCA + +.. GENERATED FROM PYTHON SOURCE LINES 27-31 + +.. code-block:: default + + + mca = xe.cross.MCA(n_modes=20, standardize=False, use_coslat=True) + mca.fit(da1, da2, dim="time") + + + + + +.. rst-class:: sphx-glr-script-out + + .. code-block:: none + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 32-33 + +Apply Varimax-rotation to MCA solution + +.. GENERATED FROM PYTHON SOURCE LINES 33-37 + +.. code-block:: default + + + rot = xe.cross.MCARotator(n_modes=10) + rot.fit(mca) + + + + + +.. rst-class:: sphx-glr-script-out + + .. code-block:: none + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 38-40 + +Get rotated singular vectors, projections (PCs), homogeneous and heterogeneous +patterns: + +.. GENERATED FROM PYTHON SOURCE LINES 40-46 + +.. code-block:: default + + + singular_vectors = rot.components() + scores = rot.scores() + hom_pats, pvals_hom = rot.homogeneous_patterns() + het_pats, pvals_het = rot.heterogeneous_patterns() + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 47-51 + +When two fields are expected, the output of the above methods is a list of +length 2, with the first and second entry containing the relevant object for +``X`` and ``Y``. For example, the p-values obtained from the two-sided t-test +for the homogeneous patterns of ``X`` are: + +.. GENERATED FROM PYTHON SOURCE LINES 51-54 + +.. code-block:: default + + + pvals_hom[0] + + + + + + +.. raw:: html + +
    +
    + + + + + + + + + + + + + + +
    <xarray.DataArray 'pvalues_of_left_homogeneous_patterns' (mode: 10, lat: 25,
    +                                                              lon: 26)>
    +    array([[[1.55655258e-089, 1.55392044e-068, 7.80119302e-062, ...,
    +             2.23581515e-050, 8.33308320e-120, 6.00836750e-251],
    +            [2.11872977e-114, 9.49780753e-082, 3.11167453e-056, ...,
    +             6.65183702e-085, 1.64000665e-088, 1.04300755e-106],
    +            [7.79953584e-175, 1.21595072e-154, 4.65953803e-091, ...,
    +             1.38541944e-037, 4.80775593e-072, 1.33473247e-185],
    +            ...,
    +            [0.00000000e+000, 0.00000000e+000, 0.00000000e+000, ...,
    +             0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
    +            [0.00000000e+000, 0.00000000e+000, 0.00000000e+000, ...,
    +             0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
    +            [0.00000000e+000, 0.00000000e+000, 0.00000000e+000, ...,
    +             0.00000000e+000, 0.00000000e+000, 0.00000000e+000]],
    +
    +           [[0.00000000e+000, 0.00000000e+000, 0.00000000e+000, ...,
    +             4.94162415e-127, 8.44446124e-042, 1.03146716e-010],
    +            [0.00000000e+000, 0.00000000e+000, 0.00000000e+000, ...,
    +             1.73955755e-006, 4.63745686e-001, 7.64186484e-001],
    +            [2.33739357e-315, 5.08908579e-295, 0.00000000e+000, ...,
    +             3.29248281e-031, 3.24235210e-009, 2.47163748e-013],
    +    ...
    +            [1.08624311e-031, 4.60559037e-034, 3.62929634e-035, ...,
    +             7.70733680e-008, 1.69130069e-007, 8.67958275e-007],
    +            [1.51071779e-017, 3.36574156e-018, 3.12910697e-018, ...,
    +             1.32355292e-002, 2.10073640e-002, 3.66735430e-002],
    +            [1.51780815e-012, 1.69354791e-013, 4.50343023e-014, ...,
    +             7.46501624e-001, 7.70929744e-001, 8.17168088e-001]],
    +
    +           [[1.24025980e-001, 3.39107580e-002, 1.79221922e-003, ...,
    +             6.87092234e-001, 7.72235373e-001, 5.05983136e-001],
    +            [2.50157622e-002, 1.60428304e-002, 9.88481378e-004, ...,
    +             8.65538505e-001, 2.93513835e-001, 5.20600556e-002],
    +            [9.83552316e-003, 8.72086401e-002, 1.50031876e-001, ...,
    +             2.52774242e-001, 9.58005017e-003, 8.41498523e-005],
    +            ...,
    +            [3.04190682e-009, 5.42503823e-010, 1.40613929e-010, ...,
    +             2.01063167e-004, 2.26427972e-003, 2.14215655e-002],
    +            [2.50500607e-002, 2.25942528e-002, 2.38626492e-002, ...,
    +             3.52630729e-004, 2.33815544e-003, 1.47774024e-002],
    +            [9.70026611e-001, 9.87391731e-001, 9.61937720e-001, ...,
    +             5.70511628e-003, 1.52611723e-002, 3.62942940e-002]]])
    +    Coordinates:
    +      * lat      (lat) float32 15.0 17.5 20.0 22.5 25.0 ... 65.0 67.5 70.0 72.5 75.0
    +      * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 255.0 257.5 260.0 262.5
    +      * mode     (mode) int64 1 2 3 4 5 6 7 8 9 10
    +    Attributes:
    +        model:     Rotated MCA
    +        software:  xeofs
    +        version:   1.2.0
    +        date:      2024-09-02 02:52:51
    +        n_modes:   10
    +        power:     1
    +        max_iter:  1000
    +        rtol:      1e-08
    +        compute:   True
    +
    +
    +
    + +.. GENERATED FROM PYTHON SOURCE LINES 55-56 + +Create a mask to identifiy where p-values are below 0.05 + +.. GENERATED FROM PYTHON SOURCE LINES 56-61 + +.. code-block:: default + + + hom_mask = [values < 0.05 for values in pvals_hom] + het_mask = [values < 0.05 for values in pvals_het] + + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 62-63 + +Plot some relevant quantities of mode 2. + +.. GENERATED FROM PYTHON SOURCE LINES 63-119 + +.. code-block:: default + + + lonlats = [ + np.meshgrid(pvals_hom[0].lon.values, pvals_hom[0].lat.values), + np.meshgrid(pvals_hom[1].lon.values, pvals_hom[1].lat.values), + ] + proj = [ + Orthographic(central_latitude=30, central_longitude=-120), + Orthographic(central_latitude=30, central_longitude=-60), + ] + kwargs1 = {"cmap": "BrBG", "vmin": -0.05, "vmax": 0.05, "transform": PlateCarree()} + kwargs2 = {"cmap": "RdBu", "vmin": -1, "vmax": 1, "transform": PlateCarree()} + + mode = 2 + + fig = plt.figure(figsize=(7, 14)) + gs = GridSpec(5, 2) + ax1 = [fig.add_subplot(gs[0, i], projection=proj[i]) for i in range(2)] + ax2 = [fig.add_subplot(gs[1, i], projection=proj[i]) for i in range(2)] + ax3 = [fig.add_subplot(gs[2, i], projection=proj[i]) for i in range(2)] + ax4 = [fig.add_subplot(gs[3, i]) for i in range(2)] + + for i, a in enumerate(ax1): + singular_vectors[i].sel(mode=mode).plot(ax=a, **kwargs1) + + for i, a in enumerate(ax2): + hom_pats[i].sel(mode=mode).plot(ax=a, **kwargs2) + a.scatter( + lonlats[i][0], + lonlats[i][1], + hom_mask[i].sel(mode=mode).values * 0.5, + color="k", + alpha=0.5, + transform=PlateCarree(), + ) + for i, a in enumerate(ax3): + het_pats[i].sel(mode=mode).plot(ax=a, **kwargs2) + a.scatter( + lonlats[i][0], + lonlats[i][1], + het_mask[i].sel(mode=mode).values * 0.5, + color="k", + alpha=0.5, + transform=PlateCarree(), + ) + + for i, a in enumerate(ax4): + scores[i].sel(mode=mode).plot(ax=a) + a.set_xlabel("") + + + for a in np.ravel([ax1, ax2, ax3]): + a.coastlines(color=".5") + a.add_feature(LAND) + + plt.tight_layout() + plt.savefig("rotated_mca.jpg") + + + +.. image-sg:: /auto_examples/2cross/images/sphx_glr_plot_rotated_mca_001.png + :alt: mode = 2, mode = 2, mode = 2, mode = 2, mode = 2, mode = 2, mode = 2, mode = 2 + :srcset: /auto_examples/2cross/images/sphx_glr_plot_rotated_mca_001.png + :class: sphx-glr-single-img + + + + + + +.. rst-class:: sphx-glr-timing + + **Total running time of the script:** (0 minutes 6.533 seconds) + + +.. _sphx_glr_download_auto_examples_2cross_plot_rotated_mca.py: + +.. only:: html + + .. container:: sphx-glr-footer sphx-glr-footer-example + + + + + .. container:: sphx-glr-download sphx-glr-download-python + + :download:`Download Python source code: plot_rotated_mca.py ` + + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: plot_rotated_mca.ipynb ` + + +.. only:: html + + .. rst-class:: sphx-glr-signature + + `Gallery generated by Sphinx-Gallery `_ diff --git a/docs/auto_examples/2cross/plot_rotated_mca_codeobj.pickle b/docs/auto_examples/2cross/plot_rotated_mca_codeobj.pickle new file mode 100644 index 00000000..0442f632 Binary files /dev/null and b/docs/auto_examples/2cross/plot_rotated_mca_codeobj.pickle differ diff --git a/docs/auto_examples/2cross/sg_execution_times.rst b/docs/auto_examples/2cross/sg_execution_times.rst new file mode 100644 index 00000000..f92421a8 --- /dev/null +++ b/docs/auto_examples/2cross/sg_execution_times.rst @@ -0,0 +1,15 @@ + +:orphan: + +.. _sphx_glr_auto_examples_2cross_sg_execution_times: + + +Computation times +================= +**00:16.803** total execution time for **auto_examples_2cross** files: + ++------------------------------------------------------------------------------------+-----------+--------+ +| :ref:`sphx_glr_auto_examples_2cross_plot_mca.py` (``plot_mca.py``) | 00:10.270 | 0.0 MB | ++------------------------------------------------------------------------------------+-----------+--------+ +| :ref:`sphx_glr_auto_examples_2cross_plot_rotated_mca.py` (``plot_rotated_mca.py``) | 00:06.533 | 0.0 MB | ++------------------------------------------------------------------------------------+-----------+--------+ diff --git a/docs/auto_examples/2multi/images/sphx_glr_plot_cca_001.png b/docs/auto_examples/2multi/images/sphx_glr_plot_cca_001.png index 5a4c2a6e..10c26abb 100644 Binary files a/docs/auto_examples/2multi/images/sphx_glr_plot_cca_001.png and b/docs/auto_examples/2multi/images/sphx_glr_plot_cca_001.png differ diff --git a/docs/auto_examples/2multi/images/sphx_glr_plot_cca_002.png b/docs/auto_examples/2multi/images/sphx_glr_plot_cca_002.png index a02a021f..89687459 100644 Binary files a/docs/auto_examples/2multi/images/sphx_glr_plot_cca_002.png and b/docs/auto_examples/2multi/images/sphx_glr_plot_cca_002.png differ diff --git a/docs/auto_examples/2multi/images/sphx_glr_plot_mca_001.png b/docs/auto_examples/2multi/images/sphx_glr_plot_mca_001.png index 1755ad7c..3883e3f3 100644 Binary files a/docs/auto_examples/2multi/images/sphx_glr_plot_mca_001.png and b/docs/auto_examples/2multi/images/sphx_glr_plot_mca_001.png differ diff --git a/docs/auto_examples/2multi/images/sphx_glr_plot_rotated_mca_001.png b/docs/auto_examples/2multi/images/sphx_glr_plot_rotated_mca_001.png index de211f10..ef2ea0ac 100644 Binary files a/docs/auto_examples/2multi/images/sphx_glr_plot_rotated_mca_001.png and b/docs/auto_examples/2multi/images/sphx_glr_plot_rotated_mca_001.png differ diff --git a/docs/auto_examples/2multi/images/thumb/sphx_glr_plot_cca_thumb.png b/docs/auto_examples/2multi/images/thumb/sphx_glr_plot_cca_thumb.png index 9c16f7d5..2e99a4d0 100644 Binary files a/docs/auto_examples/2multi/images/thumb/sphx_glr_plot_cca_thumb.png and b/docs/auto_examples/2multi/images/thumb/sphx_glr_plot_cca_thumb.png differ diff --git a/docs/auto_examples/2multi/images/thumb/sphx_glr_plot_mca_thumb.png b/docs/auto_examples/2multi/images/thumb/sphx_glr_plot_mca_thumb.png index b4771316..7949e785 100644 Binary files a/docs/auto_examples/2multi/images/thumb/sphx_glr_plot_mca_thumb.png and b/docs/auto_examples/2multi/images/thumb/sphx_glr_plot_mca_thumb.png differ diff --git a/docs/auto_examples/2multi/images/thumb/sphx_glr_plot_rotated_mca_thumb.png b/docs/auto_examples/2multi/images/thumb/sphx_glr_plot_rotated_mca_thumb.png index 292b6200..a3ed60a4 100644 Binary files a/docs/auto_examples/2multi/images/thumb/sphx_glr_plot_rotated_mca_thumb.png and b/docs/auto_examples/2multi/images/thumb/sphx_glr_plot_rotated_mca_thumb.png differ diff --git a/docs/auto_examples/2multi/plot_cca.ipynb b/docs/auto_examples/2multi/plot_cca.ipynb index be266233..f8e3b7f4 100644 --- a/docs/auto_examples/2multi/plot_cca.ipynb +++ b/docs/auto_examples/2multi/plot_cca.ipynb @@ -22,12 +22,12 @@ "metadata": {}, "outputs": [], "source": [ - "import xarray as xr\n", - "import xeofs as xe\n", - "\n", + "import cartopy.crs as ccrs\n", "import matplotlib.pyplot as plt\n", + "import xarray as xr\n", "from matplotlib.gridspec import GridSpec\n", - "import cartopy.crs as ccrs" + "\n", + "import xeofs as xe" ] }, { @@ -102,7 +102,7 @@ "metadata": {}, "outputs": [], "source": [ - "model = xe.models.CCA(\n", + "model = xe.multi.CCA(\n", " n_modes=2,\n", " use_coslat=True,\n", " pca=True,\n", @@ -189,7 +189,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/docs/auto_examples/2multi/plot_cca.py b/docs/auto_examples/2multi/plot_cca.py index e57af825..70f6a68e 100644 --- a/docs/auto_examples/2multi/plot_cca.py +++ b/docs/auto_examples/2multi/plot_cca.py @@ -11,12 +11,12 @@ First, we'll import the necessary modules. """ -import xarray as xr -import xeofs as xe - +import cartopy.crs as ccrs import matplotlib.pyplot as plt +import xarray as xr from matplotlib.gridspec import GridSpec -import cartopy.crs as ccrs + +import xeofs as xe # %% # Next, we load the data and compute the SST anomalies. This removes the @@ -56,7 +56,7 @@ # Note that if our initial PCA modes don't hit the 90% variance target, ``xeofs`` # will give a warning. -model = xe.models.CCA( +model = xe.multi.CCA( n_modes=2, use_coslat=True, pca=True, diff --git a/docs/auto_examples/2multi/plot_cca.py.md5 b/docs/auto_examples/2multi/plot_cca.py.md5 index d0fdf998..40b748fb 100644 --- a/docs/auto_examples/2multi/plot_cca.py.md5 +++ b/docs/auto_examples/2multi/plot_cca.py.md5 @@ -1 +1 @@ -89f56b4ed3606f9c5a94189c1c497250 \ No newline at end of file +ccd6cf17e9ee3ea88ba470a3180fe224 \ No newline at end of file diff --git a/docs/auto_examples/2multi/plot_cca.rst b/docs/auto_examples/2multi/plot_cca.rst index 51cbe8dc..466135c9 100644 --- a/docs/auto_examples/2multi/plot_cca.rst +++ b/docs/auto_examples/2multi/plot_cca.rst @@ -31,15 +31,15 @@ First, we'll import the necessary modules. .. GENERATED FROM PYTHON SOURCE LINES 13-21 -.. code-block:: Python +.. code-block:: default - import xarray as xr - import xeofs as xe - + import cartopy.crs as ccrs import matplotlib.pyplot as plt + import xarray as xr from matplotlib.gridspec import GridSpec - import cartopy.crs as ccrs + + import xeofs as xe @@ -55,7 +55,7 @@ monthly climatologies, so the seasonal cycle doesn't impact our CCA. .. GENERATED FROM PYTHON SOURCE LINES 24-29 -.. code-block:: Python +.. code-block:: default sst = xr.tutorial.load_dataset("ersstv5").sst @@ -75,7 +75,7 @@ Now, we define the three regions of interest and store them in a list. .. GENERATED FROM PYTHON SOURCE LINES 31-38 -.. code-block:: Python +.. code-block:: default indian = sst.sel(lon=slice(35, 115), lat=slice(30, -30)) @@ -115,10 +115,10 @@ will give a warning. .. GENERATED FROM PYTHON SOURCE LINES 58-70 -.. code-block:: Python +.. code-block:: default - model = xe.models.CCA( + model = xe.multi.CCA( n_modes=2, use_coslat=True, pca=True, @@ -142,7 +142,7 @@ Let's look at the canonical loadings (components) of the first mode. .. GENERATED FROM PYTHON SOURCE LINES 72-95 -.. code-block:: Python +.. code-block:: default mode = 1 @@ -185,7 +185,7 @@ And lastly, we'll check out the canonical variates (scores) of the first mode. .. GENERATED FROM PYTHON SOURCE LINES 97-103 -.. code-block:: Python +.. code-block:: default fig, ax = plt.subplots(figsize=(12, 4)) @@ -207,14 +207,14 @@ And lastly, we'll check out the canonical variates (scores) of the first mode. .. code-block:: none - + .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 1.632 seconds) + **Total running time of the script:** (0 minutes 2.798 seconds) .. _sphx_glr_download_auto_examples_2multi_plot_cca.py: @@ -223,14 +223,17 @@ And lastly, we'll check out the canonical variates (scores) of the first mode. .. container:: sphx-glr-footer sphx-glr-footer-example - .. container:: sphx-glr-download sphx-glr-download-jupyter - :download:`Download Jupyter notebook: plot_cca.ipynb ` + .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: plot_cca.py ` + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: plot_cca.ipynb ` + .. only:: html diff --git a/docs/auto_examples/2multi/plot_cca_codeobj.pickle b/docs/auto_examples/2multi/plot_cca_codeobj.pickle index e9a23cf3..05060618 100644 Binary files a/docs/auto_examples/2multi/plot_cca_codeobj.pickle and b/docs/auto_examples/2multi/plot_cca_codeobj.pickle differ diff --git a/docs/auto_examples/2multi/plot_mca.ipynb b/docs/auto_examples/2multi/plot_mca.ipynb index 0cdfe016..16d73cb7 100644 --- a/docs/auto_examples/2multi/plot_mca.ipynb +++ b/docs/auto_examples/2multi/plot_mca.ipynb @@ -17,14 +17,14 @@ "outputs": [], "source": [ "# Load packages and data:\n", + "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import xarray as xr\n", - "import matplotlib.pyplot as plt\n", - "from matplotlib.gridspec import GridSpec\n", "from cartopy.crs import Orthographic, PlateCarree\n", "from cartopy.feature import LAND\n", + "from matplotlib.gridspec import GridSpec\n", "\n", - "from xeofs.models import MCA" + "import xeofs as xe" ] }, { @@ -60,7 +60,7 @@ "metadata": {}, "outputs": [], "source": [ - "mca = MCA(n_modes=20, standardize=False, use_coslat=True)\n", + "mca = xe.cross.MCA(n_modes=20, standardize=False, use_coslat=True)\n", "mca.fit(da1, da2, dim=\"time\")" ] }, @@ -211,7 +211,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/docs/auto_examples/2multi/plot_mca.py b/docs/auto_examples/2multi/plot_mca.py index d3b48de7..a93d5546 100644 --- a/docs/auto_examples/2multi/plot_mca.py +++ b/docs/auto_examples/2multi/plot_mca.py @@ -6,14 +6,14 @@ """ # Load packages and data: +import matplotlib.pyplot as plt import numpy as np import xarray as xr -import matplotlib.pyplot as plt -from matplotlib.gridspec import GridSpec from cartopy.crs import Orthographic, PlateCarree from cartopy.feature import LAND +from matplotlib.gridspec import GridSpec -from xeofs.models import MCA +import xeofs as xe # %% # Create 2 different DataArrays @@ -25,7 +25,7 @@ # %% # Perform MCA -mca = MCA(n_modes=20, standardize=False, use_coslat=True) +mca = xe.cross.MCA(n_modes=20, standardize=False, use_coslat=True) mca.fit(da1, da2, dim="time") # %% diff --git a/docs/auto_examples/2multi/plot_mca.py.md5 b/docs/auto_examples/2multi/plot_mca.py.md5 index 99600e23..d980cde7 100644 --- a/docs/auto_examples/2multi/plot_mca.py.md5 +++ b/docs/auto_examples/2multi/plot_mca.py.md5 @@ -1 +1 @@ -76e68784ad5c6cb6f7eb0924c3019da9 \ No newline at end of file +e696fb777ef84ec447201ca9c01d1dfe \ No newline at end of file diff --git a/docs/auto_examples/2multi/plot_mca.rst b/docs/auto_examples/2multi/plot_mca.rst index 06fa7336..ec378a1f 100644 --- a/docs/auto_examples/2multi/plot_mca.rst +++ b/docs/auto_examples/2multi/plot_mca.rst @@ -25,18 +25,18 @@ Maximum Covariance Analysis (MCA) between two data sets. .. GENERATED FROM PYTHON SOURCE LINES 7-18 -.. code-block:: Python +.. code-block:: default # Load packages and data: + import matplotlib.pyplot as plt import numpy as np import xarray as xr - import matplotlib.pyplot as plt - from matplotlib.gridspec import GridSpec from cartopy.crs import Orthographic, PlateCarree from cartopy.feature import LAND + from matplotlib.gridspec import GridSpec - from xeofs.models import MCA + import xeofs as xe @@ -51,7 +51,7 @@ Create 2 different DataArrays .. GENERATED FROM PYTHON SOURCE LINES 20-25 -.. code-block:: Python +.. code-block:: default t2m = xr.tutorial.load_dataset("air_temperature")["air"] @@ -71,10 +71,10 @@ Perform MCA .. GENERATED FROM PYTHON SOURCE LINES 27-31 -.. code-block:: Python +.. code-block:: default - mca = MCA(n_modes=20, standardize=False, use_coslat=True) + mca = xe.cross.MCA(n_modes=20, standardize=False, use_coslat=True) mca.fit(da1, da2, dim="time") @@ -86,7 +86,7 @@ Perform MCA .. code-block:: none - + @@ -97,7 +97,7 @@ patterns: .. GENERATED FROM PYTHON SOURCE LINES 34-40 -.. code-block:: Python +.. code-block:: default singular_vectors = mca.components() @@ -121,7 +121,7 @@ for the homogeneous patterns of ``X`` are: .. GENERATED FROM PYTHON SOURCE LINES 45-48 -.. code-block:: Python +.. code-block:: default pvals_hom[0] @@ -498,31 +498,31 @@ for the homogeneous patterns of ``X`` are: fill: currentColor; }
    <xarray.DataArray 'pvalues_of_left_homogeneous_patterns' (mode: 20, lat: 25,
    -                                                              lon: 26)> Size: 104kB
    -    7.302e-298 4.926e-268 8.327e-265 3.64e-251 ... 0.04256 0.08683 0.1689 0.2897
    +                                                              lon: 26)>
    +    0.0 3.288e-290 2.871e-286 1.598e-271 ... 0.04189 0.08527 0.1659 0.2852
         Coordinates:
    -      * lat      (lat) float32 100B 15.0 17.5 20.0 22.5 25.0 ... 67.5 70.0 72.5 75.0
    -      * lon      (lon) float32 104B 200.0 202.5 205.0 207.5 ... 257.5 260.0 262.5
    -      * mode     (mode) int64 160B 1 2 3 4 5 6 7 8 9 ... 12 13 14 15 16 17 18 19 20
    -    Attributes: (12/16)
    -        model:          MCA
    -        software:       xeofs
    -        version:        2.3.2
    -        date:           2024-03-31 21:13:25
    -        n_modes:        20
    -        center:         True
    -        ...             ...
    -        compute:        True
    -        sample_name:    sample
    -        feature_name:   feature
    -        solver:         auto
    -        random_state:   None
    -        solver_kwargs:  {}
  • model :
    Maximum Covariance Analysis
    software :
    xeofs
    version :
    1.2.0
    date :
    2024-09-02 02:30:55
    n_modes :
    20
    center :
    ['True', 'True']
    standardize :
    ['False', 'False']
    use_coslat :
    ['True', 'True']
    check_nans :
    ['True', 'True']
    use_pca :
    ['True', 'True']
    n_pca_modes :
    [0.999, 0.999]
    pca_init_rank_reduction :
    [0.3, 0.3]
    alpha :
    [1.0, 1.0]
    sample_name :
    sample
    feature_name :
    ['feature1', 'feature2']
    random_state :
    None
    compute :
    True
    solver :
    auto


  • @@ -579,7 +579,7 @@ Create a mask to identifiy where p-values are below 0.05 .. GENERATED FROM PYTHON SOURCE LINES 50-55 -.. code-block:: Python +.. code-block:: default hom_mask = [values < 0.05 for values in pvals_hom] @@ -599,7 +599,7 @@ Plot some relevant quantities of mode 2. .. GENERATED FROM PYTHON SOURCE LINES 57-113 -.. code-block:: Python +.. code-block:: default lonlats = [ @@ -666,20 +666,13 @@ Plot some relevant quantities of mode 2. :class: sphx-glr-single-img -.. rst-class:: sphx-glr-script-out - - .. code-block:: none - - /home/slevang/miniconda3/envs/xeofs-docs/lib/python3.11/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/110m_physical/ne_110m_land.zip - warnings.warn(f'Downloading: {url}', DownloadWarning) - .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 3.519 seconds) + **Total running time of the script:** (0 minutes 8.251 seconds) .. _sphx_glr_download_auto_examples_2multi_plot_mca.py: @@ -688,14 +681,17 @@ Plot some relevant quantities of mode 2. .. container:: sphx-glr-footer sphx-glr-footer-example - .. container:: sphx-glr-download sphx-glr-download-jupyter - :download:`Download Jupyter notebook: plot_mca.ipynb ` + .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: plot_mca.py ` + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: plot_mca.ipynb ` + .. only:: html diff --git a/docs/auto_examples/2multi/plot_mca_codeobj.pickle b/docs/auto_examples/2multi/plot_mca_codeobj.pickle index 8279095a..ec889c42 100644 Binary files a/docs/auto_examples/2multi/plot_mca_codeobj.pickle and b/docs/auto_examples/2multi/plot_mca_codeobj.pickle differ diff --git a/docs/auto_examples/2multi/plot_rotated_mca.ipynb b/docs/auto_examples/2multi/plot_rotated_mca.ipynb index 8bc915f4..e69caebc 100644 --- a/docs/auto_examples/2multi/plot_rotated_mca.ipynb +++ b/docs/auto_examples/2multi/plot_rotated_mca.ipynb @@ -17,14 +17,14 @@ "outputs": [], "source": [ "# Load packages and data:\n", + "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import xarray as xr\n", - "import matplotlib.pyplot as plt\n", - "from matplotlib.gridspec import GridSpec\n", "from cartopy.crs import Orthographic, PlateCarree\n", "from cartopy.feature import LAND\n", + "from matplotlib.gridspec import GridSpec\n", "\n", - "from xeofs.models import MCA, MCARotator" + "import xeofs as xe" ] }, { @@ -60,7 +60,7 @@ "metadata": {}, "outputs": [], "source": [ - "mca = MCA(n_modes=20, standardize=False, use_coslat=True)\n", + "mca = xe.cross.MCA(n_modes=20, standardize=False, use_coslat=True)\n", "mca.fit(da1, da2, dim=\"time\")" ] }, @@ -78,7 +78,7 @@ "metadata": {}, "outputs": [], "source": [ - "rot = MCARotator(n_modes=10)\n", + "rot = xe.cross.MCARotator(n_modes=10)\n", "rot.fit(mca)" ] }, @@ -229,7 +229,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/docs/auto_examples/2multi/plot_rotated_mca.py b/docs/auto_examples/2multi/plot_rotated_mca.py index a7ec89fe..2ae0c501 100644 --- a/docs/auto_examples/2multi/plot_rotated_mca.py +++ b/docs/auto_examples/2multi/plot_rotated_mca.py @@ -6,14 +6,14 @@ """ # Load packages and data: +import matplotlib.pyplot as plt import numpy as np import xarray as xr -import matplotlib.pyplot as plt -from matplotlib.gridspec import GridSpec from cartopy.crs import Orthographic, PlateCarree from cartopy.feature import LAND +from matplotlib.gridspec import GridSpec -from xeofs.models import MCA, MCARotator +import xeofs as xe # %% # Create 2 different DataArrays @@ -25,13 +25,13 @@ # %% # Perform MCA -mca = MCA(n_modes=20, standardize=False, use_coslat=True) +mca = xe.cross.MCA(n_modes=20, standardize=False, use_coslat=True) mca.fit(da1, da2, dim="time") # %% # Apply Varimax-rotation to MCA solution -rot = MCARotator(n_modes=10) +rot = xe.cross.MCARotator(n_modes=10) rot.fit(mca) # %% diff --git a/docs/auto_examples/2multi/plot_rotated_mca.py.md5 b/docs/auto_examples/2multi/plot_rotated_mca.py.md5 index c25ed037..deea9105 100644 --- a/docs/auto_examples/2multi/plot_rotated_mca.py.md5 +++ b/docs/auto_examples/2multi/plot_rotated_mca.py.md5 @@ -1 +1 @@ -2a8fc6d56ead0b05f3f5c909c0e07bbb \ No newline at end of file +00a21b73b61a542faad2eabd27cfcdf8 \ No newline at end of file diff --git a/docs/auto_examples/2multi/plot_rotated_mca.rst b/docs/auto_examples/2multi/plot_rotated_mca.rst index 902df371..8036b097 100644 --- a/docs/auto_examples/2multi/plot_rotated_mca.rst +++ b/docs/auto_examples/2multi/plot_rotated_mca.rst @@ -25,18 +25,18 @@ Rotated Maximum Covariance Analysis (MCA) between two data sets. .. GENERATED FROM PYTHON SOURCE LINES 7-18 -.. code-block:: Python +.. code-block:: default # Load packages and data: + import matplotlib.pyplot as plt import numpy as np import xarray as xr - import matplotlib.pyplot as plt - from matplotlib.gridspec import GridSpec from cartopy.crs import Orthographic, PlateCarree from cartopy.feature import LAND + from matplotlib.gridspec import GridSpec - from xeofs.models import MCA, MCARotator + import xeofs as xe @@ -51,7 +51,7 @@ Create 2 different DataArrays .. GENERATED FROM PYTHON SOURCE LINES 20-25 -.. code-block:: Python +.. code-block:: default t2m = xr.tutorial.load_dataset("air_temperature")["air"] @@ -71,10 +71,10 @@ Perform MCA .. GENERATED FROM PYTHON SOURCE LINES 27-31 -.. code-block:: Python +.. code-block:: default - mca = MCA(n_modes=20, standardize=False, use_coslat=True) + mca = xe.cross.MCA(n_modes=20, standardize=False, use_coslat=True) mca.fit(da1, da2, dim="time") @@ -86,7 +86,7 @@ Perform MCA .. code-block:: none - + @@ -96,10 +96,10 @@ Apply Varimax-rotation to MCA solution .. GENERATED FROM PYTHON SOURCE LINES 33-37 -.. code-block:: Python +.. code-block:: default - rot = MCARotator(n_modes=10) + rot = xe.cross.MCARotator(n_modes=10) rot.fit(mca) @@ -111,7 +111,7 @@ Apply Varimax-rotation to MCA solution .. code-block:: none - + @@ -122,7 +122,7 @@ patterns: .. GENERATED FROM PYTHON SOURCE LINES 40-46 -.. code-block:: Python +.. code-block:: default singular_vectors = rot.components() @@ -146,7 +146,7 @@ for the homogeneous patterns of ``X`` are: .. GENERATED FROM PYTHON SOURCE LINES 51-54 -.. code-block:: Python +.. code-block:: default pvals_hom[0] @@ -523,28 +523,27 @@ for the homogeneous patterns of ``X`` are: fill: currentColor; }
    <xarray.DataArray 'pvalues_of_left_homogeneous_patterns' (mode: 10, lat: 25,
    -                                                              lon: 26)> Size: 52kB
    -    5.921e-81 4.092e-64 6.235e-58 5.975e-49 ... 0.001974 0.005479 0.01494 0.03604
    +                                                              lon: 26)>
    +    1.551e-89 1.547e-68 7.776e-62 2.972e-52 ... 0.002105 0.005704 0.01526 0.03629
         Coordinates:
    -      * lat      (lat) float32 100B 15.0 17.5 20.0 22.5 25.0 ... 67.5 70.0 72.5 75.0
    -      * lon      (lon) float32 104B 200.0 202.5 205.0 207.5 ... 257.5 260.0 262.5
    -      * mode     (mode) int64 80B 1 2 3 4 5 6 7 8 9 10
    +      * lat      (lat) float32 15.0 17.5 20.0 22.5 25.0 ... 65.0 67.5 70.0 72.5 75.0
    +      * lon      (lon) float32 200.0 202.5 205.0 207.5 ... 255.0 257.5 260.0 262.5
    +      * mode     (mode) int64 1 2 3 4 5 6 7 8 9 10
         Attributes:
    -        model:             Rotated MCA
    -        n_modes:           10
    -        power:             1
    -        max_iter:          1000
    -        rtol:              1e-08
    -        squared_loadings:  False
    -        compute:           True
    -        software:          xeofs
    -        version:           2.3.2
    -        date:              2024-03-31 21:13:29
  • model :
    Rotated MCA
    software :
    xeofs
    version :
    1.2.0
    date :
    2024-09-02 02:31:13
    n_modes :
    10
    power :
    1
    max_iter :
    1000
    rtol :
    1e-08
    compute :
    True


  • @@ -600,7 +599,7 @@ Create a mask to identifiy where p-values are below 0.05 .. GENERATED FROM PYTHON SOURCE LINES 56-61 -.. code-block:: Python +.. code-block:: default hom_mask = [values < 0.05 for values in pvals_hom] @@ -620,7 +619,7 @@ Plot some relevant quantities of mode 2. .. GENERATED FROM PYTHON SOURCE LINES 63-119 -.. code-block:: Python +.. code-block:: default lonlats = [ @@ -693,7 +692,7 @@ Plot some relevant quantities of mode 2. .. rst-class:: sphx-glr-timing - **Total running time of the script:** (0 minutes 2.207 seconds) + **Total running time of the script:** (0 minutes 13.021 seconds) .. _sphx_glr_download_auto_examples_2multi_plot_rotated_mca.py: @@ -702,14 +701,17 @@ Plot some relevant quantities of mode 2. .. container:: sphx-glr-footer sphx-glr-footer-example - .. container:: sphx-glr-download sphx-glr-download-jupyter - :download:`Download Jupyter notebook: plot_rotated_mca.ipynb ` + .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: plot_rotated_mca.py ` + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: plot_rotated_mca.ipynb ` + .. only:: html diff --git a/docs/auto_examples/2multi/plot_rotated_mca_codeobj.pickle b/docs/auto_examples/2multi/plot_rotated_mca_codeobj.pickle index ea3553d3..1d2dc827 100644 Binary files a/docs/auto_examples/2multi/plot_rotated_mca_codeobj.pickle and b/docs/auto_examples/2multi/plot_rotated_mca_codeobj.pickle differ diff --git a/docs/auto_examples/2multi/sg_execution_times.rst b/docs/auto_examples/2multi/sg_execution_times.rst index 815af82c..99753050 100644 --- a/docs/auto_examples/2multi/sg_execution_times.rst +++ b/docs/auto_examples/2multi/sg_execution_times.rst @@ -6,38 +6,12 @@ Computation times ================= -**00:07.357** total execution time for 3 files **from auto_examples/2multi**: - -.. container:: - - .. raw:: html - - - - - - - - .. list-table:: - :header-rows: 1 - :class: table table-striped sg-datatable - - * - Example - - Time - - Mem (MB) - * - :ref:`sphx_glr_auto_examples_2multi_plot_mca.py` (``plot_mca.py``) - - 00:03.519 - - 0.0 - * - :ref:`sphx_glr_auto_examples_2multi_plot_rotated_mca.py` (``plot_rotated_mca.py``) - - 00:02.207 - - 0.0 - * - :ref:`sphx_glr_auto_examples_2multi_plot_cca.py` (``plot_cca.py``) - - 00:01.632 - - 0.0 +**00:02.798** total execution time for **auto_examples_2multi** files: + ++------------------------------------------------------------------------------------+-----------+--------+ +| :ref:`sphx_glr_auto_examples_2multi_plot_cca.py` (``plot_cca.py``) | 00:02.798 | 0.0 MB | ++------------------------------------------------------------------------------------+-----------+--------+ +| :ref:`sphx_glr_auto_examples_2multi_plot_mca.py` (``plot_mca.py``) | 00:00.000 | 0.0 MB | ++------------------------------------------------------------------------------------+-----------+--------+ +| :ref:`sphx_glr_auto_examples_2multi_plot_rotated_mca.py` (``plot_rotated_mca.py``) | 00:00.000 | 0.0 MB | ++------------------------------------------------------------------------------------+-----------+--------+ diff --git a/docs/auto_examples/3multi/images/sphx_glr_plot_cca_001.png b/docs/auto_examples/3multi/images/sphx_glr_plot_cca_001.png new file mode 100644 index 00000000..10c26abb Binary files /dev/null and b/docs/auto_examples/3multi/images/sphx_glr_plot_cca_001.png differ diff --git a/docs/auto_examples/3multi/images/sphx_glr_plot_cca_002.png b/docs/auto_examples/3multi/images/sphx_glr_plot_cca_002.png new file mode 100644 index 00000000..89687459 Binary files /dev/null and b/docs/auto_examples/3multi/images/sphx_glr_plot_cca_002.png differ diff --git a/docs/auto_examples/3multi/images/thumb/sphx_glr_plot_cca_thumb.png b/docs/auto_examples/3multi/images/thumb/sphx_glr_plot_cca_thumb.png new file mode 100644 index 00000000..2e99a4d0 Binary files /dev/null and b/docs/auto_examples/3multi/images/thumb/sphx_glr_plot_cca_thumb.png differ diff --git a/docs/auto_examples/3multi/index.rst b/docs/auto_examples/3multi/index.rst new file mode 100644 index 00000000..30f52aef --- /dev/null +++ b/docs/auto_examples/3multi/index.rst @@ -0,0 +1,41 @@ + + +.. _sphx_glr_auto_examples_3multi: + +3 | Multi-Set Analysis +======================== + + + +.. raw:: html + +
    + + +.. raw:: html + +
    + +.. only:: html + + .. image:: /auto_examples/3multi/images/thumb/sphx_glr_plot_cca_thumb.png + :alt: + + :ref:`sphx_glr_auto_examples_3multi_plot_cca.py` + +.. raw:: html + +
    Canonical Correlation Analysis
    +
    + + +.. raw:: html + +
    + + +.. toctree:: + :hidden: + + /auto_examples/3multi/plot_cca + diff --git a/docs/auto_examples/3multi/plot_cca.ipynb b/docs/auto_examples/3multi/plot_cca.ipynb new file mode 100644 index 00000000..f8e3b7f4 --- /dev/null +++ b/docs/auto_examples/3multi/plot_cca.ipynb @@ -0,0 +1,197 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# Canonical Correlation Analysis\n", + "\n", + "In this example, we're going to perform a Canonical Correlation Analysis (CCA)\n", + "on three datasets using the ERSSTv5 monthly sea surface temperature (SST) data\n", + "from 1970 to 2022. We divide this data into three areas: the Indian Ocean,\n", + "the Pacific Ocean, and the Atlantic Ocean. Our goal is to perform CCA on these\n", + "regions.\n", + "\n", + "First, we'll import the necessary modules.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import cartopy.crs as ccrs\n", + "import matplotlib.pyplot as plt\n", + "import xarray as xr\n", + "from matplotlib.gridspec import GridSpec\n", + "\n", + "import xeofs as xe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we load the data and compute the SST anomalies. This removes the\n", + "monthly climatologies, so the seasonal cycle doesn't impact our CCA.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sst = xr.tutorial.load_dataset(\"ersstv5\").sst\n", + "sst = sst.groupby(\"time.month\") - sst.groupby(\"time.month\").mean(\"time\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, we define the three regions of interest and store them in a list.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "indian = sst.sel(lon=slice(35, 115), lat=slice(30, -30))\n", + "pacific = sst.sel(lon=slice(130, 290), lat=slice(30, -30))\n", + "atlantic = sst.sel(lon=slice(320, 360), lat=slice(70, 10))\n", + "\n", + "data_list = [indian, pacific, atlantic]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now perform CCA. Since we are dealing with a high-dimensional feature space, we first\n", + "perform PCA to reduce the dimensionality (this is kind of a regularized CCA) by setting\n", + "``pca=True``. By setting the ``variance_fraction`` keyword argument, we specify that we\n", + "want to keep the number of PCA modes that explain 90% of the variance in each of the\n", + "three data sets.\n", + "\n", + "An important parameter is ``init_pca_modes``. It specifies the number\n", + "of PCA modes that are initially compute before truncating them to account for 90 %. If this\n", + "number is small enough, randomized PCAs will be performed instead of the full SVD decomposition\n", + "which is much faster. We can also specify ``init_pca_modes`` as a float (0 < x <= 1),\n", + "in which case the number of PCA modes is given by the fraction of the data matrix's rank\n", + "The default is set to 0.75 which will ensure that randomized PCAs are performed.\n", + "\n", + "Given the nature of SST data, we might lower it to something like 0.3, since\n", + "we expect that most of the variance in the data will be explained by a small\n", + "number of PC modes.\n", + "\n", + "Note that if our initial PCA modes don't hit the 90% variance target, ``xeofs``\n", + "will give a warning.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = xe.multi.CCA(\n", + " n_modes=2,\n", + " use_coslat=True,\n", + " pca=True,\n", + " variance_fraction=0.9,\n", + " init_pca_modes=0.30,\n", + ")\n", + "model.fit(data_list, dim=\"time\")\n", + "components = model.components()\n", + "scores = model.scores()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's look at the canonical loadings (components) of the first mode.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mode = 1\n", + "\n", + "central_longitudes = [\n", + " indian.lon.median().item(),\n", + " pacific.lon.median().item(),\n", + " pacific.lon.median().item(),\n", + "]\n", + "projections = [ccrs.PlateCarree(central_longitude=lon) for lon in central_longitudes]\n", + "\n", + "fig = plt.figure(figsize=(12, 2.5))\n", + "gs = GridSpec(1, 4, figure=fig, width_ratios=[2, 4, 1, 0.2])\n", + "axes = [fig.add_subplot(gs[0, i], projection=projections[i]) for i in range(3)]\n", + "cax = fig.add_subplot(1, 4, 4)\n", + "kwargs = dict(transform=ccrs.PlateCarree(), vmin=-1, vmax=1, cmap=\"RdBu_r\", cbar_ax=cax)\n", + "components[0].sel(mode=mode).plot(ax=axes[0], **kwargs)\n", + "components[1].sel(mode=mode).plot(ax=axes[1], **kwargs)\n", + "im = components[2].sel(mode=mode).plot(ax=axes[2], **kwargs)\n", + "fig.colorbar(im, cax=cax, orientation=\"vertical\")\n", + "for ax in axes:\n", + " ax.coastlines()\n", + " ax.set_title(\"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And lastly, we'll check out the canonical variates (scores) of the first mode.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plt.subplots(figsize=(12, 4))\n", + "scores[0].sel(mode=mode).plot(ax=ax, label=\"Indian Ocean\")\n", + "scores[1].sel(mode=mode).plot(ax=ax, label=\"Central Pacific\")\n", + "scores[2].sel(mode=mode).plot(ax=ax, label=\"North Atlantic\")\n", + "ax.legend()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/examples/2multi/plot_cca.py b/docs/auto_examples/3multi/plot_cca.py similarity index 99% rename from examples/2multi/plot_cca.py rename to docs/auto_examples/3multi/plot_cca.py index e57af825..70f6a68e 100644 --- a/examples/2multi/plot_cca.py +++ b/docs/auto_examples/3multi/plot_cca.py @@ -11,12 +11,12 @@ First, we'll import the necessary modules. """ -import xarray as xr -import xeofs as xe - +import cartopy.crs as ccrs import matplotlib.pyplot as plt +import xarray as xr from matplotlib.gridspec import GridSpec -import cartopy.crs as ccrs + +import xeofs as xe # %% # Next, we load the data and compute the SST anomalies. This removes the @@ -56,7 +56,7 @@ # Note that if our initial PCA modes don't hit the 90% variance target, ``xeofs`` # will give a warning. -model = xe.models.CCA( +model = xe.multi.CCA( n_modes=2, use_coslat=True, pca=True, diff --git a/docs/auto_examples/3multi/plot_cca.py.md5 b/docs/auto_examples/3multi/plot_cca.py.md5 new file mode 100644 index 00000000..40b748fb --- /dev/null +++ b/docs/auto_examples/3multi/plot_cca.py.md5 @@ -0,0 +1 @@ +ccd6cf17e9ee3ea88ba470a3180fe224 \ No newline at end of file diff --git a/docs/auto_examples/3multi/plot_cca.rst b/docs/auto_examples/3multi/plot_cca.rst new file mode 100644 index 00000000..7c7b6fce --- /dev/null +++ b/docs/auto_examples/3multi/plot_cca.rst @@ -0,0 +1,242 @@ + +.. DO NOT EDIT. +.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. +.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: +.. "auto_examples/3multi/plot_cca.py" +.. LINE NUMBERS ARE GIVEN BELOW. + +.. only:: html + + .. note:: + :class: sphx-glr-download-link-note + + :ref:`Go to the end ` + to download the full example code + +.. rst-class:: sphx-glr-example-title + +.. _sphx_glr_auto_examples_3multi_plot_cca.py: + + +Canonical Correlation Analysis +============================== + +In this example, we're going to perform a Canonical Correlation Analysis (CCA) +on three datasets using the ERSSTv5 monthly sea surface temperature (SST) data +from 1970 to 2022. We divide this data into three areas: the Indian Ocean, +the Pacific Ocean, and the Atlantic Ocean. Our goal is to perform CCA on these +regions. + +First, we'll import the necessary modules. + +.. GENERATED FROM PYTHON SOURCE LINES 13-21 + +.. code-block:: default + + + import cartopy.crs as ccrs + import matplotlib.pyplot as plt + import xarray as xr + from matplotlib.gridspec import GridSpec + + import xeofs as xe + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 22-24 + +Next, we load the data and compute the SST anomalies. This removes the +monthly climatologies, so the seasonal cycle doesn't impact our CCA. + +.. GENERATED FROM PYTHON SOURCE LINES 24-29 + +.. code-block:: default + + + sst = xr.tutorial.load_dataset("ersstv5").sst + sst = sst.groupby("time.month") - sst.groupby("time.month").mean("time") + + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 30-31 + +Now, we define the three regions of interest and store them in a list. + +.. GENERATED FROM PYTHON SOURCE LINES 31-38 + +.. code-block:: default + + + indian = sst.sel(lon=slice(35, 115), lat=slice(30, -30)) + pacific = sst.sel(lon=slice(130, 290), lat=slice(30, -30)) + atlantic = sst.sel(lon=slice(320, 360), lat=slice(70, 10)) + + data_list = [indian, pacific, atlantic] + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 39-58 + +We now perform CCA. Since we are dealing with a high-dimensional feature space, we first +perform PCA to reduce the dimensionality (this is kind of a regularized CCA) by setting +``pca=True``. By setting the ``variance_fraction`` keyword argument, we specify that we +want to keep the number of PCA modes that explain 90% of the variance in each of the +three data sets. + +An important parameter is ``init_pca_modes``. It specifies the number +of PCA modes that are initially compute before truncating them to account for 90 %. If this +number is small enough, randomized PCAs will be performed instead of the full SVD decomposition +which is much faster. We can also specify ``init_pca_modes`` as a float (0 < x <= 1), +in which case the number of PCA modes is given by the fraction of the data matrix's rank +The default is set to 0.75 which will ensure that randomized PCAs are performed. + +Given the nature of SST data, we might lower it to something like 0.3, since +we expect that most of the variance in the data will be explained by a small +number of PC modes. + +Note that if our initial PCA modes don't hit the 90% variance target, ``xeofs`` +will give a warning. + +.. GENERATED FROM PYTHON SOURCE LINES 58-70 + +.. code-block:: default + + + model = xe.multi.CCA( + n_modes=2, + use_coslat=True, + pca=True, + variance_fraction=0.9, + init_pca_modes=0.30, + ) + model.fit(data_list, dim="time") + components = model.components() + scores = model.scores() + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 71-72 + +Let's look at the canonical loadings (components) of the first mode. + +.. GENERATED FROM PYTHON SOURCE LINES 72-95 + +.. code-block:: default + + + mode = 1 + + central_longitudes = [ + indian.lon.median().item(), + pacific.lon.median().item(), + pacific.lon.median().item(), + ] + projections = [ccrs.PlateCarree(central_longitude=lon) for lon in central_longitudes] + + fig = plt.figure(figsize=(12, 2.5)) + gs = GridSpec(1, 4, figure=fig, width_ratios=[2, 4, 1, 0.2]) + axes = [fig.add_subplot(gs[0, i], projection=projections[i]) for i in range(3)] + cax = fig.add_subplot(1, 4, 4) + kwargs = dict(transform=ccrs.PlateCarree(), vmin=-1, vmax=1, cmap="RdBu_r", cbar_ax=cax) + components[0].sel(mode=mode).plot(ax=axes[0], **kwargs) + components[1].sel(mode=mode).plot(ax=axes[1], **kwargs) + im = components[2].sel(mode=mode).plot(ax=axes[2], **kwargs) + fig.colorbar(im, cax=cax, orientation="vertical") + for ax in axes: + ax.coastlines() + ax.set_title("") + + + + +.. image-sg:: /auto_examples/3multi/images/sphx_glr_plot_cca_001.png + :alt: plot cca + :srcset: /auto_examples/3multi/images/sphx_glr_plot_cca_001.png + :class: sphx-glr-single-img + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 96-97 + +And lastly, we'll check out the canonical variates (scores) of the first mode. + +.. GENERATED FROM PYTHON SOURCE LINES 97-103 + +.. code-block:: default + + + fig, ax = plt.subplots(figsize=(12, 4)) + scores[0].sel(mode=mode).plot(ax=ax, label="Indian Ocean") + scores[1].sel(mode=mode).plot(ax=ax, label="Central Pacific") + scores[2].sel(mode=mode).plot(ax=ax, label="North Atlantic") + ax.legend() + + + +.. image-sg:: /auto_examples/3multi/images/sphx_glr_plot_cca_002.png + :alt: mode = 1 + :srcset: /auto_examples/3multi/images/sphx_glr_plot_cca_002.png + :class: sphx-glr-single-img + + +.. rst-class:: sphx-glr-script-out + + .. code-block:: none + + + + + + + +.. rst-class:: sphx-glr-timing + + **Total running time of the script:** (0 minutes 6.563 seconds) + + +.. _sphx_glr_download_auto_examples_3multi_plot_cca.py: + +.. only:: html + + .. container:: sphx-glr-footer sphx-glr-footer-example + + + + + .. container:: sphx-glr-download sphx-glr-download-python + + :download:`Download Python source code: plot_cca.py ` + + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: plot_cca.ipynb ` + + +.. only:: html + + .. rst-class:: sphx-glr-signature + + `Gallery generated by Sphinx-Gallery `_ diff --git a/docs/auto_examples/3multi/plot_cca_codeobj.pickle b/docs/auto_examples/3multi/plot_cca_codeobj.pickle new file mode 100644 index 00000000..006138e1 Binary files /dev/null and b/docs/auto_examples/3multi/plot_cca_codeobj.pickle differ diff --git a/docs/auto_examples/3multi/sg_execution_times.rst b/docs/auto_examples/3multi/sg_execution_times.rst new file mode 100644 index 00000000..6befdc8e --- /dev/null +++ b/docs/auto_examples/3multi/sg_execution_times.rst @@ -0,0 +1,13 @@ + +:orphan: + +.. _sphx_glr_auto_examples_3multi_sg_execution_times: + + +Computation times +================= +**00:06.563** total execution time for **auto_examples_3multi** files: + ++--------------------------------------------------------------------+-----------+--------+ +| :ref:`sphx_glr_auto_examples_3multi_plot_cca.py` (``plot_cca.py``) | 00:06.563 | 0.0 MB | ++--------------------------------------------------------------------+-----------+--------+ diff --git a/docs/auto_examples/3validation/images/sphx_glr_plot_bootstrap_001.png b/docs/auto_examples/3validation/images/sphx_glr_plot_bootstrap_001.png index 6baca248..4c38317d 100644 Binary files a/docs/auto_examples/3validation/images/sphx_glr_plot_bootstrap_001.png and b/docs/auto_examples/3validation/images/sphx_glr_plot_bootstrap_001.png differ diff --git a/docs/auto_examples/3validation/images/thumb/sphx_glr_plot_bootstrap_thumb.png b/docs/auto_examples/3validation/images/thumb/sphx_glr_plot_bootstrap_thumb.png index 6081e99f..2ca76422 100644 Binary files a/docs/auto_examples/3validation/images/thumb/sphx_glr_plot_bootstrap_thumb.png and b/docs/auto_examples/3validation/images/thumb/sphx_glr_plot_bootstrap_thumb.png differ diff --git a/docs/auto_examples/3validation/plot_bootstrap.ipynb b/docs/auto_examples/3validation/plot_bootstrap.ipynb index a9b7b574..8312c300 100644 --- a/docs/auto_examples/3validation/plot_bootstrap.ipynb +++ b/docs/auto_examples/3validation/plot_bootstrap.ipynb @@ -18,14 +18,13 @@ "outputs": [], "source": [ "# Load packages and data:\n", + "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import xarray as xr\n", - "import matplotlib.pyplot as plt\n", - "from matplotlib.gridspec import GridSpec\n", "from cartopy.crs import Orthographic, PlateCarree\n", + "from matplotlib.gridspec import GridSpec\n", "\n", - "from xeofs.models import EOF\n", - "from xeofs.validation import EOFBootstrapper" + "import xeofs as xe" ] }, { @@ -51,7 +50,7 @@ "metadata": {}, "outputs": [], "source": [ - "model = EOF(n_modes=5, standardize=False)\n", + "model = xe.single.EOF(n_modes=5, standardize=False)\n", "model.fit(t2m, dim=\"time\")\n", "expvar = model.explained_variance_ratio()\n", "components = model.components()\n", @@ -77,7 +76,7 @@ "source": [ "n_boot = 50\n", "\n", - "bs = EOFBootstrapper(n_bootstraps=n_boot)\n", + "bs = xe.validation.EOFBootstrapper(n_bootstraps=n_boot)\n", "bs.fit(model)\n", "bs_expvar = bs.explained_variance()\n", "ci_expvar = bs_expvar.quantile([0.025, 0.975], \"n\") # 95% confidence intervals\n", @@ -174,7 +173,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/docs/auto_examples/3validation/plot_bootstrap.py b/docs/auto_examples/3validation/plot_bootstrap.py index 02cec5dc..1cce033c 100644 --- a/docs/auto_examples/3validation/plot_bootstrap.py +++ b/docs/auto_examples/3validation/plot_bootstrap.py @@ -7,14 +7,13 @@ """ # Load packages and data: +import matplotlib.pyplot as plt import numpy as np import xarray as xr -import matplotlib.pyplot as plt -from matplotlib.gridspec import GridSpec from cartopy.crs import Orthographic, PlateCarree +from matplotlib.gridspec import GridSpec -from xeofs.models import EOF -from xeofs.validation import EOFBootstrapper +import xeofs as xe # %% @@ -23,7 +22,7 @@ # %% # Perform EOF analysis -model = EOF(n_modes=5, standardize=False) +model = xe.single.EOF(n_modes=5, standardize=False) model.fit(t2m, dim="time") expvar = model.explained_variance_ratio() components = model.components() @@ -38,7 +37,7 @@ n_boot = 50 -bs = EOFBootstrapper(n_bootstraps=n_boot) +bs = xe.validation.EOFBootstrapper(n_bootstraps=n_boot) bs.fit(model) bs_expvar = bs.explained_variance() ci_expvar = bs_expvar.quantile([0.025, 0.975], "n") # 95% confidence intervals diff --git a/docs/auto_examples/3validation/plot_bootstrap.py.md5 b/docs/auto_examples/3validation/plot_bootstrap.py.md5 index 8f63d45c..c683ebe5 100644 --- a/docs/auto_examples/3validation/plot_bootstrap.py.md5 +++ b/docs/auto_examples/3validation/plot_bootstrap.py.md5 @@ -1 +1 @@ -e9826d4566fc7f2d94555e72a94c50c8 \ No newline at end of file +dcdf2653c5cb47f1d8828948c6fdda17 \ No newline at end of file diff --git a/docs/auto_examples/3validation/plot_bootstrap.rst b/docs/auto_examples/3validation/plot_bootstrap.rst index e77e8f63..03348627 100644 --- a/docs/auto_examples/3validation/plot_bootstrap.rst +++ b/docs/auto_examples/3validation/plot_bootstrap.rst @@ -24,20 +24,19 @@ Significance testing of EOF analysis via bootstrap Test the significance of individual modes and obtain confidence intervals for both EOFs and PCs. -.. GENERATED FROM PYTHON SOURCE LINES 8-19 +.. GENERATED FROM PYTHON SOURCE LINES 8-18 -.. code-block:: Python +.. code-block:: default # Load packages and data: + import matplotlib.pyplot as plt import numpy as np import xarray as xr - import matplotlib.pyplot as plt - from matplotlib.gridspec import GridSpec from cartopy.crs import Orthographic, PlateCarree + from matplotlib.gridspec import GridSpec - from xeofs.models import EOF - from xeofs.validation import EOFBootstrapper + import xeofs as xe @@ -46,9 +45,9 @@ for both EOFs and PCs. -.. GENERATED FROM PYTHON SOURCE LINES 20-23 +.. GENERATED FROM PYTHON SOURCE LINES 19-22 -.. code-block:: Python +.. code-block:: default t2m = xr.tutorial.load_dataset("air_temperature")["air"] @@ -60,16 +59,16 @@ for both EOFs and PCs. -.. GENERATED FROM PYTHON SOURCE LINES 24-25 +.. GENERATED FROM PYTHON SOURCE LINES 23-24 Perform EOF analysis -.. GENERATED FROM PYTHON SOURCE LINES 25-33 +.. GENERATED FROM PYTHON SOURCE LINES 24-32 -.. code-block:: Python +.. code-block:: default - model = EOF(n_modes=5, standardize=False) + model = xe.single.EOF(n_modes=5, standardize=False) model.fit(t2m, dim="time") expvar = model.explained_variance_ratio() components = model.components() @@ -83,21 +82,21 @@ Perform EOF analysis -.. GENERATED FROM PYTHON SOURCE LINES 34-38 +.. GENERATED FROM PYTHON SOURCE LINES 33-37 Perform bootstrapping of the model to identy the number of significant modes. We perform 50 bootstraps. Note - if computationallly feasible - you typically want to choose higher numbers of bootstraps e.g. 1000. -.. GENERATED FROM PYTHON SOURCE LINES 38-55 +.. GENERATED FROM PYTHON SOURCE LINES 37-54 -.. code-block:: Python +.. code-block:: default n_boot = 50 - bs = EOFBootstrapper(n_bootstraps=n_boot) + bs = xe.validation.EOFBootstrapper(n_bootstraps=n_boot) bs.fit(model) bs_expvar = bs.explained_variance() ci_expvar = bs_expvar.quantile([0.025, 0.975], "n") # 95% confidence intervals @@ -119,21 +118,21 @@ numbers of bootstraps e.g. 1000. .. code-block:: none - 0%| | 0/50 [00:00` + .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: plot_bootstrap.py ` + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: plot_bootstrap.ipynb ` + .. only:: html diff --git a/docs/auto_examples/3validation/plot_bootstrap_codeobj.pickle b/docs/auto_examples/3validation/plot_bootstrap_codeobj.pickle index 1285dc7e..106ffc34 100644 Binary files a/docs/auto_examples/3validation/plot_bootstrap_codeobj.pickle and b/docs/auto_examples/3validation/plot_bootstrap_codeobj.pickle differ diff --git a/docs/auto_examples/3validation/sg_execution_times.rst b/docs/auto_examples/3validation/sg_execution_times.rst index fafa61f1..428a3ef4 100644 --- a/docs/auto_examples/3validation/sg_execution_times.rst +++ b/docs/auto_examples/3validation/sg_execution_times.rst @@ -6,32 +6,8 @@ Computation times ================= -**00:16.004** total execution time for 1 file **from auto_examples/3validation**: +**00:46.918** total execution time for **auto_examples_3validation** files: -.. container:: - - .. raw:: html - - - - - - - - .. list-table:: - :header-rows: 1 - :class: table table-striped sg-datatable - - * - Example - - Time - - Mem (MB) - * - :ref:`sphx_glr_auto_examples_3validation_plot_bootstrap.py` (``plot_bootstrap.py``) - - 00:16.004 - - 0.0 ++-------------------------------------------------------------------------------------+-----------+--------+ +| :ref:`sphx_glr_auto_examples_3validation_plot_bootstrap.py` (``plot_bootstrap.py``) | 00:46.918 | 0.0 MB | ++-------------------------------------------------------------------------------------+-----------+--------+ diff --git a/docs/auto_examples/4validation/images/sphx_glr_plot_bootstrap_001.png b/docs/auto_examples/4validation/images/sphx_glr_plot_bootstrap_001.png new file mode 100644 index 00000000..598a4f1a Binary files /dev/null and b/docs/auto_examples/4validation/images/sphx_glr_plot_bootstrap_001.png differ diff --git a/docs/auto_examples/4validation/images/thumb/sphx_glr_plot_bootstrap_thumb.png b/docs/auto_examples/4validation/images/thumb/sphx_glr_plot_bootstrap_thumb.png new file mode 100644 index 00000000..35eae974 Binary files /dev/null and b/docs/auto_examples/4validation/images/thumb/sphx_glr_plot_bootstrap_thumb.png differ diff --git a/docs/auto_examples/4validation/index.rst b/docs/auto_examples/4validation/index.rst new file mode 100644 index 00000000..39e779b7 --- /dev/null +++ b/docs/auto_examples/4validation/index.rst @@ -0,0 +1,40 @@ + + +.. _sphx_glr_auto_examples_4validation: + +4 | Validation +=============== + + +.. raw:: html + +
    + + +.. raw:: html + +
    + +.. only:: html + + .. image:: /auto_examples/4validation/images/thumb/sphx_glr_plot_bootstrap_thumb.png + :alt: + + :ref:`sphx_glr_auto_examples_4validation_plot_bootstrap.py` + +.. raw:: html + +
    Significance testing of EOF analysis via bootstrap
    +
    + + +.. raw:: html + +
    + + +.. toctree:: + :hidden: + + /auto_examples/4validation/plot_bootstrap + diff --git a/docs/auto_examples/4validation/plot_bootstrap.ipynb b/docs/auto_examples/4validation/plot_bootstrap.ipynb new file mode 100644 index 00000000..8312c300 --- /dev/null +++ b/docs/auto_examples/4validation/plot_bootstrap.ipynb @@ -0,0 +1,181 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# Significance testing of EOF analysis via bootstrap\n", + "\n", + "Test the significance of individual modes and obtain confidence intervals\n", + "for both EOFs and PCs.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load packages and data:\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import xarray as xr\n", + "from cartopy.crs import Orthographic, PlateCarree\n", + "from matplotlib.gridspec import GridSpec\n", + "\n", + "import xeofs as xe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "t2m = xr.tutorial.load_dataset(\"air_temperature\")[\"air\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Perform EOF analysis\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = xe.single.EOF(n_modes=5, standardize=False)\n", + "model.fit(t2m, dim=\"time\")\n", + "expvar = model.explained_variance_ratio()\n", + "components = model.components()\n", + "scores = model.scores()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Perform bootstrapping of the model to identy the number of significant modes.\n", + "We perform 50 bootstraps.\n", + "Note - if computationallly feasible - you typically want to choose higher\n", + "numbers of bootstraps e.g. 1000.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n_boot = 50\n", + "\n", + "bs = xe.validation.EOFBootstrapper(n_bootstraps=n_boot)\n", + "bs.fit(model)\n", + "bs_expvar = bs.explained_variance()\n", + "ci_expvar = bs_expvar.quantile([0.025, 0.975], \"n\") # 95% confidence intervals\n", + "\n", + "q025 = ci_expvar.sel(quantile=0.025)\n", + "q975 = ci_expvar.sel(quantile=0.975)\n", + "\n", + "is_significant = q025 - q975.shift({\"mode\": -1}) > 0\n", + "n_significant_modes = (\n", + " is_significant.where(is_significant is True).cumsum(skipna=False).max().fillna(0)\n", + ")\n", + "print(\"{:} modes are significant at alpha=0.05\".format(n_significant_modes.values))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The bootstrapping procedure identifies 3 significant modes. We can also\n", + "compute the 95 % confidence intervals of the EOFs/PCs and mask out\n", + "insignificant elements of the obtained EOFs.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ci_components = bs.components().quantile([0.025, 0.975], \"n\")\n", + "ci_scores = bs.scores().quantile([0.025, 0.975], \"n\")\n", + "\n", + "is_sig_comps = np.sign(ci_components).prod(\"quantile\") > 0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Summarize the results in a figure.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lons, lats = np.meshgrid(is_sig_comps.lon.values, is_sig_comps.lat.values)\n", + "proj = Orthographic(central_latitude=30, central_longitude=-80)\n", + "kwargs = {\"cmap\": \"RdBu\", \"vmin\": -0.05, \"vmax\": 0.05, \"transform\": PlateCarree()}\n", + "\n", + "fig = plt.figure(figsize=(10, 16))\n", + "gs = GridSpec(5, 2)\n", + "ax1 = [fig.add_subplot(gs[i, 0], projection=proj) for i in range(5)]\n", + "ax2 = [fig.add_subplot(gs[i, 1]) for i in range(5)]\n", + "\n", + "for i, (a1, a2) in enumerate(zip(ax1, ax2)):\n", + " a1.coastlines(color=\".5\")\n", + " components.isel(mode=i).plot(ax=a1, **kwargs)\n", + " a1.scatter(\n", + " lons,\n", + " lats,\n", + " is_sig_comps.isel(mode=i).values * 0.5,\n", + " color=\"k\",\n", + " alpha=0.5,\n", + " transform=PlateCarree(),\n", + " )\n", + " ci_scores.isel(mode=i, quantile=0).plot(ax=a2, color=\".3\", lw=\".5\", label=\"2.5%\")\n", + " ci_scores.isel(mode=i, quantile=1).plot(ax=a2, color=\".3\", lw=\".5\", label=\"97.5%\")\n", + " scores.isel(mode=i).plot(ax=a2, lw=\".5\", alpha=0.5, label=\"PC\")\n", + " a2.legend(loc=2)\n", + "\n", + "plt.tight_layout()\n", + "plt.savefig(\"bootstrap.jpg\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/examples/3validation/plot_bootstrap.py b/docs/auto_examples/4validation/plot_bootstrap.py similarity index 94% rename from examples/3validation/plot_bootstrap.py rename to docs/auto_examples/4validation/plot_bootstrap.py index 02cec5dc..1cce033c 100644 --- a/examples/3validation/plot_bootstrap.py +++ b/docs/auto_examples/4validation/plot_bootstrap.py @@ -7,14 +7,13 @@ """ # Load packages and data: +import matplotlib.pyplot as plt import numpy as np import xarray as xr -import matplotlib.pyplot as plt -from matplotlib.gridspec import GridSpec from cartopy.crs import Orthographic, PlateCarree +from matplotlib.gridspec import GridSpec -from xeofs.models import EOF -from xeofs.validation import EOFBootstrapper +import xeofs as xe # %% @@ -23,7 +22,7 @@ # %% # Perform EOF analysis -model = EOF(n_modes=5, standardize=False) +model = xe.single.EOF(n_modes=5, standardize=False) model.fit(t2m, dim="time") expvar = model.explained_variance_ratio() components = model.components() @@ -38,7 +37,7 @@ n_boot = 50 -bs = EOFBootstrapper(n_bootstraps=n_boot) +bs = xe.validation.EOFBootstrapper(n_bootstraps=n_boot) bs.fit(model) bs_expvar = bs.explained_variance() ci_expvar = bs_expvar.quantile([0.025, 0.975], "n") # 95% confidence intervals diff --git a/docs/auto_examples/4validation/plot_bootstrap.py.md5 b/docs/auto_examples/4validation/plot_bootstrap.py.md5 new file mode 100644 index 00000000..c683ebe5 --- /dev/null +++ b/docs/auto_examples/4validation/plot_bootstrap.py.md5 @@ -0,0 +1 @@ +dcdf2653c5cb47f1d8828948c6fdda17 \ No newline at end of file diff --git a/docs/auto_examples/4validation/plot_bootstrap.rst b/docs/auto_examples/4validation/plot_bootstrap.rst new file mode 100644 index 00000000..6c7208f1 --- /dev/null +++ b/docs/auto_examples/4validation/plot_bootstrap.rst @@ -0,0 +1,228 @@ + +.. DO NOT EDIT. +.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. +.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: +.. "auto_examples/4validation/plot_bootstrap.py" +.. LINE NUMBERS ARE GIVEN BELOW. + +.. only:: html + + .. note:: + :class: sphx-glr-download-link-note + + :ref:`Go to the end ` + to download the full example code + +.. rst-class:: sphx-glr-example-title + +.. _sphx_glr_auto_examples_4validation_plot_bootstrap.py: + + +Significance testing of EOF analysis via bootstrap +=================================================== + +Test the significance of individual modes and obtain confidence intervals +for both EOFs and PCs. + +.. GENERATED FROM PYTHON SOURCE LINES 8-18 + +.. code-block:: default + + + # Load packages and data: + import matplotlib.pyplot as plt + import numpy as np + import xarray as xr + from cartopy.crs import Orthographic, PlateCarree + from matplotlib.gridspec import GridSpec + + import xeofs as xe + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 19-22 + +.. code-block:: default + + + t2m = xr.tutorial.load_dataset("air_temperature")["air"] + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 23-24 + +Perform EOF analysis + +.. GENERATED FROM PYTHON SOURCE LINES 24-32 + +.. code-block:: default + + + model = xe.single.EOF(n_modes=5, standardize=False) + model.fit(t2m, dim="time") + expvar = model.explained_variance_ratio() + components = model.components() + scores = model.scores() + + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 33-37 + +Perform bootstrapping of the model to identy the number of significant modes. +We perform 50 bootstraps. +Note - if computationallly feasible - you typically want to choose higher +numbers of bootstraps e.g. 1000. + +.. GENERATED FROM PYTHON SOURCE LINES 37-54 + +.. code-block:: default + + + n_boot = 50 + + bs = xe.validation.EOFBootstrapper(n_bootstraps=n_boot) + bs.fit(model) + bs_expvar = bs.explained_variance() + ci_expvar = bs_expvar.quantile([0.025, 0.975], "n") # 95% confidence intervals + + q025 = ci_expvar.sel(quantile=0.025) + q975 = ci_expvar.sel(quantile=0.975) + + is_significant = q025 - q975.shift({"mode": -1}) > 0 + n_significant_modes = ( + is_significant.where(is_significant is True).cumsum(skipna=False).max().fillna(0) + ) + print("{:} modes are significant at alpha=0.05".format(n_significant_modes.values)) + + + + + +.. rst-class:: sphx-glr-script-out + + .. code-block:: none + + 0%| | 0/50 [00:00 0 + + + + + + + + + +.. GENERATED FROM PYTHON SOURCE LINES 66-67 + +Summarize the results in a figure. + +.. GENERATED FROM PYTHON SOURCE LINES 67-96 + +.. code-block:: default + + + + lons, lats = np.meshgrid(is_sig_comps.lon.values, is_sig_comps.lat.values) + proj = Orthographic(central_latitude=30, central_longitude=-80) + kwargs = {"cmap": "RdBu", "vmin": -0.05, "vmax": 0.05, "transform": PlateCarree()} + + fig = plt.figure(figsize=(10, 16)) + gs = GridSpec(5, 2) + ax1 = [fig.add_subplot(gs[i, 0], projection=proj) for i in range(5)] + ax2 = [fig.add_subplot(gs[i, 1]) for i in range(5)] + + for i, (a1, a2) in enumerate(zip(ax1, ax2)): + a1.coastlines(color=".5") + components.isel(mode=i).plot(ax=a1, **kwargs) + a1.scatter( + lons, + lats, + is_sig_comps.isel(mode=i).values * 0.5, + color="k", + alpha=0.5, + transform=PlateCarree(), + ) + ci_scores.isel(mode=i, quantile=0).plot(ax=a2, color=".3", lw=".5", label="2.5%") + ci_scores.isel(mode=i, quantile=1).plot(ax=a2, color=".3", lw=".5", label="97.5%") + scores.isel(mode=i).plot(ax=a2, lw=".5", alpha=0.5, label="PC") + a2.legend(loc=2) + + plt.tight_layout() + plt.savefig("bootstrap.jpg") + + + +.. image-sg:: /auto_examples/4validation/images/sphx_glr_plot_bootstrap_001.png + :alt: mode = 1, mode = 2, mode = 3, mode = 4, mode = 5, mode = 1, mode = 2, mode = 3, mode = 4, mode = 5 + :srcset: /auto_examples/4validation/images/sphx_glr_plot_bootstrap_001.png + :class: sphx-glr-single-img + + + + + + +.. rst-class:: sphx-glr-timing + + **Total running time of the script:** (0 minutes 32.065 seconds) + + +.. _sphx_glr_download_auto_examples_4validation_plot_bootstrap.py: + +.. only:: html + + .. container:: sphx-glr-footer sphx-glr-footer-example + + + + + .. container:: sphx-glr-download sphx-glr-download-python + + :download:`Download Python source code: plot_bootstrap.py ` + + .. container:: sphx-glr-download sphx-glr-download-jupyter + + :download:`Download Jupyter notebook: plot_bootstrap.ipynb ` + + +.. only:: html + + .. rst-class:: sphx-glr-signature + + `Gallery generated by Sphinx-Gallery `_ diff --git a/docs/auto_examples/4validation/plot_bootstrap_codeobj.pickle b/docs/auto_examples/4validation/plot_bootstrap_codeobj.pickle new file mode 100644 index 00000000..a89e7116 Binary files /dev/null and b/docs/auto_examples/4validation/plot_bootstrap_codeobj.pickle differ diff --git a/docs/auto_examples/4validation/sg_execution_times.rst b/docs/auto_examples/4validation/sg_execution_times.rst new file mode 100644 index 00000000..ef197d4f --- /dev/null +++ b/docs/auto_examples/4validation/sg_execution_times.rst @@ -0,0 +1,13 @@ + +:orphan: + +.. _sphx_glr_auto_examples_4validation_sg_execution_times: + + +Computation times +================= +**00:32.065** total execution time for **auto_examples_4validation** files: + ++-------------------------------------------------------------------------------------+-----------+--------+ +| :ref:`sphx_glr_auto_examples_4validation_plot_bootstrap.py` (``plot_bootstrap.py``) | 00:32.065 | 0.0 MB | ++-------------------------------------------------------------------------------------+-----------+--------+ diff --git a/docs/auto_examples/auto_examples_jupyter.zip b/docs/auto_examples/auto_examples_jupyter.zip index 71301183..fbfeb644 100644 Binary files a/docs/auto_examples/auto_examples_jupyter.zip and b/docs/auto_examples/auto_examples_jupyter.zip differ diff --git a/docs/auto_examples/auto_examples_python.zip b/docs/auto_examples/auto_examples_python.zip index 7d1491ba..601f75f1 100644 Binary files a/docs/auto_examples/auto_examples_python.zip and b/docs/auto_examples/auto_examples_python.zip differ diff --git a/docs/auto_examples/index.rst b/docs/auto_examples/index.rst index 42a470f7..1b3b606f 100644 --- a/docs/auto_examples/index.rst +++ b/docs/auto_examples/index.rst @@ -43,6 +43,23 @@ Here you can find some examples of how to use the library. +.. raw:: html + +
    + +.. only:: html + + .. image:: /auto_examples/1single/images/thumb/sphx_glr_plot_complex_eof_thumb.png + :alt: + + :ref:`sphx_glr_auto_examples_1single_plot_complex_eof.py` + +.. raw:: html + +
    Complex EOF analysis
    +
    + + .. raw:: html
    @@ -62,18 +79,18 @@ Here you can find some examples of how to use the library. .. raw:: html -
    +
    .. only:: html - .. image:: /auto_examples/1single/images/thumb/sphx_glr_plot_complex_eof_thumb.png + .. image:: /auto_examples/1single/images/thumb/sphx_glr_plot_hilbert_eof_thumb.png :alt: - :ref:`sphx_glr_auto_examples_1single_plot_complex_eof.py` + :ref:`sphx_glr_auto_examples_1single_plot_hilbert_eof.py` .. raw:: html -
    Complex/Hilbert EOF analysis
    +
    Hilbert EOF analysis
    @@ -200,7 +217,7 @@ Here you can find some examples of how to use the library.
    -2 | Multi-Set Analysis +2 | Cross-Set Analysis ======================== @@ -212,52 +229,66 @@ Here you can find some examples of how to use the library. .. raw:: html -
    +
    .. only:: html - .. image:: /auto_examples/2multi/images/thumb/sphx_glr_plot_cca_thumb.png + .. image:: /auto_examples/2cross/images/thumb/sphx_glr_plot_mca_thumb.png :alt: - :ref:`sphx_glr_auto_examples_2multi_plot_cca.py` + :ref:`sphx_glr_auto_examples_2cross_plot_mca.py` .. raw:: html -
    Canonical Correlation Analysis
    +
    Maximum Covariance Analysis
    .. raw:: html -
    +
    .. only:: html - .. image:: /auto_examples/2multi/images/thumb/sphx_glr_plot_mca_thumb.png + .. image:: /auto_examples/2cross/images/thumb/sphx_glr_plot_rotated_mca_thumb.png :alt: - :ref:`sphx_glr_auto_examples_2multi_plot_mca.py` + :ref:`sphx_glr_auto_examples_2cross_plot_rotated_mca.py` .. raw:: html -
    Maximum Covariance Analysis
    +
    Rotated Maximum Covariance Analysis
    .. raw:: html -
    +
    + +3 | Multi-Set Analysis +======================== + + + +.. raw:: html + +
    + + +.. raw:: html + +
    .. only:: html - .. image:: /auto_examples/2multi/images/thumb/sphx_glr_plot_rotated_mca_thumb.png + .. image:: /auto_examples/3multi/images/thumb/sphx_glr_plot_cca_thumb.png :alt: - :ref:`sphx_glr_auto_examples_2multi_plot_rotated_mca.py` + :ref:`sphx_glr_auto_examples_3multi_plot_cca.py` .. raw:: html -
    Rotated Maximum Covariance Analysis
    +
    Canonical Correlation Analysis
    @@ -265,7 +296,7 @@ Here you can find some examples of how to use the library.
    -3 | Validation +4 | Validation =============== @@ -280,10 +311,10 @@ Here you can find some examples of how to use the library. .. only:: html - .. image:: /auto_examples/3validation/images/thumb/sphx_glr_plot_bootstrap_thumb.png + .. image:: /auto_examples/4validation/images/thumb/sphx_glr_plot_bootstrap_thumb.png :alt: - :ref:`sphx_glr_auto_examples_3validation_plot_bootstrap.py` + :ref:`sphx_glr_auto_examples_4validation_plot_bootstrap.py` .. raw:: html @@ -302,8 +333,9 @@ Here you can find some examples of how to use the library. /auto_examples/1single/index.rst - /auto_examples/2multi/index.rst - /auto_examples/3validation/index.rst + /auto_examples/2cross/index.rst + /auto_examples/3multi/index.rst + /auto_examples/4validation/index.rst .. only:: html diff --git a/docs/conf.py b/docs/conf.py index 69447cbe..24f9ae73 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -50,7 +50,7 @@ # ones. extensions = [ "sphinx.ext.napoleon", - "sphinx.ext.autodoc", + # "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx_gallery.gen_gallery", "sphinx_design", @@ -60,6 +60,11 @@ ] autosummary_generate = True # Turn on sphinx.ext.autosummary + +add_module_names = False +toc_object_entries_show_parents = "hide" +show_title_parents = False + # Sphinx-gallery stuff sphinx_gallery_conf = { "examples_dirs": "../examples", # path to your example scripts @@ -225,3 +230,40 @@ # A list of files that should not be packed into the epub file. epub_exclude_files = ["search.html"] + + +# Remove parents from titles in all .rst files +def shorten_titles(app): + # Recursively crawl through source directory and shorten titles in .rst files + def crawl_source_shorten_titles(path): + # List files in directory + for file_name in os.listdir(path): + # Build path to file + file_path = os.path.join(path, file_name) + + # Recursively crawl to next directory level + if os.path.isdir(file_path): + crawl_source_shorten_titles(file_path) + + # Modify .rst source file title + else: + _, extension = os.path.splitext(file_path) + if extension == ".rst": + # Read file, modify title, write back to file + with open(file_path, "r") as file: + lines = file.readlines() + lines[0] = lines[0].split(".")[-1] + lines[1] = ("=" * (len(lines[0]) - 1)) + "\n" + print(f"Shortened title in {file_path}") + with open(file_path, "w") as file: + file.writelines(lines) + + path = os.path.join(os.path.abspath(".."), "docs/api_reference/_autosummary") + crawl_source_shorten_titles(path) + + +# Connect to sphinx events (see https://www.sphinx-doc.org/en/master/extdev/event_callbacks.html#events) +# to shorten titles in all .rst files after the sphinx autosummary extension has run +# (otherwise the titles would be overwritten by the autosummary extension) +def setup(app): + app.connect("builder-inited", shorten_titles) diff --git a/docs/model_pca.ipynb b/docs/model_pca.ipynb index 1950b0d8..10a7854c 100644 --- a/docs/model_pca.ipynb +++ b/docs/model_pca.ipynb @@ -32,11 +32,11 @@ "from matplotlib.gridspec import GridSpec\n", "import cartopy.feature as cfeature\n", "from cartopy.crs import LambertAzimuthalEqualArea, PlateCarree\n", - "from xeofs.models import EOF\n", + "import xeofs as xe\n", "\n", "t2m = xr.tutorial.load_dataset(\"air_temperature\")[\"air\"]\n", "\n", - "model = EOF(n_modes=20, standardize=True, use_coslat=True)\n", + "model = xe.single.EOF(n_modes=20, standardize=True, use_coslat=True)\n", "model.fit(t2m, dim=\"time\")\n", "\n", "expvar = model.explained_variance_ratio()\n", diff --git a/docs/model_pca.md b/docs/model_pca.md index aa3d44cf..cb8e6175 100644 --- a/docs/model_pca.md +++ b/docs/model_pca.md @@ -20,7 +20,7 @@ import cartopy.feature as cfeature import ipywidgets as widgets from IPython.display import display from cartopy.crs import LambertAzimuthalEqualArea, PlateCarree -from xeofs.models import EOF +from xeofs.single import EOF t2m = xr.tutorial.load_dataset('air_temperature')['air'] diff --git a/docs/perf/xeofs_timings.py b/docs/perf/xeofs_timings.py index 9898a233..c06f11c8 100644 --- a/docs/perf/xeofs_timings.py +++ b/docs/perf/xeofs_timings.py @@ -1,11 +1,13 @@ # %% -import numpy as np +import timeit + +import dask import eofs -import xeofs as xe +import numpy as np import xarray as xr -import dask from tqdm import tqdm -import timeit + +import xeofs as xe # %% @@ -14,7 +16,7 @@ def fit_eofs(X, n_modes=2): def fit_xeofs(X, n_modes=2): - model = xe.models.EOF(n_modes=n_modes, random_state=5) + model = xe.single.EOF(n_modes=n_modes, random_state=5) model.fit(X, dim="time") diff --git a/docs/sphinx_custom_filters.py b/docs/sphinx_custom_filters.py new file mode 100644 index 00000000..76c96012 --- /dev/null +++ b/docs/sphinx_custom_filters.py @@ -0,0 +1,7 @@ +def basename(fullname): + """Extract the basename from a full class path.""" + return fullname.split(".")[-1] + + +def setup(app): + app.builder.templates.environment.filters["basename"] = basename diff --git a/docs/user_guide/core_functionalities/dask_support.rst b/docs/user_guide/core_functionalities/dask_support.rst index 6d7e3b73..f3151cb7 100644 --- a/docs/user_guide/core_functionalities/dask_support.rst +++ b/docs/user_guide/core_functionalities/dask_support.rst @@ -37,7 +37,7 @@ then be evaluated later using ``.compute()``. import numpy as np import xarray as xr - from xeofs.models import EOF, EOFRotator + from xeofs.single import EOF, EOFRotator data = xr.DataArray( da.random.random((5000, 720, 360), chunks=(100, 100, 100)), diff --git a/docs/user_guide/core_functionalities/model_serialization.rst b/docs/user_guide/core_functionalities/model_serialization.rst index 3dd425aa..2ab012de 100644 --- a/docs/user_guide/core_functionalities/model_serialization.rst +++ b/docs/user_guide/core_functionalities/model_serialization.rst @@ -7,7 +7,7 @@ fitted models to a portable format. .. code-block:: python - from xeofs.models import EOF + from xeofs.single import EOF model = EOF() model.fit(data, dim="time") diff --git a/docs/user_guide/model_implementation.rst b/docs/user_guide/model_implementation.rst index d9c0b7f6..b38eda0c 100644 --- a/docs/user_guide/model_implementation.rst +++ b/docs/user_guide/model_implementation.rst @@ -4,7 +4,8 @@ Implement Your Own Model The xeofs package has been designed with modularity in mind, allowing you to seamlessly incorporate new methods. For instance, if you'd like to introduce a new dimensionality reduction technique named ``MyModel``, -you can achieve this by inheriting the ``_BaseModel`` class and implementing its ``_fit_algorithm()`` method. +you can achieve this by inheriting of either the ``BaseModelSingleSet`` or ``BaseModelCrossSet`` class and +implementing its ``_fit_algorithm()`` method. Here's a detailed walkthrough on how to incorporate a new model: @@ -12,17 +13,17 @@ Here's a detailed walkthrough on how to incorporate a new model: 1. Inherit the BaseModel -------------------------------------------- -Your new model should inherit from the `_BaseModel` class. This abstract base class enables +Your new model should inherit from the `BaseModel` class. This abstract base class enables the transformation of any ND xarray object into a 2D ``xarray.DataArray`` with dimensions (sample, feature) and back. Additionally, it grants access to handy parameters like ``n_modes``, ``standardize``, and ``use_coslat``. .. code-block:: python - from xeofs.models._base_model import _BaseModel - from xeofs.models.decomposer import Decomposer + from xeofs.single.base_model_single_set import BaseModelSingleSet + from xeofs.linalg.decomposer import Decomposer - class MyModel(_BaseModel): + class MyModel(BaseModelSingleSet): def __init__(self, **kwargs): super().__init__(**kwargs) diff --git a/docs/user_guide/quickstart.ipynb b/docs/user_guide/quickstart.ipynb index 440bfd1d..347c63a3 100644 --- a/docs/user_guide/quickstart.ipynb +++ b/docs/user_guide/quickstart.ipynb @@ -70,7 +70,7 @@ "metadata": {}, "outputs": [], "source": [ - "model = xe.models.EOF(use_coslat=True)" + "model = xe.single.EOF(use_coslat=True)" ] }, { diff --git a/examples/1single/eof-tmode.jpg b/examples/1single/eof-tmode.jpg index 3e005db8..0a412a30 100644 Binary files a/examples/1single/eof-tmode.jpg and b/examples/1single/eof-tmode.jpg differ diff --git a/examples/1single/mreof-analysis.jpg b/examples/1single/mreof-analysis.jpg index a75b5fbb..712e17dd 100644 Binary files a/examples/1single/mreof-analysis.jpg and b/examples/1single/mreof-analysis.jpg differ diff --git a/examples/1single/multivariate-eof-analysis.jpg b/examples/1single/multivariate-eof-analysis.jpg index 18a30b92..0dabaf80 100644 Binary files a/examples/1single/multivariate-eof-analysis.jpg and b/examples/1single/multivariate-eof-analysis.jpg differ diff --git a/examples/1single/plot_complex_eof.py b/examples/1single/plot_complex_eof.py index b2dc8b86..1c07cb13 100644 --- a/examples/1single/plot_complex_eof.py +++ b/examples/1single/plot_complex_eof.py @@ -1,108 +1,69 @@ """ -Hilbert EOF analysis +Complex EOF analysis ============================================ -We demonstrate how to execute a Hilbert EOF analysis [1]_ [2]_ [3]_. -This method extends traditional EOF analysis into the complex domain, allowing -the EOF components to have real and imaginary parts. This capability can reveal -oscillatory patterns in datasets, which are common in Earth observations. -For example, beyond typical examples like seasonal cycles, you can think of -internal waves in the ocean, or the Quasi-Biennial Oscillation in the atmosphere. - -Using monthly sea surface temperature data from 1970 to 2021 as an example, we -highlight the method's key features and address edge effects as a common challenge. - -.. [1] Rasmusson, E. M., Arkin, P. A., Chen, W.-Y. & Jalickee, J. B. Biennial variations in surface temperature over the United States as revealed by singular decomposition. Monthly Weather Review 109, 587–598 (1981). -.. [2] Barnett, T. P. Interaction of the Monsoon and Pacific Trade Wind System at Interannual Time Scales Part I: The Equatorial Zone. Monthly Weather Review 111, 756–773 (1983). -.. [3] Horel, J. Complex Principal Component Analysis: Theory and Examples. J. Climate Appl. Meteor. 23, 1660–1673 (1984). +In this tutorial, we'll walk through how to perform a Complex EOF analysis on +the zonal and meridional wind components. Let's start by importing the necessary packages and loading the data: """ # %% +import matplotlib.pyplot as plt import xarray as xr import xeofs as xe xr.set_options(display_expand_attrs=False) -sst = xr.tutorial.open_dataset("ersstv5").sst -sst - -# %% -# We fit the Hilbert EOF model directly to the raw data, retaining the seasonal -# cycle for study. The model initialization specifies the desired number of -# modes. The ``use_coslat`` parameter is set to ``True`` to adjust for grid -# convergence at the poles. While the ``HilbertEOF`` class offers padding options -# to mitigate potential edge effects, we'll begin with no padding. - -kwargs = dict(n_modes=4, use_coslat=True, random_state=7) -model = xe.models.HilbertEOF(padding="none", **kwargs) - # %% -# Now, we fit the model to the data and extract the explained variance. +# For this example, we'll use the ERA-Interim tutorial dataset ``eraint_uvz``: -model.fit(sst, dim="time") -expvar = model.explained_variance() -expvar_ratio = model.explained_variance_ratio() +uvz = xr.tutorial.open_dataset("eraint_uvz") +uvz # %% -# Let's have a look at the explained variance of the first five modes: - -expvar.round(0) +# This dataset contains the zonal, meridional, and vertical wind components at +# three different atmospheric levels. Note that the data only covers two months, +# so we have just two time steps (samples). While this isn't enough for a robust +# EOF analysis, we'll proceed for demonstration purposes. Now, let's combine the +# zonal (``u``) and meridional (``v``) wind components into a complex-valued +# dataset: +Z = uvz["u"] + 1j * uvz["v"] # %% -# Clearly, the first mode completely dominates and already explains a substantial amount of variance. -# If we look at the fraction of explained variance, we see that the first mode explains about 88.8 %. +# Next, we'll initialize and fit the ``ComplexEOF`` model to our data. The +# ``xeofs`` package makes this easy by allowing us to specify the sample +# dimension (``month``), automatically performing the Complex EOF analysis +# across all three atmospheric levels. As a standard practice, we'll also weigh +# each grid cell by the square root of the cosine of the latitude +# (``use_coslat=True``). -(expvar_ratio * 100).round(1) +model = xe.single.ComplexEOF(n_modes=1, use_coslat=True, random_state=7) +model.fit(Z, dim="month") # %% -# In comparison to standard EOF analysis (check the corresponding example, -# S-mode), the first complex mode seems to integrate the first two standard -# modes in terms of explained variance. -# This makes sense as the two modes in standard EOF are both showing parts of -# an annual cycle (which are in quadrature) and thus the complex mode combines both of them. -# Let's confirm our hypothesis by looking at the real part the complex-valued scores: +# Instead of just extracting the complex-valued components, we can also get the +# amplitude and phase of these components. Let's start by looking at the +# amplitude of the first mode: -scores = model.scores() -scores.real.plot.line(x="time", col="mode", lw=1, ylim=(-0.1, 0.1)) +spatial_ampltiudes = model.components_amplitude() +spatial_phases = model.components_phase() -# %% -# And indeed the annual cycle is completed incorporated into the first mode, -# while the second mode shows a semi-annual cycle (mode 3 in standard EOF). -# -# However, mode three and four look unusual. While showing some similarity to -# ENSO (e.g. in mode 3 peaks in 1982, 1998 and 2016), they exhibit a "running away" -# behaviour towards the boundaries of the time series. -# This a common issue in Hilbert EOF analysis which is based on the Hilbert transform (a convolution) -# that suffers from the absence of information at the time series boundaries. One way to mitigate this -# is to artificially extend the time series also known as *padding*. In ``xeofs``, you can enable -# such a padding by setting the ``padding`` parameter to ``"exp"`` which will extent the boundaries by an exponential -# decaying function. The ``decay_factor`` parameter controls the decay rate of the exponential function measured in -# multiples of the time series length. Let's see how the decay parameter impacts the results: - -model_ext = xe.models.HilbertEOF(padding="exp", decay_factor=0.01, **kwargs) -model_ext.fit(sst, dim="time") -scores_ext = model_ext.scores().sel(mode=slice(1, 4)) - -scores_ext.real.plot.line(x="time", col="mode", lw=1, ylim=(-0.1, 0.1)) +spatial_ampltiudes.sel(mode=1).plot(col="level") +plt.show() # %% -# And indeed, padding the time series effectively reduced the artifacts at the boundaries. -# Lastly, we examine the complex component amplitudes and phases. +# It looks like the first mode picks up a pattern resembling the location of the +# subtropical jet stream around ±30º latitude, particularly strong in the upper +# troposphere at 200 hPa and weaker toward the surface. We can also plot the +# phase of the first mode. To keep the plot clear, we'll only show the phase +# where the amplitude is above a certain threshold (e.g., 0.004): -comp_amps = model.components_amplitude() -comp_amps.plot(col="mode", vmin=0, vmax=0.025) +relevant_phases = spatial_phases.where(spatial_ampltiudes > 0.004) +relevant_phases.sel(mode=1).plot(col="level", cmap="twilight") +plt.show() # %% -# The component phases of the first mode clearly show the seasonal cycle as -# the northern and southern hemisphere are phase shifted by 180 degrees (white and black). -# Note the blueish regions in the central East Pacific and Indian Ocean which indicate -# a phase shift of 90 degrees compared to the main annual cycle. This is in agreement -# with mode 3 of the standard EOF analysis. - -comp_phases = model.components_phase() -comp_phases.plot(col="mode", cmap="twilight") diff --git a/examples/1single/plot_eeof.py b/examples/1single/plot_eeof.py index ab804f5b..4605b9fd 100644 --- a/examples/1single/plot_eeof.py +++ b/examples/1single/plot_eeof.py @@ -13,9 +13,10 @@ Let's begin by setting up the required packages and fetching the data: """ +import matplotlib.pyplot as plt import xarray as xr + import xeofs as xe -import matplotlib.pyplot as plt xr.set_options(display_expand_data=False) @@ -51,7 +52,7 @@ # With these parameters set, we proceed to instantiate the ``ExtendedEOF`` # model and fit our data. -model = xe.models.ExtendedEOF( +model = xe.single.ExtendedEOF( n_modes=10, tau=4, embedding=40, n_pca_modes=50, use_coslat=True ) model.fit(t2m, dim="time") diff --git a/examples/1single/plot_eeof_trend.py b/examples/1single/plot_eeof_trend.py index d309c138..86fb6526 100644 --- a/examples/1single/plot_eeof_trend.py +++ b/examples/1single/plot_eeof_trend.py @@ -8,9 +8,10 @@ Let's begin by setting up the required packages and fetching the data. """ +import matplotlib.pyplot as plt import xarray as xr + import xeofs as xe -import matplotlib.pyplot as plt xr.set_options(display_expand_data=False) @@ -26,7 +27,7 @@ # %% # We start by performing a standard EOF analysis on the dataset. -eof = xe.models.EOF(n_modes=10) +eof = xe.single.EOF(n_modes=10) eof.fit(sst, dim="time") scores = eof.scores() components = eof.components() @@ -49,7 +50,7 @@ # to capture long-term trends. To speed up computation, we apply the EEOF analysis # to the extended (lag) covariance matrix derived from the first 50 PCs. -eeof = xe.models.ExtendedEOF(n_modes=5, tau=1, embedding=120, n_pca_modes=50) +eeof = xe.single.ExtendedEOF(n_modes=5, tau=1, embedding=120, n_pca_modes=50) eeof.fit(sst, dim="time") components_ext = eeof.components() scores_ext = eeof.scores() @@ -65,13 +66,13 @@ # We can use this to the first mode to remove this nonlinear trend from our original dataset. sst_trends = eeof.inverse_transform(scores_ext.sel(mode=1)) -sst_detrended = sst - sst_trends.drop_vars("mode") +sst_detrended = sst - sst_trends # %% # Reapplying the standard EOF analysis on our now detrended dataset: -eof_model_detrended = xe.models.EOF(n_modes=5) +eof_model_detrended = xe.single.EOF(n_modes=5) eof_model_detrended.fit(sst_detrended, dim="time") scores_detrended = eof_model_detrended.scores() components_detrended = eof_model_detrended.components() diff --git a/examples/1single/plot_eof-smode.py b/examples/1single/plot_eof-smode.py index a81d00c0..479367d7 100644 --- a/examples/1single/plot_eof-smode.py +++ b/examples/1single/plot_eof-smode.py @@ -18,7 +18,7 @@ from cartopy.crs import EqualEarth, PlateCarree from matplotlib.gridspec import GridSpec -from xeofs.models import SparsePCA +import xeofs as xe # %% # We use sea surface temperature data from 1990 to 2017, consistent with the original paper. @@ -29,7 +29,7 @@ # %% # We perform sparse PCA using the `alpha` and `beta` parameters, which define the sparsity imposed by the elastic net (refer to Table 1 in the paper). In our analysis, we set `alpha` to 1e-5, as specified by the authors. Although the authors do not specify a value for `beta`, it appears that the results are not highly sensitive to this parameter. Therefore, we use the default `beta` value of 1e-4. -model = SparsePCA(n_modes=4, alpha=1e-5) +model = xe.single.SparsePCA(n_modes=4, alpha=1e-5) model.fit(sst, dim="time") expvar = model.explained_variance() expvar_ratio = model.explained_variance_ratio() diff --git a/examples/1single/plot_eof-tmode.py b/examples/1single/plot_eof-tmode.py index 6a618e3e..f85b97e9 100644 --- a/examples/1single/plot_eof-tmode.py +++ b/examples/1single/plot_eof-tmode.py @@ -7,19 +7,19 @@ Load packages and data: """ -import xarray as xr import matplotlib.pyplot as plt -from matplotlib.gridspec import GridSpec +import xarray as xr from cartopy.crs import EqualEarth, PlateCarree +from matplotlib.gridspec import GridSpec -from xeofs.models import EOF +import xeofs as xe sst = xr.tutorial.open_dataset("ersstv5")["sst"] # %% # Perform the actual analysis -model = EOF(n_modes=5) +model = xe.single.EOF(n_modes=5) model.fit(sst, dim=("lat", "lon")) expvar = model.explained_variance_ratio() components = model.components() diff --git a/examples/1single/plot_gwpca.py b/examples/1single/plot_gwpca.py index f1542228..2a9462cf 100644 --- a/examples/1single/plot_gwpca.py +++ b/examples/1single/plot_gwpca.py @@ -32,18 +32,18 @@ """ # For the analysis -import numpy as np -import xarray as xr -import xeofs as xe - # For visualization import matplotlib.pyplot as plt -import seaborn as sns +import numpy as np # For accessing R packages import rpy2.robjects as ro -from rpy2.robjects.packages import importr +import seaborn as sns +import xarray as xr from rpy2.robjects import pandas2ri +from rpy2.robjects.packages import importr + +import xeofs as xe # %% # Next, we'll install the R package `mvoutlier `_ @@ -96,7 +96,7 @@ # kilometers. Lastly, we'll standardize the input to ensure consistent scales # for the chemical elements. -gwpca = xe.models.GWPCA( +gwpca = xe.single.GWPCA( n_modes=5, standardize=True, metric="euclidean", diff --git a/examples/1single/plot_hilbert_eof.py b/examples/1single/plot_hilbert_eof.py new file mode 100644 index 00000000..a2fd42ba --- /dev/null +++ b/examples/1single/plot_hilbert_eof.py @@ -0,0 +1,124 @@ +""" +Hilbert EOF analysis +============================================ + +We demonstrate how to execute a Hilbert EOF analysis [1]_ [2]_ +[3]_. This method extends traditional EOF analysis into the complex domain, +allowing the EOF components to have real and imaginary parts. This capability +can reveal oscillatory patterns in datasets, which are common in Earth +observations. For example, beyond typical examples like seasonal cycles, you can +think of internal waves in the ocean, or the Quasi-Biennial Oscillation in the +atmosphere. + +Using monthly sea surface temperature data from 1970 to 2021 as an example, we +highlight the method's key features and address edge effects as a common +challenge. + +.. [1] Rasmusson, E. M., Arkin, P. A., Chen, W.-Y. & Jalickee, J. B. Biennial + variations in surface temperature over the United States as revealed by + singular decomposition. Monthly Weather Review 109, 587–598 (1981). +.. [2] Barnett, T. P. Interaction of the Monsoon and Pacific Trade Wind System + at Interannual Time Scales Part I: The Equatorial Zone. Monthly Weather + Review 111, 756–773 (1983). +.. [3] Horel, J. Complex Principal Component Analysis: Theory and Examples. J. + Climate Appl. Meteor. 23, 1660–1673 (1984). + +Let's start by importing the necessary packages and loading the data: +""" + +# %% +import matplotlib.pyplot as plt +import xarray as xr + +import xeofs as xe + +xr.set_options(display_expand_attrs=False) + +sst = xr.tutorial.open_dataset("ersstv5").sst +sst + +# %% +# We fit the ``HilbertEOF`` model directly to the raw data, retaining the seasonal +# cycle for study. The model initialization specifies the desired number of +# modes. The ``use_coslat`` parameter is set to ``True`` to adjust for grid +# convergence at the poles. While the ``HilbertEOF`` class offers padding +# options to mitigate potential edge effects, we'll begin with no padding. + +kwargs = dict(n_modes=4, use_coslat=True, random_state=7) +model = xe.single.HilbertEOF(padding="none", **kwargs) + +# %% +# Now, we fit the model to the data and extract the explained variance. + +model.fit(sst, dim="time") +expvar = model.explained_variance() +expvar_ratio = model.explained_variance_ratio() + +# %% +# Let's have a look at the explained variance of the first five modes: + +expvar.round(0) + + +# %% +# Clearly, the first mode completely dominates and already explains a +# substantial amount of variance. If we look at the fraction of explained +# variance, we see that the first mode explains about 88.8 %. + +(expvar_ratio * 100).round(1) + +# %% +# In comparison to standard EOF analysis (check the corresponding example, +# S-mode), the first complex mode seems to integrate the first two standard +# modes in terms of explained variance. This makes sense as the two modes in +# standard EOF are both showing parts of an annual cycle (which are in +# quadrature) and thus the complex mode combines both of them. Let's confirm our +# hypothesis by looking at the real part the complex-valued scores: + +scores = model.scores() +scores.real.plot.line(x="time", col="mode", lw=1, ylim=(-0.1, 0.1)) +plt.show() + + +# %% +# And indeed the annual cycle is completed incorporated into the first mode, +# while the second mode shows a semi-annual cycle (mode 3 in standard EOF). +# However, mode three and four look unusual. While showing some similarity to +# ENSO (e.g. in mode 3 peaks in 1982, 1998 and 2016), they exhibit a "running +# away" behaviour towards the boundaries of the time series. This a common issue +# in Hilbert EOF analysis which is based on the Hilbert transform (a +# convolution) that suffers from the absence of information at the time series +# boundaries. One way to mitigate this is to artificially extend the time series +# also known as *padding*. In ``xeofs``, you can enable such a padding by +# setting the ``padding`` parameter to ``"exp"`` which will extent the +# boundaries by an exponential decaying function. The ``decay_factor`` parameter +# controls the decay rate of the exponential function measured in multiples of +# the time series length. Let's see how the decay parameter impacts the results: + +model_ext = xe.single.HilbertEOF(padding="exp", decay_factor=0.01, **kwargs) +model_ext.fit(sst, dim="time") +scores_ext = model_ext.scores().sel(mode=slice(1, 4)) + +scores_ext.real.plot.line(x="time", col="mode", lw=1, ylim=(-0.1, 0.1)) +plt.show() + +# %% +# And indeed, padding the time series effectively reduced the artifacts at the +# boundaries. Lastly, we examine the complex component amplitudes and phases. + +comp_amps = model.components_amplitude() +comp_amps.plot(col="mode", vmin=0, vmax=0.025) +plt.show() + +# %% +# The component phases of the first mode clearly show the seasonal cycle as the +# northern and southern hemisphere are phase shifted by 180 degrees (white and +# black). Note the blueish regions in the central East Pacific and Indian Ocean +# which indicate a phase shift of 90 degrees compared to the main annual cycle. +# This is in agreement with mode 3 of the standard EOF analysis. + +comp_phases = model.components_phase() +comp_phases.plot(col="mode", cmap="twilight") +plt.show() + +# %% diff --git a/examples/1single/plot_mreof.py b/examples/1single/plot_mreof.py index 5bbc6a0f..e8c14b50 100644 --- a/examples/1single/plot_mreof.py +++ b/examples/1single/plot_mreof.py @@ -6,12 +6,12 @@ """ # Load packages and data: -import xarray as xr import matplotlib.pyplot as plt -from matplotlib.gridspec import GridSpec +import xarray as xr from cartopy.crs import PlateCarree +from matplotlib.gridspec import GridSpec -from xeofs.models import EOF, EOFRotator +import xeofs as xe # %% # Create four different dataarrayss @@ -25,9 +25,9 @@ # Perform the actual analysis multivariate_data = [subset1, subset2, subset3, subset4] -mpca = EOF(n_modes=100, standardize=False, use_coslat=True) +mpca = xe.single.EOF(n_modes=100, standardize=False, use_coslat=True) mpca.fit(multivariate_data, dim="time") -rotator = EOFRotator(n_modes=20) +rotator = xe.single.EOFRotator(n_modes=20) rotator.fit(mpca) rcomponents = rotator.components() rscores = rotator.scores() diff --git a/examples/1single/plot_multivariate-eof.py b/examples/1single/plot_multivariate-eof.py index 31c6f623..84d0a36b 100644 --- a/examples/1single/plot_multivariate-eof.py +++ b/examples/1single/plot_multivariate-eof.py @@ -6,12 +6,12 @@ """ # Load packages and data: -import xarray as xr import matplotlib.pyplot as plt -from matplotlib.gridspec import GridSpec +import xarray as xr from cartopy.crs import PlateCarree +from matplotlib.gridspec import GridSpec -from xeofs.models import EOF +import xeofs as xe # Create four different dataarrayss sst = xr.tutorial.open_dataset("ersstv5")["sst"] @@ -24,7 +24,7 @@ # %% # Perform the actual analysis -pca = EOF(n_modes=10, standardize=False, use_coslat=True) +pca = xe.single.EOF(n_modes=10, standardize=False, use_coslat=True) pca.fit(multivariate_data, dim="time") components = pca.components() scores = pca.scores() diff --git a/examples/1single/plot_rotated_eof.py b/examples/1single/plot_rotated_eof.py index 0f8b296b..33f6ab72 100644 --- a/examples/1single/plot_rotated_eof.py +++ b/examples/1single/plot_rotated_eof.py @@ -22,14 +22,13 @@ We'll start by loading the necessary packages and data: """ -import xarray as xr import matplotlib.pyplot as plt import seaborn as sns +import xarray as xr +from cartopy.crs import PlateCarree, Robinson from matplotlib.gridspec import GridSpec -from cartopy.crs import Robinson, PlateCarree - -from xeofs.models import EOF, EOFRotator +import xeofs as xe sns.set_context("paper") @@ -42,17 +41,17 @@ components = [] scores = [] # (1) Standard EOF without regularization -model = EOF(n_modes=100, standardize=True, use_coslat=True) +model = xe.single.EOF(n_modes=100, standardize=True, use_coslat=True) model.fit(sst, dim="time") components.append(model.components()) scores.append(model.scores()) # (2) Varimax-rotated EOF analysis -rot_var = EOFRotator(n_modes=50, power=1) +rot_var = xe.single.EOFRotator(n_modes=50, power=1) rot_var.fit(model) components.append(rot_var.components()) scores.append(rot_var.scores()) # (3) Promax-rotated EOF analysis -rot_pro = EOFRotator(n_modes=50, power=4) +rot_pro = xe.single.EOFRotator(n_modes=50, power=4) rot_pro.fit(model) components.append(rot_pro.components()) scores.append(rot_pro.scores()) diff --git a/examples/1single/plot_weighted-eof.py b/examples/1single/plot_weighted-eof.py index 26a88473..6d67964b 100644 --- a/examples/1single/plot_weighted-eof.py +++ b/examples/1single/plot_weighted-eof.py @@ -11,13 +11,13 @@ Load packages and data: """ -import xarray as xr import matplotlib.pyplot as plt import seaborn as sns -from matplotlib.gridspec import GridSpec +import xarray as xr from cartopy.crs import Orthographic, PlateCarree +from matplotlib.gridspec import GridSpec -from xeofs.models import EOF +import xeofs as xe sns.set_context("paper") @@ -29,22 +29,22 @@ components = [] scores = [] # (1) Based on covariance matrix -model_cov = EOF(n_modes=5, standardize=False, use_coslat=False) +model_cov = xe.single.EOF(n_modes=5, standardize=False, use_coslat=False) model_cov.fit(t2m, "time") components.append(model_cov.components()) scores.append(model_cov.scores()) # (2) Based on coslat weighted covariance matrix -model_lat = EOF(n_modes=5, standardize=False, use_coslat=True) +model_lat = xe.single.EOF(n_modes=5, standardize=False, use_coslat=True) model_lat.fit(t2m, "time") components.append(model_lat.components()) scores.append(model_lat.scores()) # (3) Based on correlation matrix -model_cor = EOF(n_modes=5, standardize=True, use_coslat=False) +model_cor = xe.single.EOF(n_modes=5, standardize=True, use_coslat=False) model_cor.fit(t2m, "time") components.append(model_cor.components()) scores.append(model_cor.scores()) # (4) Based on coslat weighted correlation matrix -model_cor_lat = EOF(n_modes=5, standardize=True, use_coslat=True) +model_cor_lat = xe.single.EOF(n_modes=5, standardize=True, use_coslat=True) model_cor_lat.fit(t2m, "time") components.append(model_cor_lat.components()) scores.append(model_cor_lat.scores()) diff --git a/examples/1single/rotated_eof.jpg b/examples/1single/rotated_eof.jpg index 28b6966b..606374db 100644 Binary files a/examples/1single/rotated_eof.jpg and b/examples/1single/rotated_eof.jpg differ diff --git a/examples/1single/sparse_pca.jpg b/examples/1single/sparse_pca.jpg index 33df23cc..fcf8dada 100644 Binary files a/examples/1single/sparse_pca.jpg and b/examples/1single/sparse_pca.jpg differ diff --git a/examples/1single/weighted_eof.jpg b/examples/1single/weighted_eof.jpg index 07008c8f..8d642634 100644 Binary files a/examples/1single/weighted_eof.jpg and b/examples/1single/weighted_eof.jpg differ diff --git a/examples/2multi/README.rst b/examples/2cross/README.rst similarity index 52% rename from examples/2multi/README.rst rename to examples/2cross/README.rst index bb9bcfc7..9fd4a086 100644 --- a/examples/2multi/README.rst +++ b/examples/2cross/README.rst @@ -1,2 +1,2 @@ -2 | Multi-Set Analysis +2 | Cross-Set Analysis ======================== diff --git a/examples/2cross/mca.jpg b/examples/2cross/mca.jpg new file mode 100644 index 00000000..6a3fcd53 Binary files /dev/null and b/examples/2cross/mca.jpg differ diff --git a/examples/2cross/plot_mca.py b/examples/2cross/plot_mca.py new file mode 100644 index 00000000..a93d5546 --- /dev/null +++ b/examples/2cross/plot_mca.py @@ -0,0 +1,112 @@ +""" +Maximum Covariance Analysis +=========================== + +Maximum Covariance Analysis (MCA) between two data sets. +""" + +# Load packages and data: +import matplotlib.pyplot as plt +import numpy as np +import xarray as xr +from cartopy.crs import Orthographic, PlateCarree +from cartopy.feature import LAND +from matplotlib.gridspec import GridSpec + +import xeofs as xe + +# %% +# Create 2 different DataArrays + +t2m = xr.tutorial.load_dataset("air_temperature")["air"] +da1 = t2m.isel(lon=slice(0, 26)) +da2 = t2m.isel(lon=slice(27, None)) + +# %% +# Perform MCA + +mca = xe.cross.MCA(n_modes=20, standardize=False, use_coslat=True) +mca.fit(da1, da2, dim="time") + +# %% +# Get singular vectors, projections (PCs), homogeneous and heterogeneous +# patterns: + +singular_vectors = mca.components() +scores = mca.scores() +hom_pats, pvals_hom = mca.homogeneous_patterns() +het_pats, pvals_het = mca.heterogeneous_patterns() + +# %% +# When two fields are expected, the output of the above methods is a list of +# length 2, with the first and second entry containing the relevant object for +# ``X`` and ``Y``. For example, the p-values obtained from the two-sided t-test +# for the homogeneous patterns of ``X`` are: + +pvals_hom[0] + +# %% +# Create a mask to identifiy where p-values are below 0.05 + +hom_mask = [values < 0.05 for values in pvals_hom] +het_mask = [values < 0.05 for values in pvals_het] + + +# %% +# Plot some relevant quantities of mode 2. + +lonlats = [ + np.meshgrid(pvals_hom[0].lon.values, pvals_hom[0].lat.values), + np.meshgrid(pvals_hom[1].lon.values, pvals_hom[1].lat.values), +] +proj = [ + Orthographic(central_latitude=30, central_longitude=-120), + Orthographic(central_latitude=30, central_longitude=-60), +] +kwargs1 = {"cmap": "BrBG", "vmin": -0.05, "vmax": 0.05, "transform": PlateCarree()} +kwargs2 = {"cmap": "RdBu", "vmin": -1, "vmax": 1, "transform": PlateCarree()} + +mode = 2 + +fig = plt.figure(figsize=(7, 14)) +gs = GridSpec(5, 2) +ax1 = [fig.add_subplot(gs[0, i], projection=proj[i]) for i in range(2)] +ax2 = [fig.add_subplot(gs[1, i], projection=proj[i]) for i in range(2)] +ax3 = [fig.add_subplot(gs[2, i], projection=proj[i]) for i in range(2)] +ax4 = [fig.add_subplot(gs[3, i]) for i in range(2)] + +for i, a in enumerate(ax1): + singular_vectors[i].sel(mode=mode).plot(ax=a, **kwargs1) + +for i, a in enumerate(ax2): + hom_pats[i].sel(mode=mode).plot(ax=a, **kwargs2) + a.scatter( + lonlats[i][0], + lonlats[i][1], + hom_mask[i].sel(mode=mode).values * 0.5, + color="k", + alpha=0.5, + transform=PlateCarree(), + ) +for i, a in enumerate(ax3): + het_pats[i].sel(mode=mode).plot(ax=a, **kwargs2) + a.scatter( + lonlats[i][0], + lonlats[i][1], + het_mask[i].sel(mode=mode).values * 0.5, + color="k", + alpha=0.5, + transform=PlateCarree(), + ) + +for i, a in enumerate(ax4): + scores[i].sel(mode=mode).plot(ax=a) + a.set_xlabel("") + + +for a in np.ravel([ax1, ax2, ax3]): + a.coastlines(color=".5") + a.add_feature(LAND) + +plt.tight_layout() +plt.savefig("mca.jpg") diff --git a/examples/2cross/plot_rotated_mca.py b/examples/2cross/plot_rotated_mca.py new file mode 100644 index 00000000..2ae0c501 --- /dev/null +++ b/examples/2cross/plot_rotated_mca.py @@ -0,0 +1,118 @@ +""" +Rotated Maximum Covariance Analysis +=================================== + +Rotated Maximum Covariance Analysis (MCA) between two data sets. +""" + +# Load packages and data: +import matplotlib.pyplot as plt +import numpy as np +import xarray as xr +from cartopy.crs import Orthographic, PlateCarree +from cartopy.feature import LAND +from matplotlib.gridspec import GridSpec + +import xeofs as xe + +# %% +# Create 2 different DataArrays + +t2m = xr.tutorial.load_dataset("air_temperature")["air"] +da1 = t2m.isel(lon=slice(0, 26)) +da2 = t2m.isel(lon=slice(27, None)) + +# %% +# Perform MCA + +mca = xe.cross.MCA(n_modes=20, standardize=False, use_coslat=True) +mca.fit(da1, da2, dim="time") + +# %% +# Apply Varimax-rotation to MCA solution + +rot = xe.cross.MCARotator(n_modes=10) +rot.fit(mca) + +# %% +# Get rotated singular vectors, projections (PCs), homogeneous and heterogeneous +# patterns: + +singular_vectors = rot.components() +scores = rot.scores() +hom_pats, pvals_hom = rot.homogeneous_patterns() +het_pats, pvals_het = rot.heterogeneous_patterns() + +# %% +# When two fields are expected, the output of the above methods is a list of +# length 2, with the first and second entry containing the relevant object for +# ``X`` and ``Y``. For example, the p-values obtained from the two-sided t-test +# for the homogeneous patterns of ``X`` are: + +pvals_hom[0] + +# %% +# Create a mask to identifiy where p-values are below 0.05 + +hom_mask = [values < 0.05 for values in pvals_hom] +het_mask = [values < 0.05 for values in pvals_het] + + +# %% +# Plot some relevant quantities of mode 2. + +lonlats = [ + np.meshgrid(pvals_hom[0].lon.values, pvals_hom[0].lat.values), + np.meshgrid(pvals_hom[1].lon.values, pvals_hom[1].lat.values), +] +proj = [ + Orthographic(central_latitude=30, central_longitude=-120), + Orthographic(central_latitude=30, central_longitude=-60), +] +kwargs1 = {"cmap": "BrBG", "vmin": -0.05, "vmax": 0.05, "transform": PlateCarree()} +kwargs2 = {"cmap": "RdBu", "vmin": -1, "vmax": 1, "transform": PlateCarree()} + +mode = 2 + +fig = plt.figure(figsize=(7, 14)) +gs = GridSpec(5, 2) +ax1 = [fig.add_subplot(gs[0, i], projection=proj[i]) for i in range(2)] +ax2 = [fig.add_subplot(gs[1, i], projection=proj[i]) for i in range(2)] +ax3 = [fig.add_subplot(gs[2, i], projection=proj[i]) for i in range(2)] +ax4 = [fig.add_subplot(gs[3, i]) for i in range(2)] + +for i, a in enumerate(ax1): + singular_vectors[i].sel(mode=mode).plot(ax=a, **kwargs1) + +for i, a in enumerate(ax2): + hom_pats[i].sel(mode=mode).plot(ax=a, **kwargs2) + a.scatter( + lonlats[i][0], + lonlats[i][1], + hom_mask[i].sel(mode=mode).values * 0.5, + color="k", + alpha=0.5, + transform=PlateCarree(), + ) +for i, a in enumerate(ax3): + het_pats[i].sel(mode=mode).plot(ax=a, **kwargs2) + a.scatter( + lonlats[i][0], + lonlats[i][1], + het_mask[i].sel(mode=mode).values * 0.5, + color="k", + alpha=0.5, + transform=PlateCarree(), + ) + +for i, a in enumerate(ax4): + scores[i].sel(mode=mode).plot(ax=a) + a.set_xlabel("") + + +for a in np.ravel([ax1, ax2, ax3]): + a.coastlines(color=".5") + a.add_feature(LAND) + +plt.tight_layout() +plt.savefig("rotated_mca.jpg") diff --git a/examples/2cross/rotated_mca.jpg b/examples/2cross/rotated_mca.jpg new file mode 100644 index 00000000..1e9748a2 Binary files /dev/null and b/examples/2cross/rotated_mca.jpg differ diff --git a/examples/2multi/mca.jpg b/examples/2multi/mca.jpg deleted file mode 100644 index 664242ae..00000000 Binary files a/examples/2multi/mca.jpg and /dev/null differ diff --git a/examples/2multi/rotated_mca.jpg b/examples/2multi/rotated_mca.jpg deleted file mode 100644 index 47e11346..00000000 Binary files a/examples/2multi/rotated_mca.jpg and /dev/null differ diff --git a/examples/3multi/README.rst b/examples/3multi/README.rst new file mode 100644 index 00000000..ed4ea2f7 --- /dev/null +++ b/examples/3multi/README.rst @@ -0,0 +1,2 @@ +3 | Multi-Set Analysis +======================== diff --git a/examples/3multi/plot_cca.py b/examples/3multi/plot_cca.py new file mode 100644 index 00000000..70f6a68e --- /dev/null +++ b/examples/3multi/plot_cca.py @@ -0,0 +1,102 @@ +""" +Canonical Correlation Analysis +============================== + +In this example, we're going to perform a Canonical Correlation Analysis (CCA) +on three datasets using the ERSSTv5 monthly sea surface temperature (SST) data +from 1970 to 2022. We divide this data into three areas: the Indian Ocean, +the Pacific Ocean, and the Atlantic Ocean. Our goal is to perform CCA on these +regions. + +First, we'll import the necessary modules. +""" + +import cartopy.crs as ccrs +import matplotlib.pyplot as plt +import xarray as xr +from matplotlib.gridspec import GridSpec + +import xeofs as xe + +# %% +# Next, we load the data and compute the SST anomalies. This removes the +# monthly climatologies, so the seasonal cycle doesn't impact our CCA. + +sst = xr.tutorial.load_dataset("ersstv5").sst +sst = sst.groupby("time.month") - sst.groupby("time.month").mean("time") + + +# %% +# Now, we define the three regions of interest and store them in a list. + +indian = sst.sel(lon=slice(35, 115), lat=slice(30, -30)) +pacific = sst.sel(lon=slice(130, 290), lat=slice(30, -30)) +atlantic = sst.sel(lon=slice(320, 360), lat=slice(70, 10)) + +data_list = [indian, pacific, atlantic] + +# %% +# We now perform CCA. Since we are dealing with a high-dimensional feature space, we first +# perform PCA to reduce the dimensionality (this is kind of a regularized CCA) by setting +# ``pca=True``. By setting the ``variance_fraction`` keyword argument, we specify that we +# want to keep the number of PCA modes that explain 90% of the variance in each of the +# three data sets. +# +# An important parameter is ``init_pca_modes``. It specifies the number +# of PCA modes that are initially compute before truncating them to account for 90 %. If this +# number is small enough, randomized PCAs will be performed instead of the full SVD decomposition +# which is much faster. We can also specify ``init_pca_modes`` as a float (0 < x <= 1), +# in which case the number of PCA modes is given by the fraction of the data matrix's rank +# The default is set to 0.75 which will ensure that randomized PCAs are performed. +# +# Given the nature of SST data, we might lower it to something like 0.3, since +# we expect that most of the variance in the data will be explained by a small +# number of PC modes. +# +# Note that if our initial PCA modes don't hit the 90% variance target, ``xeofs`` +# will give a warning. + +model = xe.multi.CCA( + n_modes=2, + use_coslat=True, + pca=True, + variance_fraction=0.9, + init_pca_modes=0.30, +) +model.fit(data_list, dim="time") +components = model.components() +scores = model.scores() + +# %% +# Let's look at the canonical loadings (components) of the first mode. + +mode = 1 + +central_longitudes = [ + indian.lon.median().item(), + pacific.lon.median().item(), + pacific.lon.median().item(), +] +projections = [ccrs.PlateCarree(central_longitude=lon) for lon in central_longitudes] + +fig = plt.figure(figsize=(12, 2.5)) +gs = GridSpec(1, 4, figure=fig, width_ratios=[2, 4, 1, 0.2]) +axes = [fig.add_subplot(gs[0, i], projection=projections[i]) for i in range(3)] +cax = fig.add_subplot(1, 4, 4) +kwargs = dict(transform=ccrs.PlateCarree(), vmin=-1, vmax=1, cmap="RdBu_r", cbar_ax=cax) +components[0].sel(mode=mode).plot(ax=axes[0], **kwargs) +components[1].sel(mode=mode).plot(ax=axes[1], **kwargs) +im = components[2].sel(mode=mode).plot(ax=axes[2], **kwargs) +fig.colorbar(im, cax=cax, orientation="vertical") +for ax in axes: + ax.coastlines() + ax.set_title("") + +# %% +# And lastly, we'll check out the canonical variates (scores) of the first mode. + +fig, ax = plt.subplots(figsize=(12, 4)) +scores[0].sel(mode=mode).plot(ax=ax, label="Indian Ocean") +scores[1].sel(mode=mode).plot(ax=ax, label="Central Pacific") +scores[2].sel(mode=mode).plot(ax=ax, label="North Atlantic") +ax.legend() diff --git a/examples/3validation/bootstrap.jpg b/examples/3validation/bootstrap.jpg deleted file mode 100644 index 6767652c..00000000 Binary files a/examples/3validation/bootstrap.jpg and /dev/null differ diff --git a/examples/3validation/README.rst b/examples/4validation/README.rst similarity index 50% rename from examples/3validation/README.rst rename to examples/4validation/README.rst index ae6a2eef..245eeb5b 100644 --- a/examples/3validation/README.rst +++ b/examples/4validation/README.rst @@ -1,2 +1,2 @@ -3 | Validation +4 | Validation =============== \ No newline at end of file diff --git a/examples/4validation/bootstrap.jpg b/examples/4validation/bootstrap.jpg new file mode 100644 index 00000000..9d36cd14 Binary files /dev/null and b/examples/4validation/bootstrap.jpg differ diff --git a/examples/4validation/plot_bootstrap.py b/examples/4validation/plot_bootstrap.py new file mode 100644 index 00000000..1cce033c --- /dev/null +++ b/examples/4validation/plot_bootstrap.py @@ -0,0 +1,95 @@ +""" +Significance testing of EOF analysis via bootstrap +=================================================== + +Test the significance of individual modes and obtain confidence intervals +for both EOFs and PCs. +""" + +# Load packages and data: +import matplotlib.pyplot as plt +import numpy as np +import xarray as xr +from cartopy.crs import Orthographic, PlateCarree +from matplotlib.gridspec import GridSpec + +import xeofs as xe + +# %% + +t2m = xr.tutorial.load_dataset("air_temperature")["air"] + +# %% +# Perform EOF analysis + +model = xe.single.EOF(n_modes=5, standardize=False) +model.fit(t2m, dim="time") +expvar = model.explained_variance_ratio() +components = model.components() +scores = model.scores() + + +# %% +# Perform bootstrapping of the model to identy the number of significant modes. +# We perform 50 bootstraps. +# Note - if computationallly feasible - you typically want to choose higher +# numbers of bootstraps e.g. 1000. + +n_boot = 50 + +bs = xe.validation.EOFBootstrapper(n_bootstraps=n_boot) +bs.fit(model) +bs_expvar = bs.explained_variance() +ci_expvar = bs_expvar.quantile([0.025, 0.975], "n") # 95% confidence intervals + +q025 = ci_expvar.sel(quantile=0.025) +q975 = ci_expvar.sel(quantile=0.975) + +is_significant = q025 - q975.shift({"mode": -1}) > 0 +n_significant_modes = ( + is_significant.where(is_significant is True).cumsum(skipna=False).max().fillna(0) +) +print("{:} modes are significant at alpha=0.05".format(n_significant_modes.values)) + +# %% +# The bootstrapping procedure identifies 3 significant modes. We can also +# compute the 95 % confidence intervals of the EOFs/PCs and mask out +# insignificant elements of the obtained EOFs. + +ci_components = bs.components().quantile([0.025, 0.975], "n") +ci_scores = bs.scores().quantile([0.025, 0.975], "n") + +is_sig_comps = np.sign(ci_components).prod("quantile") > 0 + + +# %% +# Summarize the results in a figure. + + +lons, lats = np.meshgrid(is_sig_comps.lon.values, is_sig_comps.lat.values) +proj = Orthographic(central_latitude=30, central_longitude=-80) +kwargs = {"cmap": "RdBu", "vmin": -0.05, "vmax": 0.05, "transform": PlateCarree()} + +fig = plt.figure(figsize=(10, 16)) +gs = GridSpec(5, 2) +ax1 = [fig.add_subplot(gs[i, 0], projection=proj) for i in range(5)] +ax2 = [fig.add_subplot(gs[i, 1]) for i in range(5)] + +for i, (a1, a2) in enumerate(zip(ax1, ax2)): + a1.coastlines(color=".5") + components.isel(mode=i).plot(ax=a1, **kwargs) + a1.scatter( + lons, + lats, + is_sig_comps.isel(mode=i).values * 0.5, + color="k", + alpha=0.5, + transform=PlateCarree(), + ) + ci_scores.isel(mode=i, quantile=0).plot(ax=a2, color=".3", lw=".5", label="2.5%") + ci_scores.isel(mode=i, quantile=1).plot(ax=a2, color=".3", lw=".5", label="97.5%") + scores.isel(mode=i).plot(ax=a2, lw=".5", alpha=0.5, label="PC") + a2.legend(loc=2) + +plt.tight_layout() +plt.savefig("bootstrap.jpg") diff --git a/pyproject.toml b/pyproject.toml index 982b5fc8..1a14943f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,11 +39,11 @@ dev = [ docs = [ "rpy2>=3.5", "sphinx-gallery>=0.14", - "sphinx-design>=0.5", + "sphinx-design>=0.6", "sphinx-copybutton>=0.5", "nbsphinx>=0.9", - "pydata-sphinx-theme>=0.14", - "sphinx>=7.2", + "pydata-sphinx-theme>=0.15", + "sphinx>=8", "nbconvert>=7.9", "myst-parser>=3.0", "matplotlib>=3.4", diff --git a/tests/linalg/__init__.py b/tests/linalg/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/models/test_decomposer.py b/tests/linalg/test_decomposer.py similarity index 99% rename from tests/models/test_decomposer.py rename to tests/linalg/test_decomposer.py index 29009d0b..cc3d074b 100644 --- a/tests/models/test_decomposer.py +++ b/tests/linalg/test_decomposer.py @@ -2,7 +2,7 @@ import pytest from dask.array import Array as DaskArray # type: ignore -from xeofs.models.decomposer import Decomposer +from xeofs.linalg.decomposer import Decomposer from ..utilities import data_is_dask diff --git a/tests/models/cross/__init__.py b/tests/models/cross/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/models/cross/test_cca.py b/tests/models/cross/test_cca.py new file mode 100644 index 00000000..b603e9c3 --- /dev/null +++ b/tests/models/cross/test_cca.py @@ -0,0 +1,292 @@ +import dask.array as da +import numpy as np +import pytest +import xarray as xr + +from xeofs.cross import CCA + + +def generate_random_data(shape, lazy=False, seed=142): + rng = np.random.default_rng(seed) + if lazy: + return xr.DataArray( + da.random.random(shape, chunks=(5, 5)), + dims=["sample", "feature"], + coords={"sample": np.arange(shape[0]), "feature": np.arange(shape[1])}, + ) + else: + return xr.DataArray( + rng.random(shape), + dims=["sample", "feature"], + coords={"sample": np.arange(shape[0]), "feature": np.arange(shape[1])}, + ) + + +def generate_well_conditioned_data(lazy=False): + rng = np.random.default_rng(142) + t = np.linspace(0, 50, 200) + std = 0.1 + x1 = np.sin(t)[:, None] + rng.normal(0, std, size=(200, 2)) + x2 = np.sin(t)[:, None] + rng.normal(0, std, size=(200, 3)) + x1[:, 1] = x1[:, 1] ** 2 + x2[:, 1] = x2[:, 1] ** 3 + x2[:, 2] = abs(x2[:, 2]) ** (0.5) + coords_time = np.arange(len(t)) + coords_fx = [1, 2] + coords_fy = [1, 2, 3] + X = xr.DataArray( + x1, + dims=["sample", "feature"], + coords={"sample": coords_time, "feature": coords_fx}, + ) + Y = xr.DataArray( + x2, + dims=["sample", "feature"], + coords={"sample": coords_time, "feature": coords_fy}, + ) + if lazy: + X = X.chunk({"sample": 5, "feature": -1}) + Y = Y.chunk({"sample": 5, "feature": -1}) + return X, Y + else: + return X, Y + + +@pytest.fixture +def cca(): + return CCA(n_modes=1) + + +def test_initialization(): + model = CCA() + assert model is not None + + +def test_fit(cca): + X, Y = generate_well_conditioned_data() + cca.fit(X, Y, dim="sample") + assert hasattr(cca, "preprocessor1") + assert hasattr(cca, "preprocessor2") + assert hasattr(cca, "data") + + +def test_fit_empty_data(cca): + with pytest.raises(ValueError): + cca.fit(xr.DataArray(), xr.DataArray(), "time") + + +def test_fit_invalid_dims(cca): + X, Y = generate_well_conditioned_data() + with pytest.raises(ValueError): + cca.fit(X, Y, dim=("invalid_dim1", "invalid_dim2")) + + +def test_transform(cca): + X, Y = generate_well_conditioned_data() + cca.fit(X, Y, dim="sample") + result = cca.transform(X, Y) + assert isinstance(result, list) + assert isinstance(result[0], xr.DataArray) + + +def test_transform_unseen_data(cca): + X, Y = generate_well_conditioned_data() + x = X.isel(sample=slice(151, 200)) + y = Y.isel(sample=slice(151, 200)) + X = X.isel(sample=slice(None, 150)) + Y = Y.isel(sample=slice(None, 150)) + + cca.fit(X, Y, "sample") + result = cca.transform(x, y) + assert isinstance(result, list) + assert isinstance(result[0], xr.DataArray) + # Check that unseen data can be transformed + assert result[0].notnull().all() + assert result[1].notnull().all() + + +def test_inverse_transform(cca): + X, Y = generate_well_conditioned_data() + cca.fit(X, Y, "sample") + # Assuming mode as 1 for simplicity + scores1 = cca.data["scores1"].sel(mode=1) + scores2 = cca.data["scores2"].sel(mode=1) + Xrec1, Xrec2 = cca.inverse_transform(scores1, scores2) + assert isinstance(Xrec1, xr.DataArray) + assert isinstance(Xrec2, xr.DataArray) + + +@pytest.mark.parametrize("use_pca", [False, True]) +def test_squared_covariance_fraction(use_pca): + X, Y = generate_well_conditioned_data() + cca = CCA(n_modes=2, use_pca=use_pca, n_pca_modes="all") + cca.fit(X, Y, "sample") + scf = cca.squared_covariance_fraction() + assert isinstance(scf, xr.DataArray) + assert all(scf <= 1), "Squared covariance fraction is greater than 1" + + +@pytest.mark.parametrize("use_pca", [False, True]) +def test_total_squared_covariance(use_pca): + X, Y = generate_well_conditioned_data() + + # Compute total squared covariance + X_ = X.rename({"feature": "x"}) + Y_ = Y.rename({"feature": "y"}) + cov_mat = xr.cov(X_, Y_, dim="sample") + tsc = (cov_mat**2).sum() + + cca = CCA(n_modes=2, use_pca=use_pca, n_pca_modes="all") + cca.fit(X, Y, "sample") + tsc_model = cca.data["total_squared_covariance"] + xr.testing.assert_allclose(tsc, tsc_model) + + +def test_fit_different_coordinates(): + """Like a lagged CCA scenario""" + X, Y = generate_well_conditioned_data() + X = X.isel(sample=slice(0, 99)) + Y = Y.isel(sample=slice(100, 199)) + cca = CCA(n_modes=2, use_pca=False) + cca.fit(X, Y, "sample") + r = cca.cross_correlation_coefficients() + # Correlation coefficents are not zero + assert np.all(r > np.finfo(r.dtype).eps) + + +@pytest.mark.parametrize( + "dim", + [(("time",)), (("lat", "lon")), (("lon", "lat"))], +) +def test_components(mock_data_array, dim): + cca = CCA(n_modes=2, use_pca=False) + cca.fit(mock_data_array, mock_data_array, dim) + components1, components2 = cca.components() + feature_dims = tuple(set(mock_data_array.dims) - set(dim)) + assert isinstance(components1, xr.DataArray) + assert isinstance(components2, xr.DataArray) + assert set(components1.dims) == set( + ("mode",) + feature_dims + ), "Components1 does not have the right feature dimensions" + assert set(components2.dims) == set( + ("mode",) + feature_dims + ), "Components2 does not have the right feature dimensions" + + +@pytest.mark.parametrize("shapeX", [(30, 10)]) +@pytest.mark.parametrize("shapeY", [(30, 10), (30, 5), (30, 15)]) +@pytest.mark.parametrize("use_pca", [False, True]) +def test_components_coordinates(shapeX, shapeY, use_pca): + # Test that the components have the right coordinates + X = generate_random_data(shapeX) + Y = generate_random_data(shapeY) + + cca = CCA(n_modes=2, use_pca=use_pca, n_pca_modes="all") + cca.fit(X, Y, "sample") + components1, components2 = cca.components() + xr.testing.assert_equal(components1.coords["feature"], X.coords["feature"]) + xr.testing.assert_equal(components2.coords["feature"], Y.coords["feature"]) + + +@pytest.mark.parametrize("correction", [(None), ("fdr_bh")]) +def test_homogeneous_patterns(correction): + X = generate_random_data((200, 10), seed=123) + Y = generate_random_data((200, 20), seed=321) + + cca = CCA(n_modes=10, use_pca=False) + cca.fit(X, Y, "sample") + + _ = cca.homogeneous_patterns(correction=correction) + + +@pytest.mark.parametrize( + "correction", + [(None), ("fdr_bh")], +) +def test_heterogeneous_patterns(correction): + X = generate_random_data((200, 10), seed=123) + Y = generate_random_data((200, 20), seed=321) + + cca = CCA(n_modes=10, use_pca=False) + cca.fit(X, Y, "sample") + + _ = cca.heterogeneous_patterns(correction=correction) + + +def test_predict(): + X = generate_random_data((200, 10), seed=123) + Y = generate_random_data((200, 20), seed=321) + + cca = CCA(n_modes=10, use_pca=False) + cca.fit(X, Y, "sample") + + Xnew = generate_random_data((200, 10), seed=123) + + Ry_pred = cca.predict(Xnew) + _ = cca.inverse_transform(Y=Ry_pred) + + +@pytest.mark.parametrize("engine", ["netcdf4", "zarr"]) +def test_save_load(tmp_path, engine): + """Test save/load methods in MCA class, ensuring that we can + roundtrip the model and get the same results when transforming + data.""" + X = generate_random_data((200, 10), seed=123) + Y = generate_random_data((200, 20), seed=321) + + original = CCA() + original.fit(X, Y, "sample") + + # Save the CCA model + original.save(tmp_path / "cca", engine=engine) + + # Check that the CCA model has been saved + assert (tmp_path / "cca").exists() + + # Recreate the model from saved file + loaded = CCA.load(tmp_path / "cca", engine=engine) + + # Check that the params and DataContainer objects match + assert original.get_params() == loaded.get_params() + assert all([key in loaded.data for key in original.data]) + for key in original.data: + if original.data._allow_compute[key]: + assert loaded.data[key].equals(original.data[key]) + else: + # but ensure that input data is not saved by default + assert loaded.data[key].size <= 1 + assert loaded.data[key].attrs["placeholder"] is True + + # Test that the recreated model can be used to transform new data + assert np.allclose( + original.transform(X, Y), + loaded.transform(X, Y), + ) + + # The loaded model should also be able to inverse_transform new data + XYr_o = original.inverse_transform(*original.scores()) + XYr_l = loaded.inverse_transform(*loaded.scores()) + assert np.allclose(XYr_o[0], XYr_l[0]) + assert np.allclose(XYr_o[1], XYr_l[1]) + + +def test_serialize_deserialize_dataarray(mock_data_array): + """Test roundtrip serialization when the model is fit on a DataArray.""" + model = CCA() + model.fit(mock_data_array, mock_data_array, "time") + dt = model.serialize() + rebuilt_model = CCA.deserialize(dt) + assert np.allclose( + model.transform(mock_data_array), rebuilt_model.transform(mock_data_array) + ) + + +def test_serialize_deserialize_dataset(mock_dataset): + """Test roundtrip serialization when the model is fit on a Dataset.""" + model = CCA() + model.fit(mock_dataset, mock_dataset, "time") + dt = model.serialize() + rebuilt_model = CCA.deserialize(dt) + assert np.allclose( + model.transform(mock_dataset), rebuilt_model.transform(mock_dataset) + ) diff --git a/tests/models/cross/test_cpcca.py b/tests/models/cross/test_cpcca.py new file mode 100644 index 00000000..258ff142 --- /dev/null +++ b/tests/models/cross/test_cpcca.py @@ -0,0 +1,341 @@ +import dask.array as da +import numpy as np +import pytest +import xarray as xr + +from xeofs.cross import CPCCA + + +def generate_random_data(shape, lazy=False, seed=142): + rng = np.random.default_rng(seed) + if lazy: + return xr.DataArray( + da.random.random(shape, chunks=(5, 5)), + dims=["sample", "feature"], + coords={"sample": np.arange(shape[0]), "feature": np.arange(shape[1])}, + ) + else: + return xr.DataArray( + rng.random(shape), + dims=["sample", "feature"], + coords={"sample": np.arange(shape[0]), "feature": np.arange(shape[1])}, + ) + + +def generate_well_conditioned_data(lazy=False): + rng = np.random.default_rng(142) + t = np.linspace(0, 50, 200) + std = 0.1 + x1 = np.sin(t)[:, None] + rng.normal(0, std, size=(200, 2)) + x2 = np.sin(t)[:, None] + rng.normal(0, std, size=(200, 3)) + x1[:, 1] = x1[:, 1] ** 2 + x2[:, 1] = x2[:, 1] ** 3 + x2[:, 2] = abs(x2[:, 2]) ** (0.5) + coords_time = np.arange(len(t)) + coords_fx = [1, 2] + coords_fy = [1, 2, 3] + X = xr.DataArray( + x1, + dims=["sample", "feature"], + coords={"sample": coords_time, "feature": coords_fx}, + ) + Y = xr.DataArray( + x2, + dims=["sample", "feature"], + coords={"sample": coords_time, "feature": coords_fy}, + ) + if lazy: + X = X.chunk({"sample": 5, "feature": -1}) + Y = Y.chunk({"sample": 5, "feature": -1}) + return X, Y + else: + return X, Y + + +@pytest.fixture +def cpcca(): + return CPCCA(n_modes=1) + + +def test_initialization(): + model = CPCCA() + assert model is not None + + +def test_fit(cpcca): + X, Y = generate_well_conditioned_data() + cpcca.fit(X, Y, dim="sample") + assert hasattr(cpcca, "preprocessor1") + assert hasattr(cpcca, "preprocessor2") + assert hasattr(cpcca, "data") + + +def test_fit_empty_data(cpcca): + with pytest.raises(ValueError): + cpcca.fit(xr.DataArray(), xr.DataArray(), "time") + + +def test_fit_invalid_dims(cpcca): + X, Y = generate_well_conditioned_data() + with pytest.raises(ValueError): + cpcca.fit(X, Y, dim=("invalid_dim1", "invalid_dim2")) + + +def test_transform(cpcca): + X, Y = generate_well_conditioned_data() + cpcca.fit(X, Y, dim="sample") + result = cpcca.transform(X, Y) + assert isinstance(result, list) + assert isinstance(result[0], xr.DataArray) + + +def test_transform_unseen_data(cpcca): + X, Y = generate_well_conditioned_data() + x = X.isel(sample=slice(151, 200)) + y = Y.isel(sample=slice(151, 200)) + X = X.isel(sample=slice(None, 150)) + Y = Y.isel(sample=slice(None, 150)) + + cpcca.fit(X, Y, "sample") + result = cpcca.transform(x, y) + assert isinstance(result, list) + assert isinstance(result[0], xr.DataArray) + # Check that unseen data can be transformed + assert result[0].notnull().all() + assert result[1].notnull().all() + + +def test_inverse_transform(cpcca): + X, Y = generate_well_conditioned_data() + cpcca.fit(X, Y, "sample") + # Assuming mode as 1 for simplicity + scores1 = cpcca.data["scores1"].sel(mode=1) + scores2 = cpcca.data["scores2"].sel(mode=1) + Xrec1, Xrec2 = cpcca.inverse_transform(scores1, scores2) + assert isinstance(Xrec1, xr.DataArray) + assert isinstance(Xrec2, xr.DataArray) + + +@pytest.mark.parametrize( + "alpha,use_pca", + [ + (1.0, False), + (0.5, False), + (0.0, False), + (1.0, True), + (0.5, True), + (0.0, True), + ], +) +def test_squared_covariance_fraction(alpha, use_pca): + X, Y = generate_well_conditioned_data() + cpcca = CPCCA(n_modes=2, alpha=alpha, use_pca=use_pca, n_pca_modes="all") + cpcca.fit(X, Y, "sample") + scf = cpcca.squared_covariance_fraction() + assert isinstance(scf, xr.DataArray) + assert all(scf <= 1), "Squared covariance fraction is greater than 1" + + +@pytest.mark.parametrize( + "alpha,use_pca", + [ + (1.0, False), + (0.5, False), + (0.0, False), + (1.0, True), + (0.5, True), + (0.0, True), + ], +) +def test_total_squared_covariance(alpha, use_pca): + X, Y = generate_well_conditioned_data() + + # Compute total squared covariance + X_ = X.rename({"feature": "x"}) + Y_ = Y.rename({"feature": "y"}) + cov_mat = xr.cov(X_, Y_, dim="sample") + tsc = (cov_mat**2).sum() + + cpcca = CPCCA(n_modes=2, alpha=alpha, use_pca=use_pca, n_pca_modes="all") + cpcca.fit(X, Y, "sample") + tsc_model = cpcca.data["total_squared_covariance"] + xr.testing.assert_allclose(tsc, tsc_model) + + +def test_alpha_integer(): + X, Y = generate_well_conditioned_data() + + cpcca = CPCCA(n_modes=2, alpha=1, use_pca=False) + cpcca.fit(X, Y, "sample") + + +def test_fit_different_coordinates(): + """Like a lagged CCA scenario""" + X, Y = generate_well_conditioned_data() + X = X.isel(sample=slice(0, 99)) + Y = Y.isel(sample=slice(100, 199)) + cpcca = CPCCA(n_modes=2, alpha=1, use_pca=False) + cpcca.fit(X, Y, "sample") + r = cpcca.cross_correlation_coefficients() + # Correlation coefficents are not zero + assert np.all(r > np.finfo(r.dtype).eps) + + +@pytest.mark.parametrize( + "dim", + [ + (("time",)), + (("lat", "lon")), + (("lon", "lat")), + ], +) +def test_components(mock_data_array, dim): + cpcca = CPCCA(n_modes=2, alpha=1, use_pca=False) + cpcca.fit(mock_data_array, mock_data_array, dim) + components1, components2 = cpcca.components() + feature_dims = tuple(set(mock_data_array.dims) - set(dim)) + assert isinstance(components1, xr.DataArray) + assert isinstance(components2, xr.DataArray) + assert set(components1.dims) == set( + ("mode",) + feature_dims + ), "Components1 does not have the right feature dimensions" + assert set(components2.dims) == set( + ("mode",) + feature_dims + ), "Components2 does not have the right feature dimensions" + + +@pytest.mark.parametrize( + "shapeX,shapeY,alpha,use_pca", + [ + ((20, 10), (20, 10), 1.0, False), + ((20, 40), (20, 30), 1.0, False), + ((20, 10), (20, 40), 1.0, False), + ((20, 10), (20, 10), 0.5, False), + ((20, 40), (20, 30), 0.5, False), + ((20, 10), (20, 40), 0.5, False), + ((20, 10), (20, 10), 1.0, True), + ((20, 40), (20, 30), 1.0, True), + ((20, 10), (20, 40), 1.0, True), + ((20, 10), (20, 10), 0.5, True), + ((20, 40), (20, 30), 0.5, True), + ((20, 10), (20, 40), 0.5, True), + ], +) +def test_components_coordinates(shapeX, shapeY, alpha, use_pca): + # Test that the components have the right coordinates + X = generate_random_data(shapeX) + Y = generate_random_data(shapeY) + + cpcca = CPCCA(n_modes=2, alpha=alpha, use_pca=use_pca, n_pca_modes="all") + cpcca.fit(X, Y, "sample") + components1, components2 = cpcca.components() + xr.testing.assert_equal(components1.coords["feature"], X.coords["feature"]) + xr.testing.assert_equal(components2.coords["feature"], Y.coords["feature"]) + + +@pytest.mark.parametrize( + "correction", + [(None), ("fdr_bh")], +) +def test_homogeneous_patterns(correction): + X = generate_random_data((200, 10), seed=123) + Y = generate_random_data((200, 20), seed=321) + + cpcca = CPCCA(n_modes=10, alpha=1, use_pca=False) + cpcca.fit(X, Y, "sample") + + _ = cpcca.homogeneous_patterns(correction=correction) + + +@pytest.mark.parametrize( + "correction", + [(None), ("fdr_bh")], +) +def test_heterogeneous_patterns(correction): + X = generate_random_data((200, 10), seed=123) + Y = generate_random_data((200, 20), seed=321) + + cpcca = CPCCA(n_modes=10, alpha=1, use_pca=False) + cpcca.fit(X, Y, "sample") + + _ = cpcca.heterogeneous_patterns(correction=correction) + + +def test_predict(): + X = generate_random_data((200, 10), seed=123) + Y = generate_random_data((200, 20), seed=321) + + cpcca = CPCCA(n_modes=10, alpha=0.2, use_pca=False) + cpcca.fit(X, Y, "sample") + + Xnew = generate_random_data((200, 10), seed=123) + + Ry_pred = cpcca.predict(Xnew) + _ = cpcca.inverse_transform(Y=Ry_pred) + + +@pytest.mark.parametrize("engine", ["netcdf4", "zarr"]) +@pytest.mark.parametrize("alpha", [0.0, 0.5, 1.0]) +def test_save_load(tmp_path, engine, alpha): + """Test save/load methods in MCA class, ensuring that we can + roundtrip the model and get the same results when transforming + data.""" + X = generate_random_data((200, 10), seed=123) + Y = generate_random_data((200, 20), seed=321) + + original = CPCCA(alpha=alpha) + original.fit(X, Y, "sample") + + # Save the CPCCA model + original.save(tmp_path / "cpcca", engine=engine) + + # Check that the CPCCA model has been saved + assert (tmp_path / "cpcca").exists() + + # Recreate the model from saved file + loaded = CPCCA.load(tmp_path / "cpcca", engine=engine) + + # Check that the params and DataContainer objects match + assert original.get_params() == loaded.get_params() + assert all([key in loaded.data for key in original.data]) + for key in original.data: + if original.data._allow_compute[key]: + assert loaded.data[key].equals(original.data[key]) + else: + # but ensure that input data is not saved by default + assert loaded.data[key].size <= 1 + assert loaded.data[key].attrs["placeholder"] is True + + # Test that the recreated model can be used to transform new data + assert np.allclose( + original.transform(X, Y), + loaded.transform(X, Y), + ) + + # The loaded model should also be able to inverse_transform new data + XYr_o = original.inverse_transform(*original.scores()) + XYr_l = loaded.inverse_transform(*loaded.scores()) + assert np.allclose(XYr_o[0], XYr_l[0]) + assert np.allclose(XYr_o[1], XYr_l[1]) + + +def test_serialize_deserialize_dataarray(mock_data_array): + """Test roundtrip serialization when the model is fit on a DataArray.""" + model = CPCCA() + model.fit(mock_data_array, mock_data_array, "time") + dt = model.serialize() + rebuilt_model = CPCCA.deserialize(dt) + assert np.allclose( + model.transform(mock_data_array), rebuilt_model.transform(mock_data_array) + ) + + +def test_serialize_deserialize_dataset(mock_dataset): + """Test roundtrip serialization when the model is fit on a Dataset.""" + model = CPCCA() + model.fit(mock_dataset, mock_dataset, "time") + dt = model.serialize() + rebuilt_model = CPCCA.deserialize(dt) + assert np.allclose( + model.transform(mock_dataset), rebuilt_model.transform(mock_dataset) + ) diff --git a/tests/models/cross/test_cpcca_complex_rotator.py b/tests/models/cross/test_cpcca_complex_rotator.py new file mode 100644 index 00000000..c41c4d87 --- /dev/null +++ b/tests/models/cross/test_cpcca_complex_rotator.py @@ -0,0 +1,67 @@ +import dask.array as da +import numpy as np +import pytest +import xarray as xr + +from xeofs.cross import HilbertCPCCA, HilbertCPCCARotator + + +def generate_random_data(shape, lazy=False, seed=142): + rng = np.random.default_rng(seed) + if lazy: + return xr.DataArray( + da.random.random(shape, chunks=(5, 5)), # type: ignore + dims=["sample", "feature"], + coords={"sample": np.arange(shape[0]), "feature": np.arange(shape[1])}, + ) + else: + return xr.DataArray( + rng.random(shape), + dims=["sample", "feature"], + coords={"sample": np.arange(shape[0]), "feature": np.arange(shape[1])}, + ) + + +def test_transform_raises(): + X = generate_random_data((200, 10), seed=123) + Y = generate_random_data((200, 20), seed=321) + + cpcca = HilbertCPCCA(n_modes=10, alpha=1, use_pca=False) + cpcca.fit(X, Y, "sample") + + rotator = HilbertCPCCARotator(n_modes=4) + rotator.fit(cpcca) + + with pytest.raises(NotImplementedError): + rotator.transform() + + +@pytest.mark.parametrize( + "alpha,use_pca", + [ + (1.0, False), + (0.5, False), + (0.0, False), + (1.0, True), + (0.5, True), + (0.0, True), + ], +) +def test_squared_covariance_fraction_conserved(alpha, use_pca): + X = generate_random_data((200, 10), seed=123) + Y = generate_random_data((200, 20), seed=321) + + cpcca = HilbertCPCCA(n_modes=10, alpha=alpha, use_pca=use_pca, n_pca_modes="all") + cpcca.fit(X, Y, "sample") + + n_rot_modes = 5 + rotator = HilbertCPCCARotator(n_modes=n_rot_modes, power=1) + rotator.fit(cpcca) + + scf = rotator.squared_covariance_fraction() + scf_rot = rotator.squared_covariance_fraction() + + scf_sum = scf.sel(mode=slice(1, n_rot_modes)).sum() + scf_rot_sum = scf_rot.sel(mode=slice(1, n_rot_modes)).sum() + + xr.testing.assert_allclose(scf_sum, scf_rot_sum) diff --git a/tests/models/cross/test_cpcca_rotator.py b/tests/models/cross/test_cpcca_rotator.py new file mode 100644 index 00000000..c8770699 --- /dev/null +++ b/tests/models/cross/test_cpcca_rotator.py @@ -0,0 +1,112 @@ +import dask.array.random as da +import numpy as np +import pytest +import xarray as xr + +from xeofs.cross import CPCCA, CPCCARotator + + +def generate_random_data(shape, lazy=False, seed=142): + rng = np.random.default_rng(seed) + if lazy: + return xr.DataArray( + da.random(shape, chunks=(5, 5)), + dims=["sample", "feature"], + coords={"sample": np.arange(shape[0]), "feature": np.arange(shape[1])}, + ) + else: + return xr.DataArray( + rng.random(shape), + dims=["sample", "feature"], + coords={"sample": np.arange(shape[0]), "feature": np.arange(shape[1])}, + ) + + +@pytest.mark.parametrize( + "correction", + [(None), ("fdr_bh")], +) +def test_homogeneous_patterns(correction): + X = generate_random_data((200, 10), seed=123) + Y = generate_random_data((200, 20), seed=321) + + cpcca = CPCCA(n_modes=10, alpha=1, use_pca=False) + cpcca.fit(X, Y, "sample") + + rotator = CPCCARotator(n_modes=4) + rotator.fit(cpcca) + + _ = rotator.homogeneous_patterns(correction=correction) + + +@pytest.mark.parametrize( + "correction", + [(None), ("fdr_bh")], +) +def test_heterogeneous_patterns(correction): + X = generate_random_data((200, 10), seed=123) + Y = generate_random_data((200, 20), seed=321) + + cpcca = CPCCA(n_modes=10, alpha=1, use_pca=False) + cpcca.fit(X, Y, "sample") + + rotator = CPCCARotator(n_modes=4) + rotator.fit(cpcca) + + _ = rotator.heterogeneous_patterns(correction=correction) + + +@pytest.mark.parametrize( + "alpha,use_pca", + [ + (1.0, False), + (0.5, False), + (0.0, False), + (1.0, True), + (0.5, True), + (0.0, True), + ], +) +def test_squared_covariance_fraction(alpha, use_pca): + X = generate_random_data((200, 10), seed=123) + Y = generate_random_data((200, 20), seed=321) + + cpcca = CPCCA(n_modes=10, alpha=alpha, use_pca=use_pca, n_pca_modes="all") + cpcca.fit(X, Y, "sample") + rotator = CPCCARotator(n_modes=10) + rotator.fit(cpcca) + + scf = rotator.squared_covariance_fraction() + assert isinstance(scf, xr.DataArray) + assert all(scf <= 1), "Squared covariance fraction is greater than 1" + + +@pytest.mark.parametrize( + "alpha,use_pca", + [ + (1.0, False), + (0.5, False), + (0.0, False), + (1.0, True), + (0.5, True), + (0.0, True), + ], +) +def test_squared_covariance_fraction_conserved(alpha, use_pca): + X = generate_random_data((200, 10), seed=123) + Y = generate_random_data((200, 20), seed=321) + + cpcca = CPCCA(n_modes=10, alpha=alpha, use_pca=use_pca, n_pca_modes="all") + cpcca.fit(X, Y, "sample") + + n_rot_modes = 5 + rotator = CPCCARotator(n_modes=n_rot_modes, power=1) + rotator.fit(cpcca) + + scf = rotator.squared_covariance_fraction() + scf_rot = rotator.squared_covariance_fraction() + + scf_sum = scf.sel(mode=slice(1, n_rot_modes)).sum() + scf_rot_sum = scf_rot.sel(mode=slice(1, n_rot_modes)).sum() + + xr.testing.assert_allclose(scf_sum, scf_rot_sum) diff --git a/tests/models/test_complex_mca.py b/tests/models/cross/test_hilbert_mca.py similarity index 82% rename from tests/models/test_complex_mca.py rename to tests/models/cross/test_hilbert_mca.py index 32451164..5bd07338 100644 --- a/tests/models/test_complex_mca.py +++ b/tests/models/cross/test_hilbert_mca.py @@ -1,7 +1,7 @@ import pytest import xarray as xr -from xeofs.models import HilbertMCA +from xeofs.cross import HilbertMCA @pytest.fixture @@ -29,21 +29,6 @@ def test_fit(mca_model, mock_data_array, dim): assert hasattr(mca_model, "data") -@pytest.mark.parametrize( - "dim", - [ - (("time",)), - (("lat", "lon")), - (("lon", "lat")), - ], -) -def test_squared_covariance(mca_model, mock_data_array, dim): - mca_model.fit(mock_data_array, mock_data_array, dim) - squared_covariance = mca_model.squared_covariance() - assert isinstance(squared_covariance, xr.DataArray) - assert (squared_covariance > 0).all() - - @pytest.mark.parametrize( "dim", [ @@ -57,23 +42,9 @@ def test_squared_covariance_fraction(mca_model, mock_data_array, dim): squared_covariance_fraction = mca_model.squared_covariance_fraction() assert isinstance(squared_covariance_fraction, xr.DataArray) assert (squared_covariance_fraction > 0).all() - assert squared_covariance_fraction.sum("mode") <= 1 - - -@pytest.mark.parametrize( - "dim", - [ - (("time",)), - (("lat", "lon")), - (("lon", "lat")), - ], -) -def test_singular_values(mca_model, mock_data_array, dim): - mca_model.fit(mock_data_array, mock_data_array, dim) - n_modes = mca_model.get_params()["n_modes"] - svals = mca_model.singular_values() - assert isinstance(svals, xr.DataArray) - assert svals.size == n_modes + assert all( + squared_covariance_fraction <= 1 + ), "Squared covariance fraction is greater than 1" @pytest.mark.parametrize( @@ -86,9 +57,9 @@ def test_singular_values(mca_model, mock_data_array, dim): ) def test_covariance_fraction(mca_model, mock_data_array, dim): mca_model.fit(mock_data_array, mock_data_array, dim) - cf = mca_model.covariance_fraction() + cf = mca_model.covariance_fraction_CD95() assert isinstance(cf, xr.DataArray) - assert cf.sum("mode") <= 1.00001, "Covariance fraction is greater than 1" + assert all(cf <= 1), "Squared covariance fraction is greater than 1" @pytest.mark.parametrize( @@ -231,18 +202,6 @@ def test_transform_not_implemented(mca_model, mock_data_array, dim): mca_model.transform(mock_data_array, mock_data_array) -def test_homogeneous_patterns_not_implemented(): - mca = HilbertMCA() - with pytest.raises(NotImplementedError): - mca.homogeneous_patterns() - - -def test_heterogeneous_patterns_not_implemented(): - mca = HilbertMCA() - with pytest.raises(NotImplementedError): - mca.heterogeneous_patterns() - - @pytest.mark.parametrize( "dim", [ diff --git a/tests/models/test_complex_mca_rotator.py b/tests/models/cross/test_hilbert_mca_rotator.py similarity index 80% rename from tests/models/test_complex_mca_rotator.py rename to tests/models/cross/test_hilbert_mca_rotator.py index 6da518da..7ef22bf6 100644 --- a/tests/models/test_complex_mca_rotator.py +++ b/tests/models/cross/test_hilbert_mca_rotator.py @@ -3,7 +3,7 @@ import xarray as xr # Import the classes from your modules -from xeofs.models import HilbertMCA, HilbertMCARotator +from xeofs.cross import HilbertMCA, HilbertMCARotator @pytest.fixture @@ -26,7 +26,6 @@ def test_init(): assert mca_rotator._params["power"] == 1 assert mca_rotator._params["max_iter"] == 1000 assert mca_rotator._params["rtol"] == 1e-8 - assert mca_rotator._params["squared_loadings"] is False @pytest.mark.parametrize( @@ -58,7 +57,7 @@ def test_transform(mca_model, mock_data_array): mca_rotator.fit(mca_model) with pytest.raises(NotImplementedError): - mca_rotator.transform(data1=mock_data_array, data2=mock_data_array) + mca_rotator.transform(X=mock_data_array, Y=mock_data_array) @pytest.mark.parametrize( @@ -78,24 +77,10 @@ def test_inverse_transform(mca_model): reconstructed_data = mca_rotator.inverse_transform(scores1, scores2) - assert isinstance(reconstructed_data, tuple) + assert isinstance(reconstructed_data, list) assert len(reconstructed_data) == 2 -@pytest.mark.parametrize( - "dim", - [ - (("time",)), - (("lat", "lon")), - (("lon", "lat")), - ], -) -def test_squared_covariance(mca_model, mock_data_array, dim): - mca_model.fit(mock_data_array, mock_data_array, dim) - squared_covariance = mca_model.squared_covariance() - assert isinstance(squared_covariance, xr.DataArray) - - @pytest.mark.parametrize( "dim", [ @@ -110,23 +95,6 @@ def test_squared_covariance_fraction(mca_model, mock_data_array, dim): assert isinstance(squared_covariance_fraction, xr.DataArray) -@pytest.mark.parametrize( - "dim", - [ - (("time",)), - (("lat", "lon")), - (("lon", "lat")), - ], -) -def test_singular_values(mca_model): - mca_rotator = HilbertMCARotator(n_modes=4) - mca_rotator.fit(mca_model) - n_modes = mca_rotator.get_params()["n_modes"] - svals = mca_rotator.singular_values() - assert isinstance(svals, xr.DataArray) - assert svals.size == n_modes - - @pytest.mark.parametrize( "dim", [ @@ -138,9 +106,9 @@ def test_singular_values(mca_model): def test_covariance_fraction(mca_model): mca_rotator = HilbertMCARotator(n_modes=4) mca_rotator.fit(mca_model) - cf = mca_rotator.covariance_fraction() + cf = mca_rotator.covariance_fraction_CD95() assert isinstance(cf, xr.DataArray) - assert cf.sum("mode") <= 1.00001, "Covariance fraction is greater than 1" + assert all(cf <= 1), "Squared covariance fraction is greater than 1" @pytest.mark.parametrize( @@ -192,8 +160,7 @@ def test_scores(mca_model, mock_data_array, dim): def test_homogeneous_patterns(mca_model, mock_data_array, dim): mca_rotator = HilbertMCARotator(n_modes=2) mca_rotator.fit(mca_model) - with pytest.raises(NotImplementedError): - _ = mca_rotator.homogeneous_patterns() + mca_rotator.homogeneous_patterns() @pytest.mark.parametrize( @@ -207,8 +174,7 @@ def test_homogeneous_patterns(mca_model, mock_data_array, dim): def test_heterogeneous_patterns(mca_model, mock_data_array, dim): mca_rotator = HilbertMCARotator(n_modes=2) mca_rotator.fit(mca_model) - with pytest.raises(NotImplementedError): - _ = mca_rotator.heterogeneous_patterns() + mca_rotator.heterogeneous_patterns() @pytest.mark.parametrize( diff --git a/tests/models/test_mca.py b/tests/models/cross/test_mca.py similarity index 90% rename from tests/models/test_mca.py rename to tests/models/cross/test_mca.py index 09aba4d7..3d063685 100644 --- a/tests/models/test_mca.py +++ b/tests/models/cross/test_mca.py @@ -1,9 +1,10 @@ -import pytest import numpy as np +import pytest import xarray as xr -from xeofs.models.mca import MCA -from ..utilities import data_is_dask +from xeofs.cross import MCA + +from ...utilities import data_is_dask @pytest.fixture @@ -99,7 +100,7 @@ def test_fit_with_dataarray_list(mca_model, mock_data_array_list, dim): ) def test_transform(mca_model, mock_data_array, dim): mca_model.fit(mock_data_array, mock_data_array, dim) - result = mca_model.transform(data1=mock_data_array, data2=mock_data_array) + result = mca_model.transform(X=mock_data_array, Y=mock_data_array) assert isinstance(result, list) assert isinstance(result[0], xr.DataArray) @@ -110,7 +111,7 @@ def test_transform_unseen_data(mca_model, mock_data_array, dim): data_unseen = mock_data_array.isel(time=slice(21, None)) mca_model.fit(data, data, dim) - result = mca_model.transform(data1=data_unseen, data2=data_unseen) + result = mca_model.transform(X=data_unseen, Y=data_unseen) assert isinstance(result, list) assert isinstance(result[0], xr.DataArray) # Check that unseen data can be transformed @@ -136,20 +137,6 @@ def test_inverse_transform(mca_model, mock_data_array, dim): assert isinstance(Xrec2, xr.DataArray) -@pytest.mark.parametrize( - "dim", - [ - (("time",)), - (("lat", "lon")), - (("lon", "lat")), - ], -) -def test_squared_covariance(mca_model, mock_data_array, dim): - mca_model.fit(mock_data_array, mock_data_array, dim) - squared_covariance = mca_model.squared_covariance() - assert isinstance(squared_covariance, xr.DataArray) - - @pytest.mark.parametrize( "dim", [ @@ -162,23 +149,7 @@ def test_squared_covariance_fraction(mca_model, mock_data_array, dim): mca_model.fit(mock_data_array, mock_data_array, dim) scf = mca_model.squared_covariance_fraction() assert isinstance(scf, xr.DataArray) - assert scf.sum("mode") <= 1.00001, "Squared covariance fraction is greater than 1" - - -@pytest.mark.parametrize( - "dim", - [ - (("time",)), - (("lat", "lon")), - (("lon", "lat")), - ], -) -def test_singular_values(mca_model, mock_data_array, dim): - mca_model.fit(mock_data_array, mock_data_array, dim) - n_modes = mca_model.get_params()["n_modes"] - svals = mca_model.singular_values() - assert isinstance(svals, xr.DataArray) - assert svals.size == n_modes + assert all(scf <= 1), "Squared covariance fraction is greater than 1" @pytest.mark.parametrize( @@ -191,9 +162,9 @@ def test_singular_values(mca_model, mock_data_array, dim): ) def test_covariance_fraction(mca_model, mock_data_array, dim): mca_model.fit(mock_data_array, mock_data_array, dim) - cf = mca_model.covariance_fraction() + cf = mca_model.covariance_fraction_CD95() assert isinstance(cf, xr.DataArray) - assert cf.sum("mode") <= 1.00001, "Covariance fraction is greater than 1" + assert all(cf <= 1), "Squared covariance fraction is greater than 1" @pytest.mark.parametrize( @@ -383,16 +354,16 @@ def test_heterogeneous_patterns(mca_model, mock_data_array, dim): ], ) def test_compute(mock_dask_data_array, dim, compute): - mca_model = MCA(n_modes=10, compute=compute) + mca_model = MCA(n_modes=10, compute=compute, n_pca_modes=10) mca_model.fit(mock_dask_data_array, mock_dask_data_array, (dim)) if compute: - assert not data_is_dask(mca_model.data["squared_covariance"]) + assert not data_is_dask(mca_model.data["singular_values"]) assert not data_is_dask(mca_model.data["components1"]) assert not data_is_dask(mca_model.data["components2"]) else: - assert data_is_dask(mca_model.data["squared_covariance"]) + assert data_is_dask(mca_model.data["singular_values"]) assert data_is_dask(mca_model.data["components1"]) assert data_is_dask(mca_model.data["components2"]) diff --git a/tests/models/test_mca_rotator.py b/tests/models/cross/test_mca_rotator.py similarity index 87% rename from tests/models/test_mca_rotator.py rename to tests/models/cross/test_mca_rotator.py index 61198549..f467d331 100644 --- a/tests/models/test_mca_rotator.py +++ b/tests/models/cross/test_mca_rotator.py @@ -1,22 +1,23 @@ -import pytest import numpy as np +import pytest import xarray as xr # Import the classes from your modules -from xeofs.models import MCA, MCARotator -from ..utilities import data_is_dask +from xeofs.cross import MCA, MCARotator + +from ...utilities import data_is_dask @pytest.fixture def mca_model(mock_data_array, dim): - mca = MCA(n_modes=5) + mca = MCA(n_modes=5, use_pca=False) mca.fit(mock_data_array, mock_data_array, dim) return mca @pytest.fixture def mca_model_delayed(mock_dask_data_array, dim): - mca = MCA(n_modes=5, compute=False, check_nans=False) + mca = MCA(n_modes=5, compute=False, check_nans=False, use_pca=False) mca.fit(mock_dask_data_array, mock_dask_data_array, dim) return mca @@ -27,7 +28,7 @@ def test_init(): assert mca_rotator._params["power"] == 1 assert mca_rotator._params["max_iter"] == 1000 assert mca_rotator._params["rtol"] == 1e-8 - assert mca_rotator._params["squared_loadings"] is False + # assert mca_rotator._params["squared_loadings"] is False @pytest.mark.parametrize( @@ -58,7 +59,7 @@ def test_transform(mca_model, mock_data_array): mca_rotator = MCARotator(n_modes=4) mca_rotator.fit(mca_model) - projections = mca_rotator.transform(data1=mock_data_array, data2=mock_data_array) + projections = mca_rotator.transform(X=mock_data_array, Y=mock_data_array) assert len(projections) == 2 @@ -80,25 +81,10 @@ def test_inverse_transform(mca_model): reconstructed_data = mca_rotator.inverse_transform(scores1, scores2) - assert isinstance(reconstructed_data, tuple) + assert isinstance(reconstructed_data, list) assert len(reconstructed_data) == 2 -@pytest.mark.parametrize( - "dim", - [ - (("time",)), - (("lat", "lon")), - (("lon", "lat")), - ], -) -def test_squared_covariance(mca_model, mock_data_array, dim): - mca_rotator = MCARotator(n_modes=4) - mca_rotator.fit(mca_model) - covariance_fraction = mca_rotator.squared_covariance() - assert isinstance(covariance_fraction, xr.DataArray) - - @pytest.mark.parametrize( "dim", [ @@ -114,23 +100,6 @@ def test_squared_covariance_fraction(mca_model, mock_data_array, dim): assert isinstance(squared_covariance_fraction, xr.DataArray) -@pytest.mark.parametrize( - "dim", - [ - (("time",)), - (("lat", "lon")), - (("lon", "lat")), - ], -) -def test_singular_values(mca_model): - mca_rotator = MCARotator(n_modes=4) - mca_rotator.fit(mca_model) - n_modes = mca_rotator.get_params()["n_modes"] - svals = mca_rotator.singular_values() - assert isinstance(svals, xr.DataArray) - assert svals.size == n_modes - - @pytest.mark.parametrize( "dim", [ @@ -142,9 +111,9 @@ def test_singular_values(mca_model): def test_covariance_fraction(mca_model): mca_rotator = MCARotator(n_modes=4) mca_rotator.fit(mca_model) - cf = mca_rotator.covariance_fraction() + cf = mca_rotator.covariance_fraction_CD95() assert isinstance(cf, xr.DataArray) - assert cf.sum("mode") <= 1.00001, "Covariance fraction is greater than 1" + assert all(cf <= 1), "Squared covariance fraction is greater than 1" @pytest.mark.parametrize( diff --git a/tests/models/cross/test_rda.py b/tests/models/cross/test_rda.py new file mode 100644 index 00000000..d582d30b --- /dev/null +++ b/tests/models/cross/test_rda.py @@ -0,0 +1,292 @@ +import dask.array as da +import numpy as np +import pytest +import xarray as xr + +from xeofs.cross import RDA + + +def generate_random_data(shape, lazy=False, seed=142): + rng = np.random.default_rng(seed) + if lazy: + return xr.DataArray( + da.random.random(shape, chunks=(5, 5)), + dims=["sample", "feature"], + coords={"sample": np.arange(shape[0]), "feature": np.arange(shape[1])}, + ) + else: + return xr.DataArray( + rng.random(shape), + dims=["sample", "feature"], + coords={"sample": np.arange(shape[0]), "feature": np.arange(shape[1])}, + ) + + +def generate_well_conditioned_data(lazy=False): + rng = np.random.default_rng(142) + t = np.linspace(0, 50, 200) + std = 0.1 + x1 = np.sin(t)[:, None] + rng.normal(0, std, size=(200, 2)) + x2 = np.sin(t)[:, None] + rng.normal(0, std, size=(200, 3)) + x1[:, 1] = x1[:, 1] ** 2 + x2[:, 1] = x2[:, 1] ** 3 + x2[:, 2] = abs(x2[:, 2]) ** (0.5) + coords_time = np.arange(len(t)) + coords_fx = [1, 2] + coords_fy = [1, 2, 3] + X = xr.DataArray( + x1, + dims=["sample", "feature"], + coords={"sample": coords_time, "feature": coords_fx}, + ) + Y = xr.DataArray( + x2, + dims=["sample", "feature"], + coords={"sample": coords_time, "feature": coords_fy}, + ) + if lazy: + X = X.chunk({"sample": 5, "feature": -1}) + Y = Y.chunk({"sample": 5, "feature": -1}) + return X, Y + else: + return X, Y + + +@pytest.fixture +def rda(): + return RDA(n_modes=1) + + +def test_initialization(): + model = RDA() + assert model is not None + + +def test_fit(rda): + X, Y = generate_well_conditioned_data() + rda.fit(X, Y, dim="sample") + assert hasattr(rda, "preprocessor1") + assert hasattr(rda, "preprocessor2") + assert hasattr(rda, "data") + + +def test_fit_empty_data(rda): + with pytest.raises(ValueError): + rda.fit(xr.DataArray(), xr.DataArray(), "time") + + +def test_fit_invalid_dims(rda): + X, Y = generate_well_conditioned_data() + with pytest.raises(ValueError): + rda.fit(X, Y, dim=("invalid_dim1", "invalid_dim2")) + + +def test_transform(rda): + X, Y = generate_well_conditioned_data() + rda.fit(X, Y, dim="sample") + result = rda.transform(X, Y) + assert isinstance(result, list) + assert isinstance(result[0], xr.DataArray) + + +def test_transform_unseen_data(rda): + X, Y = generate_well_conditioned_data() + x = X.isel(sample=slice(151, 200)) + y = Y.isel(sample=slice(151, 200)) + X = X.isel(sample=slice(None, 150)) + Y = Y.isel(sample=slice(None, 150)) + + rda.fit(X, Y, "sample") + result = rda.transform(x, y) + assert isinstance(result, list) + assert isinstance(result[0], xr.DataArray) + # Check that unseen data can be transformed + assert result[0].notnull().all() + assert result[1].notnull().all() + + +def test_inverse_transform(rda): + X, Y = generate_well_conditioned_data() + rda.fit(X, Y, "sample") + # Assuming mode as 1 for simplicity + scores1 = rda.data["scores1"].sel(mode=1) + scores2 = rda.data["scores2"].sel(mode=1) + Xrec1, Xrec2 = rda.inverse_transform(scores1, scores2) + assert isinstance(Xrec1, xr.DataArray) + assert isinstance(Xrec2, xr.DataArray) + + +@pytest.mark.parametrize("use_pca", [False, True]) +def test_squared_covariance_fraction(use_pca): + X, Y = generate_well_conditioned_data() + rda = RDA(n_modes=2, use_pca=use_pca, n_pca_modes="all") + rda.fit(X, Y, "sample") + scf = rda.squared_covariance_fraction() + assert isinstance(scf, xr.DataArray) + assert all(scf <= 1), "Squared covariance fraction is greater than 1" + + +@pytest.mark.parametrize("use_pca", [False, True]) +def test_total_squared_covariance(use_pca): + X, Y = generate_well_conditioned_data() + + # Compute total squared covariance + X_ = X.rename({"feature": "x"}) + Y_ = Y.rename({"feature": "y"}) + cov_mat = xr.cov(X_, Y_, dim="sample") + tsc = (cov_mat**2).sum() + + rda = RDA(n_modes=2, use_pca=use_pca, n_pca_modes="all") + rda.fit(X, Y, "sample") + tsc_model = rda.data["total_squared_covariance"] + xr.testing.assert_allclose(tsc, tsc_model) + + +def test_fit_different_coordinates(): + """Like a lagged RDA scenario""" + X, Y = generate_well_conditioned_data() + X = X.isel(sample=slice(0, 99)) + Y = Y.isel(sample=slice(100, 199)) + rda = RDA(n_modes=2, use_pca=False) + rda.fit(X, Y, "sample") + r = rda.cross_correlation_coefficients() + # Correlation coefficents are not zero + assert np.all(r > np.finfo(r.dtype).eps) + + +@pytest.mark.parametrize( + "dim", + [(("time",)), (("lat", "lon")), (("lon", "lat"))], +) +def test_components(mock_data_array, dim): + rda = RDA(n_modes=2, use_pca=False) + rda.fit(mock_data_array, mock_data_array, dim) + components1, components2 = rda.components() + feature_dims = tuple(set(mock_data_array.dims) - set(dim)) + assert isinstance(components1, xr.DataArray) + assert isinstance(components2, xr.DataArray) + assert set(components1.dims) == set( + ("mode",) + feature_dims + ), "Components1 does not have the right feature dimensions" + assert set(components2.dims) == set( + ("mode",) + feature_dims + ), "Components2 does not have the right feature dimensions" + + +@pytest.mark.parametrize("shapeX", [(30, 10)]) +@pytest.mark.parametrize("shapeY", [(30, 10), (30, 5), (30, 15)]) +@pytest.mark.parametrize("use_pca", [False, True]) +def test_components_coordinates(shapeX, shapeY, use_pca): + # Test that the components have the right coordinates + X = generate_random_data(shapeX) + Y = generate_random_data(shapeY) + + rda = RDA(n_modes=2, use_pca=use_pca, n_pca_modes="all") + rda.fit(X, Y, "sample") + components1, components2 = rda.components() + xr.testing.assert_equal(components1.coords["feature"], X.coords["feature"]) + xr.testing.assert_equal(components2.coords["feature"], Y.coords["feature"]) + + +@pytest.mark.parametrize("correction", [(None), ("fdr_bh")]) +def test_homogeneous_patterns(correction): + X = generate_random_data((200, 10), seed=123) + Y = generate_random_data((200, 20), seed=321) + + rda = RDA(n_modes=10, use_pca=False) + rda.fit(X, Y, "sample") + + _ = rda.homogeneous_patterns(correction=correction) + + +@pytest.mark.parametrize( + "correction", + [(None), ("fdr_bh")], +) +def test_heterogeneous_patterns(correction): + X = generate_random_data((200, 10), seed=123) + Y = generate_random_data((200, 20), seed=321) + + rda = RDA(n_modes=10, use_pca=False) + rda.fit(X, Y, "sample") + + _ = rda.heterogeneous_patterns(correction=correction) + + +def test_predict(): + X = generate_random_data((200, 10), seed=123) + Y = generate_random_data((200, 20), seed=321) + + rda = RDA(n_modes=10, use_pca=False) + rda.fit(X, Y, "sample") + + Xnew = generate_random_data((200, 10), seed=123) + + Ry_pred = rda.predict(Xnew) + _ = rda.inverse_transform(Y=Ry_pred) + + +@pytest.mark.parametrize("engine", ["netcdf4", "zarr"]) +def test_save_load(tmp_path, engine): + """Test save/load methods in MCA class, ensuring that we can + roundtrip the model and get the same results when transforming + data.""" + X = generate_random_data((200, 10), seed=123) + Y = generate_random_data((200, 20), seed=321) + + original = RDA() + original.fit(X, Y, "sample") + + # Save the RDA model + original.save(tmp_path / "rda", engine=engine) + + # Check that the RDA model has been saved + assert (tmp_path / "rda").exists() + + # Recreate the model from saved file + loaded = RDA.load(tmp_path / "rda", engine=engine) + + # Check that the params and DataContainer objects match + assert original.get_params() == loaded.get_params() + assert all([key in loaded.data for key in original.data]) + for key in original.data: + if original.data._allow_compute[key]: + assert loaded.data[key].equals(original.data[key]) + else: + # but ensure that input data is not saved by default + assert loaded.data[key].size <= 1 + assert loaded.data[key].attrs["placeholder"] is True + + # Test that the recreated model can be used to transform new data + assert np.allclose( + original.transform(X, Y), + loaded.transform(X, Y), + ) + + # The loaded model should also be able to inverse_transform new data + XYr_o = original.inverse_transform(*original.scores()) + XYr_l = loaded.inverse_transform(*loaded.scores()) + assert np.allclose(XYr_o[0], XYr_l[0]) + assert np.allclose(XYr_o[1], XYr_l[1]) + + +def test_serialize_deserialize_dataarray(mock_data_array): + """Test roundtrip serialization when the model is fit on a DataArray.""" + model = RDA() + model.fit(mock_data_array, mock_data_array, "time") + dt = model.serialize() + rebuilt_model = RDA.deserialize(dt) + assert np.allclose( + model.transform(mock_data_array), rebuilt_model.transform(mock_data_array) + ) + + +def test_serialize_deserialize_dataset(mock_dataset): + """Test roundtrip serialization when the model is fit on a Dataset.""" + model = RDA() + model.fit(mock_dataset, mock_dataset, "time") + dt = model.serialize() + rebuilt_model = RDA.deserialize(dt) + assert np.allclose( + model.transform(mock_dataset), rebuilt_model.transform(mock_dataset) + ) diff --git a/tests/models/multi/__init__.py b/tests/models/multi/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/models/test_cca.py b/tests/models/multi/test_cca.py similarity index 97% rename from tests/models/test_cca.py rename to tests/models/multi/test_cca.py index ea77c2af..038862d0 100644 --- a/tests/models/test_cca.py +++ b/tests/models/multi/test_cca.py @@ -1,6 +1,6 @@ import pytest -from xeofs.models.cca import CCA +from xeofs.multi import CCA @pytest.mark.parametrize( diff --git a/tests/models/single/__init__.py b/tests/models/single/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/models/test_eeof.py b/tests/models/single/test_eeof.py similarity index 99% rename from tests/models/test_eeof.py rename to tests/models/single/test_eeof.py index 1c150a2c..680a01cf 100644 --- a/tests/models/test_eeof.py +++ b/tests/models/single/test_eeof.py @@ -1,8 +1,8 @@ import numpy as np -import xarray as xr import pytest +import xarray as xr -from xeofs.models.eeof import ExtendedEOF +from xeofs.single import ExtendedEOF def test_init(): diff --git a/tests/models/test_eof.py b/tests/models/single/test_eof.py similarity index 99% rename from tests/models/test_eof.py rename to tests/models/single/test_eof.py index d1403e80..c7373152 100644 --- a/tests/models/test_eof.py +++ b/tests/models/single/test_eof.py @@ -1,8 +1,8 @@ import numpy as np -import xarray as xr import pytest +import xarray as xr -from xeofs.models.eof import EOF +from xeofs.single import EOF def test_init(): diff --git a/tests/models/test_eof_rotator.py b/tests/models/single/test_eof_rotator.py similarity index 98% rename from tests/models/test_eof_rotator.py rename to tests/models/single/test_eof_rotator.py index 91eaaa27..b8e84245 100644 --- a/tests/models/test_eof_rotator.py +++ b/tests/models/single/test_eof_rotator.py @@ -1,10 +1,11 @@ -import pytest import numpy as np +import pytest import xarray as xr -from xeofs.models import EOF, EOFRotator from xeofs.data_container import DataContainer -from ..utilities import data_is_dask +from xeofs.single import EOF, EOFRotator + +from ...utilities import data_is_dask @pytest.fixture diff --git a/tests/models/test_gwpca.py b/tests/models/single/test_gwpca.py similarity index 93% rename from tests/models/test_gwpca.py rename to tests/models/single/test_gwpca.py index 8b3fc353..c7431a67 100644 --- a/tests/models/test_gwpca.py +++ b/tests/models/single/test_gwpca.py @@ -1,6 +1,6 @@ import pytest -import xeofs as xe +import xeofs as xe # ============================================================================= # GENERALLY VALID TEST CASES @@ -31,7 +31,7 @@ [("bisquare"), ("gaussian"), ("exponential")], ) def test_fit(mock_data_array, kernel): - gwpca = xe.models.GWPCA( + gwpca = xe.single.GWPCA( n_modes=2, metric="haversine", kernel=kernel, bandwidth=5000 ) gwpca.fit(mock_data_array, dim=("lat", "lon")) @@ -49,4 +49,4 @@ def test_fit(mock_data_array, kernel): ) def test_fit_invalid(mock_data_array, metric, kernel, bandwidth): with pytest.raises(ValueError): - xe.models.GWPCA(n_modes=2, metric=metric, kernel=kernel, bandwidth=bandwidth) + xe.single.GWPCA(n_modes=2, metric=metric, kernel=kernel, bandwidth=bandwidth) diff --git a/tests/models/test_complex_eof.py b/tests/models/single/test_hilbert_eof.py similarity index 98% rename from tests/models/test_complex_eof.py rename to tests/models/single/test_hilbert_eof.py index 1235df85..ae7b8256 100644 --- a/tests/models/test_complex_eof.py +++ b/tests/models/single/test_hilbert_eof.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from xeofs.models import HilbertEOF +from xeofs.single import HilbertEOF warnings.filterwarnings("ignore", message="numpy.dtype size changed") warnings.filterwarnings("ignore", message="numpy.ufunc size changed") diff --git a/tests/models/test_complex_eof_rotator.py b/tests/models/single/test_hilbert_eof_rotator.py similarity index 98% rename from tests/models/test_complex_eof_rotator.py rename to tests/models/single/test_hilbert_eof_rotator.py index e0d46b6a..9fc3bd3c 100644 --- a/tests/models/test_complex_eof_rotator.py +++ b/tests/models/single/test_hilbert_eof_rotator.py @@ -2,7 +2,7 @@ import xarray as xr from xeofs.data_container import DataContainer -from xeofs.models import HilbertEOF, HilbertEOFRotator +from xeofs.single import HilbertEOF, HilbertEOFRotator @pytest.fixture diff --git a/tests/models/test_opa.py b/tests/models/single/test_opa.py similarity index 99% rename from tests/models/test_opa.py rename to tests/models/single/test_opa.py index 57991bbb..d1781c93 100644 --- a/tests/models/test_opa.py +++ b/tests/models/single/test_opa.py @@ -1,8 +1,8 @@ import numpy as np -import xarray as xr import pytest +import xarray as xr -from xeofs.models import OPA +from xeofs.single import OPA @pytest.fixture diff --git a/tests/models/single/test_pop.py b/tests/models/single/test_pop.py new file mode 100644 index 00000000..b42e9748 --- /dev/null +++ b/tests/models/single/test_pop.py @@ -0,0 +1,219 @@ +import numpy as np +import pytest +import xarray as xr + +from xeofs.single import POP + + +def test_init(): + """Tests the initialization of the POP class""" + pop = POP(n_modes=5, standardize=True, use_coslat=True) + + # Assert preprocessor has been initialized + assert hasattr(pop, "_params") + assert hasattr(pop, "preprocessor") + assert hasattr(pop, "whitener") + + +def test_fit(mock_data_array): + pop = POP() + pop.fit(mock_data_array, "time") + + +def test_eigenvalues(mock_data_array): + pop = POP() + pop.fit(mock_data_array, "time") + + eigenvalues = pop.eigenvalues() + assert isinstance(eigenvalues, xr.DataArray) + + +def test_damping_times(mock_data_array): + pop = POP() + pop.fit(mock_data_array, "time") + + times = pop.damping_times() + assert isinstance(times, xr.DataArray) + + +def test_periods(mock_data_array): + pop = POP() + pop.fit(mock_data_array, "time") + + periods = pop.periods() + assert isinstance(periods, xr.DataArray) + + +def test_components(mock_data_array): + """Tests the components method of the POP class""" + sample_dim = ("time",) + pop = POP() + pop.fit(mock_data_array, sample_dim) + + # Test components method + components = pop.components() + feature_dims = tuple(set(mock_data_array.dims) - set(sample_dim)) + assert isinstance(components, xr.DataArray), "Components is not a DataArray" + assert set(components.dims) == set( + ("mode",) + feature_dims + ), "Components does not have the right feature dimensions" + + +def test_scores(mock_data_array): + """Tests the scores method of the POP class""" + sample_dim = ("time",) + pop = POP() + pop.fit(mock_data_array, sample_dim) + + # Test scores method + scores = pop.scores() + assert isinstance(scores, xr.DataArray), "Scores is not a DataArray" + assert set(scores.dims) == set( + (sample_dim + ("mode",)) + ), "Scores does not have the right dimensions" + + +def test_transform(mock_data_array): + """Test projecting new unseen data onto the POPs""" + dim = ("time",) + data = mock_data_array.isel({dim[0]: slice(1, None)}) + new_data = mock_data_array.isel({dim[0]: slice(0, 1)}) + + # Create a xarray DataArray with random data + model = POP(n_modes=2, solver="full") + model.fit(data, dim) + scores = model.scores() + + # Project data onto the components + projections = model.transform(data) + + # Check that the projection has the right dimensions + assert projections.dims == scores.dims, "Projection has wrong dimensions" # type: ignore + + # Check that the projection has the right data type + assert isinstance(projections, xr.DataArray), "Projection is not a DataArray" + + # Check that the projection has the right name + assert projections.name == "scores", "Projection has wrong name: {}".format( + projections.name + ) + + # Check that the projection's data is the same as the scores + np.testing.assert_allclose( + scores.sel(mode=slice(1, 3)), projections.sel(mode=slice(1, 3)), rtol=1e-3 + ) + + # Project unseen data onto the components + new_projections = model.transform(new_data) + + # Check that the projection has the right dimensions + assert new_projections.dims == scores.dims, "Projection has wrong dimensions" # type: ignore + + # Check that the projection has the right data type + assert isinstance(new_projections, xr.DataArray), "Projection is not a DataArray" + + # Check that the projection has the right name + assert new_projections.name == "scores", "Projection has wrong name: {}".format( + new_projections.name + ) + + # Ensure that the new projections are not NaNs + assert np.all(new_projections.notnull().values), "New projections contain NaNs" + + +def test_inverse_transform(mock_data_array): + """Test inverse_transform method in POP class.""" + + dim = ("time",) + # instantiate the POP class with necessary parameters + pop = POP(n_modes=20, standardize=True) + + # fit the POP model + pop.fit(mock_data_array, dim=dim) + scores = pop.scores() + + # Test with single mode + scores_selection = scores.sel(mode=1) + X_rec_1 = pop.inverse_transform(scores_selection) + assert isinstance(X_rec_1, xr.DataArray) + + # Test with single mode as list + scores_selection = scores.sel(mode=[1]) + X_rec_1_list = pop.inverse_transform(scores_selection) + assert isinstance(X_rec_1_list, xr.DataArray) + + # Single mode and list should be equal + xr.testing.assert_allclose(X_rec_1, X_rec_1_list) + + # Test with all modes + X_rec = pop.inverse_transform(scores) + assert isinstance(X_rec, xr.DataArray) + + # Check that the reconstructed data has the same dimensions as the original data + assert set(X_rec.dims) == set(mock_data_array.dims) + + +@pytest.mark.parametrize("engine", ["zarr"]) +def test_save_load(mock_data_array, tmp_path, engine): + """Test save/load methods in POP class, ensuring that we can + roundtrip the model and get the same results when transforming + data.""" + # NOTE: netcdf4 does not support complex data types, so we use only zarr here + dim = "time" + original = POP() + original.fit(mock_data_array, dim) + + # Save the POP model + original.save(tmp_path / "pop", engine=engine) + + # Check that the POP model has been saved + assert (tmp_path / "pop").exists() + + # Recreate the model from saved file + loaded = POP.load(tmp_path / "pop", engine=engine) + + # Check that the params and DataContainer objects match + assert original.get_params() == loaded.get_params() + assert all([key in loaded.data for key in original.data]) + for key in original.data: + if original.data._allow_compute[key]: + assert loaded.data[key].equals(original.data[key]) + else: + # but ensure that input data is not saved by default + assert loaded.data[key].size <= 1 + assert loaded.data[key].attrs["placeholder"] is True + + # Test that the recreated model can be used to transform new data + assert np.allclose( + original.transform(mock_data_array), loaded.transform(mock_data_array) + ) + + # The loaded model should also be able to inverse_transform new data + assert np.allclose( + original.inverse_transform(original.scores()), + loaded.inverse_transform(loaded.scores()), + ) + + +def test_serialize_deserialize_dataarray(mock_data_array): + """Test roundtrip serialization when the model is fit on a DataArray.""" + dim = "time" + model = POP() + model.fit(mock_data_array, dim) + dt = model.serialize() + rebuilt_model = POP.deserialize(dt) + assert np.allclose( + model.transform(mock_data_array), rebuilt_model.transform(mock_data_array) + ) + + +def test_serialize_deserialize_dataset(mock_dataset): + """Test roundtrip serialization when the model is fit on a Dataset.""" + dim = "time" + model = POP() + model.fit(mock_dataset, dim) + dt = model.serialize() + rebuilt_model = POP.deserialize(dt) + assert np.allclose( + model.transform(mock_dataset), rebuilt_model.transform(mock_dataset) + ) diff --git a/tests/models/test_sparse_pca.py b/tests/models/single/test_sparse_pca.py similarity index 99% rename from tests/models/test_sparse_pca.py rename to tests/models/single/test_sparse_pca.py index ffc594a8..f4a1bc56 100644 --- a/tests/models/test_sparse_pca.py +++ b/tests/models/single/test_sparse_pca.py @@ -2,7 +2,7 @@ import pytest import xarray as xr -from xeofs.models import SparsePCA +from xeofs.single import SparsePCA def test_init(): diff --git a/tests/models/test_orthogonality.py b/tests/models/test_orthogonality.py deleted file mode 100644 index cf7a9114..00000000 --- a/tests/models/test_orthogonality.py +++ /dev/null @@ -1,947 +0,0 @@ -import numpy as np -import pytest - -from xeofs.models import ( - EOF, - MCA, - EOFRotator, - HilbertEOF, - HilbertEOFRotator, - HilbertMCA, - HilbertMCARotator, - MCARotator, -) - - -# Orthogonality -# ============================================================================= -# EOF -@pytest.mark.parametrize( - "dim, use_coslat", - [ - (("time",), True), - (("lat", "lon"), False), - (("lon", "lat"), False), - ], -) -def test_eof_components(dim, use_coslat, mock_data_array): - """Components are orthogonal""" - model = EOF(n_modes=5, standardize=True, use_coslat=use_coslat) - model.fit(mock_data_array, dim=dim) - V = model.data["components"].values - assert np.allclose( - V.T @ V, np.eye(V.shape[1]), atol=1e-5 - ), "Components are not orthogonal" - - -@pytest.mark.parametrize( - "dim, use_coslat", - [ - (("time",), True), - (("lat", "lon"), False), - (("lon", "lat"), False), - ], -) -def test_eof_scores(dim, use_coslat, mock_data_array): - """Scores are orthogonal""" - model = EOF(n_modes=5, standardize=True, use_coslat=use_coslat) - model.fit(mock_data_array, dim=dim) - U = model.data["scores"].values / model.data["norms"].values - assert np.allclose( - U.T @ U, np.eye(U.shape[1]), atol=1e-5 - ), "Scores are not orthogonal" - - -# Hilbert EOF -@pytest.mark.parametrize( - "dim, use_coslat", - [ - (("time",), True), - (("lat", "lon"), False), - (("lon", "lat"), False), - ], -) -def test_ceof_components(dim, use_coslat, mock_data_array): - """Components are unitary""" - model = HilbertEOF(n_modes=5, standardize=True, use_coslat=use_coslat) - model.fit(mock_data_array, dim=dim) - V = model.data["components"].values - assert np.allclose( - V.conj().T @ V, np.eye(V.shape[1]), atol=1e-5 - ), "Components are not unitary" - - -@pytest.mark.parametrize( - "dim, use_coslat", - [ - (("time",), True), - (("lat", "lon"), False), - (("lon", "lat"), False), - ], -) -def test_ceof_scores(dim, use_coslat, mock_data_array): - """Scores are unitary""" - model = HilbertEOF(n_modes=5, standardize=True, use_coslat=use_coslat) - model.fit(mock_data_array, dim=dim) - U = model.data["scores"].values / model.data["norms"].values - assert np.allclose( - U.conj().T @ U, np.eye(U.shape[1]), atol=1e-5 - ), "Scores are not unitary" - - -# Rotated EOF -@pytest.mark.parametrize( - "dim, use_coslat, power", - [ - (("time",), True, 1), - (("lat", "lon"), False, 1), - (("lon", "lat"), False, 1), - (("time",), True, 2), - (("lat", "lon"), False, 2), - (("lon", "lat"), False, 2), - ], -) -def test_reof_components(dim, use_coslat, power, mock_data_array): - """Components are NOT orthogonal""" - model = EOF(n_modes=5, standardize=True, use_coslat=use_coslat) - model.fit(mock_data_array, dim=dim) - rot = EOFRotator(n_modes=5, power=power) - rot.fit(model) - V = rot.data["components"].values - K = V.conj().T @ V - assert np.allclose( - np.diag(K), np.ones(V.shape[1]), atol=1e-5 - ), "Components are not normalized" - # Assert that off-diagonals are not zero - assert not np.allclose(K, np.eye(K.shape[0])), "Rotated components are orthogonal" - - -@pytest.mark.parametrize( - "dim, use_coslat, power", - [ - (("time",), True, 1), - (("lat", "lon"), False, 1), - (("lon", "lat"), False, 1), - (("time",), True, 2), - (("lat", "lon"), False, 2), - (("lon", "lat"), False, 2), - ], -) -def test_reof_scores(dim, use_coslat, power, mock_data_array): - """Components are orthogonal only for Varimax rotation""" - model = EOF(n_modes=5, standardize=True, use_coslat=use_coslat) - model.fit(mock_data_array, dim=dim) - rot = EOFRotator(n_modes=5, power=power) - rot.fit(model) - U = rot.data["scores"].values / rot.data["norms"].values - K = U.conj().T @ U - if power == 1: - # Varimax rotation does guarantee orthogonality - assert np.allclose( - K, np.eye(K.shape[1]), atol=1e-5 - ), "Components are not orthogonal" - else: - assert not np.allclose(K, np.eye(K.shape[1])), "Components are orthogonal" - - -# Hilbert rotated EOF -@pytest.mark.parametrize( - "dim, use_coslat, power", - [ - (("time",), True, 1), - (("lat", "lon"), False, 1), - (("lon", "lat"), False, 1), - (("time",), True, 2), - (("lat", "lon"), False, 2), - (("lon", "lat"), False, 2), - ], -) -def test_creof_components(dim, use_coslat, power, mock_data_array): - """Components are NOT unitary""" - model = HilbertEOF(n_modes=5, standardize=True, use_coslat=use_coslat) - model.fit(mock_data_array, dim=dim) - rot = HilbertEOFRotator(n_modes=5, power=power) - rot.fit(model) - V = rot.data["components"].values - K = V.conj().T @ V - assert np.allclose( - np.diag(K), np.ones(V.shape[1]), atol=1e-5 - ), "Components are not normalized" - # Assert that off-diagonals are not zero - assert not np.allclose(K, np.eye(K.shape[0])), "Rotated components are unitary" - - -@pytest.mark.parametrize( - "dim, use_coslat, power", - [ - (("time",), True, 1), - (("lat", "lon"), False, 1), - (("lon", "lat"), False, 1), - (("time",), True, 2), - (("lat", "lon"), False, 2), - (("lon", "lat"), False, 2), - ], -) -def test_creof_scores(dim, use_coslat, power, mock_data_array): - """Components are unitary only for Varimax rotation""" - model = HilbertEOF(n_modes=5, standardize=True, use_coslat=use_coslat) - model.fit(mock_data_array, dim=dim) - rot = HilbertEOFRotator(n_modes=5, power=power) - rot.fit(model) - U = rot.data["scores"].values / rot.data["norms"].values - K = U.conj().T @ U - if power == 1: - # Varimax rotation does guarantee unitarity - assert np.allclose( - K, np.eye(K.shape[1]), atol=1e-5 - ), "Components are not unitary" - else: - assert not np.allclose(K, np.eye(K.shape[1])), "Components are unitary" - - -# MCA -@pytest.mark.parametrize( - "dim, use_coslat", - [ - (("time",), True), - (("lat", "lon"), False), - (("lon", "lat"), False), - ], -) -def test_mca_components(dim, use_coslat, mock_data_array): - """Components are orthogonal""" - data1 = mock_data_array.copy() - data2 = data1.copy() ** 2 - model = MCA(n_modes=5, standardize=True, use_coslat=use_coslat) - model.fit(data1, data2, dim=dim) - V1 = model.data["components1"].values - V2 = model.data["components2"].values - K1 = V1.T @ V1 - K2 = V2.T @ V2 - assert np.allclose( - K1, np.eye(K1.shape[0]), rtol=1e-8 - ), "Left components are not orthogonal" - assert np.allclose( - K2, np.eye(K2.shape[0]), rtol=1e-8 - ), "Right components are not orthogonal" - - -@pytest.mark.parametrize( - "dim, use_coslat", - [ - (("time",), True), - (("lat", "lon"), False), - (("lon", "lat"), False), - ], -) -def test_mca_scores(dim, use_coslat, mock_data_array): - """Scores are orthogonal""" - data1 = mock_data_array.copy() - data2 = data1.copy() ** 2 - model = MCA(n_modes=5, standardize=True, use_coslat=use_coslat) - model.fit(data1, data2, dim=dim) - U1 = model.data["scores1"].values - U2 = model.data["scores2"].values - K = U1.T @ U2 - target = np.eye(K.shape[0]) * (model.data["input_data1"].sample.size - 1) - assert np.allclose(K, target, atol=1e-5), "Scores are not orthogonal" - - -# Hilbert MCA -@pytest.mark.parametrize( - "dim, use_coslat", - [ - (("time",), True), - (("lat", "lon"), False), - (("lon", "lat"), False), - ], -) -def test_cmca_components(dim, use_coslat, mock_data_array): - """Components are unitary""" - data1 = mock_data_array.copy() - data2 = data1.copy() ** 2 - model = HilbertMCA(n_modes=5, standardize=True, use_coslat=use_coslat) - model.fit(data1, data2, dim=dim) - V1 = model.data["components1"].values - V2 = model.data["components2"].values - K1 = V1.conj().T @ V1 - K2 = V2.conj().T @ V2 - assert np.allclose( - K1, np.eye(K1.shape[0]), atol=1e-5 - ), "Left components are not unitary" - assert np.allclose( - K2, np.eye(K2.shape[0]), atol=1e-5 - ), "Right components are not unitary" - - -@pytest.mark.parametrize( - "dim, use_coslat", - [ - (("time",), True), - (("lat", "lon"), False), - (("lon", "lat"), False), - ], -) -def test_cmca_scores(dim, use_coslat, mock_data_array): - """Scores are unitary""" - data1 = mock_data_array.copy() - data2 = data1.copy() ** 2 - model = HilbertMCA(n_modes=10, standardize=True, use_coslat=use_coslat) - model.fit(data1, data2, dim=dim) - U1 = model.data["scores1"].values - U2 = model.data["scores2"].values - K = U1.conj().T @ U2 - target = np.eye(K.shape[0]) * (model.data["input_data1"].sample.size - 1) - assert np.allclose(K, target, atol=1e-5), "Scores are not unitary" - - -# Rotated MCA -@pytest.mark.parametrize( - "dim, use_coslat, power, squared_loadings", - [ - (("time",), True, 1, False), - (("lat", "lon"), False, 1, False), - (("lon", "lat"), False, 1, False), - (("time",), True, 2, False), - (("lat", "lon"), False, 2, False), - (("lon", "lat"), False, 2, False), - (("time",), True, 1, True), - (("lat", "lon"), False, 1, True), - (("lon", "lat"), False, 1, True), - (("time",), True, 2, True), - (("lat", "lon"), False, 2, True), - (("lon", "lat"), False, 2, True), - ], -) -def test_rmca_components(dim, use_coslat, power, squared_loadings, mock_data_array): - """Components are NOT orthogonal""" - data1 = mock_data_array.copy() - data2 = data1.copy() ** 2 - model = MCA(n_modes=19, standardize=True, use_coslat=use_coslat) - model.fit(data1, data2, dim=dim) - rot = MCARotator(n_modes=5, power=power, squared_loadings=squared_loadings) - rot.fit(model) - V1 = rot.data["components1"].values - V2 = rot.data["components2"].values - K1 = V1.conj().T @ V1 - K2 = V2.conj().T @ V2 - assert np.allclose( - np.diag(K1), np.ones(K1.shape[0]), rtol=1e-5 - ), "Components are not normalized" - assert np.allclose( - np.diag(K2), np.ones(K2.shape[0]), rtol=1e-5 - ), "Components are not normalized" - # Assert that off-diagonals are not zero - assert not np.allclose(K1, np.eye(K1.shape[0])), "Rotated components are orthogonal" - assert not np.allclose(K2, np.eye(K2.shape[0])), "Rotated components are orthogonal" - - -@pytest.mark.parametrize( - "dim, use_coslat, power, squared_loadings", - [ - (("time",), True, 1, False), - (("lat", "lon"), False, 1, False), - (("lon", "lat"), False, 1, False), - (("time",), True, 2, False), - (("lat", "lon"), False, 2, False), - (("lon", "lat"), False, 2, False), - (("time",), True, 1, True), - (("lat", "lon"), False, 1, True), - (("lon", "lat"), False, 1, True), - (("time",), True, 2, True), - (("lat", "lon"), False, 2, True), - (("lon", "lat"), False, 2, True), - ], -) -def test_rmca_scores(dim, use_coslat, power, squared_loadings, mock_data_array): - """Components are orthogonal only for Varimax rotation""" - data1 = mock_data_array.copy() - data2 = data1.copy() ** 2 - model = MCA(n_modes=5, standardize=True, use_coslat=use_coslat) - model.fit(data1, data2, dim=dim) - rot = MCARotator(n_modes=5, power=power, squared_loadings=squared_loadings) - rot.fit(model) - U1 = rot.data["scores1"].values - U2 = rot.data["scores2"].values - K = U1.conj().T @ U2 - target = np.eye(K.shape[0]) * (model.data["input_data1"].sample.size - 1) - if power == 1: - # Varimax rotation does guarantee orthogonality - assert np.allclose(K, target, atol=1e-5), "Components are not orthogonal" - else: - assert not np.allclose(K, target), "Components are orthogonal" - - -# Hilbert Rotated MCA -@pytest.mark.parametrize( - "dim, use_coslat, power, squared_loadings", - [ - (("time",), True, 1, False), - (("lat", "lon"), False, 1, False), - (("lon", "lat"), False, 1, False), - (("time",), True, 2, False), - (("lat", "lon"), False, 2, False), - (("lon", "lat"), False, 2, False), - (("time",), True, 1, True), - (("lat", "lon"), False, 1, True), - (("lon", "lat"), False, 1, True), - (("time",), True, 2, True), - (("lat", "lon"), False, 2, True), - (("lon", "lat"), False, 2, True), - ], -) -def test_crmca_components(dim, use_coslat, power, squared_loadings, mock_data_array): - """Components are NOT orthogonal""" - data1 = mock_data_array.copy() - data2 = data1.copy() ** 2 - model = HilbertMCA(n_modes=19, standardize=True, use_coslat=use_coslat) - model.fit(data1, data2, dim=dim) - rot = HilbertMCARotator(n_modes=5, power=power, squared_loadings=squared_loadings) - rot.fit(model) - V1 = rot.data["components1"].values - V2 = rot.data["components2"].values - K1 = V1.conj().T @ V1 - K2 = V2.conj().T @ V2 - assert np.allclose( - np.diag(K1), np.ones(K1.shape[0]), rtol=1e-5 - ), "Components are not normalized" - assert np.allclose( - np.diag(K2), np.ones(K2.shape[0]), rtol=1e-5 - ), "Components are not normalized" - # Assert that off-diagonals are not zero - assert not np.allclose(K1, np.eye(K1.shape[0])), "Rotated components are orthogonal" - assert not np.allclose(K2, np.eye(K2.shape[0])), "Rotated components are orthogonal" - - -@pytest.mark.parametrize( - "dim, use_coslat, power, squared_loadings", - [ - (("time",), True, 1, False), - (("lat", "lon"), False, 1, False), - (("lon", "lat"), False, 1, False), - (("time",), True, 2, False), - (("lat", "lon"), False, 2, False), - (("lon", "lat"), False, 2, False), - (("time",), True, 1, True), - (("lat", "lon"), False, 1, True), - (("lon", "lat"), False, 1, True), - (("time",), True, 2, True), - (("lat", "lon"), False, 2, True), - (("lon", "lat"), False, 2, True), - ], -) -def test_crmca_scores(dim, use_coslat, power, squared_loadings, mock_data_array): - """Components are orthogonal only for Varimax rotation""" - data1 = mock_data_array.copy() - data2 = data1.copy() ** 2 - model = HilbertMCA(n_modes=5, standardize=True, use_coslat=use_coslat) - model.fit(data1, data2, dim=dim) - rot = HilbertMCARotator(n_modes=5, power=power, squared_loadings=squared_loadings) - rot.fit(model) - U1 = rot.data["scores1"].values - U2 = rot.data["scores2"].values - K = U1.conj().T @ U2 - target = np.eye(K.shape[0]) * (model.data["input_data1"].sample.size - 1) - if power == 1: - # Varimax rotation does guarantee orthogonality - assert np.allclose(K, target, atol=1e-5), "Components are not orthogonal" - else: - assert not np.allclose(K, target), "Components are orthogonal" - - -# Transform -# ============================================================================= -# EOF -@pytest.mark.parametrize( - "dim, use_coslat", - [ - (("time",), True), - (("lat", "lon"), False), - (("lon", "lat"), False), - ], -) -@pytest.mark.parametrize("normalized", [True, False]) -def test_eof_transform(dim, use_coslat, mock_data_array, normalized): - """Transforming the original data results in the model scores""" - model = EOF( - n_modes=5, - standardize=True, - use_coslat=use_coslat, - random_state=5, - ) - model.fit(mock_data_array, dim=dim) - scores = model.scores(normalized=normalized) - pseudo_scores = model.transform(mock_data_array, normalized=normalized) - assert np.allclose( - scores, pseudo_scores, atol=1e-4 - ), "Transformed data does not match the scores" - - -# Hilbert EOF -@pytest.mark.parametrize( - "dim, use_coslat", - [ - (("time",), True), - (("lat", "lon"), False), - (("lon", "lat"), False), - ], -) -@pytest.mark.parametrize("normalized", [True, False]) -def test_ceof_transform(dim, use_coslat, mock_data_array, normalized): - """Not implemented yet""" - model = HilbertEOF(n_modes=5, standardize=True, use_coslat=use_coslat) - model.fit(mock_data_array, dim=dim) - model.scores(normalized=normalized) - with pytest.raises(NotImplementedError): - model.transform(mock_data_array, normalized=normalized) - - -# Rotated EOF -@pytest.mark.parametrize( - "dim, use_coslat, power", - [ - (("time",), True, 1), - (("lat", "lon"), False, 1), - (("lon", "lat"), False, 1), - (("time",), True, 2), - (("lat", "lon"), False, 2), - (("lon", "lat"), False, 2), - ], -) -@pytest.mark.parametrize("normalized", [True, False]) -def test_reof_transform(dim, use_coslat, power, mock_data_array, normalized): - """Transforming the original data results in the model scores""" - model = EOF(n_modes=5, standardize=True, use_coslat=use_coslat, random_state=5) - model.fit(mock_data_array, dim=dim) - rot = EOFRotator(n_modes=5, power=power) - rot.fit(model) - scores = rot.scores(normalized=normalized) - pseudo_scores = rot.transform(mock_data_array, normalized=normalized) - np.testing.assert_allclose( - scores, - pseudo_scores, - rtol=5e-3, - err_msg="Transformed data does not match the scores", - ) - - -# Hilbert Rotated EOF -@pytest.mark.parametrize( - "dim, use_coslat, power", - [ - (("time",), True, 1), - (("lat", "lon"), False, 1), - (("lon", "lat"), False, 1), - (("time",), True, 2), - (("lat", "lon"), False, 2), - (("lon", "lat"), False, 2), - ], -) -@pytest.mark.parametrize("normalized", [True, False]) -def test_creof_transform(dim, use_coslat, power, mock_data_array, normalized): - """not implemented yet""" - model = HilbertEOF(n_modes=5, standardize=True, use_coslat=use_coslat) - model.fit(mock_data_array, dim=dim) - rot = HilbertEOFRotator(n_modes=5, power=power) - rot.fit(model) - rot.scores(normalized=normalized) - with pytest.raises(NotImplementedError): - rot.transform(mock_data_array, normalized=normalized) - - -# MCA -@pytest.mark.parametrize( - "dim, use_coslat", - [ - (("time",), True), - (("lat", "lon"), False), - (("lon", "lat"), False), - ], -) -def test_mca_transform(dim, use_coslat, mock_data_array): - """Transforming the original data results in the model scores""" - data1 = mock_data_array.copy() - data2 = data1.copy() ** 2 - model = MCA(n_modes=5, standardize=True, use_coslat=use_coslat) - model.fit(data1, data2, dim=dim) - scores1, scores2 = model.scores() - pseudo_scores1, pseudo_scores2 = model.transform(data1=data1, data2=data2) - assert np.allclose( - scores1, pseudo_scores1, atol=1e-4 - ), "Transformed data does not match the scores" - assert np.allclose( - scores2, pseudo_scores2, atol=1e-4 - ), "Transformed data does not match the scores" - - -# Hilbert MCA -@pytest.mark.parametrize( - "dim, use_coslat", - [ - (("time",), True), - (("lat", "lon"), False), - (("lon", "lat"), False), - ], -) -def test_cmca_transform(dim, use_coslat, mock_data_array): - """Transforming the original data results in the model scores""" - data1 = mock_data_array.copy() - data2 = data1.copy() ** 2 - model = HilbertMCA(n_modes=5, standardize=True, use_coslat=use_coslat) - model.fit(data1, data2, dim=dim) - scores1, scores2 = model.scores() - with pytest.raises(NotImplementedError): - pseudo_scores1, pseudo_scores2 = model.transform(data1=data1, data2=data2) - - -# Rotated MCA -@pytest.mark.parametrize( - "dim, use_coslat, power, squared_loadings", - [ - (("time",), True, 1, False), - (("lat", "lon"), False, 1, False), - (("lon", "lat"), False, 1, False), - (("time",), True, 2, False), - (("lat", "lon"), False, 2, False), - (("lon", "lat"), False, 2, False), - (("time",), True, 1, True), - (("lat", "lon"), False, 1, True), - (("lon", "lat"), False, 1, True), - (("time",), True, 2, True), - (("lat", "lon"), False, 2, True), - (("lon", "lat"), False, 2, True), - ], -) -def test_rmca_transform(dim, use_coslat, power, squared_loadings, mock_data_array): - """Transforming the original data results in the model scores""" - data1 = mock_data_array.copy() - data2 = data1.copy() ** 2 - model = MCA(n_modes=5, standardize=True, use_coslat=use_coslat) - model.fit(data1, data2, dim=dim) - rot = MCARotator(n_modes=5, power=power, squared_loadings=squared_loadings) - rot.fit(model) - scores1, scores2 = rot.scores() - pseudo_scores1, pseudo_scores2 = rot.transform(data1=data1, data2=data2) - assert np.allclose( - scores1, pseudo_scores1, atol=1e-5 - ), "Transformed data does not match the scores" - assert np.allclose( - scores2, pseudo_scores2, atol=1e-5 - ), "Transformed data does not match the scores" - - -# Hilbert Rotated MCA -@pytest.mark.parametrize( - "dim, use_coslat, power, squared_loadings", - [ - (("time",), True, 1, False), - (("lat", "lon"), False, 1, False), - (("lon", "lat"), False, 1, False), - (("time",), True, 2, False), - (("lat", "lon"), False, 2, False), - (("lon", "lat"), False, 2, False), - (("time",), True, 1, True), - (("lat", "lon"), False, 1, True), - (("lon", "lat"), False, 1, True), - (("time",), True, 2, True), - (("lat", "lon"), False, 2, True), - (("lon", "lat"), False, 2, True), - ], -) -def test_crmca_transform(dim, use_coslat, power, squared_loadings, mock_data_array): - """Transforming the original data results in the model scores""" - data1 = mock_data_array.copy() - data2 = data1.copy() ** 2 - model = HilbertMCA(n_modes=5, standardize=True, use_coslat=use_coslat) - model.fit(data1, data2, dim=dim) - rot = HilbertMCARotator(n_modes=5, power=power, squared_loadings=squared_loadings) - rot.fit(model) - scores1, scores2 = rot.scores() - with pytest.raises(NotImplementedError): - pseudo_scores1, pseudo_scores2 = rot.transform(data1=data1, data2=data2) - - -# Reconstruct -# ============================================================================= -def r2_score(x, y, dim=None): - """Compute the R2 score between two DataArrays - - Parameters - ---------- - x : xr.DataArray - Reference data - y : xr.DataArray - Testing data to be compared with the reference data - dim : str or sequence of str, optional - Dimension(s) over which to compute the R2 score (the default is None, which - means that the R2 score is computed over all dimensions) - - Returns - ------- - r2_score : xr.DataArray - R2 score between x and y - - """ - ssres = ((x - y) ** 2).sum(dim) - sstot = ((x - x.mean(dim)) ** 2).sum(dim) - return 1 - (ssres / sstot) - - -# EOF -@pytest.mark.parametrize( - "dim, use_coslat", - [ - (("time",), True), - (("lat", "lon"), False), - (("lon", "lat"), False), - ], -) -@pytest.mark.parametrize("normalized", [True, False]) -def test_eof_inverse_transform(dim, use_coslat, mock_data_array, normalized): - """Inverse transform produces an approximate reconstruction of the original data""" - data = mock_data_array - model = EOF(n_modes=19, standardize=True, use_coslat=use_coslat) - model.fit(data, dim=dim) - scores = model.scores(normalized=normalized) - data_rec = model.inverse_transform(scores, normalized=normalized) - r2 = r2_score(data, data_rec, dim=dim) - r2 = r2.mean() - # Choose a threshold of 0.95; a bit arbitrary - assert r2 > 0.95, "Inverse transform does not produce a good reconstruction" - - -# Hilbert EOF -@pytest.mark.parametrize( - "dim, use_coslat", - [ - (("time",), True), - (("lat", "lon"), False), - (("lon", "lat"), False), - ], -) -@pytest.mark.parametrize("normalized", [True, False]) -def test_ceof_inverse_transform(dim, use_coslat, mock_data_array, normalized): - """Inverse transform produces an approximate reconstruction of the original data""" - data = mock_data_array - model = HilbertEOF(n_modes=19, standardize=True, use_coslat=use_coslat) - model.fit(data, dim=dim) - scores = model.scores(normalized=normalized) - data_rec = model.inverse_transform(scores, normalized=normalized).real - r2 = r2_score(data, data_rec, dim=dim) - r2 = r2.mean() - # Choose a threshold of 0.95; a bit arbitrary - assert r2 > 0.95, "Inverse transform does not produce a good reconstruction" - - -# Rotated EOF -@pytest.mark.parametrize( - "dim, use_coslat, power", - [ - (("time",), True, 1), - (("lat", "lon"), False, 1), - (("lon", "lat"), False, 1), - (("time",), True, 2), - (("lat", "lon"), False, 2), - (("lon", "lat"), False, 2), - ], -) -@pytest.mark.parametrize("normalized", [True, False]) -def test_reof_inverse_transform(dim, use_coslat, power, mock_data_array, normalized): - """Inverse transform produces an approximate reconstruction of the original data""" - data = mock_data_array - model = EOF(n_modes=19, standardize=True, use_coslat=use_coslat) - model.fit(data, dim=dim) - rot = EOFRotator(n_modes=19, power=power) - rot.fit(model) - scores = rot.scores(normalized=normalized) - data_rec = rot.inverse_transform(scores, normalized=normalized).real - r2 = r2_score(data, data_rec, dim=dim) - r2 = r2.mean() - # Choose a threshold of 0.95; a bit arbitrary - assert ( - r2 > 0.95 - ), f"Inverse transform does not produce a good reconstruction (R2={r2.values:.2f})" - - -# Hilbert Rotated EOF -@pytest.mark.parametrize( - "dim, use_coslat, power", - [ - (("time",), True, 1), - (("lat", "lon"), False, 1), - (("lon", "lat"), False, 1), - (("time",), True, 2), - (("lat", "lon"), False, 2), - (("lon", "lat"), False, 2), - ], -) -@pytest.mark.parametrize("normalized", [True, False]) -def test_creof_inverse_transform(dim, use_coslat, power, mock_data_array, normalized): - """Inverse transform produces an approximate reconstruction of the original data""" - data = mock_data_array - model = HilbertEOF(n_modes=19, standardize=True, use_coslat=use_coslat) - model.fit(data, dim=dim) - rot = HilbertEOFRotator(n_modes=19, power=power) - rot.fit(model) - scores = rot.scores(normalized=normalized) - data_rec = rot.inverse_transform(scores, normalized=normalized).real - r2 = r2_score(data, data_rec, dim=dim) - r2 = r2.mean() - # Choose a threshold of 0.95; a bit arbitrary - assert ( - r2 > 0.95 - ), f"Inverse transform does not produce a good reconstruction (R2={r2.values:.2f})" - - -# MCA -@pytest.mark.parametrize( - "dim, use_coslat", - [ - (("time",), True), - (("lat", "lon"), False), - (("lon", "lat"), False), - ], -) -def test_mca_inverse_transform(dim, use_coslat, mock_data_array): - """Inverse transform produces an approximate reconstruction of the original data""" - data1 = mock_data_array.copy() - data2 = data1.copy() ** 2 - model = MCA(n_modes=19, standardize=True, use_coslat=use_coslat) - model.fit(data1, data2, dim=dim) - scores1 = model.data["scores1"] - scores2 = model.data["scores2"] - data1_rec, data2_rec = model.inverse_transform(scores1, scores2) - r2_1 = r2_score(data1, data1_rec, dim=dim) - r2_2 = r2_score(data2, data2_rec, dim=dim) - r2_1 = r2_1.mean() - r2_2 = r2_2.mean() - # Choose a threshold of 0.95; a bit arbitrary - assert ( - r2_1 > 0.95 - ), f"Inverse transform does not produce a good reconstruction of left field (R2={r2_1.values:.2f})" - assert ( - r2_2 > 0.95 - ), f"Inverse transform does not produce a good reconstruction of right field (R2={r2_2.values:.2f})" - - -# Hilbert MCA -@pytest.mark.parametrize( - "dim, use_coslat", - [ - (("time",), True), - (("lat", "lon"), False), - (("lon", "lat"), False), - ], -) -def test_cmca_inverse_transform(dim, use_coslat, mock_data_array): - """Inverse transform produces an approximate reconstruction of the original data""" - data1 = mock_data_array.copy() - data2 = data1.copy() ** 2 - model = HilbertMCA(n_modes=19, standardize=True, use_coslat=use_coslat) - model.fit(data1, data2, dim=dim) - scores1 = model.data["scores1"] - scores2 = model.data["scores2"] - data1_rec, data2_rec = model.inverse_transform(scores1, scores2) - r2_1 = r2_score(data1, data1_rec, dim=dim) - r2_2 = r2_score(data2, data2_rec, dim=dim) - r2_1 = r2_1.mean() - r2_2 = r2_2.mean() - # Choose a threshold of 0.95; a bit arbitrary - assert ( - r2_1 > 0.95 - ), f"Inverse transform does not produce a good reconstruction of left field (R2={r2_1.values:.2f})" - assert ( - r2_2 > 0.95 - ), f"Inverse transform does not produce a good reconstruction of right field (R2={r2_2.values:.2f})" - - -# Rotated MCA -@pytest.mark.parametrize( - "dim, use_coslat, power, squared_loadings", - [ - (("time",), True, 1, False), - (("lat", "lon"), False, 1, False), - (("lon", "lat"), False, 1, False), - (("time",), True, 2, False), - (("lat", "lon"), False, 2, False), - (("lon", "lat"), False, 2, False), - (("time",), True, 1, True), - (("lat", "lon"), False, 1, True), - (("lon", "lat"), False, 1, True), - (("time",), True, 2, True), - (("lat", "lon"), False, 2, True), - (("lon", "lat"), False, 2, True), - ], -) -def test_rmca_inverse_transform( - dim, use_coslat, power, squared_loadings, mock_data_array -): - """Inverse transform produces an approximate reconstruction of the original data""" - data1 = mock_data_array.copy() - data2 = data1.copy() ** 2 - model = MCA(n_modes=10, standardize=True, use_coslat=use_coslat) - model.fit(data1, data2, dim=dim) - rot = MCARotator(n_modes=10, power=power, squared_loadings=squared_loadings) - rot.fit(model) - scores1 = rot.data["scores1"] - scores2 = rot.data["scores2"] - data1_rec, data2_rec = rot.inverse_transform(scores1, scores2) - r2_1 = r2_score(data1, data1_rec, dim=dim) - r2_2 = r2_score(data2, data2_rec, dim=dim) - r2_1 = r2_1.mean() - r2_2 = r2_2.mean() - # Choose a threshold of 0.90; a bit arbitrary - assert ( - r2_1 > 0.75 - ), f"Inverse transform does not produce a good reconstruction of left field (R2={r2_1.values:.2f})" - assert ( - r2_2 > 0.75 - ), f"Inverse transform does not produce a good reconstruction of right field (R2={r2_2.values:.2f})" - - -# Hilbert Rotated MCA -@pytest.mark.parametrize( - "dim, use_coslat, power, squared_loadings", - [ - (("time",), True, 1, False), - (("lat", "lon"), False, 1, False), - (("lon", "lat"), False, 1, False), - (("time",), True, 2, False), - (("lat", "lon"), False, 2, False), - (("lon", "lat"), False, 2, False), - (("time",), True, 1, True), - (("lat", "lon"), False, 1, True), - (("lon", "lat"), False, 1, True), - (("time",), True, 2, True), - (("lat", "lon"), False, 2, True), - (("lon", "lat"), False, 2, True), - ], -) -def test_crmca_inverse_transform( - dim, use_coslat, power, squared_loadings, mock_data_array -): - """Inverse transform produces an approximate reconstruction of the original data""" - # NOTE: The lobpcg SVD solver for Hilbert matrices requires a small number of modes - # compared to the actual data size. Since we have a small test set here we only use - # 10 modes for the test. Therefore, the threshold for the R2 score is lower than for - # the other tests. - data1 = mock_data_array.copy() - data2 = data1.copy() ** 2 - model = HilbertMCA(n_modes=10, standardize=True, use_coslat=use_coslat) - model.fit(data1, data2, dim=dim) - rot = HilbertMCARotator(n_modes=10, power=power, squared_loadings=squared_loadings) - rot.fit(model) - scores1 = rot.data["scores1"] - scores2 = rot.data["scores2"] - data1_rec, data2_rec = rot.inverse_transform(scores1, scores2) - r2_1 = r2_score(data1, data1_rec, dim=dim) - r2_2 = r2_score(data2, data2_rec, dim=dim) - r2_1 = r2_1.mean() - r2_2 = r2_2.mean() - # Choose a threshold of 0.80; a bit arbitrary - assert ( - r2_1 > 0.80 - ), f"Inverse transform does not produce a good reconstruction of left field (R2={r2_1.values:.2f})" - assert ( - r2_2 > 0.80 - ), f"Inverse transform does not produce a good reconstruction of right field (R2={r2_2.values:.2f})" diff --git a/tests/models/test_rotator.py b/tests/models/test_rotator_factory.py similarity index 88% rename from tests/models/test_rotator.py rename to tests/models/test_rotator_factory.py index f092a268..78ade121 100644 --- a/tests/models/test_rotator.py +++ b/tests/models/test_rotator_factory.py @@ -1,16 +1,8 @@ import pytest -from xeofs.models import ( - EOF, - MCA, - EOFRotator, - HilbertEOF, - HilbertEOFRotator, - HilbertMCA, - HilbertMCARotator, - MCARotator, -) -from xeofs.models.rotator_factory import RotatorFactory +from xeofs.cross import MCA, HilbertMCA, HilbertMCARotator, MCARotator +from xeofs.rotator_factory import RotatorFactory +from xeofs.single import EOF, EOFRotator, HilbertEOF, HilbertEOFRotator # RotatorFactory should be imported from its module # from module import RotatorFactory diff --git a/tests/preprocessing/test_pca_preprocessor_dataarray.py b/tests/preprocessing/test_pca_preprocessor_dataarray.py deleted file mode 100644 index 9db17c72..00000000 --- a/tests/preprocessing/test_pca_preprocessor_dataarray.py +++ /dev/null @@ -1,181 +0,0 @@ -import pytest -import numpy as np - -from xeofs.preprocessing.preprocessor import Preprocessor -from ..conftest import generate_synthetic_dataarray -from ..utilities import ( - get_dims_from_data, - data_is_dask, - data_has_multiindex, - assert_expected_dims, - assert_expected_coords, -) - -# ============================================================================= -# GENERALLY VALID TEST CASES -# ============================================================================= -N_SAMPLE_DIMS = [1, 2] -N_FEATURE_DIMS = [1, 2] -INDEX_POLICY = ["index", "multiindex"] -NAN_POLICY = ["no_nan", "fulldim"] -DASK_POLICY = ["no_dask", "dask"] -SEED = [0] - -TEST_DATA_PARAMS = [ - (ns, nf, index, nan, dask) - for ns in N_SAMPLE_DIMS - for nf in N_FEATURE_DIMS - for index in INDEX_POLICY - for nan in NAN_POLICY - for dask in DASK_POLICY -] - -SAMPLE_DIM_NAMES = ["sample", "sample_alternative"] -FEATURE_DIM_NAMES = ["feature", "feature_alternative"] - -VALID_TEST_CASES = [ - (sample_name, feature_name, data_params) - for sample_name in SAMPLE_DIM_NAMES - for feature_name in FEATURE_DIM_NAMES - for data_params in TEST_DATA_PARAMS -] - - -# TESTS -# ============================================================================= -@pytest.mark.parametrize( - "with_std, with_coslat, with_weights", - [ - (True, True, True), - (True, True, False), - (True, False, True), - (True, False, False), - (False, True, True), - (False, True, False), - (False, False, True), - (False, False, False), - ], -) -def test_fit_transform_scalings(with_std, with_coslat, with_weights, mock_data_array): - """fit method should not be implemented.""" - prep = Preprocessor(with_std=with_std, with_coslat=with_coslat) - - weights = None - if with_weights: - weights = mock_data_array.mean("time").copy() - weights[:] = 0.5 - - data_trans = prep.fit_transform( - mock_data_array, - weights=weights, - sample_dims=("time",), - ) - - assert hasattr(prep, "scaler") - assert hasattr(prep, "preconverter") - assert hasattr(prep, "stacker") - assert hasattr(prep, "postconverter") - assert hasattr(prep, "sanitizer") - - # Transformed data is centered - assert np.isclose(data_trans.mean("sample"), 0).all() - # Transformed data is standardized - if with_std and not with_coslat: - if with_weights: - assert np.isclose(data_trans.std("sample"), 0.5).all() - else: - assert np.isclose(data_trans.std("sample"), 1).all() - - -@pytest.mark.parametrize( - "index_policy, nan_policy, dask_policy", - [ - ("index", "no_nan", "no_dask"), - ("multiindex", "no_nan", "dask"), - ("index", "fulldim", "no_dask"), - ("multiindex", "fulldim", "dask"), - ], -) -def test_fit_transform_same_dim_names(index_policy, nan_policy, dask_policy): - data = generate_synthetic_dataarray(1, 1, index_policy, nan_policy, dask_policy) - all_dims, sample_dims, feature_dims = get_dims_from_data(data) - - prep = Preprocessor(sample_name="sample0", feature_name="feature0") - transformed = prep.fit_transform(data, sample_dims) - reconstructed = prep.inverse_transform_data(transformed) - - data_is_dask_before = data_is_dask(data) - data_is_dask_interm = data_is_dask(transformed) - data_is_dask_after = data_is_dask(reconstructed) - - assert set(transformed.dims) == set(("sample0", "feature0")) - assert set(reconstructed.dims) == set(("sample0", "feature0")) - assert not data_has_multiindex(transformed) - assert transformed.notnull().all() - assert data_is_dask_before == data_is_dask_interm - assert data_is_dask_before == data_is_dask_after - - -@pytest.mark.parametrize( - "sample_name, feature_name, data_params", - VALID_TEST_CASES, -) -def test_fit_transform(sample_name, feature_name, data_params): - data = generate_synthetic_dataarray(*data_params) - all_dims, sample_dims, feature_dims = get_dims_from_data(data) - - prep = Preprocessor(sample_name=sample_name, feature_name=feature_name) - transformed = prep.fit_transform(data, sample_dims) - - data_is_dask_before = data_is_dask(data) - data_is_dask_after = data_is_dask(transformed) - - assert transformed.dims == (sample_name, feature_name) - assert not data_has_multiindex(transformed) - assert transformed.notnull().all() - assert data_is_dask_before == data_is_dask_after - - -@pytest.mark.parametrize( - "sample_name, feature_name, data_params", - VALID_TEST_CASES, -) -def test_inverse_transform(sample_name, feature_name, data_params): - data = generate_synthetic_dataarray(*data_params) - all_dims, sample_dims, feature_dims = get_dims_from_data(data) - - prep = Preprocessor(sample_name=sample_name, feature_name=feature_name) - transformed = prep.fit_transform(data, sample_dims) - components = transformed.rename({sample_name: "mode"}) - scores = transformed.rename({feature_name: "mode"}) - - reconstructed = prep.inverse_transform_data(transformed) - components = prep.inverse_transform_components(components) - scores = prep.inverse_transform_scores(scores) - - # Reconstructed data has the same dimensions as the original data - assert_expected_dims(data, reconstructed, policy="all") - assert_expected_dims(data, components, policy="feature") - assert_expected_dims(data, scores, policy="sample") - - # Reconstructed data has the same coordinates as the original data - assert_expected_coords(data, reconstructed, policy="all") - assert_expected_coords(data, components, policy="feature") - assert_expected_coords(data, scores, policy="sample") - - # Reconstructed data and original data have NaNs in the same FEATURES - # Note: NaNs in the same place is not guaranteed, since isolated NaNs will be propagated - # to all samples in the same feature - features_with_nans_before = data.isnull().any(sample_dims) - features_with_nans_after = reconstructed.isnull().any(sample_dims) - assert features_with_nans_before.equals(features_with_nans_after) - - # Reconstructed data has MultiIndex if and only if original data has MultiIndex - data_has_multiindex_before = data_has_multiindex(data) - data_has_multiindex_after = data_has_multiindex(reconstructed) - assert data_has_multiindex_before == data_has_multiindex_after - - # Reconstructed data is dask if and only if original data is dask - data_is_dask_before = data_is_dask(data) - data_is_dask_after = data_is_dask(reconstructed) - assert data_is_dask_before == data_is_dask_after diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/validation/test_eof_bootstrapper.py b/tests/validation/test_eof_bootstrapper.py index c4501c03..cb6a8ee7 100644 --- a/tests/validation/test_eof_bootstrapper.py +++ b/tests/validation/test_eof_bootstrapper.py @@ -1,8 +1,8 @@ +import numpy as np import pytest import xarray as xr -import numpy as np -from xeofs.models import EOF +from xeofs.single import EOF from xeofs.validation import EOFBootstrapper diff --git a/xeofs/__init__.py b/xeofs/__init__.py index ac3e5d18..b4d99e2e 100644 --- a/xeofs/__init__.py +++ b/xeofs/__init__.py @@ -1,4 +1,5 @@ -from xeofs import models, validation +from xeofs import cross, multi, single, validation from xeofs._version import __version__ +from xeofs.rotator_factory import RotatorFactory -__all__ = ["models", "validation", "__version__"] +__all__ = ["single", "cross", "multi", "RotatorFactory", "validation", "__version__"] diff --git a/xeofs/base_model.py b/xeofs/base_model.py new file mode 100644 index 00000000..7f13899e --- /dev/null +++ b/xeofs/base_model.py @@ -0,0 +1,189 @@ +import warnings +from abc import ABC, abstractmethod +from datetime import datetime +from typing import Any, Literal + +import dask.base +import xarray as xr +from typing_extensions import Self + +from ._version import __version__ +from .utils.data_types import DataArray +from .utils.io import insert_placeholders, open_model_tree, write_model_tree +from .utils.xarray_utils import data_is_dask + +try: + from xarray.core.datatree import DataTree # type: ignore +except ImportError: + from datatree import DataTree + +# Ignore warnings from numpy casting with additional coordinates +warnings.filterwarnings("ignore", message=r"^invalid value encountered in cast*") + +xr.set_options(keep_attrs=True) + + +class BaseModel(ABC): + """ + Abstract base class for an xeofs model. + + Provides basic functionality for lazy model evaluation, serialization, deserialization and saving/loading models. + + """ + + def __init__(self): + # Define model parameters + self._params = {} + + # Define analysis-relevant meta data + self.attrs = {"model": "BaseModel"} + self.attrs.update( + { + "software": "xeofs", + "version": __version__, + "date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + } + ) + self.attrs.update(self._params) + + @abstractmethod + def get_serialization_attrs(self) -> dict: + """Get the attributes to serialize.""" + raise NotImplementedError + + def compute(self, **kwargs): + """Compute and load delayed model results. + + Parameters + ---------- + **kwargs + Additional keyword arguments to pass to `dask.compute()`. + """ + # find and compute all dask arrays simultaneously to allow dask to optimize the + # shared graph and avoid duplicate i/o and computations + dt = self.serialize() + + data_objs = { + k: v + for k, v in dt.to_dict().items() + if data_is_dask(v) and v.attrs.get("allow_compute", True) + } + + (data_objs,) = dask.base.compute(data_objs, **kwargs) + + for k, v in data_objs.items(): + dt[k] = DataTree(v) + + # then rebuild the trained model from the computed results + self._deserialize_attrs(dt) + + self._post_compute() + + def _post_compute(self): + pass + + def get_params(self) -> dict[str, Any]: + """Get the model parameters.""" + return self._params + + def serialize(self) -> DataTree: + """Serialize a complete model with its preprocessor.""" + # Create a root node for this object with its params as attrs + ds_root = xr.Dataset(attrs=dict(params=self.get_params())) + dt = DataTree(data=ds_root, name=type(self).__name__) + + # Retrieve the tree representation of each attached object, or set basic attrs + for key, attr in self.get_serialization_attrs().items(): + if hasattr(attr, "serialize"): + dt[key] = attr.serialize() + dt.attrs[key] = "_is_tree" + else: + dt.attrs[key] = attr + + return dt + + def save( + self, + path: str, + overwrite: bool = False, + save_data: bool = False, + engine: Literal["zarr", "netcdf4", "h5netcdf"] = "zarr", + **kwargs, + ): + """Save the model. + + Parameters + ---------- + path : str + Path to save the model. + overwrite: bool, default=False + Whether or not to overwrite the existing path if it already exists. + Ignored unless `engine="zarr"`. + save_data : str + Whether or not to save the full input data along with the fitted components. + engine : {"zarr", "netcdf4", "h5netcdf"}, default="zarr" + Xarray backend engine to use for writing the saved model. + **kwargs + Additional keyword arguments to pass to `DataTree.to_netcdf()` or `DataTree.to_zarr()`. + + """ + self.compute() + + dt = self.serialize() + + # Remove any raw data arrays at this stage + if not save_data: + dt = insert_placeholders(dt) + + write_model_tree(dt, path, overwrite=overwrite, engine=engine, **kwargs) + + @classmethod + def deserialize(cls, dt: DataTree) -> Self: + """Deserialize the model and its preprocessors from a DataTree.""" + # Recreate the model with parameters set by root level attrs + model = cls(**dt.attrs["params"]) + model._deserialize_attrs(dt) + return model + + def _deserialize_attrs(self, dt: DataTree): + """Set the necessary attributes of the model from a DataTree.""" + for key, attr in dt.attrs.items(): + if key == "params": + continue + elif attr == "_is_tree": + deserialized_obj = getattr(self, str(key)).deserialize(dt[str(key)]) + else: + deserialized_obj = attr + setattr(self, str(key), deserialized_obj) + + @classmethod + def load( + cls, + path: str, + engine: Literal["zarr", "netcdf4", "h5netcdf"] = "zarr", + **kwargs, + ) -> Self: + """Load a saved model. + + Parameters + ---------- + path : str + Path to the saved model. + engine : {"zarr", "netcdf4", "h5netcdf"}, default="zarr" + Xarray backend engine to use for reading the saved model. + **kwargs + Additional keyword arguments to pass to `open_datatree()`. + + Returns + ------- + model : BaseModel + The loaded model. + + """ + dt = open_model_tree(path, engine=engine, **kwargs) + model = cls.deserialize(dt) + return model + + def _validate_loaded_data(self, X: DataArray): + """Optionally check the loaded data for placeholders.""" + pass diff --git a/xeofs/cross/__init__.py b/xeofs/cross/__init__.py new file mode 100644 index 00000000..85fde6ea --- /dev/null +++ b/xeofs/cross/__init__.py @@ -0,0 +1,51 @@ +import warnings + +from .cca import CCA, ComplexCCA, HilbertCCA +from .cpcca import CPCCA, ComplexCPCCA, HilbertCPCCA +from .cpcca_rotator import ComplexCPCCARotator, CPCCARotator, HilbertCPCCARotator +from .mca import MCA, ComplexMCA, HilbertMCA +from .mca_rotator import ComplexMCARotator, HilbertMCARotator, MCARotator +from .rda import RDA, ComplexRDA, HilbertRDA + +__all__ = [ + "CCA", + "MCA", + "RDA", + "CPCCA", + "ComplexCCA", + "ComplexMCA", + "ComplexRDA", + "ComplexCPCCA", + "HilbertCCA", + "HilbertMCA", + "HilbertRDA", + "HilbertCPCCA", + "MCARotator", + "CPCCARotator", + "ComplexMCARotator", + "ComplexCPCCARotator", + "HilbertMCARotator", + "HilbertCPCCARotator", +] + + +DEPRECATED_NAMES = [ + # ("OldClass", "NewClass"), +] + + +def __dir__(): + return sorted(__all__ + [names[0] for names in DEPRECATED_NAMES]) + + +def __getattr__(name): + for old_name, new_name in DEPRECATED_NAMES: + if name == old_name: + msg = ( + f"Class '{old_name}' is deprecated and will be renamed to '{new_name}' in the next major release. " + f"In that release, '{old_name}' will refer to a different class. " + f"Please switch to '{new_name}' to maintain compatibility." + ) + warnings.warn(msg, DeprecationWarning, stacklevel=2) + return globals()[new_name] + raise AttributeError(f"module {__name__} has no attribute {name}") diff --git a/xeofs/cross/base_model_cross_set.py b/xeofs/cross/base_model_cross_set.py new file mode 100644 index 00000000..ee801947 --- /dev/null +++ b/xeofs/cross/base_model_cross_set.py @@ -0,0 +1,542 @@ +from abc import abstractmethod +from typing import Any, Hashable, Sequence + +from numpy.random import Generator +from typing_extensions import Self + +from ..base_model import BaseModel +from ..data_container import DataContainer +from ..preprocessing.preprocessor import Preprocessor +from ..preprocessing.whitener import Whitener +from ..utils.data_types import DataArray, DataObject, GenericType +from ..utils.sanity_checks import validate_input_type +from ..utils.xarray_utils import convert_to_dim_type + + +class BaseModelCrossSet(BaseModel): + """ + Abstract base class for cross-decomposition models. + + Parameters: + ------------- + n_modes: int + Number of modes to calculate. + center: bool, default=True + Whether to center the input data. + standardize: bool, default=False + Whether to standardize the input data. + use_coslat: bool, default=False + Whether to use cosine of latitude for scaling. + check_nans : bool, default=True + If True, remove full-dimensional NaN features from the data, check to + ensure that NaN features match the original fit data during transform, + and check for isolated NaNs. Note: this forces eager computation of dask + arrays. If False, skip all NaN checks. In this case, NaNs should be + explicitly removed or filled prior to fitting, or SVD will fail. + alpha : float, default=1.0 + Parameter to perform fractional whitening of the data. If 0, the data is + completely whitened. If 1, the data is not whitened. + use_pca : bool, default=False + If True, perform PCA to reduce the dimensionality of the data. + n_pca_modes : int | float | str, default=0.999 + If int, specifies the number of modes to retain. If float, specifies the + fraction of variance in the (whitened) data that should be explained by + the retained modes. If "all", all modes are retained. + init_rank_reduction : float, default=0.3 + Only relevant when `use_pca=True` and `n_modes` is a float, in which + case it denotes the fraction of the initial rank to reduce the data to + via PCA as a first guess before truncating the solution to the desired + fraction of explained variance. This allows for faster computation of + PCA via randomized SVD and avoids the need to compute the full SVD. + compute: bool, default=True + Whether to compute elements of the model eagerly, or to defer + computation. If True, four pieces of the fit will be computed + sequentially: 1) the preprocessor scaler, 2) optional NaN checks, 3) SVD + decomposition, 4) scores and components. + random_state: numpy.random.Generator or int, optional + Seed for the random number generator. + sample_name: str, default="sample" + Name of the new sample dimension. + feature_name: str, default="feature" + Name of the new feature dimension. + solver: {"auto", "full", "randomized"} + Solver to use for the SVD computation. + solver_kwargs: dict[str, Any], default={} + Additional keyword arguments to passed to the SVD solver function. + + """ + + def __init__( + self, + n_modes: int, + center: Sequence[bool] | bool = True, + standardize: Sequence[bool] | bool = False, + use_coslat: Sequence[bool] | bool = False, + check_nans: Sequence[bool] | bool = True, + use_pca: Sequence[bool] | bool = True, + n_pca_modes: Sequence[float | int | str] | float | int | str = 0.999, + pca_init_rank_reduction: Sequence[float] | float = 0.3, + alpha: Sequence[float] | float = 1.0, + solver: Sequence[str] | str = "auto", + compute: bool = True, + sample_name: str = "sample", + feature_name: Sequence[str] | str = "feature", + random_state: Generator | int | None = None, + solver_kwargs: dict[str, Any] = {}, + ): + super().__init__() + + # Process parameters + center = self._process_parameter("center", center, True) + standardize = self._process_parameter("standardize", standardize, False) + use_coslat = self._process_parameter("use_coslat", use_coslat, False) + check_nans = self._process_parameter("check_nans", check_nans, True) + use_pca = self._process_parameter("use_pca", use_pca, True) + n_pca_modes = self._process_parameter("n_pca_modes", n_pca_modes, 0.999) + pca_init_rank_reduction = self._process_parameter( + "pca_init_rank_reduction", pca_init_rank_reduction, 0.3 + ) + + alpha = self._process_parameter("alpha", alpha, 1.0) + # Ensure that alpha is a float + alpha = [float(a) for a in alpha] + + # Use feature1 and feature2 throughout the model to refer to the two datasets + if isinstance(feature_name, str): + feature_name = [feature_name + str(i + 1) for i in range(2)] + self._check_parameter_number("feature_name", feature_name) + if feature_name[0] == feature_name[1]: + raise ValueError("feature_name must be different for each dataset") + + # Define model parameters + self.sample_name = sample_name + self.feature_name = feature_name + + self._params = { + "n_modes": n_modes, + "center": center, + "standardize": standardize, + "use_coslat": use_coslat, + "check_nans": check_nans, + "use_pca": use_pca, + "n_pca_modes": n_pca_modes, + "pca_init_rank_reduction": pca_init_rank_reduction, + "alpha": alpha, + "sample_name": sample_name, + "feature_name": feature_name, + "random_state": random_state, + "compute": compute, + "solver": solver, + } + + self._decomposer_kwargs = { + "n_modes": n_modes, + "init_rank_reduction": pca_init_rank_reduction, + "solver": solver, + "random_state": random_state, + "compute": compute, + "solver_kwargs": solver_kwargs, + } + + # Define analysis-relevant meta data + self.attrs.update({"model": "BaseModelCrossSet"}) + self.attrs.update(self.get_params()) + + # Initialize preprocessors for dataset X and Y + self.preprocessor1 = Preprocessor( + sample_name=sample_name, + feature_name=feature_name[0], + with_center=center[0], + with_std=standardize[0], + with_coslat=use_coslat[0], + check_nans=check_nans[0], + compute=compute, + ) + + self.preprocessor2 = Preprocessor( + sample_name=sample_name, + feature_name=feature_name[1], + with_center=center[1], + with_std=standardize[1], + with_coslat=use_coslat[1], + check_nans=check_nans[1], + compute=compute, + ) + + self.whitener1 = Whitener( + alpha=alpha[0], + use_pca=use_pca[0], + n_modes=n_pca_modes[0], + init_rank_reduction=pca_init_rank_reduction[0], + sample_name=sample_name, + feature_name=feature_name[0], + ) + self.whitener2 = Whitener( + alpha=alpha[1], + use_pca=use_pca[1], + n_modes=n_pca_modes[1], + init_rank_reduction=pca_init_rank_reduction[1], + sample_name=sample_name, + feature_name=feature_name[1], + ) + + # Initialize the data container that stores the results + self.data = DataContainer() + + @abstractmethod + def _fit_algorithm(self, X: DataArray, Y: DataArray) -> Self: + """ + Fit the model to the preprocessed data. This method needs to be implemented in the respective + subclass. + + Parameters + ---------- + X, Y: DataArray + Preprocessed input data of two dimensions: (`sample_name`, `feature_name`) + + """ + raise NotImplementedError + + @abstractmethod + def _transform_algorithm( + self, X: DataArray | None = None, Y: DataArray | None = None, **kwargs + ) -> dict[str, DataArray]: + """ + Transform the preprocessed data. This method needs to be implemented in the respective + subclass. + + Parameters + ---------- + X, Y: DataArray + Preprocessed input data of two dimensions: (`sample_name`, `feature_name`) + + """ + raise NotImplementedError + + @abstractmethod + def _inverse_transform_algorithm( + self, X: DataArray | None = None, Y: DataArray | None = None, **kwargs + ) -> dict[str, DataArray]: + """ + Reconstruct the original data from transformed data. This method needs to be implemented in the respective + subclass. + + Parameters + ---------- + scores1: DataArray + Transformed left field data to be reconstructed. This could be + a subset of the `scores` data of a fitted model, or unseen data. + Must have a 'mode' dimension. + scores2: DataArray + Transformed right field data to be reconstructed. This could be + a subset of the `scores` data of a fitted model, or unseen data. + Must have a 'mode' dimension. + + Returns + ------- + Xrec1: DataArray + Reconstructed data of left field. + Xrec2: DataArray + Reconstructed data of right field. + + """ + raise NotImplementedError + + @abstractmethod + def _predict_algorithm(self, X: DataArray, **kwargs) -> DataArray: + """Predict the right field from the left field. This method needs to be implemented in the respective subclass.""" + raise NotImplementedError + + @abstractmethod + def _get_components(self, **kwargs) -> tuple[DataArray, DataArray]: + """Get the components.""" + raise NotImplementedError + + @abstractmethod + def _get_scores(self, **kwargs) -> tuple[DataArray, DataArray]: + """Get the scores.""" + raise NotImplementedError + + def fit( + self, + X: DataObject, + Y: DataObject, + dim: Hashable | Sequence[Hashable], + weights_X: DataObject | None = None, + weights_Y: DataObject | None = None, + ) -> Self: + """Fit the data to the model. + + Parameters + ---------- + X, Y: DataObject + Data to be fitted. + dim: Hashable | Sequence[Hashable] + Define the sample dimensions. The remaining dimensions + will be treated as feature dimensions. + weights_X, weights_Y: DataObject | None, default=None + Weights for the data. If None, no weights are used. + + Returns + ------- + xeofs MultiSetModel + Fitted model. + + """ + validate_input_type(X) + validate_input_type(Y) + if weights_X is not None: + validate_input_type(weights_X) + if weights_Y is not None: + validate_input_type(weights_Y) + + self.sample_dims = convert_to_dim_type(dim) + # Preprocess data + X = self.preprocessor1.fit_transform(X, self.sample_dims, weights_X) + Y = self.preprocessor2.fit_transform(Y, self.sample_dims, weights_Y) + # Whiten data + X = self.whitener1.fit_transform(X) + Y = self.whitener2.fit_transform(Y) + # Augment data + X, y = self._augment_data(X, Y) + # Fit the model + self._fit_algorithm(X, Y) + + if self.get_params()["compute"]: + self.data.compute() + self._post_compute() + + return self + + def transform( + self, X: DataObject | None = None, Y: DataObject | None = None, normalized=False + ) -> Sequence[DataArray] | DataArray: + """Transform the data. + + Parameters + ---------- + X, Y: DataObject | None + Data to be transformed. At least one of them must be provided. + normalized: bool, default=False + Whether to return L2 normalized scores. + + Returns + ------- + Sequence[DataArray] | DataArray + Transformed data. + + + """ + if X is None and Y is None: + raise ValueError("Either X or Y must be provided.") + + if X is not None: + validate_input_type(X) + # Preprocess X + X = self.preprocessor1.transform(X) + X = self.whitener1.transform(X) + if Y is not None: + validate_input_type(Y) + # Preprocess Y + Y = self.preprocessor2.transform(Y) + Y = self.whitener2.transform(Y) + + data = self._transform_algorithm(X, Y, normalized=normalized) + data_list = [] + if X is not None: + X = self.whitener1.inverse_transform_scores_unseen(data["X"]) + X = self.preprocessor1.inverse_transform_scores_unseen(X) + data_list.append(X) + if Y is not None: + Y = self.whitener2.inverse_transform_scores_unseen(data["Y"]) + Y = self.preprocessor2.inverse_transform_scores_unseen(Y) + data_list.append(Y) + + if len(data_list) == 1: + return data_list[0] + else: + return data_list + + def inverse_transform( + self, X: DataArray | None = None, Y: DataArray | None = None + ) -> Sequence[DataObject] | DataObject: + """Reconstruct the original data from transformed data. + + Parameters + ---------- + X, Y: DataArray | None + Transformed data to be reconstructed. At least one of them must be provided. + + Returns + ------- + Sequence[DataObject] | DataObject + Reconstructed data. + + """ + x_is_given = X is not None + y_is_given = Y is not None + + if (not x_is_given) and (not y_is_given): + raise ValueError("Either X or Y must be provided.") + + if x_is_given: + # Handle scalar mode in xr.dot + if "mode" not in X.dims: + X = X.expand_dims("mode") + + if y_is_given: + # Handle scalar mode in xr.dot + if "mode" not in Y.dims: + Y = Y.expand_dims("mode") + + inv_transformed = self._inverse_transform_algorithm(X, Y) + + results: list[DataObject] = [] + + # Unstack and rescale the data + if x_is_given: + X = inv_transformed["X"] + X = self.whitener1.inverse_transform_data(X) + Xrec = self.preprocessor1.inverse_transform_data(X) + results.append(Xrec) + if y_is_given: + Y = inv_transformed["Y"] + Y = self.whitener2.inverse_transform_data(Y) + Yrec = self.preprocessor2.inverse_transform_data(Y) + results.append(Yrec) + + if len(results) == 1: + return results[0] + else: + return results + + def predict(self, X: DataObject) -> DataArray: + """Predict Y from X. + + Parameters + ---------- + X: DataObject + Data to be used for prediction. + + Returns + ------- + DataArray + Predicted data in transformed space. + + """ + + validate_input_type(X) + + # Preprocess X + X = self.preprocessor1.transform(X) + + # Whiten X + X = self.whitener1.transform(X) + + # Predict Y + Y = self._predict_algorithm(X) + + # Inverse transform Y + Y = self.whitener2.inverse_transform_scores_unseen(Y) + Y = self.preprocessor2.inverse_transform_scores_unseen(Y) + + return Y + + def components(self, normalized=True) -> tuple[DataObject, DataObject]: + """Get the components of the model. + + The components may be referred to differently depending on the model + type. Common terms include canonical vectors, singular vectors, loadings + or spatial patterns. + + Parameters + ---------- + normalized: bool, default=True + Whether to return L2 normalized components. + + Returns + ------- + tuple[DataObject, DataObject] + Components of X and Y. + + """ + Px, Py = self._get_components(normalized=normalized) + + Px = self.whitener1.inverse_transform_components(Px) + Py = self.whitener2.inverse_transform_components(Py) + + Px: DataObject = self.preprocessor1.inverse_transform_components(Px) + Py: DataObject = self.preprocessor2.inverse_transform_components(Py) + return Px, Py + + def scores(self, normalized=False) -> tuple[DataArray, DataArray]: + """Get the scores of the model. + + The component scores may be referred to differently depending on the + model type. Common terms include canonical variates, expansion + coefficents, principal component (scores) or temporal patterns. + + Parameters + ---------- + normalized: bool, default=False + Whether to return L2 normalized scores. + + Returns + ------- + tuple[DataArray, DataArray] + Scores of X and Y. + + """ + Rx, Ry = self._get_scores(normalized=normalized) + + Rx = self.whitener1.inverse_transform_scores(Rx) + Ry = self.whitener2.inverse_transform_scores(Ry) + + Rx: DataArray = self.preprocessor1.inverse_transform_scores(Rx) + Ry: DataArray = self.preprocessor2.inverse_transform_scores(Ry) + return Rx, Ry + + def get_serialization_attrs(self) -> dict: + """Get the attributes needed to serialize the model. + + Returns + ------- + dict + Attributes needed to serialize the model. + + """ + return dict( + data=self.data, + preprocessor1=self.preprocessor1, + preprocessor2=self.preprocessor2, + whitener1=self.whitener1, + whitener2=self.whitener2, + ) + + def _augment_data(self, X: DataArray, Y: DataArray) -> tuple[DataArray, DataArray]: + """Optional method to augment the data before fitting.""" + return X, Y + + def _process_parameter( + self, + parameter_name: str, + parameter: Sequence[GenericType] | GenericType | None, + default: GenericType, + ) -> Sequence[GenericType]: + n_datasets = 2 + if parameter is None: + parameter = [default] * n_datasets + elif not isinstance(parameter, (list, tuple)): + parameter = [parameter] * n_datasets + self._check_parameter_number(parameter_name, parameter) + return parameter + + @staticmethod + def _check_parameter_number( + parameter_name: str, parameter: Sequence[GenericType] + ) -> None: + if len(parameter) != 2: + err_msg = ( + f"Expected 2 items for '{parameter_name}', but got {len(parameter)}." + ) + raise ValueError(err_msg) diff --git a/xeofs/cross/cca.py b/xeofs/cross/cca.py new file mode 100644 index 00000000..210b038a --- /dev/null +++ b/xeofs/cross/cca.py @@ -0,0 +1,356 @@ +from typing import Sequence + +import numpy as np + +from .cpcca import CPCCA, ComplexCPCCA, HilbertCPCCA + + +class CCA(CPCCA): + """Canonical Correlation Analysis (CCA). + + CCA seeks to find paris of coupled patterns that maximize the correlation [1]_ [2]_ . + + This method solves the following optimization problem: + + :math:`\\max_{q_x, q_y} \\left( q_x^T X^T Y q_y \\right)` + + subject to the constraints: + + :math:`q_x^T (X^TX) q_x = 1, \\quad q_y^T (Y^TY) q_y = 1` + + where :math:`X` and :math:`Y` are the input data matrices and :math:`q_x` + and :math:`q_y` are the corresponding pattern vectors. + + Parameters + ---------- + n_modes : int, default=2 + Number of modes to calculate. + standardize : Squence[bool] | bool, default=False + Whether to standardize the input data. Generally not recommended as + standardization can be managed by the degree of whitening. + use_coslat : Sequence[bool] | bool, default=False + For data on a longitude-latitude grid, whether to correct for varying + grid cell areas towards the poles by scaling each grid point with the + square root of the cosine of its latitude. + use_pca : Sequence[bool] | bool, default=False + Whether to preprocess each field individually by reducing dimensionality + through PCA. The cross-covariance matrix is then computed in the reduced + principal component space. + n_pca_modes : Sequence[int | float | str] | int | float | str, default=0.999 + Number of modes to retain during PCA preprocessing step. If int, + specifies the exact number of modes; if float, specifies the fraction of + variance to retain; if "all", all modes are retained. + pca_init_rank_reduction : Sequence[float] | float, default=0.3 + Relevant when `use_pca=True` and `n_pca_modes` is a float. Specifies the + initial fraction of rank reduction for faster PCA computation via + randomized SVD. + check_nans : Sequence[bool] | bool, default=True + Whether to check for NaNs in the input data. Set to False for lazy model + evaluation. + compute : bool, default=True + Whether to compute the model elements eagerly. If True, the following + are computed sequentially: preprocessor scaler, optional NaN checks, SVD + decomposition, scores, and components. + random_state : numpy.random.Generator | int | None, default=None + Seed for the random number generator. + sample_name : str, default="sample" + Name for the new sample dimension. + feature_name : Sequence[str] | str, default="feature" + Name for the new feature dimension. + solver : {"auto", "full", "randomized"} + Solver to use for the SVD computation. + solver_kwargs : dict, default={} + Additional keyword arguments passed to the SVD solver function. + + + References + ---------- + .. [1] Bretherton, C., Smith, C., Wallace, J., 1992. An intercomparison of + methods for finding coupled patterns in climate data. Journal of climate + 5, 541–560. + .. [2] Wilks, D. S. Statistical Methods in the Atmospheric Sciences. + (Academic Press, 2019). + doi:https://doi.org/10.1016/B978-0-12-815823-4.00011-0. + + Examples + -------- + + Perform CCA on two datasets on a regular longitude-latitude grid: + + >>> model = CCA(n_modes=5, use_coslat=True) + >>> model.fit(X, Y, dim="time") + + """ + + def __init__( + self, + n_modes: int = 2, + standardize: Sequence[bool] | bool = False, + use_coslat: Sequence[bool] | bool = False, + check_nans: Sequence[bool] | bool = True, + use_pca: Sequence[bool] | bool = True, + n_pca_modes: Sequence[float | int | str] | float | int | str = 0.999, + pca_init_rank_reduction: Sequence[float] | float = 0.3, + compute: bool = True, + sample_name: str = "sample", + feature_name: Sequence[str] | str = "feature", + solver: str = "auto", + random_state: np.random.Generator | int | None = None, + solver_kwargs: dict = {}, + ): + CPCCA.__init__( + self, + n_modes=n_modes, + alpha=[0.0, 0.0], + standardize=standardize, + use_coslat=use_coslat, + check_nans=check_nans, + use_pca=use_pca, + n_pca_modes=n_pca_modes, + pca_init_rank_reduction=pca_init_rank_reduction, + compute=compute, + sample_name=sample_name, + feature_name=feature_name, + solver=solver, + random_state=random_state, + solver_kwargs=solver_kwargs, + ) + self.attrs.update({"model": "Canonical Correlation Analysis"}) + # Renove alpha from the inherited CPCCA serialization params because it is hard-coded for CCA + self._params.pop("alpha") + + +class ComplexCCA(ComplexCPCCA, CCA): + """Complex CCA. + + CCA applied to a complex-valued field obtained from a pair of variables such + as the zonal and meridional components, :math:`U` and :math:`V`, of the wind + field. Complex CCA analysis then maximizes the correlation between + two datasets of the form + + .. math:: + Z_x = U_x + iV_x + + and + + .. math:: + Z_y = U_y + iV_y + + into a set of complex-valued components and PC scores. + + + Parameters + ---------- + n_modes : int, default=2 + Number of modes to calculate. + standardize : Squence[bool] | bool, default=False + Whether to standardize the input data. Generally not recommended as + standardization can be managed by the degree of whitening. + use_coslat : Sequence[bool] | bool, default=False + For data on a longitude-latitude grid, whether to correct for varying + grid cell areas towards the poles by scaling each grid point with the + square root of the cosine of its latitude. + use_pca : Sequence[bool] | bool, default=False + Whether to preprocess each field individually by reducing dimensionality + through PCA. The cross-covariance matrix is then computed in the reduced + principal component space. + n_pca_modes : Sequence[int | float | str] | int | float | str, default=0.999 + Number of modes to retain during PCA preprocessing step. If int, + specifies the exact number of modes; if float, specifies the fraction of + variance to retain; if "all", all modes are retained. + pca_init_rank_reduction : Sequence[float] | float, default=0.3 + Relevant when `use_pca=True` and `n_pca_modes` is a float. Specifies the + initial fraction of rank reduction for faster PCA computation via + randomized SVD. + check_nans : Sequence[bool] | bool, default=True + Whether to check for NaNs in the input data. Set to False for lazy model + evaluation. + compute : bool, default=True + Whether to compute the model elements eagerly. If True, the following + are computed sequentially: preprocessor scaler, optional NaN checks, SVD + decomposition, scores, and components. + random_state : numpy.random.Generator | int | None, default=None + Seed for the random number generator. + sample_name : str, default="sample" + Name for the new sample dimension. + feature_name : Sequence[str] | str, default="feature" + Name for the new feature dimension. + solver : {"auto", "full", "randomized"} + Solver to use for the SVD computation. + solver_kwargs : dict, default={} + Additional keyword arguments passed to the SVD solver function. + + Examples + -------- + + With two DataArrays `u_i` and `v_i` representing the zonal and meridional + components of the wind field for two different regions :math:`x` and + :math:`y`, construct + + >>> X = u_x + 1j * v_x + >>> Y = u_y + 1j * v_y + + and fit the Complex CCA model: + + >>> model = ComplexCCA(n_modes=5) + >>> model.fit(X, Y, "time") + + + """ + + def __init__( + self, + n_modes: int = 2, + standardize: Sequence[bool] | bool = False, + use_coslat: Sequence[bool] | bool = False, + check_nans: Sequence[bool] | bool = True, + use_pca: Sequence[bool] | bool = True, + n_pca_modes: Sequence[float | int | str] | float | int | str = 0.999, + pca_init_rank_reduction: Sequence[float] | float = 0.3, + compute: bool = True, + sample_name: str = "sample", + feature_name: Sequence[str] | str = "feature", + solver: str = "auto", + random_state: np.random.Generator | int | None = None, + solver_kwargs: dict = {}, + ): + ComplexCPCCA.__init__( + self, + n_modes=n_modes, + alpha=[0.0, 0.0], + standardize=standardize, + use_coslat=use_coslat, + check_nans=check_nans, + use_pca=use_pca, + n_pca_modes=n_pca_modes, + pca_init_rank_reduction=pca_init_rank_reduction, + compute=compute, + sample_name=sample_name, + feature_name=feature_name, + solver=solver, + random_state=random_state, + solver_kwargs=solver_kwargs, + ) + self.attrs.update({"model": "Complex CCA"}) + # Renove alpha from the inherited CPCCA serialization params because it is hard-coded for CCA + self._params.pop("alpha") + + +class HilbertCCA(HilbertCPCCA, ComplexCCA): + """Hilbert CCA. + + Hilbert CCA extends CCA by examining amplitude-phase relationships. It + augments the input data with its Hilbert transform, creating a + complex-valued field. + + This method solves the following optimization problem: + + :math:`\\max_{q_x, q_y} \\left( q_x^H X^H Y q_y \\right)` + + subject to the constraints: + + :math:`q_x^H (X^HX) q_x = 1, \\quad q_y^H (Y^HY) q_y = 1` + + where :math:`H` denotes the conjugate transpose and :math:`X` and :math:`Y` + are the augmented data matrices. + + An optional padding with exponentially decaying values can be applied prior + to the Hilbert transform in order to mitigate the impact of spectral + leakage. + + + Parameters + ---------- + n_modes : int, default=2 + Number of modes to calculate. + padding : Sequence[str] | str | None, default="exp" + Padding method for the Hilbert transform. Available options are: - None: + no padding - "exp": exponential decay + decay_factor : Sequence[float] | float, default=0.2 + Decay factor for the exponential padding. + standardize : Squence[bool] | bool, default=False + Whether to standardize the input data. Generally not recommended as + standardization can be managed by the degree of whitening. + use_coslat : Sequence[bool] | bool, default=False + For data on a longitude-latitude grid, whether to correct for varying + grid cell areas towards the poles by scaling each grid point with the + square root of the cosine of its latitude. + use_pca : Sequence[bool] | bool, default=False + Whether to preprocess each field individually by reducing dimensionality + through PCA. The cross-covariance matrix is computed in the reduced + principal component space. + n_pca_modes : Sequence[int | float | str] | int | float | str, default=0.999 + Number of modes to retain during PCA preprocessing step. If int, + specifies the exact number of modes; if float, specifies the fraction of + variance to retain; if "all", all modes are retained. + pca_init_rank_reduction : Sequence[float] | float, default=0.3 + Relevant when `use_pca=True` and `n_pca_modes` is a float. Specifies the + initial fraction of rank reduction for faster PCA computation via + randomized SVD. + check_nans : Sequence[bool] | bool, default=True + Whether to check for NaNs in the input data. Set to False for lazy model + evaluation. + compute : bool, default=True + Whether to compute the model elements eagerly. If True, the following + are computed sequentially: preprocessor scaler, optional NaN checks, SVD + decomposition, scores, and components. + random_state : numpy.random.Generator | int | None, default=None + Seed for the random number generator. + sample_name : str, default="sample" + Name for the new sample dimension. + feature_name : Sequence[str] | str, default="feature" + Name for the new feature dimension. + solver : {"auto", "full", "randomized"} + Solver to use for the SVD computation. + solver_kwargs : dict, default={} + Additional keyword arguments passed to the SVD solver function. + + + + Examples + -------- + >>> model = HilbertCCA(n_modes=5) + >>> model.fit(X, Y, "time") + + """ + + def __init__( + self, + n_modes: int = 2, + padding: Sequence[str] | str | None = "exp", + decay_factor: Sequence[float] | float = 0.2, + standardize: Sequence[bool] | bool = False, + use_coslat: Sequence[bool] | bool = False, + check_nans: Sequence[bool] | bool = True, + use_pca: Sequence[bool] | bool = True, + n_pca_modes: Sequence[float | int | str] | float | int | str = 0.999, + pca_init_rank_reduction: Sequence[float] | float = 0.3, + compute: bool = True, + sample_name: str = "sample", + feature_name: Sequence[str] | str = "feature", + solver: str = "auto", + random_state: np.random.Generator | int | None = None, + solver_kwargs: dict = {}, + ): + HilbertCPCCA.__init__( + self, + n_modes=n_modes, + alpha=[0.0, 0.0], + standardize=standardize, + use_coslat=use_coslat, + check_nans=check_nans, + use_pca=use_pca, + n_pca_modes=n_pca_modes, + pca_init_rank_reduction=pca_init_rank_reduction, + compute=compute, + sample_name=sample_name, + feature_name=feature_name, + solver=solver, + random_state=random_state, + solver_kwargs=solver_kwargs, + padding=padding, + decay_factor=decay_factor, + ) + self.attrs.update({"model": "Hilbert CCA"}) + # Renove alpha from the inherited CPCCA serialization params because it is hard-coded for CCA + self._params.pop("alpha") diff --git a/xeofs/cross/cpcca.py b/xeofs/cross/cpcca.py new file mode 100644 index 00000000..ec8c0da2 --- /dev/null +++ b/xeofs/cross/cpcca.py @@ -0,0 +1,1458 @@ +import warnings +from typing import Sequence + +import numpy as np +import xarray as xr +from typing_extensions import Self + +from ..linalg._numpy import _fractional_matrix_power +from ..linalg.decomposer import Decomposer +from ..utils.data_types import DataArray, DataObject +from ..utils.hilbert_transform import hilbert_transform +from ..utils.statistics import pearson_correlation +from ..utils.xarray_utils import argsort_dask +from .base_model_cross_set import BaseModelCrossSet + + +class CPCCA(BaseModelCrossSet): + """Continuum Power CCA (CPCCA). + + CPCCA extends continuum power regression to isolate pairs of coupled + patterns, maximizing the squared covariance between partially whitened + variables [1]_ [2]_. + + This method solves the following optimization problem: + + :math:`\\max_{q_x, q_y} \\left( q_x^T X^T Y q_y \\right)` + + subject to the constraints: + + :math:`q_x^T (X^TX)^{1-\\alpha_x} q_x = 1, \\quad q_y^T + (Y^TY)^{1-\\alpha_y} q_y = 1` + + where :math:`\\alpha_x` and :math:`\\alpha_y` control the degree of + whitening applied to the data. + + Parameters + ---------- + n_modes : int, default=2 + Number of modes to calculate. + alpha : Sequence[float] | float, default=0.2 + Degree of whitening applied to the data. If float, the same value is + applied to both data sets. + standardize : Squence[bool] | bool, default=False + Whether to standardize the input data. Generally not recommended as + standardization can be managed by the degree of whitening. + use_coslat : Sequence[bool] | bool, default=False + For data on a longitude-latitude grid, whether to correct for varying + grid cell areas towards the poles by scaling each grid point with the + square root of the cosine of its latitude. + use_pca : Sequence[bool] | bool, default=False + Whether to preprocess each field individually by reducing dimensionality + through PCA. The cross-covariance matrix is then computed in the reduced + principal component space. + n_pca_modes : Sequence[int | float | str] | int | float | str, default=0.999 + Number of modes to retain during PCA preprocessing step. If int, + specifies the exact number of modes; if float, specifies the fraction of + variance to retain; if "all", all modes are retained. + pca_init_rank_reduction : Sequence[float] | float, default=0.3 + Relevant when `use_pca=True` and `n_pca_modes` is a float. Specifies the + initial fraction of rank reduction for faster PCA computation via + randomized SVD. + check_nans : Sequence[bool] | bool, default=True + Whether to check for NaNs in the input data. Set to False for lazy model + evaluation. + compute : bool, default=True + Whether to compute the model elements eagerly. If True, the following + are computed sequentially: preprocessor scaler, optional NaN checks, SVD + decomposition, scores, and components. + random_state : numpy.random.Generator | int | None, default=None + Seed for the random number generator. + sample_name : str, default="sample" + Name for the new sample dimension. + feature_name : Sequence[str] | str, default="feature" + Name for the new feature dimension. + solver : {"auto", "full", "randomized"} + Solver to use for the SVD computation. + solver_kwargs : dict, default={} + Additional keyword arguments passed to the SVD solver function. + + Notes + ----- + Canonical Correlation Analysis (CCA), Maximum Covariance Analysis (MCA) and + Redundany Analysis (RDA) are all special cases of CPCCA depending on the + choice of the parameter :math:`\\alpha`. + + References + ---------- + .. [1] Swenson, E. Continuum Power CCA: A Unified Approach for Isolating + Coupled Modes. Journal of Climate 28, 1016–1030 (2015). + .. [2] Wilks, D. S. Statistical Methods in the Atmospheric Sciences. + (Academic Press, 2019). + doi:https://doi.org/10.1016/B978-0-12-815823-4.00011-0. + + Examples + -------- + Perform regular CCA on two data sets: + + >>> model = CPCCA(n_modes=5, alpha=0.0) + >>> model.fit(X, Y) + + Perform regularized CCA on two data sets: + + >>> model = CPCCA(n_modes=5, alpha=0.2) + >>> model.fit(X, Y) + + Perform Maximum Covariance Analysis: + + >>> model = CPCCA(n_modes=5, alpha=1.0) + >>> model.fit(X, Y) + + Perform Redundancy Analysis: + + >>> model = CPCCA(n_modes=5, alpha=[0, 1]) + >>> model.fit(X, Y) + + Make predictions for `Y` given `X`: + + >>> scores_y_pred = model.predict(X) # prediction in "PC" space + >>> Y_pred = model.inverse_transform(Y=scores_y_pred) # prediction in physical space + + + """ + + def __init__( + self, + n_modes: int = 2, + alpha: Sequence[float] | float = 0.2, + standardize: Sequence[bool] | bool = False, + use_coslat: Sequence[bool] | bool = False, + use_pca: Sequence[bool] | bool = True, + n_pca_modes: Sequence[float | int | str] | float | int | str = 0.999, + pca_init_rank_reduction: Sequence[float] | float = 0.3, + check_nans: Sequence[bool] | bool = True, + compute: bool = True, + sample_name: str = "sample", + feature_name: Sequence[str] | str = "feature", + solver: str = "auto", + random_state: np.random.Generator | int | None = None, + solver_kwargs: dict = {}, + **kwargs, + ): + super().__init__( + n_modes=n_modes, + center=True, + standardize=standardize, + use_coslat=use_coslat, + check_nans=check_nans, + use_pca=use_pca, + n_pca_modes=n_pca_modes, + pca_init_rank_reduction=pca_init_rank_reduction, + alpha=alpha, + compute=compute, + sample_name=sample_name, + feature_name=feature_name, + solver=solver, + random_state=random_state, + solver_kwargs=solver_kwargs, + ) + self.attrs.update({"model": "Continuum Power CCA"}) + # Remove center from the inherited serialization params because it is hard-coded for CPCCA + self._params.pop("center") + + params = self.get_params() + self.sample_name: str = params["sample_name"] + self.feature_name: tuple[str, str] = params["feature_name"] + + def _fit_algorithm( + self, + X: DataArray, + Y: DataArray, + ) -> Self: + feature_name = self.feature_name + + # Compute the totalsquared covariance from the unwhitened data + C_whitened = self._compute_cross_matrix( + X, + Y, + sample_dim=self.sample_name, + feature_dim_x=feature_name[0], + feature_dim_y=feature_name[1], + method="covariance", + diagonal=False, + ) + + # Initialize the SVD decomposer + decomposer = Decomposer(**self._decomposer_kwargs) + dims = (feature_name[0], feature_name[1]) + decomposer.fit(C_whitened, dims=dims) + + # Store the results + singular_values = decomposer.s_ + Q1 = decomposer.U_ + Q2 = decomposer.V_ + + # Compute total squared variance + total_squared_covariance = self._compute_total_squared_covariance(C_whitened) + + # Index of the sorted covariance explained + idx_sorted_modes = argsort_dask(singular_values, "mode")[::-1] # type: ignore + idx_sorted_modes.coords.update(singular_values.coords) + + # Project the data onto the singular vectors + scores1 = xr.dot(X, Q1, dims=feature_name[0]) + scores2 = xr.dot(Y, Q2, dims=feature_name[1]) + + norm1 = np.sqrt(xr.dot(scores1.conj(), scores1, dims=self.sample_name)).real + norm2 = np.sqrt(xr.dot(scores2.conj(), scores2, dims=self.sample_name)).real + + self.data.add(name="input_data1", data=X, allow_compute=False) + self.data.add(name="input_data2", data=Y, allow_compute=False) + self.data.add(name="components1", data=Q1) + self.data.add(name="components2", data=Q2) + self.data.add(name="scores1", data=scores1) + self.data.add(name="scores2", data=scores2) + self.data.add(name="singular_values", data=singular_values) + self.data.add(name="squared_covariance", data=singular_values**2) + self.data.add(name="total_squared_covariance", data=total_squared_covariance) + self.data.add(name="idx_modes_sorted", data=idx_sorted_modes) + self.data.add(name="norm1", data=norm1) + self.data.add(name="norm2", data=norm2) + + # # Assign analysis-relevant meta data + self.data.set_attrs(self.attrs) + return self + + def _transform_algorithm( + self, + X: DataArray | None = None, + Y: DataArray | None = None, + normalized=False, + ) -> dict[str, DataArray]: + results = {} + if X is not None: + # Project data onto singular vectors + comps1 = self.data["components1"] + norm1 = self.data["norm1"] + scores1 = xr.dot(X, comps1) + if normalized: + scores1 = scores1 / norm1 + results["X"] = scores1 + + if Y is not None: + # Project data onto singular vectors + comps2 = self.data["components2"] + norm2 = self.data["norm2"] + scores2 = xr.dot(Y, comps2) + if normalized: + scores2 = scores2 / norm2 + results["Y"] = scores2 + + return results + + def _inverse_transform_algorithm( + self, X: DataArray | None = None, Y: DataArray | None = None + ) -> dict[str, DataArray]: + x_is_given = X is not None + y_is_given = Y is not None + + if (not x_is_given) and (not y_is_given): + raise ValueError("Either X or Y must be given.") + + results = {} + + if x_is_given: + # Singular vectors + comps1 = self.data["components1"].sel(mode=X.mode) + # Reconstruct the data + results["X"] = xr.dot(X, comps1.conj(), dims="mode") + + if y_is_given: + # Singular vectors + comps2 = self.data["components2"].sel(mode=Y.mode) + # Reconstruct the data + results["Y"] = xr.dot(Y, comps2.conj(), dims="mode") + + return results + + def _predict_algorithm(self, X: DataArray) -> DataArray: + sample_name_fit_x = "sample_fit_dim_x" + sample_name_fit_y = "sample_fit_dim_y" + Qx = self.data["components1"] + Rx = self.data["scores1"].rename({self.sample_name: sample_name_fit_x}) + Ry = self.data["scores2"].rename({self.sample_name: sample_name_fit_y}) + + def _predict_numpy(X, Qx, Rx, Ry): + G = Rx.conj().T @ Ry / np.linalg.norm(Rx, axis=0) ** 2 + return X @ Qx @ G + + Ry_pred = xr.apply_ufunc( + _predict_numpy, + X, + Qx, + Rx, + Ry, + input_core_dims=[ + [self.sample_name, self.feature_name[0]], + [self.feature_name[0], "mode"], + [sample_name_fit_x, "mode"], + [sample_name_fit_y, "mode"], + ], + output_core_dims=[[self.sample_name, "mode"]], + dask="allowed", + ) + Ry_pred.name = "pseudo_scores_Y" + return Ry_pred + + def _get_components(self, normalized=True): + comps1 = self.data["components1"] + comps2 = self.data["components2"] + + if not normalized: + comps1 = comps1 * self.data["norm1"] + comps2 = comps2 * self.data["norm2"] + + return comps1, comps2 + + def _get_scores(self, normalized=False): + norm1 = self.data["norm1"] + norm2 = self.data["norm2"] + + scores1 = self.data["scores1"] + scores2 = self.data["scores2"] + + if normalized: + scores1 = scores1 / norm1 + scores2 = scores2 / norm2 + + return scores1, scores2 + + def cross_correlation_coefficients(self): + """Get the cross-correlation coefficients. + + The cross-correlation coefficients between the scores of ``X`` and ``Y`` + are computed as: + + .. math:: + c_{xy, i} = \\text{corr} \\left(\\mathbf{r}_{x, i}, \\mathbf{r}_{y, i} \\right) + + where :math:`\\mathbf{r}_{x, i}` and :math:`\\mathbf{r}_{y, i}` are the + `i`th scores of ``X`` and ``Y``, + + Notes + ----- + When :math:`\\alpha=0`, the cross-correlation coefficients are + equivalent to the canonical correlation coefficients. + + """ + + Rx = self.data["scores1"] + Ry = self.data["scores2"] + + cross_corr = self._compute_cross_matrix( + Rx, + Ry, + sample_dim=self.sample_name, + feature_dim_x="mode", + feature_dim_y="mode", + method="correlation", + diagonal=True, + ) + cross_corr = cross_corr.real + cross_corr.name = "cross_correlation_coefficients" + return cross_corr + + def correlation_coefficients_X(self): + """Get the correlation coefficients for the scores of :math:`X`. + + The correlation coefficients of the scores of :math:`X` are given by: + + .. math:: + c_{x, ij} = \\text{corr} \\left(\\mathbf{r}_{x, i}, \\mathbf{r}_{x, j} \\right) + + where :math:`\\mathbf{r}_{x, i}` and :math:`\\mathbf{r}_{x, j}` are the + `i`th and `j`th scores of :math:`X`. + + """ + Rx = self.data["scores1"] + + corr = self._compute_cross_matrix( + Rx, + Rx, + sample_dim=self.sample_name, + feature_dim_x="mode", + feature_dim_y="mode", + method="correlation", + diagonal=False, + ) + corr.name = "correlation_coefficients_X" + return corr + + def correlation_coefficients_Y(self): + """Get the correlation coefficients for the scores of :math:`Y`. + + The correlation coefficients of the scores of :math:`Y` are given by: + + .. math:: + c_{y, ij} = \\text{corr} \\left(\\mathbf{r}_{y, i}, \\mathbf{r}_{y, j} \\right) + + where :math:`\\mathbf{r}_{y, i}` and :math:`\\mathbf{r}_{y, j}` are the + `i`th and `j`th scores of :math:`Y`. + + """ + Ry = self.data["scores2"] + + corr = self._compute_cross_matrix( + Ry, + Ry, + sample_dim=self.sample_name, + feature_dim_x="mode", + feature_dim_y="mode", + method="correlation", + diagonal=False, + ) + corr.name = "correlation_coefficients_Y" + return corr + + def squared_covariance_fraction(self): + """Get the squared covariance fraction (SCF). + + The SCF is computed as a weighted mean-square error (see equation (15) + in Swenson (2015)) : + + .. math:: + SCF_{i} = 1 - \\frac{\\|\\mathbf{d}_{X,i}^T \\mathbf{d}_{Y,i}\\|_F^2}{\\|X^TY\\|_F^2} + + where :math:`\\mathbf{d}_{X,i}` and :math:`\\mathbf{d}_{Y,i}` are the + residuals of the input data :math:`X` and :math:`Y` after reconstruction + by the `ith` scores of :math:`X` and :math:`Y`, respectively. + + References + ---------- + Swenson, E. Continuum Power CCA: A Unified Approach for Isolating + Coupled Modes. Journal of Climate 28, 1016–1030 (2015). + + """ + + def _compute_residual_variance_numpy(X, Y, Xrec, Yrec): + dX = X - Xrec + dY = Y - Yrec + + return np.linalg.norm(dX.conj().T @ dY / (dX.shape[0] - 1)) ** 2 + + total_squared_covariance = self.data["total_squared_covariance"] + sample_name_x = "sample_dim_x" + sample_name_y = "sample_dim_y" + + # Get the singular vectors + Q1 = self.data["components1"] + Q2 = self.data["components2"] + + # Get input data + X1 = self.data["input_data1"] + X2 = self.data["input_data2"] + + # Unwhiten the data + X1 = self.whitener1.inverse_transform_data(X1, unwhiten_only=True) + X2 = self.whitener2.inverse_transform_data(X2, unwhiten_only=True) + + # Rename the sample dimension to avoid conflicts for + # different coordinates with same length + X1 = X1.rename({self.sample_name: sample_name_x}) + X2 = X2.rename({self.sample_name: sample_name_y}) + + # Get the component scores + scores1 = self.data["scores1"] + scores2 = self.data["scores2"] + + # Compute the residual variance for each mode + squared_covariance_fraction: list[DataArray] = [] + for mode in scores1.mode.values: + # Reconstruct the data + X1r = xr.dot( + scores1.sel(mode=[mode]), Q1.sel(mode=[mode]).conj().T, dims="mode" + ) + X2r = xr.dot( + scores2.sel(mode=[mode]), Q2.sel(mode=[mode]).conj().T, dims="mode" + ) + + # Unwhitend the reconstructed data + X1r = self.whitener1.inverse_transform_data(X1r, unwhiten_only=True) + X2r = self.whitener2.inverse_transform_data(X2r, unwhiten_only=True) + + # Compute fraction variance explained + X1r = X1r.rename({self.sample_name: sample_name_x}) + X2r = X2r.rename({self.sample_name: sample_name_y}) + res_var: DataArray = xr.apply_ufunc( + _compute_residual_variance_numpy, + X1, + X2, + X1r, + X2r, + input_core_dims=[ + [sample_name_x, self.feature_name[0]], + [sample_name_y, self.feature_name[1]], + [sample_name_x, self.feature_name[0]], + [sample_name_y, self.feature_name[1]], + ], + output_core_dims=[[]], + dask="allowed", + ) + res_var = res_var.expand_dims({"mode": [mode]}) + squared_covariance_fraction.append(1 - res_var / total_squared_covariance) + + scf = xr.concat(squared_covariance_fraction, dim="mode") + scf.name = "squared_covariance_fraction" + + # In theory, the residual can be larger than the total squared covariance + # if a mode is not well defined. In this case, the SCF would be negative. + # We set these values to zero. + scf = xr.where(scf < 0, 0, scf) + return scf + + def fraction_variance_X_explained_by_X(self): + """Get the fraction of variance explained (FVE X). + + The FVE X is the fraction of variance in :math:`X` explained by the + scores of :math:`X`. It is computed as a weighted mean-square error (see + equation (15) in Swenson (2015)) : + + .. math:: + FVE_{X|X,i} = 1 - \\frac{\\|\\mathbf{d}_{X,i}\\|_F^2}{\\|X\\|_F^2} + + where :math:`\\mathbf{d}_{X,i}` are the residuals of the input data + :math:`X` after reconstruction by the `ith` scores of :math:`X`. + + References + ---------- + Swenson, E. Continuum Power CCA: A Unified Approach for Isolating + Coupled Modes. Journal of Climate 28, 1016–1030 (2015). + + """ + # Get the singular vectors + Qx = self.data["components1"] + + # Get input data + X = self.data["input_data1"] + + # Unwhiten the data + X = self.whitener1.inverse_transform_data(X, unwhiten_only=True) + + # Compute the total variance + total_variance: DataArray = self._compute_total_variance(X, self.sample_name) + + # Get the component scores + Rx = self.data["scores1"] + + # Compute the residual variance for each mode + fraction_variance_explained: list[DataArray] = [] + for mode in Rx.mode.values: + # Reconstruct the data + Xr = xr.dot(Rx.sel(mode=[mode]), Qx.sel(mode=[mode]).conj().T, dims="mode") + + # Unwhitend the reconstructed data + Xr = self.whitener1.inverse_transform_data(Xr, unwhiten_only=True) + + # Compute fraction variance explained + residual_variance = self._compute_total_variance(X - Xr, self.sample_name) + residual_variance = residual_variance.expand_dims({"mode": [mode]}) + fraction_variance_explained.append(1 - residual_variance / total_variance) + + fve_xx = xr.concat(fraction_variance_explained, dim="mode") + fve_xx.name = "fraction_variance_X_explained_by_X" + return fve_xx + + def fraction_variance_Y_explained_by_Y(self): + """Get the fraction of variance explained (FVE Y). + + The FVE Y is the fraction of variance in :math:`Y` explained by the + scores of :math:`Y`. It is computed as a weighted mean-square error (see + equation (15) in Swenson (2015)) : + + .. math:: + FVE_{Y|Y,i} = 1 - \\frac{\\|\\mathbf{d}_{Y,i}\\|_F^2}{\\|Y\\|_F^2} + + where :math:`\\mathbf{d}_{Y,i}` are the residuals of the input data + :math:`Y` after reconstruction by the `ith` scores of :math:`Y`. + + References + ---------- + Swenson, E. Continuum Power CCA: A Unified Approach for Isolating + Coupled Modes. Journal of Climate 28, 1016–1030 (2015). + + """ + # Get the singular vectors + Qy = self.data["components2"] + + # Get input data + Y = self.data["input_data2"] + + # Unwhiten the data + Y = self.whitener2.inverse_transform_data(Y, unwhiten_only=True) + + # Compute the total variance + total_variance: DataArray = self._compute_total_variance(Y, self.sample_name) + + # Get the component scores + Ry = self.data["scores2"] + + # Compute the residual variance for each mode + fraction_variance_explained: list[DataArray] = [] + for mode in Ry.mode.values: + # Reconstruct the data + Yr = xr.dot(Ry.sel(mode=[mode]), Qy.sel(mode=[mode]).conj().T, dims="mode") + + # Unwhitend the reconstructed data + Yr = self.whitener2.inverse_transform_data(Yr, unwhiten_only=True) + + # Compute fraction variance explained + residual_variance = self._compute_total_variance(Y - Yr, self.sample_name) + residual_variance = residual_variance.expand_dims({"mode": [mode]}) + fraction_variance_explained.append(1 - residual_variance / total_variance) + + fve_yy = xr.concat(fraction_variance_explained, dim="mode") + fve_yy.name = "fraction_variance_Y_explained_by_Y" + return fve_yy + + def fraction_variance_Y_explained_by_X(self) -> DataArray: + """Get the fraction of variance explained (FVE YX). + + The FVE YX is the fraction of variance in :math:`Y` explained by the + scores of :math:`X`. It is computed as a weighted mean-square error (see + equation (15) in Swenson (2015)) : + + .. math:: + FVE_{Y|X,i} = 1 - \\frac{\\|(X^TX)^{-1/2} \\mathbf{d}_{X,i}^T \\mathbf{d}_{Y,i}\\|_F^2}{\\|(X^TX)^{-1/2} X^TY\\|_F^2} + + where :math:`\\mathbf{d}_{X,i}` and :math:`\\mathbf{d}_{Y,i}` are the + residuals of the input data :math:`X` and :math:`Y` after reconstruction + by the `ith` scores of :math:`X` and :math:`Y`, respectively. + + References + ---------- + Swenson, E. Continuum Power CCA: A Unified Approach for Isolating + Coupled Modes. Journal of Climate 28, 1016–1030 (2015). + + """ + + def _compute_total_variance_numpy(X, Y): + Cx = X.conj().T @ X / (X.shape[0] - 1) + Tinv = _fractional_matrix_power(Cx, -0.5) + return np.linalg.norm(Tinv @ X.conj().T @ Y / (X.shape[0] - 1)) ** 2 + + def _compute_residual_variance_numpy(X, Y, Xrec, Yrec): + dX = X - Xrec + dY = Y - Yrec + + Cx = X.conj().T @ X / (X.shape[0] - 1) + Tinv = _fractional_matrix_power(Cx, -0.5) + return np.linalg.norm(Tinv @ dX.conj().T @ dY / (dX.shape[0] - 1)) ** 2 + + sample_name_x = "sample_dim_x" + sample_name_y = "sample_dim_y" + + # Get the singular vectors + Q1 = self.data["components1"] + Q2 = self.data["components2"] + + # Get input data + X1 = self.data["input_data1"] + X2 = self.data["input_data2"] + + # Unwhiten the data + X1 = self.whitener1.inverse_transform_data(X1, unwhiten_only=True) + X2 = self.whitener2.inverse_transform_data(X2, unwhiten_only=True) + + # Compute the total variance + X1 = X1.rename({self.sample_name: sample_name_x}) + X2 = X2.rename({self.sample_name: sample_name_y}) + total_variance: DataArray = xr.apply_ufunc( + _compute_total_variance_numpy, + X1, + X2, + input_core_dims=[ + [sample_name_x, self.feature_name[0]], + [sample_name_y, self.feature_name[1]], + ], + output_core_dims=[[]], + dask="allowed", + ) + + # Get the component scores + scores1 = self.data["scores1"] + scores2 = self.data["scores2"] + + # Compute the residual variance for each mode + fraction_variance_explained: list[DataArray] = [] + for mode in scores1.mode.values: + # Reconstruct the data + X1r = xr.dot( + scores1.sel(mode=[mode]), Q1.sel(mode=[mode]).conj().T, dims="mode" + ) + X2r = xr.dot( + scores2.sel(mode=[mode]), Q2.sel(mode=[mode]).conj().T, dims="mode" + ) + + # Unwhitend the reconstructed data + X1r = self.whitener1.inverse_transform_data(X1r, unwhiten_only=True) + X2r = self.whitener2.inverse_transform_data(X2r, unwhiten_only=True) + + # Compute fraction variance explained + X1r = X1r.rename({self.sample_name: sample_name_x}) + X2r = X2r.rename({self.sample_name: sample_name_y}) + res_var: DataArray = xr.apply_ufunc( + _compute_residual_variance_numpy, + X1, + X2, + X1r, + X2r, + input_core_dims=[ + [sample_name_x, self.feature_name[0]], + [sample_name_y, self.feature_name[1]], + [sample_name_x, self.feature_name[0]], + [sample_name_y, self.feature_name[1]], + ], + output_core_dims=[[]], + dask="allowed", + ) + res_var = res_var.expand_dims({"mode": [mode]}) + fraction_variance_explained.append(1 - res_var / total_variance) + + fve_yx = xr.concat(fraction_variance_explained, dim="mode") + fve_yx.name = "fraction_variance_Y_explained_by_X" + return fve_yx + + def homogeneous_patterns(self, correction=None, alpha=0.05): + """Get the homogeneous correlation patterns. + + The homogeneous correlation patterns are the correlation coefficients + between the input data and the scores. They are defined as: + + .. math:: + H_{X, i} = \\text{corr} \\left(X, \\mathbf{r}_{x,i} \\right) + + .. math:: + H_{Y, i} = \\text{corr} \\left(Y, \\mathbf{r}_{y,i} \\right) + + where :math:`X` and :math:`Y` are the input data, and + :math:`\\mathbf{r}_{x,i}` and :math:`\\mathbf{r}_{y,i}` are the `i`th + scores of :math:`X` and :math:`Y`, respectively. + + + Parameters + ---------- + correction: str, default=None + Method to apply a multiple testing correction. If None, no + correction is applied. Available methods are: - bonferroni : + one-step correction - sidak : one-step correction - holm-sidak : + step down method using Sidak adjustments - holm : step-down method + using Bonferroni adjustments - simes-hochberg : step-up method + (independent) - hommel : closed method based on Simes tests + (non-negative) - fdr_bh : Benjamini/Hochberg (non-negative) + (default) - fdr_by : Benjamini/Yekutieli (negative) - fdr_tsbh : two + stage fdr correction (non-negative) - fdr_tsbky : two stage fdr + correction (non-negative) + alpha: float, default=0.05 + The desired family-wise error rate. Not used if `correction` is + None. + + Returns + ------- + tuple[DataObject, DataObject] + Homogenous correlation patterns of `X` and `Y`. + tuple[DataObject, DataObject] + p-values of the homogenous correlation patterns of `X` and `Y`. + + """ + input_data1 = self.data["input_data1"] + input_data2 = self.data["input_data2"] + + input_data1 = self.whitener1.inverse_transform_data(input_data1) + input_data2 = self.whitener2.inverse_transform_data(input_data2) + + scores1 = self.data["scores1"] + scores2 = self.data["scores2"] + + hom_pat1, pvals1 = pearson_correlation( + input_data1, + scores1, + correction=correction, + alpha=alpha, + sample_name=self.sample_name, + feature_name=self.feature_name[0], + ) + hom_pat2, pvals2 = pearson_correlation( + input_data2, + scores2, + correction=correction, + alpha=alpha, + sample_name=self.sample_name, + feature_name=self.feature_name[1], + ) + + hom_pat1.name = "left_homogeneous_patterns" + hom_pat2.name = "right_homogeneous_patterns" + + pvals1.name = "pvalues_of_left_homogeneous_patterns" + pvals2.name = "pvalues_of_right_homogeneous_patterns" + + hom_pat1 = self.preprocessor1.inverse_transform_components(hom_pat1) + hom_pat2 = self.preprocessor2.inverse_transform_components(hom_pat2) + + pvals1 = self.preprocessor1.inverse_transform_components(pvals1) + pvals2 = self.preprocessor2.inverse_transform_components(pvals2) + + return (hom_pat1, hom_pat2), (pvals1, pvals2) + + def heterogeneous_patterns(self, correction=None, alpha=0.05): + """Get the heterogeneous correlation patterns. + + The heterogeneous patterns are the correlation coefficients between the + input data and the scores of the other field: + + .. math:: + G_{X, i} = \\text{corr} \\left(X, \\mathbf{r}_{y,i} \\right) + + .. math:: + G_{Y, i} = \\text{corr} \\left(Y, \\mathbf{r}_{x,i} \\right) + + where :math:`X` and :math:`Y` are the input data, and + :math:`\\mathbf{r}_{x,i}` and :math:`\\mathbf{r}_{y,i}` are the `i`th + scores of :math:`X` and :math:`Y`, respectively. + + Parameters + ---------- + correction: str, default=None + Method to apply a multiple testing correction. If None, no + correction is applied. Available methods are: - bonferroni : + one-step correction - sidak : one-step correction - holm-sidak : + step down method using Sidak adjustments - holm : step-down method + using Bonferroni adjustments - simes-hochberg : step-up method + (independent) - hommel : closed method based on Simes tests + (non-negative) - fdr_bh : Benjamini/Hochberg (non-negative) + (default) - fdr_by : Benjamini/Yekutieli (negative) - fdr_tsbh : two + stage fdr correction (non-negative) - fdr_tsbky : two stage fdr + correction (non-negative) + alpha: float, default=0.05 + The desired family-wise error rate. Not used if `correction` is + None. + + Returns + ------- + tuple[DataObject, DataObject] + Heterogenous correlation patterns of `X` and `Y`. + tuple[DataObject, DataObject] + p-values of the heterogenous correlation patterns of `X` and `Y`. + + """ + input_data1 = self.data["input_data1"] + input_data2 = self.data["input_data2"] + + input_data1 = self.whitener1.inverse_transform_data(input_data1) + input_data2 = self.whitener2.inverse_transform_data(input_data2) + + scores1 = self.data["scores1"] + scores2 = self.data["scores2"] + + patterns1, pvals1 = pearson_correlation( + input_data1, + scores2, + correction=correction, + alpha=alpha, + sample_name=self.sample_name, + feature_name=self.feature_name[0], + ) + patterns2, pvals2 = pearson_correlation( + input_data2, + scores1, + correction=correction, + alpha=alpha, + sample_name=self.sample_name, + feature_name=self.feature_name[1], + ) + + patterns1.name = "left_heterogeneous_patterns" + patterns2.name = "right_heterogeneous_patterns" + + pvals1.name = "pvalues_of_left_heterogeneous_patterns" + pvals2.name = "pvalues_of_right_heterogeneous_patterns" + + patterns1 = self.preprocessor1.inverse_transform_components(patterns1) + patterns2 = self.preprocessor2.inverse_transform_components(patterns2) + + pvals1 = self.preprocessor1.inverse_transform_components(pvals1) + pvals2 = self.preprocessor2.inverse_transform_components(pvals2) + + return (patterns1, patterns2), (pvals1, pvals2) + + def _validate_loaded_data(self, data: xr.DataArray): + if data.attrs.get("placeholder"): + warnings.warn( + f"The input data field '{data.name}' was not saved, which will produce" + " empty results when calling `homogeneous_patterns()` or " + "`heterogeneous_patterns()`. To avoid this warning, you can save the" + " model with `save_data=True`, or add the data manually by running" + " it through the model's `preprocessor.transform()` method and then" + " attaching it with `data.add()`." + ) + + def _compute_cross_matrix( + self, + X: DataArray, + Y: DataArray, + sample_dim: str, + feature_dim_x: str, + feature_dim_y: str, + method: str = "covariance", + diagonal: bool = False, + ) -> DataArray: + """Compute the cross matrix of two data objects. + + Assume centered data. + + Parameters + ---------- + X, Y : DataArray + DataArrays to compute the cross matrix from. + sample_dim : str + Name of the sample dimension. + feature_dim_x, feature_dim_y : str + Name of the feature dimensions. If the feature dimensions are the same, they are renamed to avoid conflicts. + method : {"covariance", "correlation"} + Method to compute the cross matrix. + diagonal : bool, default=False + Whether to compute the diagonal of the cross matrix. + + Returns + ------- + DataArray + The cross matrix of the two data objects. + + """ + if feature_dim_x == feature_dim_y: + new_feature_dim_x = feature_dim_x + "_x" + new_feature_dim_y = feature_dim_y + "_y" + X = X.rename({feature_dim_x: new_feature_dim_x}) + Y = Y.rename({feature_dim_y: new_feature_dim_y}) + feature_dim_x = new_feature_dim_x + feature_dim_y = new_feature_dim_y + + # Rename the sample dimension to avoid conflicts for + # different coordinates with same length + sample_dim_x = sample_dim + "_x" + sample_dim_y = sample_dim + "_y" + X = X.rename({sample_dim: sample_dim_x}) + Y = Y.rename({sample_dim: sample_dim_y}) + + if method == "correlation": + X = self._normalize_data(X, sample_dim_x) + Y = self._normalize_data(Y, sample_dim_y) + + if diagonal: + return xr.apply_ufunc( + self._compute_cross_covariance_diagonal_numpy, + X, + Y, + input_core_dims=[ + [sample_dim_x, feature_dim_x], + [sample_dim_y, feature_dim_y], + ], + output_core_dims=[[feature_dim_y]], + dask="allowed", + ).rename({feature_dim_y: feature_dim_y[:-2]}) + else: + return xr.apply_ufunc( + self._compute_cross_covariance_numpy, + X, + Y, + input_core_dims=[ + [sample_dim_x, feature_dim_x], + [sample_dim_y, feature_dim_y], + ], + output_core_dims=[[feature_dim_x, feature_dim_y]], + dask="allowed", + ) + + def _compute_cross_covariance_diagonal_numpy(self, X, Y): + # Assume centered data + return np.diag(self._compute_cross_covariance_numpy(X, Y)) + + def _compute_total_squared_covariance(self, C: DataArray) -> DataArray: + """Compute the total squared covariance. + + Requires the unwhitened covariance matrix which we can obtain by multiplying the whitened covariance matrix with the inverse of the whitening transformation matrix. + """ + C = self.whitener2.inverse_transform_data(C) + C = self.whitener1.inverse_transform_data(C.conj().T) + # Not necessary to conjugate transpose for total squared covariance + # C = C.conj().T + return (abs(C) ** 2).sum() + + @staticmethod + def _compute_total_variance(X: DataArray, dim: str) -> DataArray: + """Compute the total variance of the centered data.""" + return (X * X.conj()).sum() / (X[dim].size - 1) + + @staticmethod + def _compute_cross_covariance_numpy(X, Y): + # Assume centered data + n_samples_x = X.shape[0] + n_samples_y = Y.shape[0] + if n_samples_x != n_samples_y: + err_msg = f"Both data matrices must have the same number of samples but found {n_samples_x} in the first and {n_samples_y} in the second." + raise ValueError(err_msg) + return X.conj().T @ Y / (n_samples_x - 1) + + @staticmethod + def _normalize_data(X, dim): + # Assume centered data + return X / X.std(dim) + + +class ComplexCPCCA(CPCCA): + """Complex CPCCA. + + Complex CPCCA extends classical CPCCA [1]_ by examining amplitude-phase + relationships. It is based on complex-valued fields obtained from a pair of + variables such as the zonal and meridional components, :math:`U` and + :math:`V`, of the wind field, leading to complex-valued data matrices: + + .. math:: + X = U_x + iV_x + + and + + .. math:: + Y = U_y + iV_y + + This method solves the following optimization problem: + + :math:`\\max_{q_x, q_y} \\left( q_x^H X^H Y q_y \\right)` + + subject to the constraints: + + :math:`q_x^H (X^HX)^{1-\\alpha_x} q_x = 1, \\quad q_y^H + (Y^HY)^{1-\\alpha_y} q_y = 1` + + where :math:`H` denotes the conjugate transpose, :math:`X` and :math:`Y` are + the complex-valued data matrices, and :math:`\\alpha_x` and + :math:`\\alpha_y` control the degree of whitening applied to the data. + + Parameters + ---------- + n_modes : int, default=2 + Number of modes to calculate. + alpha : Sequence[float] | float, default=0.2 + Degree of whitening applied to the data. If float, the same value is + applied to both data sets. + padding : Sequence[str] | str | None, default="exp" + Padding method for the Hilbert transform. Available options are: - None: + no padding - "exp": exponential decay + decay_factor : Sequence[float] | float, default=0.2 + Decay factor for the exponential padding. + standardize : Squence[bool] | bool, default=False + Whether to standardize the input data. Generally not recommended as + standardization can be managed by the degree of whitening. + use_coslat : Sequence[bool] | bool, default=False + For data on a longitude-latitude grid, whether to correct for varying + grid cell areas towards the poles by scaling each grid point with the + square root of the cosine of its latitude. + use_pca : Sequence[bool] | bool, default=False + Whether to preprocess each field individually by reducing dimensionality + through PCA. The cross-covariance matrix is computed in the reduced + principal component space. + n_pca_modes : Sequence[int | float | str] | int | float | str, default=0.999 + Number of modes to retain during PCA preprocessing step. If int, + specifies the exact number of modes; if float, specifies the fraction of + variance to retain; if "all", all modes are retained. + pca_init_rank_reduction : Sequence[float] | float, default=0.3 + Relevant when `use_pca=True` and `n_pca_modes` is a float. Specifies the + initial fraction of rank reduction for faster PCA computation via + randomized SVD. + check_nans : Sequence[bool] | bool, default=True + Whether to check for NaNs in the input data. Set to False for lazy model + evaluation. + compute : bool, default=True + Whether to compute the model elements eagerly. If True, the following + are computed sequentially: preprocessor scaler, optional NaN checks, SVD + decomposition, scores, and components. + random_state : numpy.random.Generator | int | None, default=None + Seed for the random number generator. + sample_name : str, default="sample" + Name for the new sample dimension. + feature_name : Sequence[str] | str, default="feature" + Name for the new feature dimension. + solver : {"auto", "full", "randomized"} + Solver to use for the SVD computation. + solver_kwargs : dict, default={} + Additional keyword arguments passed to the SVD solver function. + + Examples + -------- + + With two DataArrays :math:`u_i` and :math:`v_i` representing the zonal and + meridional components of the wind field for two different regions :math:`x` + and :math:`y`, construct + + >>> X = u_x + 1j * v_x + >>> Y = u_y + 1j * v_y + + and fit the Complex CPCCA model: + + >>> model = ComplexCPCCA(n_modes=5) + >>> model.fit(X, Y, "time") + + Finally, extract the amplitude and phase patterns: + + >>> amp_x, amp_y = model.components_amplitude() + >>> phase_x, phase_y = model.components_phase() + + + References + ---------- + .. [1] Swenson, E. Continuum Power CCA: A Unified Approach for Isolating + Coupled Modes. Journal of Climate 28, 1016–1030 (2015). + + """ + + def __init__( + self, + n_modes: int = 2, + alpha: Sequence[float] | float = 0.2, + standardize: Sequence[bool] | bool = False, + use_coslat: Sequence[bool] | bool = False, + check_nans: Sequence[bool] | bool = True, + use_pca: Sequence[bool] | bool = True, + n_pca_modes: Sequence[float | int | str] | float | int | str = 0.999, + pca_init_rank_reduction: Sequence[float] | float = 0.3, + compute: bool = True, + sample_name: str = "sample", + feature_name: Sequence[str] | str = "feature", + solver: str = "auto", + random_state: np.random.Generator | int | None = None, + solver_kwargs: dict = {}, + ): + CPCCA.__init__( + self, + n_modes=n_modes, + standardize=standardize, + use_coslat=use_coslat, + check_nans=check_nans, + use_pca=use_pca, + n_pca_modes=n_pca_modes, + pca_init_rank_reduction=pca_init_rank_reduction, + alpha=alpha, + compute=compute, + sample_name=sample_name, + feature_name=feature_name, + solver=solver, + random_state=random_state, + solver_kwargs=solver_kwargs, + ) + self.attrs.update({"model": "Complex CPCCA"}) + + def _fit_algorithm(self, X: DataArray, Y: DataArray) -> Self: + if (not np.iscomplexobj(X)) or (not np.iscomplexobj(Y)): + warnings.warn( + "Expected complex-valued data but found real-valued data. For Hilbert model, use corresponding `Hilbert` class." + ) + + return super()._fit_algorithm(X, Y) + + def components_amplitude(self, normalized=True) -> tuple[DataObject, DataObject]: + """Get the amplitude of the components. + + The amplitudes of the components are defined as + + .. math:: + A_{x, ij} = |p_{x, ij}| + .. math:: + A_{y, ij} = |p_{y, ij}| + + where :math:`p_{ij}` is the :math:`i`-th entry of the :math:`j`-th + component and :math:`|\\cdot|` denotes the absolute value. + + Returns + ------- + tuple[DataObject, DataObject] + Component amplitudes of :math:`X` and :math:`Y`. + + """ + Px, Py = self._get_components(normalized=normalized) + + Px = self.whitener1.inverse_transform_components(Px) + Py = self.whitener2.inverse_transform_components(Py) + + Px = abs(Px) + Py = abs(Py) + + Px.name = "components_amplitude_X" + Py.name = "components_amplitude_Y" + + Px = self.preprocessor1.inverse_transform_components(Px) + Py = self.preprocessor2.inverse_transform_components(Py) + + return Px, Py + + def components_phase(self, normalized=True) -> tuple[DataObject, DataObject]: + """Get the phase of the components. + + The phases of the components are defined as + + .. math:: + \\phi_{x, ij} = \\arg(p_{x, ij}) + .. math:: + \\phi_{y, ij} = \\arg(p_{y, ij}) + + where :math:`p_{ij}` is the :math:`i`-th entry of the :math:`j`-th component and + :math:`\\arg(\\cdot)` denotes the argument of a complex number. + + Returns + ------- + tuple[DataObject, DataObject] + Component phases of :math:`X` and :math:`Y`. + + """ + Px, Py = self._get_components(normalized=normalized) + + Px = self.whitener1.inverse_transform_components(Px) + Py = self.whitener2.inverse_transform_components(Py) + + Px = xr.apply_ufunc(np.angle, Px, keep_attrs=True) + Py = xr.apply_ufunc(np.angle, Py, keep_attrs=True) + + Px.name = "components_phase_X" + Py.name = "components_phase_Y" + + Px = self.preprocessor1.inverse_transform_components(Px) + Py = self.preprocessor2.inverse_transform_components(Py) + + return Px, Py + + def scores_amplitude(self, normalized=False) -> tuple[DataArray, DataArray]: + """Get the amplitude of the scores. + + The amplitudes of the scores are defined as + + .. math:: + A_{x, ij} = |r_{y, ij}| + .. math:: + A_{y, ij} = |r_{x, ij}| + + where :math:`r_{ij}` is the :math:`i`-th entry of the :math:`j`-th score and + :math:`|\\cdot|` denotes the absolute value. + + Returns + ------- + tuple[DataArray, DataArray] + Score amplitudes of :math:`X` and :math:`Y`. + + """ + Rx, Ry = self._get_scores(normalized=normalized) + + Rx = self.whitener1.inverse_transform_scores(Rx) + Ry = self.whitener2.inverse_transform_scores(Ry) + + Rx = abs(Rx) + Ry = abs(Ry) + + Rx.name = "scores_amplitude_X" + Ry.name = "scores_amplitude_Y" + + Rx = self.preprocessor1.inverse_transform_scores(Rx) + Ry = self.preprocessor2.inverse_transform_scores(Ry) + + return Rx, Ry + + def scores_phase(self, normalized=False) -> tuple[DataArray, DataArray]: + """Get the phase of the scores. + + The phases of the scores are defined as + + .. math:: + \\phi_{x, ij} = \\arg(r_{x, ij}) + .. math:: + \\phi_{y, ij} = \\arg(r_{y, ij}) + + where :math:`r_{ij}` is the :math:`i`-th entry of the :math:`j`-th score and + :math:`\\arg(\\cdot)` denotes the argument of a complex number. + + Returns + ------- + tuple[DataArray, DataArray] + Score phases of :math:`X` and :math:`Y`. + + """ + Rx, Ry = self._get_scores(normalized=normalized) + + Rx = self.whitener1.inverse_transform_scores(Rx) + Ry = self.whitener2.inverse_transform_scores(Ry) + + Rx = xr.apply_ufunc(np.angle, Rx, keep_attrs=True) + Ry = xr.apply_ufunc(np.angle, Ry, keep_attrs=True) + + Rx.name = "scores_phase_X" + Ry.name = "scores_phase_Y" + + Rx = self.preprocessor1.inverse_transform_scores(Rx) + Ry = self.preprocessor2.inverse_transform_scores(Ry) + + return Rx, Ry + + +class HilbertCPCCA(ComplexCPCCA): + """Hilbert CPCCA. + + Hilbert CPCCA extends classical CPCCA [1]_ by examining + amplitude-phase relationships. It augments the input data with its Hilbert + transform, creating a complex-valued field. + + This method solves the following optimization problem: + + :math:`\\max_{q_x, q_y} \\left( q_x^H X^H Y q_y \\right)` + + subject to the constraints: + + :math:`q_x^H (X^HX)^{1-\\alpha_x} q_x = 1, \\quad q_y^H + (Y^HY)^{1-\\alpha_y} q_y = 1` + + where :math:`H` denotes the conjugate transpose, :math:`X` and :math:`Y` are + the augmented data matrices, and :math:`\\alpha_x` and :math:`\\alpha_y` + control the degree of whitening applied to the data. + + Parameters + ---------- + n_modes : int, default=2 + Number of modes to calculate. + alpha : Sequence[float] | float, default=0.2 + Degree of whitening applied to the data. If float, the same value is + applied to both data sets. + padding : Sequence[str] | str | None, default="exp" + Padding method for the Hilbert transform. Available options are: - None: + no padding - "exp": exponential decay + decay_factor : Sequence[float] | float, default=0.2 + Decay factor for the exponential padding. + standardize : Squence[bool] | bool, default=False + Whether to standardize the input data. Generally not recommended as + standardization can be managed by the degree of whitening. + use_coslat : Sequence[bool] | bool, default=False + For data on a longitude-latitude grid, whether to correct for varying + grid cell areas towards the poles by scaling each grid point with the + square root of the cosine of its latitude. + use_pca : Sequence[bool] | bool, default=False + Whether to preprocess each field individually by reducing dimensionality + through PCA. The cross-covariance matrix is computed in the reduced + principal component space. + n_pca_modes : Sequence[int | float | str] | int | float | str, default=0.999 + Number of modes to retain during PCA preprocessing step. If int, + specifies the exact number of modes; if float, specifies the fraction of + variance to retain; if "all", all modes are retained. + pca_init_rank_reduction : Sequence[float] | float, default=0.3 + Relevant when `use_pca=True` and `n_pca_modes` is a float. Specifies the + initial fraction of rank reduction for faster PCA computation via + randomized SVD. + check_nans : Sequence[bool] | bool, default=True + Whether to check for NaNs in the input data. Set to False for lazy model + evaluation. + compute : bool, default=True + Whether to compute the model elements eagerly. If True, the following + are computed sequentially: preprocessor scaler, optional NaN checks, SVD + decomposition, scores, and components. + random_state : numpy.random.Generator | int | None, default=None + Seed for the random number generator. + sample_name : str, default="sample" + Name for the new sample dimension. + feature_name : Sequence[str] | str, default="feature" + Name for the new feature dimension. + solver : {"auto", "full", "randomized"} + Solver to use for the SVD computation. + solver_kwargs : dict, default={} + Additional keyword arguments passed to the SVD solver function. + + Examples + -------- + + Perform Hilbert CPCCA on two real-valued datasets `X` and `Y`, using + exponential padding: + + >>> model = HilbertCPCCA(n_modes=5, padding="exp") + >>> model.fit(X, Y) + + References + ---------- + .. [1] Swenson, E. Continuum Power CCA: A Unified Approach for Isolating + Coupled Modes. Journal of Climate 28, 1016–1030 (2015). + + """ + + def __init__( + self, + n_modes: int = 2, + alpha: Sequence[float] | float = 0.2, + padding: Sequence[str] | str | None = "exp", + decay_factor: Sequence[float] | float = 0.2, + standardize: Sequence[bool] | bool = False, + use_coslat: Sequence[bool] | bool = False, + check_nans: Sequence[bool] | bool = True, + use_pca: Sequence[bool] | bool = True, + n_pca_modes: Sequence[float | int | str] | float | int | str = 0.999, + pca_init_rank_reduction: Sequence[float] | float = 0.3, + compute: bool = True, + sample_name: str = "sample", + feature_name: Sequence[str] | str = "feature", + solver: str = "auto", + random_state: np.random.Generator | int | None = None, + solver_kwargs: dict = {}, + ): + ComplexCPCCA.__init__( + self, + n_modes=n_modes, + standardize=standardize, + use_coslat=use_coslat, + check_nans=check_nans, + use_pca=use_pca, + n_pca_modes=n_pca_modes, + pca_init_rank_reduction=pca_init_rank_reduction, + alpha=alpha, + compute=compute, + sample_name=sample_name, + feature_name=feature_name, + solver=solver, + random_state=random_state, + solver_kwargs=solver_kwargs, + ) + self.attrs.update({"model": "Hilbert CPCCA"}) + + padding = self._process_parameter("padding", padding, "epx") + decay_factor = self._process_parameter("decay_factor", decay_factor, 0.2) + self._params["padding"] = padding + self._params["decay_factor"] = decay_factor + + def _fit_algorithm(self, X: DataArray, Y: DataArray) -> Self: + CPCCA._fit_algorithm(self, X, Y) + return self + + def transform( + self, X: DataObject | None = None, Y: DataObject | None = None, normalized=False + ) -> Sequence[DataArray]: + """Transform the input data into the component space.""" + raise NotImplementedError("Hilbert models do not support the transform method.") + + def _augment_data(self, X: DataArray, Y: DataArray) -> tuple[DataArray, DataArray]: + """Augment the data with the Hilbert transform.""" + params = self.get_params() + padding = params["padding"] + decay_factor = params["decay_factor"] + X = hilbert_transform( + X, + dims=(self.sample_name, self.feature_name[0]), + padding=padding[0], + decay_factor=decay_factor[0], + ) + Y = hilbert_transform( + Y, + dims=(self.sample_name, self.feature_name[1]), + padding=padding[1], + decay_factor=decay_factor[1], + ) + return X, Y diff --git a/xeofs/models/mca_rotator.py b/xeofs/cross/cpcca_rotator.py similarity index 51% rename from xeofs/models/mca_rotator.py rename to xeofs/cross/cpcca_rotator.py index d5d17bac..162157cd 100644 --- a/xeofs/models/mca_rotator.py +++ b/xeofs/cross/cpcca_rotator.py @@ -1,62 +1,68 @@ -from datetime import datetime -from typing import Dict, List +from typing import Sequence import numpy as np import xarray as xr from typing_extensions import Self -from .._version import __version__ +from ..base_model import BaseModel from ..data_container import DataContainer -from ..preprocessing.preprocessor import Preprocessor +from ..linalg.rotation import promax +from ..preprocessing import Preprocessor, Whitener from ..utils.data_types import DataArray, DataObject -from ..utils.rotation import promax from ..utils.xarray_utils import argsort_dask, get_deterministic_sign_multiplier -from .mca import MCA, HilbertMCA +from .cpcca import CPCCA, ComplexCPCCA, HilbertCPCCA -class MCARotator(MCA): - """Rotate a solution obtained from ``xe.models.MCA``. +class CPCCARotator(CPCCA): + """Rotate a solution obtained from ``xe.cross.CPCCA``. - Rotated MCA [1]_ is an extension of the standard MCA that applies an additional rotation - to the computed modes to maximize the variance explained individually by each mode. - This rotation method enhances interpretability by distributing the explained variance more - evenly among the modes, making it easier to discern patterns within the data. + Rotate the obtained components and scores of a CPCCA model to increase + interpretability. The algorithm here is based on the approach of Cheng & + Dunkerton (1995) [1]_ and adapted to the CPCCA framework [2]_. Parameters ---------- n_modes : int, default=10 Specify the number of modes to be rotated. power : int, default=1 - Set the power for the Promax rotation. A ``power`` value of 1 results - in a Varimax rotation. + Set the power for the Promax rotation. A ``power`` value of 1 results in + a Varimax rotation. max_iter : int or None, default=None Determine the maximum number of iterations for the computation of the rotation matrix. If not specified, defaults to 1000 if ``compute=True`` - and 100 if ``compute=False``, since we can't terminate a lazy computation - based using ``rtol``. + and 100 if ``compute=False``, since we can't terminate a lazy + computation based using ``rtol``. rtol : float, default=1e-8 Define the relative tolerance required to achieve convergence and terminate the iterative process. - squared_loadings : bool, default=False - Specify the method for constructing the combined vectors of loadings. If True, - the combined vectors are loaded with the singular values (termed "squared loadings"), - conserving the squared covariance under rotation. This allows estimation of mode importance - after rotation. If False, the combined vectors are loaded with the square root of the - singular values, following the method described by Cheng & Dunkerton. compute : bool, default=True Whether to compute the rotation immediately. References ---------- - .. [1] Cheng, X. & Dunkerton, T. J. Orthogonal Rotation of Spatial Patterns Derived from Singular Value Decomposition Analysis. J. Climate 8, 2631–2643 (1995). + .. [1] Cheng, X. & Dunkerton, T. J. Orthogonal Rotation of Spatial Patterns + Derived from Singular Value Decomposition Analysis. J. Climate 8, + 2631–2643 (1995). + .. [2] Swenson, E. Continuum Power CCA: A Unified Approach for Isolating + Coupled Modes. Journal of Climate 28, 1016–1030 (2015). Examples -------- - >>> model = MCA(n_modes=5) - >>> model.fit(da1, da2, dim='time') - >>> rotator = MCARotator(n_modes=5, power=2) + + Perform a CPCCA analysis: + + >>> model = CPCCA(n_modes=10) + >>> model.fit(X, Y, dim='time') + + Then, apply varimax rotation to first 5 components and scores: + + >>> rotator = CPCCARotator(n_modes=5) >>> rotator.fit(model) + + Retrieve the rotated components and scores: + >>> rotator.components() + >>> rotator.scores() """ @@ -66,9 +72,10 @@ def __init__( power: int = 1, max_iter: int | None = None, rtol: float = 1e-8, - squared_loadings: bool = False, compute: bool = True, ): + BaseModel.__init__(self) + if max_iter is None: max_iter = 1000 if compute else 100 @@ -78,123 +85,86 @@ def __init__( "power": power, "max_iter": max_iter, "rtol": rtol, - "squared_loadings": squared_loadings, "compute": compute, } # Define analysis-relevant meta data - self.attrs = {"model": "Rotated MCA"} + self.attrs.update({"model": "Rotated CPCCA"}) self.attrs.update(self._params) - self.attrs.update( - { - "software": "xeofs", - "version": __version__, - "date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - } - ) # Attach empty objects self.preprocessor1 = Preprocessor() self.preprocessor2 = Preprocessor() + self.whitener1 = Whitener() + self.whitener2 = Whitener() self.data = DataContainer() - self.model = MCA() + self.model = CPCCA() self.sorted = False - def get_serialization_attrs(self) -> Dict: + def get_serialization_attrs(self) -> dict: return dict( data=self.data, preprocessor1=self.preprocessor1, preprocessor2=self.preprocessor2, + whitener1=self.whitener1, + whitener2=self.whitener2, model=self.model, sorted=self.sorted, ) - def _compute_rot_mat_inv_trans(self, rotation_matrix, input_dims) -> xr.DataArray: - """Compute the inverse transpose of the rotation matrix. - - For orthogonal rotations (e.g., Varimax), the inverse transpose is equivalent - to the rotation matrix itself. For oblique rotations (e.g., Promax), the simplification - does not hold. - - Returns - ------- - rotation_matrix : xr.DataArray - - """ - if self._params["power"] > 1: - # inverse matrix - rotation_matrix = xr.apply_ufunc( - np.linalg.inv, - rotation_matrix, - input_core_dims=[(input_dims)], - output_core_dims=[(input_dims[::-1])], - vectorize=False, - dask="allowed", - ) - # transpose matrix - rotation_matrix = rotation_matrix.conj().transpose(*input_dims) - return rotation_matrix - - def fit(self, model: MCA) -> Self: - """Rotate the solution obtained from ``xe.models.MCA``. - - Parameters - ---------- - model : ``xe.models.MCA`` - The MCA model to be rotated. - - """ - self._fit_algorithm(model) - - if self._params["compute"]: - self.compute() - - return self - def _fit_algorithm(self, model) -> Self: self.model = model self.preprocessor1 = model.preprocessor1 self.preprocessor2 = model.preprocessor2 + self.whitener1 = model.whitener1 + self.whitener2 = model.whitener2 self.sample_name = self.model.sample_name self.feature_name = self.model.feature_name self.sorted = False + common_feature_dim = "common_feature_dim" + feature_name = self._get_feature_name() + n_modes = self._params["n_modes"] power = self._params["power"] max_iter = self._params["max_iter"] rtol = self._params["rtol"] - use_squared_loadings = self._params["squared_loadings"] # Construct the combined vector of loadings - # NOTE: In the methodology used by Cheng & Dunkerton (CD), the combined vectors are "loaded" or weighted - # with the square root of the singular values, akin to what is done in standard Varimax rotation. This method - # ensures that the total amount of covariance is conserved during the rotation process. - # However, in Maximum Covariance Analysis (MCA), the focus is usually on the squared covariance to determine - # the importance of a given mode. The approach adopted by CD does not preserve the squared covariance under - # rotation, making it impossible to estimate the importance of modes post-rotation. - # To resolve this issue, one possible workaround is to rotate the singular vectors that have been "loaded" - # or weighted with the singular values ("squared loadings"), as opposed to the square root of the singular values. - # In doing so, the squared covariance remains conserved under rotation, allowing for the estimation of the - # modes' importance. - norm1 = self.model.data["norm1"].sel(mode=slice(1, n_modes)) - norm2 = self.model.data["norm2"].sel(mode=slice(1, n_modes)) - if use_squared_loadings: - # Squared loadings approach conserving squared covariance - scaling = norm1 * norm2 - else: - # Cheng & Dunkerton approach conserving covariance - scaling = np.sqrt(norm1 * norm2) - - comps1 = self.model.data["components1"].sel(mode=slice(1, n_modes)) - comps2 = self.model.data["components2"].sel(mode=slice(1, n_modes)) - loadings = xr.concat([comps1, comps2], dim=self.feature_name) * scaling + # NOTE: In the methodology + # used by Cheng & Dunkerton (CD95), the combined vectors are "loaded" or + # weighted with the square root of the singular values, akin to what is + # done in standard Varimax rotation. While this approach ensures that + # the resulting projections are still uncorrelated as in the unrotated + # solution, it does not conserve the squared covariance fraction, i.e. + # the amount of explained squared covariance can be different before and + # after rotation. The authors then introduced a so-called "covariance + # fraction" which is conserved under rotation, but does not have a clear + # interpretation as the term covariance fraction is only correct when + # both data sets X and Y are equal and MCA reduces to PCA. + svalues = self.model.data["singular_values"].sel(mode=slice(1, n_modes)) + scaling = np.sqrt(svalues) + + # Get unrotated singular vectors + Qx = self.model.data["components1"].sel(mode=slice(1, n_modes)) + Qy = self.model.data["components2"].sel(mode=slice(1, n_modes)) + + # Unwhiten and back-transform into physical space + Qx = self.whitener1.inverse_transform_components(Qx) + Qy = self.whitener2.inverse_transform_components(Qy) + + # Rename the feature dimension to a common name so that the combined vectors can be concatenated + Qx = Qx.rename({feature_name[0]: common_feature_dim}) + Qy = Qy.rename({feature_name[1]: common_feature_dim}) + + loadings = xr.concat([Qx, Qy], dim=common_feature_dim) * scaling # Rotate loadings promax_kwargs = {"power": power, "max_iter": max_iter, "rtol": rtol} rot_loadings, rot_matrix, phi_matrix = promax( loadings=loadings, - feature_dim=self.feature_name, + feature_dim=common_feature_dim, compute=self._params["compute"], **promax_kwargs, ) @@ -210,55 +180,67 @@ def _fit_algorithm(self, model) -> Self: ) # Rotated (loaded) singular vectors - comps1_rot = rot_loadings.isel( - {self.feature_name: slice(0, comps1.coords[self.feature_name].size)} + Qx_rot = rot_loadings.isel( + {common_feature_dim: slice(0, Qx.coords[common_feature_dim].size)} ) - comps2_rot = rot_loadings.isel( - {self.feature_name: slice(comps1.coords[self.feature_name].size, None)} + Qy_rot = rot_loadings.isel( + {common_feature_dim: slice(Qx.coords[common_feature_dim].size, None)} ) + # Rename the common feature dimension to the original feature names + Qx_rot = Qx_rot.rename({common_feature_dim: feature_name[0]}) + Qy_rot = Qy_rot.rename({common_feature_dim: feature_name[1]}) + + # For consistency with the unrotated model classes, we transform the pattern vectors + # into the whitened PC space + Qx_rot = self.whitener1.transform_components(Qx_rot) + Qy_rot = self.whitener2.transform_components(Qy_rot) + # Normalization factor of singular vectors norm1_rot = xr.apply_ufunc( np.linalg.norm, - comps1_rot, - input_core_dims=[[self.feature_name, "mode"]], + Qx_rot, + input_core_dims=[[feature_name[0], "mode"]], output_core_dims=[["mode"]], - exclude_dims={self.feature_name}, + exclude_dims={feature_name[0]}, kwargs={"axis": 0}, vectorize=False, dask="allowed", ) norm2_rot = xr.apply_ufunc( np.linalg.norm, - comps2_rot, - input_core_dims=[[self.feature_name, "mode"]], + Qy_rot, + input_core_dims=[[feature_name[1], "mode"]], output_core_dims=[["mode"]], - exclude_dims={self.feature_name}, + exclude_dims={feature_name[1]}, kwargs={"axis": 0}, vectorize=False, dask="allowed", ) # Rotated (normalized) singular vectors - comps1_rot = comps1_rot / norm1_rot - comps2_rot = comps2_rot / norm2_rot + Qx_rot = Qx_rot / norm1_rot + Qy_rot = Qy_rot / norm2_rot - # Remove the squaring introduced by the squared loadings approach - if use_squared_loadings: - norm1_rot = norm1_rot**0.5 - norm2_rot = norm2_rot**0.5 - - # norm1 * norm2 = "singular values" - squared_covariance = (norm1_rot * norm2_rot) ** 2 + # CD95 call the quantity "norm1 * norm2" the "explained covariance" + explained_covariance = norm1_rot * norm2_rot + squared_covariance = explained_covariance**2 # Reorder according to squared covariance - idx_modes_sorted = argsort_dask(squared_covariance, "mode")[::-1] + idx_modes_sorted = argsort_dask(squared_covariance, "mode")[::-1] # type: ignore idx_modes_sorted.coords.update(squared_covariance.coords) # Rotate scores using rotation matrix scores1 = self.model.data["scores1"].sel(mode=slice(1, n_modes)) scores2 = self.model.data["scores2"].sel(mode=slice(1, n_modes)) + scores1 = self.whitener1.inverse_transform_scores(scores1) + scores2 = self.whitener2.inverse_transform_scores(scores2) + + # Normalize scores + scores1 = scores1 / scaling + scores2 = scores2 / scaling + RinvT = self._compute_rot_mat_inv_trans( rot_matrix, input_dims=("mode_m", "mode_n") ) @@ -266,13 +248,13 @@ def _fit_algorithm(self, model) -> Self: scores1 = scores1.rename({"mode": "mode_m"}) scores2 = scores2.rename({"mode": "mode_m"}) RinvT = RinvT.rename({"mode_n": "mode"}) - scores1_rot = xr.dot(scores1, RinvT, dims="mode_m") - scores2_rot = xr.dot(scores2, RinvT, dims="mode_m") + scores1_rot = xr.dot(scores1, RinvT, dims="mode_m") * norm1_rot + scores2_rot = xr.dot(scores2, RinvT, dims="mode_m") * norm2_rot # Ensure consitent signs for deterministic output - modes_sign = get_deterministic_sign_multiplier(rot_loadings, self.feature_name) - comps1_rot = comps1_rot * modes_sign - comps2_rot = comps2_rot * modes_sign + modes_sign = get_deterministic_sign_multiplier(rot_loadings, common_feature_dim) + Qx_rot = Qx_rot * modes_sign + Qy_rot = Qy_rot * modes_sign scores1_rot = scores1_rot * modes_sign scores2_rot = scores2_rot * modes_sign @@ -283,8 +265,8 @@ def _fit_algorithm(self, model) -> Self: self.data.add( name="input_data2", data=self.model.data["input_data2"], allow_compute=False ) - self.data.add(name="components1", data=comps1_rot) - self.data.add(name="components2", data=comps2_rot) + self.data.add(name="components1", data=Qx_rot) + self.data.add(name="components2", data=Qy_rot) self.data.add(name="scores1", data=scores1_rot) self.data.add(name="scores2", data=scores2_rot) self.data.add(name="squared_covariance", data=squared_covariance) @@ -292,6 +274,7 @@ def _fit_algorithm(self, model) -> Self: name="total_squared_covariance", data=self.model.data["total_squared_covariance"], ) + self.data.add(name="idx_modes_sorted", data=idx_modes_sorted) self.data.add(name="norm1", data=norm1_rot) self.data.add(name="norm2", data=norm2_rot) @@ -304,43 +287,46 @@ def _fit_algorithm(self, model) -> Self: return self - def _post_compute(self): - """Leave sorting until after compute because it can't be done lazily.""" - self._sort_by_variance() + def fit(self, model: CPCCA) -> Self: + """Rotate the solution obtained from ``xe.cross.CPCCA``. - def _sort_by_variance(self): - """Re-sort the mode dimension of all data variables by variance explained.""" - if not self.sorted: - for key in self.data.keys(): - if "mode" in self.data[key].dims and key != "idx_modes_sorted": - self.data[key] = ( - self.data[key] - .isel(mode=self.data["idx_modes_sorted"].values) - .assign_coords(mode=self.data[key].mode) - ) - self.sorted = True + Parameters + ---------- + model : ``xe.cross.CPCCA`` + The CPCCA model to be rotated. + + """ + self._fit_algorithm(model) + + if self._params["compute"]: + self.compute() + + return self def transform( - self, data1: DataObject | None = None, data2: DataObject | None = None - ) -> DataArray | List[DataArray]: - """Project new "unseen" data onto the rotated singular vectors. + self, + X: DataObject | None = None, + Y: DataObject | None = None, + normalized: bool = False, + ) -> DataArray | list[DataArray]: + """Transform the data. Parameters ---------- - data1 : DataArray | Dataset | List[DataArray] - Data to be projected onto the rotated singular vectors of the first dataset. - data2 : DataArray | Dataset | List[DataArray] - Data to be projected onto the rotated singular vectors of the second dataset. + X, Y: DataObject | None + Data to be transformed. At least one of them must be provided. + normalized: bool, default=False + Whether to return L2 normalized scores. Returns ------- - DataArray | List[DataArray] - Projected data. + Sequence[DataArray] | DataArray + Transformed data. """ # raise error if no data is provided - if data1 is None and data2 is None: - raise ValueError("No data provided. Please provide data1 and/or data2.") + if X is None and Y is None: + raise ValueError("No data provided. Please provide X and/or Y.") n_modes = self._params["n_modes"] rot_matrix = self.data["rotation_matrix"] @@ -349,18 +335,21 @@ def transform( ) RinvT = RinvT.rename({"mode_n": "mode"}) + scaling = self.model.data["singular_values"].sel(mode=slice(1, n_modes)) + scaling = np.sqrt(scaling) + results = [] - if data1 is not None: + if X is not None: # Select the (non-rotated) singular vectors of the first dataset comps1 = self.model.data["components1"].sel(mode=slice(1, n_modes)) - norm1 = self.model.data["norm1"].sel(mode=slice(1, n_modes)) # Preprocess the data - data1 = self.preprocessor1.transform(data1) + comps1 = self.whitener1.inverse_transform_components(comps1) + X = self.preprocessor1.transform(X) # Compute non-rotated scores by projecting the data onto non-rotated components - projections1 = xr.dot(data1, comps1) / norm1 + projections1 = xr.dot(X, comps1) / scaling # Rotate the scores projections1 = projections1.rename({"mode": "mode_m"}) projections1 = xr.dot(projections1, RinvT, dims="mode_m") @@ -372,21 +361,25 @@ def transform( # Adapt the sign of the scores projections1 = projections1 * self.data["modes_sign"] + # Unscale the scores + if not normalized: + projections1 = projections1 * self.data["norm1"] + # Unstack the projections projections1 = self.preprocessor1.inverse_transform_scores(projections1) results.append(projections1) - if data2 is not None: + if Y is not None: # Select the (non-rotated) singular vectors of the second dataset comps2 = self.model.data["components2"].sel(mode=slice(1, n_modes)) - norm2 = self.model.data["norm2"].sel(mode=slice(1, n_modes)) # Preprocess the data - data2 = self.preprocessor2.transform(data2) + comps2 = self.whitener2.inverse_transform_components(comps2) + Y = self.preprocessor2.transform(Y) # Compute non-rotated scores by project the data onto non-rotated components - projections2 = xr.dot(data2, comps2) / norm2 + projections2 = xr.dot(Y, comps2) / scaling # Rotate the scores projections2 = projections2.rename({"mode": "mode_m"}) projections2 = xr.dot(projections2, RinvT, dims="mode_m") @@ -398,34 +391,83 @@ def transform( # Determine the sign of the scores projections2 = projections2 * self.data["modes_sign"] + # Unscale the scores + if not normalized: + projections2 = projections2 * self.data["norm2"] + # Unstack the projections projections2 = self.preprocessor2.inverse_transform_scores(projections2) results.append(projections2) if len(results) == 0: - raise ValueError("provide at least one of [`data1`, `data2`]") + raise ValueError("provide at least one of [`X`, `Y`]") elif len(results) == 1: return results[0] else: return results + def _post_compute(self): + """Leave sorting until after compute because it can't be done lazily.""" + self._sort_by_variance() + + def _sort_by_variance(self): + """Re-sort the mode dimension of all data variables by variance explained.""" + if not self.sorted: + for key in self.data.keys(): + if "mode" in self.data[key].dims and key != "idx_modes_sorted": + self.data[key] = ( + self.data[key] + .isel(mode=self.data["idx_modes_sorted"].values) + .assign_coords(mode=self.data[key].mode) + ) + self.sorted = True + + def _compute_rot_mat_inv_trans(self, rotation_matrix, input_dims) -> xr.DataArray: + """Compute the inverse transpose of the rotation matrix. -class HilbertMCARotator(MCARotator, HilbertMCA): - """Rotate a solution obtained from ``xe.models.HilbertMCA``. + For orthogonal rotations (e.g., Varimax), the inverse transpose is equivalent + to the rotation matrix itself. For oblique rotations (e.g., Promax), the simplification + does not hold. + + Returns + ------- + rotation_matrix : xr.DataArray + + """ + if self._params["power"] > 1: + # inverse matrix + rotation_matrix = xr.apply_ufunc( + np.linalg.inv, + rotation_matrix, + input_core_dims=[(input_dims)], + output_core_dims=[(input_dims[::-1])], + vectorize=False, + dask="allowed", + ) + # transpose matrix + rotation_matrix = rotation_matrix.conj().transpose(*input_dims) + return rotation_matrix + + def _get_feature_name(self): + return self.model.feature_name + + +class ComplexCPCCARotator(CPCCARotator, ComplexCPCCA): + """Rotate a solution obtained from ``xe.cross.ComplexCPCCA``. + + Rotate the obtained components and scores of a CPCCA model to increase + interpretability. The algorithm here is based on the approach of Cheng & + Dunkerton (1995) [1]_ and adapted to the CPCCA framework [2]_. - Hilbert Rotated MCA [1]_ [2]_ [3]_ extends MCA by incorporating both amplitude and phase information - using a Hilbert transform prior to performing MCA and subsequent Varimax or Promax rotation. - This adds a further layer of dimensionality to the analysis, allowing for a more nuanced interpretation - of complex relationships within the data, particularly useful when analyzing oscillatory data. Parameters ---------- n_modes : int, default=10 Specify the number of modes to be rotated. power : int, default=1 - Set the power for the Promax rotation. A ``power`` value of 1 results - in a Varimax rotation. + Set the power for the Promax rotation. A ``power`` value of 1 results in + a Varimax rotation. max_iter : int, default=1000 Determine the maximum number of iterations for the computation of the rotation matrix. @@ -433,46 +475,119 @@ class HilbertMCARotator(MCARotator, HilbertMCA): Define the relative tolerance required to achieve convergence and terminate the iterative process. squared_loadings : bool, default=False - Specify the method for constructing the combined vectors of loadings. If True, - the combined vectors are loaded with the singular values (termed "squared loadings"), - conserving the squared covariance under rotation. This allows estimation of mode importance - after rotation. If False, the combined vectors are loaded with the square root of the - singular values, following the method described by Cheng & Dunkerton. + Specify the method for constructing the combined vectors of loadings. If + True, the combined vectors are loaded with the singular values (termed + "squared loadings"), conserving the squared covariance under rotation. + This allows estimation of mode importance after rotation. If False, the + combined vectors are loaded with the square root of the singular values, + following the method described by Cheng & Dunkerton. compute: bool, default=True Whether to compute the rotation immediately. References ---------- - .. [1] Cheng, X. & Dunkerton, T. J. Orthogonal Rotation of Spatial Patterns Derived from Singular Value Decomposition Analysis. J. Climate 8, 2631–2643 (1995). - .. [2] Elipot, S., Frajka-Williams, E., Hughes, C. W., Olhede, S. & Lankhorst, M. Observed Basin-Scale Response of the North Atlantic Meridional Overturning Circulation to Wind Stress Forcing. Journal of Climate 30, 2029–2054 (2017). - .. [3] Rieger, N., Corral, Á., Olmedo, E. & Turiel, A. Lagged Teleconnections of Climate Variables Identified via Complex Rotated Maximum Covariance Analysis. Journal of Climate 34, 9861–9878 (2021). + .. [1] Cheng, X. & Dunkerton, T. J. Orthogonal Rotation of Spatial Patterns + Derived from Singular Value Decomposition Analysis. J. Climate 8, + 2631–2643 (1995). + .. [3] Swenson, E. Continuum Power CCA: A Unified Approach for Isolating + Coupled Modes. Journal of Climate 28, 1016–1030 (2015). + + Examples + -------- + + Perform a CPCCA analysis: + + >>> model = ComplexCPCCA(n_modes=10) + >>> model.fit(X, Y, dim='time') + + Then, apply varimax rotation to first 5 components and scores: + + >>> rotator = ComplexCPCCARotator(n_modes=5) + >>> rotator.fit(model) + + Retrieve the rotated components and scores: + + >>> rotator.components() + >>> rotator.scores() + + """ + + def __init__(self, **kwargs): + CPCCARotator.__init__(self, **kwargs) + self.attrs.update({"model": "Rotated Complex CPCCA"}) + self.model = ComplexCPCCA() +class HilbertCPCCARotator(ComplexCPCCARotator, HilbertCPCCA): + """Rotate a solution obtained from ``xe.cross.HilbertCPCCA``. + + Rotate the obtained components and scores of a CPCCA model to increase + interpretability. The algorithm here is based on the approach of Cheng & + Dunkerton (1995) [1]_ and adapted to the CPCCA framework [2]_. + + + Parameters + ---------- + n_modes : int, default=10 + Specify the number of modes to be rotated. + power : int, default=1 + Set the power for the Promax rotation. A ``power`` value of 1 results in + a Varimax rotation. + max_iter : int, default=1000 + Determine the maximum number of iterations for the computation of the + rotation matrix. + rtol : float, default=1e-8 + Define the relative tolerance required to achieve convergence and + terminate the iterative process. + squared_loadings : bool, default=False + Specify the method for constructing the combined vectors of loadings. If + True, the combined vectors are loaded with the singular values (termed + "squared loadings"), conserving the squared covariance under rotation. + This allows estimation of mode importance after rotation. If False, the + combined vectors are loaded with the square root of the singular values, + following the method described by Cheng & Dunkerton. + compute: bool, default=True + Whether to compute the rotation immediately. + + References + ---------- + .. [1] Cheng, X. & Dunkerton, T. J. Orthogonal Rotation of Spatial Patterns + Derived from Singular Value Decomposition Analysis. J. Climate 8, + 2631–2643 (1995). + .. [3] Swenson, E. Continuum Power CCA: A Unified Approach for Isolating + Coupled Modes. Journal of Climate 28, 1016–1030 (2015). Examples -------- - >>> model = HilbertMCA(n_modes=5) - >>> model.fit(da1, da2, dim='time') - >>> rotator = HilbertMCARotator(n_modes=5, power=2) + + Perform a CPCCA analysis: + + >>> model = HilbertCPCCA(n_modes=10) + >>> model.fit(X, Y, dim='time') + + Then, apply varimax rotation to first 5 components and scores: + + >>> rotator = HilbertCPCCARotator(n_modes=5) >>> rotator.fit(model) + + Retrieve the rotated components and scores: + >>> rotator.components() + >>> rotator.scores() """ def __init__(self, **kwargs): - super().__init__(**kwargs) - self.attrs.update({"model": "Hilbert Rotated MCA"}) - self.model = HilbertMCA() + ComplexCPCCARotator.__init__(self, **kwargs) + self.attrs.update({"model": "Rotated Hilbert CPCCA"}) + self.model = HilbertCPCCA() - def transform(self, **kwargs): + def transform( + self, X: DataObject | None = None, Y: DataObject | None = None, normalized=False + ) -> Sequence[DataArray]: + """Transform the data.""" # Here we make use of the Method Resolution Order (MRO) to call the - # transform method of the first class in the MRO after `MCARotator` - # that has a transform method. In this case it will be `HilbertMCA`, + # transform method of the first class in the MRO after `CPCCARotator` + # that has a transform method. In this case it will be `HilbertCPCCA`, # which will raise an error because it does not have a transform method. - super(MCARotator, self).transform(**kwargs) - - def homogeneous_patterns(self, **kwargs): - super(MCARotator, self).homogeneous_patterns(**kwargs) - - def heterogeneous_patterns(self, **kwargs): - super(MCARotator, self).homogeneous_patterns(**kwargs) + return super(CPCCARotator, self).transform(X, Y, normalized) diff --git a/xeofs/cross/mca.py b/xeofs/cross/mca.py new file mode 100644 index 00000000..d45d1182 --- /dev/null +++ b/xeofs/cross/mca.py @@ -0,0 +1,464 @@ +import warnings +from typing import Sequence + +import numpy as np + +from ..utils.data_types import DataArray +from .cpcca import CPCCA, ComplexCPCCA, HilbertCPCCA + + +class MCA(CPCCA): + """Maximum Covariance Analysis (MCA). + + MCA seeks to find paris of coupled patterns that maximize the squared + covariance [1]_ [2]_ . + + This method solves the following optimization problem: + + :math:`\\max_{q_x, q_y} \\left( q_x^T X^T Y q_y \\right)` + + subject to the constraints: + + :math:`q_x^T q_x = 1, \\quad q_y^T q_y = 1` + + where :math:`X` and :math:`Y` are the input data matrices and :math:`q_x` + and :math:`q_y` are the corresponding pattern vectors. + + Parameters + ---------- + n_modes : int, default=2 + Number of modes to calculate. + standardize : Squence[bool] | bool, default=False + Whether to standardize the input data. Generally not recommended as + standardization can be managed by the degree of whitening. + use_coslat : Sequence[bool] | bool, default=False + For data on a longitude-latitude grid, whether to correct for varying + grid cell areas towards the poles by scaling each grid point with the + square root of the cosine of its latitude. + use_pca : Sequence[bool] | bool, default=False + Whether to preprocess each field individually by reducing dimensionality + through PCA. The cross-covariance matrix is then computed in the reduced + principal component space. + n_pca_modes : Sequence[int | float | str] | int | float | str, default=0.999 + Number of modes to retain during PCA preprocessing step. If int, + specifies the exact number of modes; if float, specifies the fraction of + variance to retain; if "all", all modes are retained. + pca_init_rank_reduction : Sequence[float] | float, default=0.3 + Relevant when `use_pca=True` and `n_pca_modes` is a float. Specifies the + initial fraction of rank reduction for faster PCA computation via + randomized SVD. + check_nans : Sequence[bool] | bool, default=True + Whether to check for NaNs in the input data. Set to False for lazy model + evaluation. + compute : bool, default=True + Whether to compute the model elements eagerly. If True, the following + are computed sequentially: preprocessor scaler, optional NaN checks, SVD + decomposition, scores, and components. + random_state : numpy.random.Generator | int | None, default=None + Seed for the random number generator. + sample_name : str, default="sample" + Name for the new sample dimension. + feature_name : Sequence[str] | str, default="feature" + Name for the new feature dimension. + solver : {"auto", "full", "randomized"} + Solver to use for the SVD computation. + solver_kwargs : dict, default={} + Additional keyword arguments passed to the SVD solver function. + + + References + ---------- + .. [1] Bretherton, C., Smith, C., Wallace, J., 1992. An intercomparison of + methods for finding coupled patterns in climate data. Journal of climate + 5, 541–560. + .. [2] Wilks, D. S. Statistical Methods in the Atmospheric Sciences. + (Academic Press, 2019). + doi:https://doi.org/10.1016/B978-0-12-815823-4.00011-0. + + Examples + -------- + + Perform MCA on two datasets on a regular longitude-latitude grid: + + >>> model = MCA(n_modes=5, use_coslat=True) + >>> model.fit(X, Y, dim="time") + + """ + + def __init__( + self, + n_modes: int = 2, + standardize: Sequence[bool] | bool = False, + use_coslat: Sequence[bool] | bool = False, + check_nans: Sequence[bool] | bool = True, + use_pca: Sequence[bool] | bool = True, + n_pca_modes: Sequence[float | int | str] | float | int | str = 0.999, + pca_init_rank_reduction: Sequence[float] | float = 0.3, + compute: bool = True, + sample_name: str = "sample", + feature_name: Sequence[str] | str = "feature", + solver: str = "auto", + random_state: np.random.Generator | int | None = None, + solver_kwargs: dict = {}, + ): + CPCCA.__init__( + self, + n_modes=n_modes, + alpha=[1.0, 1.0], + standardize=standardize, + use_coslat=use_coslat, + check_nans=check_nans, + use_pca=use_pca, + n_pca_modes=n_pca_modes, + pca_init_rank_reduction=pca_init_rank_reduction, + compute=compute, + sample_name=sample_name, + feature_name=feature_name, + solver=solver, + random_state=random_state, + solver_kwargs=solver_kwargs, + ) + self.attrs.update({"model": "Maximum Covariance Analysis"}) + # Renove alpha from the inherited CPCCA serialization params because it is hard-coded for MCA + self._params.pop("alpha") + + def covariance_fraction_CD95(self): + """Get the covariance fraction (CF). + + Cheng and Dunkerton (1995) [3]_ define the CF as follows: + + .. math:: + CF_i = \\frac{\\sigma_i}{\\sum_{i=1}^{m} \\sigma_i} + + where `m` is the total number of modes and :math:`\\sigma_i` is the + `ith` singular value of the covariance matrix. + + This implementation estimates the sum of singular values from the first + `n` modes, therefore one should aim to retain as many modes as possible + to get a good estimate of the covariance fraction. + + Note + ---- + In MCA, the focus is on maximizing the *squared* covariance (SC). As a + result, this quantity is preserved during decomposition - meaning the SC + of both datasets remains unchanged before and after decomposition. Each + mode explains a fraction of the total SC, and together, all modes can + reconstruct the total SC of the cross-covariance matrix. However, the + (non-squared) covariance is not invariant in MCA; it is not preserved by + the individual modes and cannot be reconstructed from them. + Consequently, the squared covariance fraction (SCF) is invariant in MCA + and is typically used to assess the relative importance of each mode. In + contrast, the convariance fraction (CF) is not invariant. Cheng and + Dunkerton [3]_ introduced the CF to compare the relative importance of + modes before and after Varimax rotation in MCA. Notably, when the data + fields in MCA are identical, the CF corresponds to the explained + variance ratio in Principal Component Analysis (PCA). + + References + ---------- + .. [3] Cheng, X. & Dunkerton, T. J. Orthogonal Rotation of Spatial + Patterns Derived from Singular Value Decomposition Analysis. J. + Climate 8, 2631–2643 (1995). + + """ + # Check how sensitive the CF is to the number of modes + cov_exp = self._covariance_explained_DC95() + tot_var = self._total_covariance() + cf = cov_exp[0] / cov_exp.cumsum() + change_per_mode = cf.shift({"mode": 1}) - cf + change_in_cf_in_last_mode = change_per_mode.isel(mode=-1) + if change_in_cf_in_last_mode > 0.001: + warnings.warn( + "The curent estimate of CF is sensitive to the number of modes retained. Please increase `n_modes` for a better estimate." + ) + cov_frac = cov_exp / tot_var + cov_frac.name = "covariance_fraction" + cov_frac.attrs.update(cov_exp.attrs) + return cov_frac + + def _squared_covariance(self) -> DataArray: + """Get the squared covariance. + + The squared covariance is given by the squared singular values of the + covariance matrix: + + .. math:: + SC_i = \\sigma_i^2 + + where :math:`\\sigma_i` is the `ith` singular value of the covariance + matrix. + + """ + # only true for MCA, for alpha < 1 the sigmas become more and more correlation coefficients + # either remove this one and provide it only for MCA child class, or use error formulation + sc = self.data["squared_covariance"] + sc.name = "squared_covariance" + return sc + + def _covariance_explained_DC95(self) -> DataArray: + """Get the covariance explained (CE) per mode according to CD95. + + References + ---------- + Cheng, X. & Dunkerton, T. J. Orthogonal Rotation of Spatial Patterns + Derived from Singular Value Decomposition Analysis. J. Climate 8, + 2631–2643 (1995). + + """ + cov_exp = self._squared_covariance() ** (0.5) + cov_exp.name = "pseudo_explained_covariance" + return cov_exp + + def _total_covariance(self) -> DataArray: + """Get the total covariance. + + This measure follows the defintion of Cheng and Dunkerton (1995). + Note that this measure is not an invariant in MCA. + + """ + pseudo_tot_cov = self._covariance_explained_DC95().sum() + pseudo_tot_cov.name = "pseudo_total_covariance" + return pseudo_tot_cov + + +class ComplexMCA(ComplexCPCCA, MCA): + """Complex MCA. + + MCA applied to a complex-valued field obtained from a pair of variables such + as the zonal and meridional components, :math:`U` and :math:`V`, of the wind + field. Complex EOF analysis then maximizes the squared covariance between + two datasets of the form + + .. math:: + Z_x = U_x + iV_x + + and + + .. math:: + Z_y = U_y + iV_y + + into a set of complex-valued components and PC scores. + + + Parameters + ---------- + n_modes : int, default=2 + Number of modes to calculate. + standardize : Squence[bool] | bool, default=False + Whether to standardize the input data. Generally not recommended as + standardization can be managed by the degree of whitening. + use_coslat : Sequence[bool] | bool, default=False + For data on a longitude-latitude grid, whether to correct for varying + grid cell areas towards the poles by scaling each grid point with the + square root of the cosine of its latitude. + use_pca : Sequence[bool] | bool, default=False + Whether to preprocess each field individually by reducing dimensionality + through PCA. The cross-covariance matrix is then computed in the reduced + principal component space. + n_pca_modes : Sequence[int | float | str] | int | float | str, default=0.999 + Number of modes to retain during PCA preprocessing step. If int, + specifies the exact number of modes; if float, specifies the fraction of + variance to retain; if "all", all modes are retained. + pca_init_rank_reduction : Sequence[float] | float, default=0.3 + Relevant when `use_pca=True` and `n_pca_modes` is a float. Specifies the + initial fraction of rank reduction for faster PCA computation via + randomized SVD. + check_nans : Sequence[bool] | bool, default=True + Whether to check for NaNs in the input data. Set to False for lazy model + evaluation. + compute : bool, default=True + Whether to compute the model elements eagerly. If True, the following + are computed sequentially: preprocessor scaler, optional NaN checks, SVD + decomposition, scores, and components. + random_state : numpy.random.Generator | int | None, default=None + Seed for the random number generator. + sample_name : str, default="sample" + Name for the new sample dimension. + feature_name : Sequence[str] | str, default="feature" + Name for the new feature dimension. + solver : {"auto", "full", "randomized"} + Solver to use for the SVD computation. + solver_kwargs : dict, default={} + Additional keyword arguments passed to the SVD solver function. + + Examples + -------- + + With two DataArrays `u_i` and `v_i` representing the zonal and meridional + components of the wind field for two different regions :math:`x` and + :math:`y`, construct + + >>> X = u_x + 1j * v_x + >>> Y = u_y + 1j * v_y + + and fit the Complex MCA model: + + >>> model = ComplexMCA(n_modes=5) + >>> model.fit(X, Y, "time") + + + """ + + def __init__( + self, + n_modes: int = 2, + standardize: Sequence[bool] | bool = False, + use_coslat: Sequence[bool] | bool = False, + check_nans: Sequence[bool] | bool = True, + use_pca: Sequence[bool] | bool = True, + n_pca_modes: Sequence[float | int | str] | float | int | str = 0.999, + pca_init_rank_reduction: Sequence[float] | float = 0.3, + compute: bool = True, + sample_name: str = "sample", + feature_name: Sequence[str] | str = "feature", + solver: str = "auto", + random_state: np.random.Generator | int | None = None, + solver_kwargs: dict = {}, + ): + ComplexCPCCA.__init__( + self, + n_modes=n_modes, + alpha=[1.0, 1.0], + standardize=standardize, + use_coslat=use_coslat, + check_nans=check_nans, + use_pca=use_pca, + n_pca_modes=n_pca_modes, + pca_init_rank_reduction=pca_init_rank_reduction, + compute=compute, + sample_name=sample_name, + feature_name=feature_name, + solver=solver, + random_state=random_state, + solver_kwargs=solver_kwargs, + ) + self.attrs.update({"model": "Complex MCA"}) + # Renove alpha from the inherited CPCCA serialization params because it is hard-coded for MCA + self._params.pop("alpha") + + +class HilbertMCA(HilbertCPCCA, ComplexMCA): + """Hilbert MCA. + + Hilbert MCA [1]_ (aka Analytical SVD), extends MCA by + examining amplitude-phase relationships. It augments the input data with its + Hilbert transform, creating a complex-valued field. + + This method solves the following optimization problem: + + :math:`\\max_{q_x, q_y} \\left( q_x^H X^H Y q_y \\right)` + + subject to the constraints: + + :math:`q_x^H q_x = 1, \\quad q_y^H q_y = 1` + + where :math:`H` denotes the conjugate transpose and :math:`X` and :math:`Y` + are the augmented data matrices. + + An optional padding with exponentially decaying values can be applied prior + to the Hilbert transform in order to mitigate the impact of spectral + leakage. + + + Parameters + ---------- + n_modes : int, default=2 + Number of modes to calculate. + padding : Sequence[str] | str | None, default="exp" + Padding method for the Hilbert transform. Available options are: - None: + no padding - "exp": exponential decay + decay_factor : Sequence[float] | float, default=0.2 + Decay factor for the exponential padding. + standardize : Squence[bool] | bool, default=False + Whether to standardize the input data. Generally not recommended as + standardization can be managed by the degree of whitening. + use_coslat : Sequence[bool] | bool, default=False + For data on a longitude-latitude grid, whether to correct for varying + grid cell areas towards the poles by scaling each grid point with the + square root of the cosine of its latitude. + use_pca : Sequence[bool] | bool, default=False + Whether to preprocess each field individually by reducing dimensionality + through PCA. The cross-covariance matrix is computed in the reduced + principal component space. + n_pca_modes : Sequence[int | float | str] | int | float | str, default=0.999 + Number of modes to retain during PCA preprocessing step. If int, + specifies the exact number of modes; if float, specifies the fraction of + variance to retain; if "all", all modes are retained. + pca_init_rank_reduction : Sequence[float] | float, default=0.3 + Relevant when `use_pca=True` and `n_pca_modes` is a float. Specifies the + initial fraction of rank reduction for faster PCA computation via + randomized SVD. + check_nans : Sequence[bool] | bool, default=True + Whether to check for NaNs in the input data. Set to False for lazy model + evaluation. + compute : bool, default=True + Whether to compute the model elements eagerly. If True, the following + are computed sequentially: preprocessor scaler, optional NaN checks, SVD + decomposition, scores, and components. + random_state : numpy.random.Generator | int | None, default=None + Seed for the random number generator. + sample_name : str, default="sample" + Name for the new sample dimension. + feature_name : Sequence[str] | str, default="feature" + Name for the new feature dimension. + solver : {"auto", "full", "randomized"} + Solver to use for the SVD computation. + solver_kwargs : dict, default={} + Additional keyword arguments passed to the SVD solver function. + + References + ---------- + .. [1] Elipot, S., Frajka-Williams, E., Hughes, C. W., Olhede, S. & + Lankhorst, M. Observed Basin-Scale Response of the North Atlantic + Meridional Overturning Circulation to Wind Stress Forcing. Journal of + Climate 30, 2029–2054 (2017). + + + + Examples + -------- + >>> model = HilbertMCA(n_modes=5) + >>> model.fit(X, Y, "time") + + """ + + def __init__( + self, + n_modes: int = 2, + padding: Sequence[str] | str | None = "exp", + decay_factor: Sequence[float] | float = 0.2, + standardize: Sequence[bool] | bool = False, + use_coslat: Sequence[bool] | bool = False, + check_nans: Sequence[bool] | bool = True, + use_pca: Sequence[bool] | bool = True, + n_pca_modes: Sequence[float | int | str] | float | int | str = 0.999, + pca_init_rank_reduction: Sequence[float] | float = 0.3, + compute: bool = True, + sample_name: str = "sample", + feature_name: Sequence[str] | str = "feature", + solver: str = "auto", + random_state: np.random.Generator | int | None = None, + solver_kwargs: dict = {}, + ): + HilbertCPCCA.__init__( + self, + n_modes=n_modes, + alpha=[1.0, 1.0], + standardize=standardize, + use_coslat=use_coslat, + check_nans=check_nans, + use_pca=use_pca, + n_pca_modes=n_pca_modes, + pca_init_rank_reduction=pca_init_rank_reduction, + compute=compute, + sample_name=sample_name, + feature_name=feature_name, + solver=solver, + random_state=random_state, + solver_kwargs=solver_kwargs, + padding=padding, + decay_factor=decay_factor, + ) + self.attrs.update({"model": "Hilbert MCA"}) + # Renove alpha from the inherited CPCCA serialization params because it is hard-coded for MCA + self._params.pop("alpha") diff --git a/xeofs/cross/mca_rotator.py b/xeofs/cross/mca_rotator.py new file mode 100644 index 00000000..a47146c4 --- /dev/null +++ b/xeofs/cross/mca_rotator.py @@ -0,0 +1,229 @@ +from .cpcca_rotator import ComplexCPCCARotator, CPCCARotator, HilbertCPCCARotator +from .mca import MCA, ComplexMCA, HilbertMCA + + +class MCARotator(CPCCARotator, MCA): + """Rotate a solution obtained from ``xe.cross.MCA``. + + Rotate the obtained components and scores of a CPCCA model to increase + interpretability. The algorithm here is based on the approach of Cheng & + Dunkerton (1995) [1]_. + + Parameters + ---------- + n_modes : int, default=10 + Specify the number of modes to be rotated. + power : int, default=1 + Set the power for the Promax rotation. A ``power`` value of 1 results in + a Varimax rotation. + max_iter : int or None, default=None + Determine the maximum number of iterations for the computation of the + rotation matrix. If not specified, defaults to 1000 if ``compute=True`` + and 100 if ``compute=False``, since we can't terminate a lazy + computation based using ``rtol``. + rtol : float, default=1e-8 + Define the relative tolerance required to achieve convergence and + terminate the iterative process. + compute : bool, default=True + Whether to compute the rotation immediately. + + References + ---------- + .. [1] Cheng, X. & Dunkerton, T. J. Orthogonal Rotation of Spatial Patterns + Derived from Singular Value Decomposition Analysis. J. Climate 8, + 2631–2643 (1995). + + Examples + -------- + + Perform a MCA: + + >>> model = MCA(n_modes=10) + >>> model.fit(X, Y, dim='time') + + Then, apply varimax rotation to first 5 components and scores: + + >>> rotator = MCARotator(n_modes=5) + >>> rotator.fit(model) + + Retrieve the rotated components and scores: + + >>> rotator.components() + >>> rotator.scores() + + """ + + def __init__( + self, + n_modes: int = 10, + power: int = 1, + max_iter: int | None = None, + rtol: float = 1e-8, + compute: bool = True, + ): + CPCCARotator.__init__( + self, + n_modes=n_modes, + power=power, + max_iter=max_iter, + rtol=rtol, + compute=compute, + ) + + # Define analysis-relevant meta data + self.attrs.update({"model": "Rotated MCA"}) + self.model = MCA() + + +class ComplexMCARotator(ComplexCPCCARotator, ComplexMCA): + """Rotate a solution obtained from ``xe.cross.ComplexMCA``. + + Rotate the obtained components and scores of a CPCCA model to increase + interpretability. The algorithm here is based on the approach of Cheng & + Dunkerton (1995) [1]_, Elipot et al. (2017) [2]_ and Rieger et al. (2021). + + Parameters + ---------- + n_modes : int, default=10 + Specify the number of modes to be rotated. + power : int, default=1 + Set the power for the Promax rotation. A ``power`` value of 1 results in + a Varimax rotation. + max_iter : int, default=1000 + Determine the maximum number of iterations for the computation of the + rotation matrix. + rtol : float, default=1e-8 + Define the relative tolerance required to achieve convergence and + terminate the iterative process. + compute: bool, default=True + Whether to compute the rotation immediately. + + References + ---------- + .. [1] Cheng, X. & Dunkerton, T. J. Orthogonal Rotation of Spatial Patterns + Derived from Singular Value Decomposition Analysis. J. Climate 8, + 2631–2643 (1995). + .. [2] Elipot, S., Frajka-Williams, E., Hughes, C. W., Olhede, S. & + Lankhorst, M. Observed Basin-Scale Response of the North Atlantic + Meridional Overturning Circulation to Wind Stress Forcing. Journal of + Climate 30, 2029–2054 (2017). + .. [3] Rieger, N., Corral, Á., Olmedo, E. & Turiel, A. Lagged + Teleconnections of Climate Variables Identified via Complex Rotated + Maximum Covariance Analysis. Journal of Climate 34, 9861–9878 (2021). + + + Examples + -------- + + Perform a Complex MCA: + + >>> model = ComplexMCA(n_modes=10) + >>> model.fit(X, Y, dim='time') + + Then, apply varimax rotation to first 5 components and scores: + + >>> rotator = ComplexMCARotator(n_modes=5) + >>> rotator.fit(model) + + Retrieve the rotated components and scores: + + >>> rotator.components() + >>> rotator.scores() + + """ + + def __init__( + self, + n_modes: int = 10, + power: int = 1, + max_iter: int | None = None, + rtol: float = 1e-8, + compute: bool = True, + ): + ComplexCPCCARotator.__init__( + self, + n_modes=n_modes, + power=power, + max_iter=max_iter, + rtol=rtol, + compute=compute, + ) + self.attrs.update({"model": "Rotated Complex MCA"}) + self.model = ComplexMCA() + + +class HilbertMCARotator(HilbertCPCCARotator, HilbertMCA): + """Rotate a solution obtained from ``xe.cross.HilbertMCA``. + + Rotate the obtained components and scores of a CPCCA model to increase + interpretability. The algorithm here is based on the approach of Cheng & + Dunkerton (1995) [1]_, Elipot et al. (2017) [2]_ and Rieger et al. (2021). + + Parameters + ---------- + n_modes : int, default=10 + Specify the number of modes to be rotated. + power : int, default=1 + Set the power for the Promax rotation. A ``power`` value of 1 results in + a Varimax rotation. + max_iter : int, default=1000 + Determine the maximum number of iterations for the computation of the + rotation matrix. + rtol : float, default=1e-8 + Define the relative tolerance required to achieve convergence and + terminate the iterative process. + compute: bool, default=True + Whether to compute the rotation immediately. + + References + ---------- + .. [1] Cheng, X. & Dunkerton, T. J. Orthogonal Rotation of Spatial Patterns + Derived from Singular Value Decomposition Analysis. J. Climate 8, + 2631–2643 (1995). + .. [2] Elipot, S., Frajka-Williams, E., Hughes, C. W., Olhede, S. & + Lankhorst, M. Observed Basin-Scale Response of the North Atlantic + Meridional Overturning Circulation to Wind Stress Forcing. Journal of + Climate 30, 2029–2054 (2017). + .. [3] Rieger, N., Corral, Á., Olmedo, E. & Turiel, A. Lagged + Teleconnections of Climate Variables Identified via Complex Rotated + Maximum Covariance Analysis. Journal of Climate 34, 9861–9878 (2021). + + + Examples + -------- + + Perform a Hilbert MCA: + + >>> model = HilbertMCA(n_modes=10) + >>> model.fit(X, Y, dim='time') + + Then, apply varimax rotation to first 5 components and scores: + + >>> rotator = HilbertMCARotator(n_modes=5) + >>> rotator.fit(model) + + Retrieve the rotated components and scores: + + >>> rotator.components() + >>> rotator.scores() + + """ + + def __init__( + self, + n_modes: int = 10, + power: int = 1, + max_iter: int | None = None, + rtol: float = 1e-8, + compute: bool = True, + ): + HilbertCPCCARotator.__init__( + self, + n_modes=n_modes, + power=power, + max_iter=max_iter, + rtol=rtol, + compute=compute, + ) + self.attrs.update({"model": "Rotated Hilbert MCA"}) + self.model = HilbertMCA() diff --git a/xeofs/cross/rda.py b/xeofs/cross/rda.py new file mode 100644 index 00000000..ae2abe38 --- /dev/null +++ b/xeofs/cross/rda.py @@ -0,0 +1,356 @@ +from typing import Sequence + +import numpy as np + +from .cpcca import CPCCA, ComplexCPCCA, HilbertCPCCA + + +class RDA(CPCCA): + """Redundancy Analysis (RDA). + + RDA seeks to find paris of coupled patterns that maximize the predictand + variance [1]_ [2]_ . + + This method solves the following optimization problem: + + :math:`\\max_{q_x, q_y} \\left( q_x^T X^T Y q_y \\right)` + + subject to the constraints: + + :math:`q_x^T (X^TX) q_x = 1, \\quad q_y^T q_y = 1` + + where :math:`X` and :math:`Y` are the input data matrices and :math:`q_x` + and :math:`q_y` are the corresponding pattern vectors. + + Parameters + ---------- + n_modes : int, default=2 + Number of modes to calculate. + standardize : Squence[bool] | bool, default=False + Whether to standardize the input data. Generally not recommended as + standardization can be managed by the degree of whitening. + use_coslat : Sequence[bool] | bool, default=False + For data on a longitude-latitude grid, whether to correct for varying + grid cell areas towards the poles by scaling each grid point with the + square root of the cosine of its latitude. + use_pca : Sequence[bool] | bool, default=False + Whether to preprocess each field individually by reducing dimensionality + through PCA. The cross-covariance matrix is then computed in the reduced + principal component space. + n_pca_modes : Sequence[int | float | str] | int | float | str, default=0.999 + Number of modes to retain during PCA preprocessing step. If int, + specifies the exact number of modes; if float, specifies the fraction of + variance to retain; if "all", all modes are retained. + pca_init_rank_reduction : Sequence[float] | float, default=0.3 + Relevant when `use_pca=True` and `n_pca_modes` is a float. Specifies the + initial fraction of rank reduction for faster PCA computation via + randomized SVD. + check_nans : Sequence[bool] | bool, default=True + Whether to check for NaNs in the input data. Set to False for lazy model + evaluation. + compute : bool, default=True + Whether to compute the model elements eagerly. If True, the following + are computed sequentially: preprocessor scaler, optional NaN checks, SVD + decomposition, scores, and components. + random_state : numpy.random.Generator | int | None, default=None + Seed for the random number generator. + sample_name : str, default="sample" + Name for the new sample dimension. + feature_name : Sequence[str] | str, default="feature" + Name for the new feature dimension. + solver : {"auto", "full", "randomized"} + Solver to use for the SVD computation. + solver_kwargs : dict, default={} + Additional keyword arguments passed to the SVD solver function. + + + References + ---------- + .. [1] 1. Storch, H. von & Zwiers, F. W. Statistical Analysis in Climate + Research. (Cambridge University Press (Virtual Publishing), 2003). + .. [2] Wilks, D. S. Statistical Methods in the Atmospheric Sciences. + (Academic Press, 2019). + doi:https://doi.org/10.1016/B978-0-12-815823-4.00011-0. + + Examples + -------- + + Perform RDA on two datasets on a regular longitude-latitude grid: + + >>> model = RDA(n_modes=5, use_coslat=True) + >>> model.fit(X, Y, dim="time") + + """ + + def __init__( + self, + n_modes: int = 2, + standardize: Sequence[bool] | bool = False, + use_coslat: Sequence[bool] | bool = False, + check_nans: Sequence[bool] | bool = True, + use_pca: Sequence[bool] | bool = True, + n_pca_modes: Sequence[float | int | str] | float | int | str = 0.999, + pca_init_rank_reduction: Sequence[float] | float = 0.3, + compute: bool = True, + sample_name: str = "sample", + feature_name: Sequence[str] | str = "feature", + solver: str = "auto", + random_state: np.random.Generator | int | None = None, + solver_kwargs: dict = {}, + ): + CPCCA.__init__( + self, + n_modes=n_modes, + alpha=[0.0, 1.0], + standardize=standardize, + use_coslat=use_coslat, + check_nans=check_nans, + use_pca=use_pca, + n_pca_modes=n_pca_modes, + pca_init_rank_reduction=pca_init_rank_reduction, + compute=compute, + sample_name=sample_name, + feature_name=feature_name, + solver=solver, + random_state=random_state, + solver_kwargs=solver_kwargs, + ) + self.attrs.update({"model": "Redundancy Analysis"}) + # Renove alpha from the inherited CPCCA serialization params because it is hard-coded for RDA + self._params.pop("alpha") + + +class ComplexRDA(ComplexCPCCA, RDA): + """Complex RDA. + + RDA applied to a complex-valued field obtained from a pair of variables such + as the zonal and meridional components, :math:`U` and :math:`V`, of the wind + field. Complex RDA analysis then maximizes the correlation between + two datasets of the form + + .. math:: + Z_x = U_x + iV_x + + and + + .. math:: + Z_y = U_y + iV_y + + into a set of complex-valued components and PC scores. + + + Parameters + ---------- + n_modes : int, default=2 + Number of modes to calculate. + standardize : Squence[bool] | bool, default=False + Whether to standardize the input data. Generally not recommended as + standardization can be managed by the degree of whitening. + use_coslat : Sequence[bool] | bool, default=False + For data on a longitude-latitude grid, whether to correct for varying + grid cell areas towards the poles by scaling each grid point with the + square root of the cosine of its latitude. + use_pca : Sequence[bool] | bool, default=False + Whether to preprocess each field individually by reducing dimensionality + through PCA. The cross-covariance matrix is then computed in the reduced + principal component space. + n_pca_modes : Sequence[int | float | str] | int | float | str, default=0.999 + Number of modes to retain during PCA preprocessing step. If int, + specifies the exact number of modes; if float, specifies the fraction of + variance to retain; if "all", all modes are retained. + pca_init_rank_reduction : Sequence[float] | float, default=0.3 + Relevant when `use_pca=True` and `n_pca_modes` is a float. Specifies the + initial fraction of rank reduction for faster PCA computation via + randomized SVD. + check_nans : Sequence[bool] | bool, default=True + Whether to check for NaNs in the input data. Set to False for lazy model + evaluation. + compute : bool, default=True + Whether to compute the model elements eagerly. If True, the following + are computed sequentially: preprocessor scaler, optional NaN checks, SVD + decomposition, scores, and components. + random_state : numpy.random.Generator | int | None, default=None + Seed for the random number generator. + sample_name : str, default="sample" + Name for the new sample dimension. + feature_name : Sequence[str] | str, default="feature" + Name for the new feature dimension. + solver : {"auto", "full", "randomized"} + Solver to use for the SVD computation. + solver_kwargs : dict, default={} + Additional keyword arguments passed to the SVD solver function. + + Examples + -------- + + With two DataArrays `u_i` and `v_i` representing the zonal and meridional + components of the wind field for two different regions :math:`x` and + :math:`y`, construct + + >>> X = u_x + 1j * v_x + >>> Y = u_y + 1j * v_y + + and fit the Complex RDA model: + + >>> model = ComplexRDA(n_modes=5) + >>> model.fit(X, Y, "time") + + + """ + + def __init__( + self, + n_modes: int = 2, + standardize: Sequence[bool] | bool = False, + use_coslat: Sequence[bool] | bool = False, + check_nans: Sequence[bool] | bool = True, + use_pca: Sequence[bool] | bool = True, + n_pca_modes: Sequence[float | int | str] | float | int | str = 0.999, + pca_init_rank_reduction: Sequence[float] | float = 0.3, + compute: bool = True, + sample_name: str = "sample", + feature_name: Sequence[str] | str = "feature", + solver: str = "auto", + random_state: np.random.Generator | int | None = None, + solver_kwargs: dict = {}, + ): + ComplexCPCCA.__init__( + self, + n_modes=n_modes, + alpha=[0.0, 1.0], + standardize=standardize, + use_coslat=use_coslat, + check_nans=check_nans, + use_pca=use_pca, + n_pca_modes=n_pca_modes, + pca_init_rank_reduction=pca_init_rank_reduction, + compute=compute, + sample_name=sample_name, + feature_name=feature_name, + solver=solver, + random_state=random_state, + solver_kwargs=solver_kwargs, + ) + self.attrs.update({"model": "Complex RDA"}) + # Renove alpha from the inherited CPCCA serialization params because it is hard-coded for RDA + self._params.pop("alpha") + + +class HilbertRDA(HilbertCPCCA, ComplexRDA): + """Hilbert RDA. + + Hilbert RDA extends RDA by examining amplitude-phase relationships. It + augments the input data with its Hilbert transform, creating a + complex-valued field. + + This method solves the following optimization problem: + + :math:`\\max_{q_x, q_y} \\left( q_x^H X^H Y q_y \\right)` + + subject to the constraints: + + :math:`q_x^H (X^HX) q_x = 1, \\quad q_y^H q_y = 1` + + where :math:`H` denotes the conjugate transpose and :math:`X` and :math:`Y` + are the augmented data matrices. + + An optional padding with exponentially decaying values can be applied prior + to the Hilbert transform in order to mitigate the impact of spectral + leakage. + + + Parameters + ---------- + n_modes : int, default=2 + Number of modes to calculate. + padding : Sequence[str] | str | None, default="exp" + Padding method for the Hilbert transform. Available options are: - None: + no padding - "exp": exponential decay + decay_factor : Sequence[float] | float, default=0.2 + Decay factor for the exponential padding. + standardize : Squence[bool] | bool, default=False + Whether to standardize the input data. Generally not recommended as + standardization can be managed by the degree of whitening. + use_coslat : Sequence[bool] | bool, default=False + For data on a longitude-latitude grid, whether to correct for varying + grid cell areas towards the poles by scaling each grid point with the + square root of the cosine of its latitude. + use_pca : Sequence[bool] | bool, default=False + Whether to preprocess each field individually by reducing dimensionality + through PCA. The cross-covariance matrix is computed in the reduced + principal component space. + n_pca_modes : Sequence[int | float | str] | int | float | str, default=0.999 + Number of modes to retain during PCA preprocessing step. If int, + specifies the exact number of modes; if float, specifies the fraction of + variance to retain; if "all", all modes are retained. + pca_init_rank_reduction : Sequence[float] | float, default=0.3 + Relevant when `use_pca=True` and `n_pca_modes` is a float. Specifies the + initial fraction of rank reduction for faster PCA computation via + randomized SVD. + check_nans : Sequence[bool] | bool, default=True + Whether to check for NaNs in the input data. Set to False for lazy model + evaluation. + compute : bool, default=True + Whether to compute the model elements eagerly. If True, the following + are computed sequentially: preprocessor scaler, optional NaN checks, SVD + decomposition, scores, and components. + random_state : numpy.random.Generator | int | None, default=None + Seed for the random number generator. + sample_name : str, default="sample" + Name for the new sample dimension. + feature_name : Sequence[str] | str, default="feature" + Name for the new feature dimension. + solver : {"auto", "full", "randomized"} + Solver to use for the SVD computation. + solver_kwargs : dict, default={} + Additional keyword arguments passed to the SVD solver function. + + + + Examples + -------- + >>> model = HilbertRDA(n_modes=5) + >>> model.fit(X, Y, "time") + + """ + + def __init__( + self, + n_modes: int = 2, + padding: Sequence[str] | str | None = "exp", + decay_factor: Sequence[float] | float = 0.2, + standardize: Sequence[bool] | bool = False, + use_coslat: Sequence[bool] | bool = False, + check_nans: Sequence[bool] | bool = True, + use_pca: Sequence[bool] | bool = True, + n_pca_modes: Sequence[float | int | str] | float | int | str = 0.999, + pca_init_rank_reduction: Sequence[float] | float = 0.3, + compute: bool = True, + sample_name: str = "sample", + feature_name: Sequence[str] | str = "feature", + solver: str = "auto", + random_state: np.random.Generator | int | None = None, + solver_kwargs: dict = {}, + ): + HilbertCPCCA.__init__( + self, + n_modes=n_modes, + alpha=[0.0, 1.0], + standardize=standardize, + use_coslat=use_coslat, + check_nans=check_nans, + use_pca=use_pca, + n_pca_modes=n_pca_modes, + pca_init_rank_reduction=pca_init_rank_reduction, + compute=compute, + sample_name=sample_name, + feature_name=feature_name, + solver=solver, + random_state=random_state, + solver_kwargs=solver_kwargs, + padding=padding, + decay_factor=decay_factor, + ) + self.attrs.update({"model": "Hilbert RDA"}) + # Renove alpha from the inherited CPCCA serialization params because it is hard-coded for RDA + self._params.pop("alpha") diff --git a/xeofs/data_container/data_container.py b/xeofs/data_container/data_container.py index dca4e5f3..9b8f948f 100644 --- a/xeofs/data_container/data_container.py +++ b/xeofs/data_container/data_container.py @@ -1,7 +1,4 @@ -from typing import Dict - import dask -from dask.diagnostics.progress import ProgressBar from typing_extensions import Self try: @@ -52,13 +49,9 @@ def deserialize(cls, dt: DataTree) -> Self: container._allow_compute[key] = node.attrs["allow_compute"] return container - def compute(self, verbose=False, **kwargs): + def compute(self, **kwargs): computed_data = {k: v for k, v in self.items() if self._allow_compute[k]} - if verbose: - with ProgressBar(): - (computed_data,) = dask.compute(computed_data, **kwargs) - else: - (computed_data,) = dask.compute(computed_data, **kwargs) + (computed_data,) = dask.compute(computed_data, **kwargs) for k, v in computed_data.items(): self[k] = v @@ -71,7 +64,7 @@ def _validate_attrs_values(self, value): else: return value - def _validate_attrs(self, attrs: Dict) -> Dict: + def _validate_attrs(self, attrs: dict) -> dict: """Convert any boolean and None values to strings""" for key, value in attrs.items(): if isinstance(value, bool): @@ -83,7 +76,7 @@ def _validate_attrs(self, attrs: Dict) -> Dict: return attrs - def set_attrs(self, attrs: Dict): + def set_attrs(self, attrs: dict): attrs = self._validate_attrs(attrs) for key in self.keys(): self[key].attrs = attrs diff --git a/xeofs/linalg/__init__.py b/xeofs/linalg/__init__.py new file mode 100644 index 00000000..c2cfd896 --- /dev/null +++ b/xeofs/linalg/__init__.py @@ -0,0 +1,3 @@ +from .utils import total_variance + +__all__ = ["total_variance"] diff --git a/xeofs/linalg/_numpy/__init__.py b/xeofs/linalg/_numpy/__init__.py new file mode 100644 index 00000000..cd53e976 --- /dev/null +++ b/xeofs/linalg/_numpy/__init__.py @@ -0,0 +1,5 @@ +from ._rotation import _promax +from ._svd import _SVD +from ._utils import _fractional_matrix_power + +__all__ = ["_fractional_matrix_power", "_promax", "_SVD"] diff --git a/xeofs/utils/rotation.py b/xeofs/linalg/_numpy/_rotation.py similarity index 90% rename from xeofs/utils/rotation.py rename to xeofs/linalg/_numpy/_rotation.py index 0f1b10cc..7f7f6f42 100644 --- a/xeofs/utils/rotation.py +++ b/xeofs/linalg/_numpy/_rotation.py @@ -1,25 +1,6 @@ -import dask.array import numpy as np -import xarray as xr - -from .data_types import DataArray - - -def promax(loadings: DataArray, feature_dim, **kwargs): - rotated, rot_mat, phi_mat = xr.apply_ufunc( - _promax, - loadings, - input_core_dims=[[feature_dim, "mode"]], - output_core_dims=[ - [feature_dim, "mode"], - ["mode_m", "mode_n"], - ["mode_m", "mode_n"], - ], - kwargs=kwargs, - dask="allowed", - ) - - return rotated, rot_mat, phi_mat +from dask.array import Array as DaskArray # type: ignore +from dask.array.linalg import svd_compressed def _promax( @@ -154,9 +135,9 @@ def _varimax( X = X.copy() n_samples, n_modes = X.shape - if isinstance(X, dask.array.Array): + if isinstance(X, DaskArray): # Use svd_compressed if dask to allow chunking in both dimensions - svd_func = dask.array.linalg.svd_compressed + svd_func = svd_compressed svd_args = (n_modes,) else: svd_func = np.linalg.svd diff --git a/xeofs/linalg/_numpy/_svd.py b/xeofs/linalg/_numpy/_svd.py new file mode 100644 index 00000000..013997de --- /dev/null +++ b/xeofs/linalg/_numpy/_svd.py @@ -0,0 +1,274 @@ +import warnings + +import numpy as np +from dask.array import Array as DaskArray # type: ignore +from dask.array.linalg import svd_compressed as dask_svd +from dask.graph_manipulation import wait_on +from scipy.sparse.linalg import svds as complex_svd # type: ignore +from sklearn.utils.extmath import randomized_svd + +from ...utils.sanity_checks import sanity_check_n_modes + + +def get_deterministic_sign_multiplier(data, axis: int): + """Compute a sign multiplier that ensures deterministic output using Dask. + + Parameters: + ------------ + data: da.Array + Input data to determine sorting order. + axis: int + Axis along which to compute the sign multiplier. + + Returns: + --------- + sign_multiplier: da.Array + Sign multiplier that ensures deterministic output. + """ + max_vals = np.max(data, axis=axis) + min_vals = np.min(data, axis=axis) + sign_multiplier = np.where(np.abs(max_vals) >= np.abs(min_vals), 1, -1) + return sign_multiplier + + +class _SVD: + """Decomposes a data object using Singular Value Decomposition (SVD). + + The data object will be decomposed like X = U * S * V.T, where U and V are + orthogonal matrices and S is a diagonal matrix with the singular values on + the diagonal. + + Parameters + ---------- + n_modes : int | float | str + Number of components to be computed. If float, it represents the fraction of variance to keep. If "all", all components are kept. + init_rank_reduction : float, default=0.0 + Used only when `n_modes` is given as a float. Specifiy the initial target rank to be computed by randomized SVD before truncating the solution to the desired fraction of explained variance. Must be in the half open interval (0, 1]. Lower values will speed up the computation. If the rank is too low and the fraction of explained variance is not reached, a warning will be raised. + flip_signs : bool, default=True + Whether to flip the sign of the components to ensure deterministic output. + solver : {'auto', 'full', 'randomized'}, default='auto' + The solver is selected by a default policy based on size of `X` and `n_modes`: + if the input data is larger than 500x500 and the number of modes to extract is lower + than 80% of the smallest dimension of the data, then the more efficient + `randomized` method is enabled. Otherwise the exact full SVD is computed + and optionally truncated afterwards. + random_state : np.random.Generator | int | None, default=None + Seed for the random number generator. + solver_kwargs : dict, default={} + Additional keyword arguments passed to the SVD solver. + """ + + def __init__( + self, + n_modes: int | float | str, + init_rank_reduction: float = 0.3, + flip_signs: bool = True, + solver: str = "auto", + random_state: np.random.Generator | int | None = None, + solver_kwargs: dict = {}, + is_complex: bool | str = "auto", + ): + sanity_check_n_modes(n_modes) + self.is_based_on_variance = True if isinstance(n_modes, float) else False + + if self.is_based_on_variance: + if not (0 < init_rank_reduction <= 1.0): + raise ValueError( + "init_rank_reduction must be in the half open interval (0, 1]." + ) + + self.n_modes = n_modes + self.n_modes_precompute = n_modes + self.init_rank_reduction = init_rank_reduction + self.flip_signs = flip_signs + self.solver = solver + self.random_state = random_state + self.solver_kwargs = solver_kwargs + self.is_complex = is_complex + + def _get_n_modes_precompute(self, rank: int) -> int: + if self.is_based_on_variance: + n_modes_precompute = int(rank * self.init_rank_reduction) + if n_modes_precompute < 1: + warnings.warn( + f"`init_rank_reduction={self.init_rank_reduction}` is too low resulting in zero components. One component will be computed instead." + ) + n_modes_precompute = 1 + elif self.n_modes_precompute == "all": + n_modes_precompute = rank + + elif isinstance(self.n_modes_precompute, int): + if self.n_modes_precompute > rank: + raise ValueError( + f"n_modes must be less than or equal to the rank of the dataset (rank = {rank})." + ) + n_modes_precompute = self.n_modes_precompute + return n_modes_precompute + + def fit_transform(self, X): + """Decomposes the data object. + + Parameters + ---------- + X : DataArray + A 2-dimensional data object to be decomposed. + """ + rank = min(X.shape) + self.n_modes_precompute = self._get_n_modes_precompute(rank) + + # Check if data is small enough to use exact SVD + # If not, use randomized SVD + # If data is complex, use scipy sparse SVD + # If data is dask, use dask SVD + # Conditions for using exact SVD follow scitkit-learn's PCA implementation + # Source: https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html + + use_dask = True if isinstance(X, DaskArray) else False + + match self.is_complex: + case bool(): + use_complex = self.is_complex + case "auto": + use_complex = True if np.iscomplexobj(X) else False + case _: + raise ValueError( + f"Unrecognized value for is_complex '{self.is_complex}'. " + "Valid options are True, False, and 'auto'." + ) + + is_small_data = max(X.shape) < 500 + + match self.solver: + # TODO(nicrie): implement more performant case for tall and skinny problems which are best handled by precomputing the covariance matrix. + # if X.shape[1] <= 1_000 and X.shape[0] >= 10 * X.shape[1]: -> covariance_eigh" (see sklearn PCA implementation: https://github.com/scikit-learn/scikit-learn/blob/e87b32a81c70abed8f2e97483758eb64df8255e9/sklearn/decomposition/_pca.py#L526) + case "auto": + has_many_modes = self.n_modes_precompute > int(0.8 * rank) + use_exact = ( + True if is_small_data and has_many_modes and not use_dask else False + ) + case "full": + use_exact = True + case "randomized": + use_exact = False + case _: + raise ValueError( + f"Unrecognized solver '{self.solver}'. " + "Valid options are 'auto', 'full', and 'randomized'." + ) + + # Use exact SVD for small data sets + if use_exact: + U, s, VT = self._svd(X, np.linalg.svd, self.solver_kwargs) + U = U[:, : self.n_modes_precompute] + s = s[: self.n_modes_precompute] + VT = VT[: self.n_modes_precompute, :] + + # Use randomized SVD for large, real-valued data sets + elif (not use_complex) and (not use_dask): + solver_kwargs = self.solver_kwargs | { + "n_components": self.n_modes_precompute, + "random_state": self.random_state, + } + U, s, VT = self._svd(X, randomized_svd, solver_kwargs) + + # Use scipy sparse SVD for large, complex-valued data sets + elif use_complex and (not use_dask): + # Scipy sparse version + solver_kwargs = self.solver_kwargs | { + "k": self.n_modes_precompute, + "solver": "lobpcg", + "random_state": self.random_state, + } + U, s, VT = self._svd(X, complex_svd, solver_kwargs) + idx_sort = np.argsort(s)[::-1] + U = U[:, idx_sort] + s = s[idx_sort] + VT = VT[idx_sort, :] + + # Use dask SVD for large, real-valued, delayed data sets + elif (not use_complex) and use_dask: + solver_kwargs = self.solver_kwargs | { + "k": self.n_modes_precompute, + "seed": self.random_state, + } + solver_kwargs.setdefault("compute", False) + solver_kwargs.setdefault("n_power_iter", 4) + U, s, VT = self._svd(X, dask_svd, solver_kwargs) + else: + err_msg = ( + "Complex data together with dask is currently not implemented. See dask issue 7639 " + "https://github.com/dask/dask/issues/7639" + ) + raise NotImplementedError(err_msg) + + U, s, VT = wait_on(U, s, VT) + + V = VT.conj().T + + # Flip signs of components to ensure deterministic output + if self.flip_signs: + sign_multiplier = get_deterministic_sign_multiplier(V, axis=0) + V *= sign_multiplier + U *= sign_multiplier + + # Truncate the decomposition to the desired number of modes + if self.is_based_on_variance: + # Truncating based on variance requires computation of dask array + # which we prefer to avoid + if use_dask: + err_msg = "Estimating the number of modes to keep based on variance is not supported with dask arrays. Please explicitly specifiy the number of modes to keep by using an integer for the number of modes." + raise ValueError(err_msg) + + # Compute the fraction of explained variance per mode + N = X.shape[0] - 1 + total_variance = X.var(axis=0, ddof=1).sum() + explained_variance = s**2 / N / total_variance + cum_expvar = explained_variance.cumsum() + total_explained_variance = cum_expvar[-1].item() + + n_modes_required = ( + self.n_modes_precompute - (cum_expvar >= self.n_modes).sum() + 1 + ) + if n_modes_required > self.n_modes_precompute: + warnings.warn( + f"Dataset has {self.n_modes_precompute} components, explaining {total_explained_variance:.2%} of the variance. However, {self.n_modes:.2%} explained variance was requested. Please consider increasing `init_rank_reduction`." + ) + n_modes_required = self.n_modes_precompute + + # Truncate solution to the desired fraction of explained variance + U = U[:, :n_modes_required] + s = s[:n_modes_required] + V = V[:, :n_modes_required] + + return U, s, V + + def _svd(self, X, func, kwargs): + """Performs SVD on the data + + Parameters + ---------- + X : DataArray + A 2-dimensional data object to be decomposed. + func : Callable + Method to perform SVD. + kwargs : dict + Additional keyword arguments passed to the SVD solver. + + Returns + ------- + U : DataArray + Left singular vectors. + s : DataArray + Singular values. + VT : DataArray + Right singular vectors. + """ + try: + U, s, VT = func(X, **kwargs) + return U, s, VT + except np.linalg.LinAlgError: + raise np.linalg.LinAlgError( + "SVD failed. This may be due to isolated NaN values in the data. Please consider the following steps:\n" + "1. Check for and remove any isolated NaNs in your dataset.\n" + "2. If the error persists, please raise an issue at https://github.com/xarray-contrib/xeofs/issues." + ) diff --git a/xeofs/utils/linalg.py b/xeofs/linalg/_numpy/_utils.py similarity index 86% rename from xeofs/utils/linalg.py rename to xeofs/linalg/_numpy/_utils.py index dba34954..f5fd50cb 100644 --- a/xeofs/utils/linalg.py +++ b/xeofs/linalg/_numpy/_utils.py @@ -1,7 +1,9 @@ import numpy as np +from ._svd import _SVD -def fractional_matrix_power(C, power): + +def _fractional_matrix_power(C, power, **kwargs): """Compute the fractional matrix power of a symmetric matrix using SVD. Note: This function is a simplified version of the fractional_matrix_power @@ -11,7 +13,8 @@ def fractional_matrix_power(C, power): if C.shape[0] != C.shape[1]: raise ValueError("Matrix must be square.") - V, s, _ = np.linalg.svd(C) + svd = _SVD(n_modes="all", **kwargs) + _, s, V = svd.fit_transform(C) # cut off small singular values is_above_zero = s > np.finfo(s.dtype).eps diff --git a/xeofs/models/decomposer.py b/xeofs/linalg/decomposer.py similarity index 95% rename from xeofs/models/decomposer.py rename to xeofs/linalg/decomposer.py index 27ecda6a..462d1281 100644 --- a/xeofs/models/decomposer.py +++ b/xeofs/linalg/decomposer.py @@ -1,12 +1,10 @@ import warnings -from typing import Optional import dask import numpy as np import xarray as xr from dask.array import Array as DaskArray # type: ignore from dask.array.linalg import svd_compressed as dask_svd -from dask.diagnostics.progress import ProgressBar from scipy.sparse.linalg import svds as complex_svd # type: ignore from sklearn.utils.extmath import randomized_svd @@ -37,10 +35,8 @@ class Decomposer: than 80% of the smallest dimension of the data, then the more efficient `randomized` method is enabled. Otherwise the exact full SVD is computed and optionally truncated afterwards. - random_state : Optional[int], default=None + random_state : np.random.Generator | int | None, default=None Seed for the random number generator. - verbose: bool, default=False - Whether to show a progress bar when computing the decomposition. component_dim_name : str, default='mode' Name of the component dimension in the output DataArrays. solver_kwargs : dict, default={} @@ -54,8 +50,7 @@ def __init__( flip_signs: bool = True, compute: bool = True, solver: str = "auto", - random_state: Optional[int] = None, - verbose: bool = False, + random_state: np.random.Generator | int | None = None, component_dim_name: str = "mode", solver_kwargs: dict = {}, ): @@ -73,7 +68,6 @@ def __init__( self.init_rank_reduction = init_rank_reduction self.flip_signs = flip_signs self.compute = compute - self.verbose = verbose self.solver = solver self.random_state = random_state self.component_dim_name = component_dim_name @@ -300,10 +294,5 @@ def _compute_svd_result(self, U, s, VT): case False: pass case True: - match self.verbose: - case True: - with ProgressBar(): - U, s, VT = dask.compute(U, s, VT) - case False: - U, s, VT = dask.compute(U, s, VT) + U, s, VT = dask.compute(U, s, VT) return U, s, VT diff --git a/xeofs/linalg/rotation.py b/xeofs/linalg/rotation.py new file mode 100644 index 00000000..74360ee1 --- /dev/null +++ b/xeofs/linalg/rotation.py @@ -0,0 +1,21 @@ +import xarray as xr + +from ..utils.data_types import DataArray +from ._numpy._rotation import _promax + + +def promax(loadings: DataArray, feature_dim, **kwargs): + rotated, rot_mat, phi_mat = xr.apply_ufunc( + _promax, + loadings, + input_core_dims=[[feature_dim, "mode"]], + output_core_dims=[ + [feature_dim, "mode"], + ["mode_m", "mode_n"], + ["mode_m", "mode_n"], + ], + kwargs=kwargs, + dask="allowed", + ) + + return rotated, rot_mat, phi_mat diff --git a/xeofs/linalg/svd.py b/xeofs/linalg/svd.py new file mode 100644 index 00000000..563ad1e8 --- /dev/null +++ b/xeofs/linalg/svd.py @@ -0,0 +1,81 @@ +import numpy as np +import xarray as xr +from dask.base import compute as dask_compute + +from ..utils.data_types import DataArray +from ._numpy import _SVD + + +class SVD: + def __init__( + self, + n_modes: int | float | str, + is_complex: bool | str = "auto", + init_rank_reduction: float = 0.3, + flip_signs: bool = True, + solver: str = "auto", + compute: bool = True, + random_state: np.random.Generator | int | None = None, + solver_kwargs: dict = {}, + sample_name: str = "sample", + feature_name: str = "feature", + ): + self.n_modes = n_modes + self.is_complex = is_complex + self.init_rank_reduction = init_rank_reduction + self.flip_signs = flip_signs + self.solver = solver + self.random_state = random_state + self.solver_kwargs = solver_kwargs + self.compute_svd = compute + + self.sample_name = sample_name + self.feature_name = feature_name + + def fit_transform(self, X: DataArray) -> tuple[DataArray, DataArray, DataArray]: + """Decomposes the data object. + + Parameters + ---------- + X : DataArray + A 2-dimensional data object to be decomposed. + + Returns + ------- + U : DataArray + The left singular vectors of the decomposition. + s : DataArray + The singular values of the decomposition. + V : DataArray + The right singular vectors of the decomposition. + + """ + svd = _SVD( + n_modes=self.n_modes, + init_rank_reduction=self.init_rank_reduction, + flip_signs=self.flip_signs, + solver=self.solver, + random_state=self.random_state, + is_complex=self.is_complex, + **self.solver_kwargs, + ) + U, s, V = xr.apply_ufunc( + svd.fit_transform, + X, + input_core_dims=[[self.sample_name, self.feature_name]], + output_core_dims=[ + [self.sample_name, "mode"], + ["mode"], + [self.feature_name, "mode"], + ], + dask="allowed", + ) + mode_coords = np.arange(1, U.mode.size + 1) + s = s.assign_coords(mode=mode_coords) + U = U.assign_coords(mode=mode_coords) + V = V.assign_coords(mode=mode_coords) + + if self.compute_svd: + U, s, V = dask_compute(U, s, V) + + return U, s, V diff --git a/xeofs/linalg/utils.py b/xeofs/linalg/utils.py new file mode 100644 index 00000000..418dcf69 --- /dev/null +++ b/xeofs/linalg/utils.py @@ -0,0 +1,6 @@ +from ..utils.data_types import DataArray + + +def total_variance(X: DataArray, dim: str) -> DataArray: + """Compute the total variance of the centered data.""" + return (X * X.conj()).sum() / (X[dim].size - 1) diff --git a/xeofs/models/_base_cross_model.py b/xeofs/models/_base_cross_model.py deleted file mode 100644 index 79eb7ff2..00000000 --- a/xeofs/models/_base_cross_model.py +++ /dev/null @@ -1,497 +0,0 @@ -import warnings -from abc import ABC, abstractmethod -from datetime import datetime -from typing import Dict, Hashable, Literal, Optional, Sequence, Tuple - -import dask -import xarray as xr -from dask.diagnostics.progress import ProgressBar -from typing_extensions import Self - -try: - from xarray.core.datatree import DataTree -except ImportError: - from datatree import DataTree - -from .._version import __version__ -from ..data_container import DataContainer -from ..preprocessing.preprocessor import Preprocessor -from ..utils.data_types import DataArray, DataObject -from ..utils.io import insert_placeholders, open_model_tree, write_model_tree -from ..utils.sanity_checks import validate_input_type -from ..utils.xarray_utils import convert_to_dim_type, data_is_dask -from .eof import EOF - - -class _BaseCrossModel(ABC): - """ - Abstract base class for cross-decomposition models. - - Parameters: - ------------- - n_modes: int, default=10 - Number of modes to calculate. - center: bool, default=True - Whether to center the input data. - standardize: bool, default=False - Whether to standardize the input data. - use_coslat: bool, default=False - Whether to use cosine of latitude for scaling. - check_nans : bool, default=True - If True, remove full-dimensional NaN features from the data, check to ensure - that NaN features match the original fit data during transform, and check - for isolated NaNs. Note: this forces eager computation of dask arrays. - If False, skip all NaN checks. In this case, NaNs should be explicitly removed - or filled prior to fitting, or SVD will fail. - n_pca_modes: int, default=None - Number of PCA modes to calculate. - compute: bool, default=True - Whether to compute elements of the model eagerly, or to defer computation. - If True, four pieces of the fit will be computed sequentially: 1) the - preprocessor scaler, 2) optional NaN checks, 3) SVD decomposition, 4) scores - and components. - sample_name: str, default="sample" - Name of the new sample dimension. - feature_name: str, default="feature" - Name of the new feature dimension. - solver: {"auto", "full", "randomized"}, default="auto" - Solver to use for the SVD computation. - solver_kwargs: dict, default={} - Additional keyword arguments to passed to the SVD solver function. - - """ - - def __init__( - self, - n_modes=10, - center=True, - standardize=False, - use_coslat=False, - check_nans=True, - n_pca_modes=None, - compute=True, - verbose=False, - sample_name="sample", - feature_name="feature", - solver="auto", - random_state=None, - solver_kwargs={}, - ): - if verbose: - warnings.warn( - "The 'verbose' parameter is deprecated and will be removed in a future release.", - category=DeprecationWarning, - stacklevel=3, - ) - self.n_modes = n_modes - self.sample_name = sample_name - self.feature_name = feature_name - - # Define model parameters - self._params = { - "n_modes": n_modes, - "center": center, - "standardize": standardize, - "use_coslat": use_coslat, - "check_nans": check_nans, - "n_pca_modes": n_pca_modes, - "compute": compute, - "sample_name": sample_name, - "feature_name": feature_name, - "solver": solver, - "random_state": random_state, - "solver_kwargs": solver_kwargs, - } - - self._decomposer_kwargs = { - "n_modes": n_modes, - "solver": solver, - "random_state": random_state, - "compute": compute, - "verbose": verbose, - "solver_kwargs": solver_kwargs, - } - self._preprocessor_kwargs = { - "sample_name": sample_name, - "feature_name": feature_name, - "with_center": center, - "with_std": standardize, - "with_coslat": use_coslat, - "check_nans": check_nans, - "compute": compute, - } - - # Define analysis-relevant meta data - self.attrs = {"model": "BaseCrossModel"} - self.attrs.update( - { - "software": "xeofs", - "version": __version__, - "date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - } - ) - self.attrs.update(self._params) - - # Initialize preprocessors to scale and stack left (1) and right (2) data - self.preprocessor1 = Preprocessor(**self._preprocessor_kwargs) - self.preprocessor2 = Preprocessor(**self._preprocessor_kwargs) - - # Initialize the data container that stores the results - self.data = DataContainer() - - # Initialize PCA objects - self.pca1 = ( - EOF(n_modes=n_pca_modes, compute=self._params["compute"], check_nans=False) - if n_pca_modes - else None - ) - self.pca2 = ( - EOF(n_modes=n_pca_modes, compute=self._params["compute"], check_nans=False) - if n_pca_modes - else None - ) - - def get_serialization_attrs(self) -> Dict: - return dict( - data=self.data, - preprocessor1=self.preprocessor1, - preprocessor2=self.preprocessor2, - ) - - def fit( - self, - data1: DataObject, - data2: DataObject, - dim: Hashable | Sequence[Hashable], - weights1: Optional[DataObject] = None, - weights2: Optional[DataObject] = None, - ) -> Self: - """ - Fit the model to the data. - - Parameters - ---------- - data1: DataArray | Dataset | List[DataArray] - Left input data. - data2: DataArray | Dataset | List[DataArray] - Right input data. - dim: Hashable | Sequence[Hashable] - Define the sample dimensions. The remaining dimensions - will be treated as feature dimensions. - weights1: Optional[DataObject] - Weights to be applied to the left input data. - weights2: Optional[DataObject] - Weights to be applied to the right input data. - - """ - validate_input_type(data1) - validate_input_type(data2) - if weights1 is not None: - validate_input_type(weights1) - if weights2 is not None: - validate_input_type(weights2) - - self.sample_dims = convert_to_dim_type(dim) - # Preprocess data1 - data1 = self.preprocessor1.fit_transform(data1, self.sample_dims, weights1) - # Preprocess data2 - data2 = self.preprocessor2.fit_transform(data2, self.sample_dims, weights2) - - self._fit_algorithm(data1, data2) - - if self._params["compute"]: - self.data.compute() - - return self - - def transform( - self, data1: Optional[DataObject] = None, data2: Optional[DataObject] = None - ) -> Sequence[DataArray]: - """ - Abstract method to transform the data. - - - """ - if data1 is None and data2 is None: - raise ValueError("Either data1 or data2 must be provided.") - - if data1 is not None: - validate_input_type(data1) - # Preprocess data1 - data1 = self.preprocessor1.transform(data1) - if data2 is not None: - validate_input_type(data2) - # Preprocess data2 - data2 = self.preprocessor2.transform(data2) - - data = self._transform_algorithm(data1, data2) - data_list = [] - if data1 is not None: - data1 = self.preprocessor1.inverse_transform_scores_unseen(data["data1"]) - data_list.append(data1) - if data2 is not None: - data2 = self.preprocessor2.inverse_transform_scores_unseen(data["data2"]) - data_list.append(data2) - return data_list - - def inverse_transform( - self, scores1: DataArray, scores2: DataArray - ) -> Tuple[DataObject, DataObject]: - """Reconstruct the original data from transformed data. - - Parameters - ---------- - scores1: DataObject - Transformed left field data to be reconstructed. This could be - a subset of the `scores` data of a fitted model, or unseen data. - Must have a 'mode' dimension. - scores2: DataObject - Transformed right field data to be reconstructed. This could be - a subset of the `scores` data of a fitted model, or unseen data. - Must have a 'mode' dimension. - - Returns - ------- - Xrec1: DataArray | Dataset | List[DataArray] - Reconstructed data of left field. - Xrec2: DataArray | Dataset | List[DataArray] - Reconstructed data of right field. - - """ - # Handle scalar mode in xr.dot - if "mode" not in scores1.dims: - scores1 = scores1.expand_dims("mode") - if "mode" not in scores2.dims: - scores2 = scores2.expand_dims("mode") - - data1, data2 = self._inverse_transform_algorithm(scores1, scores2) - - # Unstack and rescale the data - data1 = self.preprocessor1.inverse_transform_data(data1) - data2 = self.preprocessor2.inverse_transform_data(data2) - - return data1, data2 - - @abstractmethod - def _fit_algorithm(self, data1: DataArray, data2: DataArray) -> Self: - """ - Fit the model to the preprocessed data. This method needs to be implemented in the respective - subclass. - - Parameters - ---------- - data1, data2: DataArray - Preprocessed input data of two dimensions: (`sample_name`, `feature_name`) - - """ - raise NotImplementedError - - @abstractmethod - def _transform_algorithm( - self, data1: Optional[DataArray] = None, data2: Optional[DataArray] = None - ) -> Dict[str, DataArray]: - """ - Transform the preprocessed data. This method needs to be implemented in the respective - subclass. - - Parameters - ---------- - data1, data2: DataArray - Preprocessed input data of two dimensions: (`sample_name`, `feature_name`) - - """ - raise NotImplementedError - - @abstractmethod - def _inverse_transform_algorithm( - self, scores1: DataArray, scores2: DataArray - ) -> Tuple[DataArray, DataArray]: - """ - Reconstruct the original data from transformed data. This method needs to be implemented in the respective - subclass. - - Parameters - ---------- - scores1: DataArray - Transformed left field data to be reconstructed. This could be - a subset of the `scores` data of a fitted model, or unseen data. - Must have a 'mode' dimension. - scores2: DataArray - Transformed right field data to be reconstructed. This could be - a subset of the `scores` data of a fitted model, or unseen data. - Must have a 'mode' dimension. - - Returns - ------- - Xrec1: DataArray - Reconstructed data of left field. - Xrec2: DataArray - Reconstructed data of right field. - - """ - raise NotImplementedError - - def components(self) -> Tuple[DataObject, DataObject]: - """Get the components.""" - comps1 = self.data["components1"] - comps2 = self.data["components2"] - - components1: DataObject = self.preprocessor1.inverse_transform_components( - comps1 - ) - components2: DataObject = self.preprocessor2.inverse_transform_components( - comps2 - ) - return components1, components2 - - def scores(self) -> Tuple[DataArray, DataArray]: - """Get the scores.""" - scores1 = self.data["scores1"] - scores2 = self.data["scores2"] - - scores1: DataArray = self.preprocessor1.inverse_transform_scores(scores1) - scores2: DataArray = self.preprocessor2.inverse_transform_scores(scores2) - return scores1, scores2 - - def compute(self, verbose: bool = False, **kwargs): - """Compute and load delayed model results. - - Parameters - ---------- - verbose : bool - Whether or not to provide additional information about the computing progress. - **kwargs - Additional keyword arguments to pass to `dask.compute()`. - """ - # find and compute all dask arrays simultaneously to allow dask to optimize the - # shared graph and avoid duplicate i/o and computations - dt = self.serialize() - - data_objs = { - k: v - for k, v in dt.to_dict().items() - if data_is_dask(v) and v.attrs.get("allow_compute", True) - } - - if verbose: - with ProgressBar(): - (data_objs,) = dask.compute(data_objs, **kwargs) - else: - (data_objs,) = dask.compute(data_objs, **kwargs) - - for k, v in data_objs.items(): - dt[k] = DataTree(v) - - # then rebuild the trained model from the computed results - self._deserialize_attrs(dt) - - self._post_compute() - - def _post_compute(self): - pass - - def get_params(self) -> Dict: - """Get the model parameters.""" - return self._params - - def serialize(self) -> DataTree: - """Serialize a complete model with its preprocessors.""" - # Create a root node for this object with its params as attrs - ds_root = xr.Dataset(attrs=dict(params=self.get_params())) - dt = DataTree(data=ds_root, name=type(self).__name__) - - # Retrieve the tree representation of each attached object, or set basic attrs - for key, attr in self.get_serialization_attrs().items(): - if hasattr(attr, "serialize"): - dt[key] = attr.serialize() - dt.attrs[key] = "_is_tree" - else: - dt.attrs[key] = attr - - return dt - - def save( - self, - path: str, - overwrite: bool = False, - save_data: bool = False, - engine: Literal["zarr", "netcdf4", "h5netcdf"] = "zarr", - **kwargs, - ): - """Save the model. - - Parameters - ---------- - path : str - Path to save the model. - overwrite: bool, default=False - Whether or not to overwrite the existing path if it already exists. - Ignored unless `engine="zarr"`. - save_data : str - Whether or not to save the full input data along with the fitted components. - engine : {"zarr", "netcdf4", "h5netcdf"}, default="zarr" - Xarray backend engine to use for writing the saved model. - **kwargs - Additional keyword arguments to pass to `DataTree.to_netcdf()` or `DataTree.to_zarr()`. - - """ - self.compute() - - dt = self.serialize() - - # Remove any raw data arrays at this stage - if not save_data: - dt = insert_placeholders(dt) - - write_model_tree(dt, path, overwrite=overwrite, engine=engine, **kwargs) - - @classmethod - def deserialize(cls, dt: DataTree) -> Self: - """Deserialize the model and its preprocessors from a DataTree.""" - # Recreate the model with parameters set by root level attrs - model = cls(**dt.attrs["params"]) - model._deserialize_attrs(dt) - return model - - def _deserialize_attrs(self, dt: DataTree): - """Set the necessary attributes of the model from a DataTree.""" - for key, attr in dt.attrs.items(): - if key == "params": - continue - elif attr == "_is_tree": - deserialized_obj = getattr(self, key).deserialize(dt[key]) - else: - deserialized_obj = attr - setattr(self, key, deserialized_obj) - - @classmethod - def load( - cls, - path: str, - engine: Literal["zarr", "netcdf4", "h5netcdf"] = "zarr", - **kwargs, - ) -> Self: - """Load a saved model. - - Parameters - ---------- - path : str - Path to the saved model. - engine : {"zarr", "netcdf4", "h5netcdf"}, default="zarr" - Xarray backend engine to use for reading the saved model. - **kwargs - Additional keyword arguments to pass to `open_datatree()`. - - Returns - ------- - model : _BaseCrossModel - The loaded model. - - """ - dt = open_model_tree(path, engine=engine, **kwargs) - model = cls.deserialize(dt) - return model - - def _validate_loaded_data(self, data: DataArray): - """Optionally check the loaded data for placeholders.""" - pass diff --git a/xeofs/models/mca.py b/xeofs/models/mca.py deleted file mode 100644 index 1f77d4ea..00000000 --- a/xeofs/models/mca.py +++ /dev/null @@ -1,919 +0,0 @@ -import warnings -from typing import Dict, Optional, Sequence, Tuple - -import numpy as np -import xarray as xr -from typing_extensions import Self - -from ..utils.data_types import DataArray, DataObject -from ..utils.dimension_renamer import DimensionRenamer -from ..utils.hilbert_transform import hilbert_transform -from ..utils.sanity_checks import assert_not_complex -from ..utils.statistics import pearson_correlation -from ..utils.xarray_utils import argsort_dask -from ._base_cross_model import _BaseCrossModel -from .decomposer import Decomposer - - -class MCA(_BaseCrossModel): - """Maximum Covariance Analyis. - - MCA is a statistical method that finds patterns of maximum covariance between two datasets. - - Parameters - ---------- - n_modes: int, default=2 - Number of modes to calculate. - center: bool, default=True - Whether to center the input data. - standardize: bool, default=False - Whether to standardize the input data. - use_coslat: bool, default=False - Whether to use cosine of latitude for scaling. - n_pca_modes: int, default=None - The number of principal components to retain during the PCA preprocessing - step applied to both data sets prior to executing MCA. - If set to None, PCA preprocessing will be bypassed, and the MCA will be performed on the original datasets. - Specifying an integer value greater than 0 for `n_pca_modes` will trigger the PCA preprocessing, retaining - only the specified number of principal components. This reduction in dimensionality can be especially beneficial - when dealing with high-dimensional data, where computing the cross-covariance matrix can become computationally - intensive or in scenarios where multicollinearity is a concern. - compute : bool, default=True - Whether to compute elements of the model eagerly, or to defer computation. - If True, four pieces of the fit will be computed sequentially: 1) the - preprocessor scaler, 2) optional NaN checks, 3) SVD decomposition, 4) scores - and components. - sample_name: str, default="sample" - Name of the new sample dimension. - feature_name: str, default="feature" - Name of the new feature dimension. - solver: {"auto", "full", "randomized"}, default="auto" - Solver to use for the SVD computation. - random_state: int, default=None - Seed for the random number generator. - solver_kwargs: dict, default={} - Additional keyword arguments passed to the SVD solver function. - - Notes - ----- - MCA is similar to Principal Component Analysis (PCA) and Canonical Correlation Analysis (CCA), - but while PCA finds modes of maximum variance and CCA finds modes of maximum correlation, - MCA finds modes of maximum covariance. See [1]_ [2]_ for more details. - - References - ---------- - .. [1] Bretherton, C., Smith, C., Wallace, J., 1992. An intercomparison of methods for finding coupled patterns in climate data. Journal of climate 5, 541–560. - .. [2] Cherry, S., 1996. Singular value decomposition analysis and canonical correlation analysis. Journal of Climate 9, 2003–2009. - - Examples - -------- - >>> model = MCA(n_modes=5, standardize=True) - >>> model.fit(data1, data2) - - """ - - def __init__( - self, - n_modes: int = 2, - center: bool = True, - standardize: bool = False, - use_coslat: bool = False, - check_nans: bool = True, - n_pca_modes: Optional[int] = None, - compute: bool = True, - sample_name: str = "sample", - feature_name: str = "feature", - solver: str = "auto", - random_state: Optional[int] = None, - solver_kwargs: Dict = {}, - **kwargs, - ): - super().__init__( - n_modes=n_modes, - center=center, - standardize=standardize, - use_coslat=use_coslat, - check_nans=check_nans, - n_pca_modes=n_pca_modes, - compute=compute, - sample_name=sample_name, - feature_name=feature_name, - solver=solver, - random_state=random_state, - solver_kwargs=solver_kwargs, - **kwargs, - ) - self.attrs.update({"model": "MCA"}) - - def _compute_cross_covariance_matrix(self, X1, X2): - """Compute the cross-covariance matrix of two data objects. - - Note: It is assumed that the data objects are centered. - - """ - sample_name = self.sample_name - n_samples = X1.coords[sample_name].size - if X1.coords[sample_name].size != X2.coords[sample_name].size: - err_msg = "The two data objects must have the same number of samples." - raise ValueError(err_msg) - - return xr.dot(X1.conj(), X2, dims=sample_name) / (n_samples - 1) - - def _fit_algorithm( - self, - data1: DataArray, - data2: DataArray, - ) -> Self: - sample_name = self.sample_name - feature_name = self.feature_name - - # Initialize the SVD decomposer - decomposer = Decomposer(**self._decomposer_kwargs) - - # Perform SVD on PCA-reduced data - if (self.pca1 is not None) and (self.pca2 is not None): - # Fit the PCA models - self.pca1.fit(data1, dim=sample_name) - self.pca2.fit(data2, dim=sample_name) - # Get the PCA scores - pca_scores1 = self.pca1.data["scores"] * self.pca1.singular_values() - pca_scores2 = self.pca2.data["scores"] * self.pca2.singular_values() - # Compute the cross-covariance matrix of the PCA scores - pca_scores1 = pca_scores1.rename({"mode": "feature1"}) - pca_scores2 = pca_scores2.rename({"mode": "feature2"}) - cov_matrix = self._compute_cross_covariance_matrix(pca_scores1, pca_scores2) - - # Perform the SVD - decomposer.fit(cov_matrix, dims=("feature1", "feature2")) - V1 = decomposer.U_ # left singular vectors (feature1 x mode) - V2 = decomposer.V_ # right singular vectors (feature2 x mode) - - # left and right PCA eigenvectors (feature x mode) - V1pre = self.pca1.data["components"] - V2pre = self.pca2.data["components"] - - # Compute the singular vectors - V1pre = V1pre.rename({"mode": "feature1"}) - V2pre = V2pre.rename({"mode": "feature2"}) - singular_vectors1 = xr.dot(V1pre, V1, dims="feature1") - singular_vectors2 = xr.dot(V2pre, V2, dims="feature2") - - # Perform SVD directly on data - else: - # Rename feature and associated dimensions of data objects to avoid index conflicts - dim_renamer1 = DimensionRenamer(feature_name, "1") - dim_renamer2 = DimensionRenamer(feature_name, "2") - data1_temp = dim_renamer1.fit_transform(data1) - data2_temp = dim_renamer2.fit_transform(data2) - # Compute the cross-covariance matrix - cov_matrix = self._compute_cross_covariance_matrix(data1_temp, data2_temp) - - # Perform the SVD - decomposer.fit(cov_matrix, dims=("feature1", "feature2")) - singular_vectors1 = decomposer.U_ - singular_vectors2 = decomposer.V_ - - # Rename the singular vectors - singular_vectors1 = dim_renamer1.inverse_transform(singular_vectors1) - singular_vectors2 = dim_renamer2.inverse_transform(singular_vectors2) - - # Store the results - singular_values = decomposer.s_ - - # Compute total squared variance - squared_covariance = singular_values**2 - total_squared_covariance = (abs(cov_matrix) ** 2).sum() - - norm1 = np.sqrt(singular_values) - norm2 = np.sqrt(singular_values) - - # Index of the sorted squared covariance - idx_sorted_modes = argsort_dask(squared_covariance, "mode")[::-1] - idx_sorted_modes.coords.update(squared_covariance.coords) - - # Project the data onto the singular vectors - scores1 = xr.dot(data1, singular_vectors1, dims=feature_name) / norm1 - scores2 = xr.dot(data2, singular_vectors2, dims=feature_name) / norm2 - - self.data.add(name="input_data1", data=data1, allow_compute=False) - self.data.add(name="input_data2", data=data2, allow_compute=False) - self.data.add(name="components1", data=singular_vectors1) - self.data.add(name="components2", data=singular_vectors2) - self.data.add(name="scores1", data=scores1) - self.data.add(name="scores2", data=scores2) - self.data.add(name="squared_covariance", data=squared_covariance) - self.data.add(name="total_squared_covariance", data=total_squared_covariance) - self.data.add(name="idx_modes_sorted", data=idx_sorted_modes) - self.data.add(name="norm1", data=norm1) - self.data.add(name="norm2", data=norm2) - - # Assign analysis-relevant meta data - self.data.set_attrs(self.attrs) - return self - - def transform( - self, data1: Optional[DataObject] = None, data2: Optional[DataObject] = None - ) -> Sequence[DataArray]: - """Get the expansion coefficients of "unseen" data. - - The expansion coefficients are obtained by projecting data onto the singular vectors. - - Parameters - ---------- - data1: DataArray | Dataset | List[DataArray] - Left input data. Must be provided if `data2` is not provided. - data2: DataArray | Dataset | List[DataArray] - Right input data. Must be provided if `data1` is not provided. - - Returns - ------- - scores1: DataArray | Dataset | List[DataArray] - Left scores. - scores2: DataArray | Dataset | List[DataArray] - Right scores. - - """ - return super().transform(data1, data2) - - def _transform_algorithm( - self, data1: Optional[DataArray] = None, data2: Optional[DataArray] = None - ) -> Dict[str, DataArray]: - results = {} - if data1 is not None: - # Project data onto singular vectors - comps1 = self.data["components1"] - norm1 = self.data["norm1"] - scores1 = xr.dot(data1, comps1) / norm1 - results["data1"] = scores1 - - if data2 is not None: - # Project data onto singular vectors - comps2 = self.data["components2"] - norm2 = self.data["norm2"] - scores2 = xr.dot(data2, comps2) / norm2 - results["data2"] = scores2 - - return results - - def _inverse_transform_algorithm( - self, scores1: DataArray, scores2: DataArray - ) -> Tuple[DataArray, DataArray]: - """Reconstruct the original data from transformed data. - - Parameters - ---------- - scores1: DataArray - Transformed left field data to be reconstructed. This could be - a subset of the `scores` data of a fitted model, or unseen data. - Must have a 'mode' dimension. - scores2: DataArray - Transformed right field data to be reconstructed. This could be - a subset of the `scores` data of a fitted model, or unseen data. - Must have a 'mode' dimension. - - Returns - ------- - Xrec1: DataArray - Reconstructed data of left field. - Xrec2: DataArray - Reconstructed data of right field. - - """ - # Singular vectors - comps1 = self.data["components1"].sel(mode=scores1.mode) - comps2 = self.data["components2"].sel(mode=scores2.mode) - - # Norms - norm1 = self.data["norm1"].sel(mode=scores1.mode) - norm2 = self.data["norm2"].sel(mode=scores2.mode) - - # Reconstruct the data - data1 = xr.dot(scores1, comps1.conj() * norm1, dims="mode") - data2 = xr.dot(scores2, comps2.conj() * norm2, dims="mode") - - return data1, data2 - - def squared_covariance(self): - """Get the squared covariance. - - The squared covariance corresponds to the explained variance in PCA and is given by the - squared singular values of the covariance matrix. - - """ - return self.data["squared_covariance"] - - def squared_covariance_fraction(self): - """Calculate the squared covariance fraction (SCF). - - The SCF is a measure of the proportion of the total squared covariance that is explained by each mode `i`. It is computed - as follows: - - .. math:: - SCF_i = \\frac{\\sigma_i^2}{\\sum_{i=1}^{m} \\sigma_i^2} - - where `m` is the total number of modes and :math:`\\sigma_i` is the `ith` singular value of the covariance matrix. - - """ - return self.data["squared_covariance"] / self.data["total_squared_covariance"] - - def singular_values(self): - """Get the singular values of the cross-covariance matrix.""" - singular_values = xr.apply_ufunc( - np.sqrt, - self.data["squared_covariance"], - dask="allowed", - vectorize=False, - keep_attrs=True, - ) - singular_values.name = "singular_values" - return singular_values - - def total_covariance(self) -> DataArray: - """Get the total covariance. - - This measure follows the defintion of Cheng and Dunkerton (1995). - Note that this measure is not an invariant in MCA. - - """ - tot_cov = self.singular_values().sum() - tot_cov.attrs.update(self.singular_values().attrs) - tot_cov.name = "total_covariance" - return tot_cov - - def covariance_fraction(self): - """Get the covariance fraction (CF). - - Cheng and Dunkerton (1995) define the CF as follows: - - .. math:: - CF_i = \\frac{\\sigma_i}{\\sum_{i=1}^{m} \\sigma_i} - - where `m` is the total number of modes and :math:`\\sigma_i` is the - `ith` singular value of the covariance matrix. - - In this implementation the sum of singular values is estimated from - the first `n` modes, therefore one should aim to retain as many - modes as possible to get a good estimate of the covariance fraction. - - Note - ---- - It is important to differentiate the CF from the squared covariance fraction (SCF). While the SCF is an invariant quantity in MCA, the CF is not. - Therefore, the SCF is used to assess the relative importance of each mode. Cheng and Dunkerton (1995) introduced the CF in the context of - Varimax-rotated MCA to compare the relative importance of each mode before and after rotation. In the special case of both data fields in MCA being identical, - the CF is equivalent to the explained variance ratio in EOF analysis. - - """ - # Check how sensitive the CF is to the number of modes - svals = self.singular_values() - tot_var = self.total_covariance() - cf = svals[0] / svals.cumsum() - change_per_mode = cf.shift({"mode": 1}) - cf - change_in_cf_in_last_mode = change_per_mode.isel(mode=-1) - if change_in_cf_in_last_mode > 0.001: - print( - "Warning: CF is sensitive to the number of modes retained. Please increase `n_modes` for a better estimate." - ) - cov_frac = svals / tot_var - cov_frac.name = "covariance_fraction" - cov_frac.attrs.update(svals.attrs) - return cov_frac - - def components(self): - """Return the singular vectors of the left and right field. - - Returns - ------- - components1: DataArray | Dataset | List[DataArray] - Left components of the fitted model. - components2: DataArray | Dataset | List[DataArray] - Right components of the fitted model. - - """ - return super().components() - - def scores(self): - """Return the scores of the left and right field. - - The scores in MCA are the projection of the left and right field onto the - left and right singular vector of the cross-covariance matrix. - - Returns - ------- - scores1: DataArray - Left scores. - scores2: DataArray - Right scores. - - """ - return super().scores() - - def homogeneous_patterns(self, correction=None, alpha=0.05): - """Return the homogeneous patterns of the left and right field. - - The homogeneous patterns are the correlation coefficients between the - input data and the scores. - - More precisely, the homogeneous patterns `r_{hom}` are defined as - - .. math:: - r_{hom, x} = corr \\left(X, A_x \\right) - .. math:: - r_{hom, y} = corr \\left(Y, A_y \\right) - - where :math:`X` and :math:`Y` are the input data, :math:`A_x` and :math:`A_y` - are the scores of the left and right field, respectively. - - Parameters - ---------- - correction: str, default=None - Method to apply a multiple testing correction. If None, no correction - is applied. Available methods are: - - bonferroni : one-step correction - - sidak : one-step correction - - holm-sidak : step down method using Sidak adjustments - - holm : step-down method using Bonferroni adjustments - - simes-hochberg : step-up method (independent) - - hommel : closed method based on Simes tests (non-negative) - - fdr_bh : Benjamini/Hochberg (non-negative) (default) - - fdr_by : Benjamini/Yekutieli (negative) - - fdr_tsbh : two stage fdr correction (non-negative) - - fdr_tsbky : two stage fdr correction (non-negative) - alpha: float, default=0.05 - The desired family-wise error rate. Not used if `correction` is None. - - Returns - ------- - patterns1: DataArray | Dataset | List[DataArray] - Left homogenous patterns. - patterns2: DataArray | Dataset | List[DataArray] - Right homogenous patterns. - pvals1: DataArray | Dataset | List[DataArray] - Left p-values. - pvals2: DataArray | Dataset | List[DataArray] - Right p-values. - - """ - input_data1 = self.data["input_data1"] - input_data2 = self.data["input_data2"] - - scores1 = self.data["scores1"] - scores2 = self.data["scores2"] - - hom_pat1, pvals1 = pearson_correlation( - input_data1, scores1, correction=correction, alpha=alpha - ) - hom_pat2, pvals2 = pearson_correlation( - input_data2, scores2, correction=correction, alpha=alpha - ) - - hom_pat1.name = "left_homogeneous_patterns" - hom_pat2.name = "right_homogeneous_patterns" - - pvals1.name = "pvalues_of_left_homogeneous_patterns" - pvals2.name = "pvalues_of_right_homogeneous_patterns" - - hom_pat1 = self.preprocessor1.inverse_transform_components(hom_pat1) - hom_pat2 = self.preprocessor2.inverse_transform_components(hom_pat2) - - pvals1 = self.preprocessor1.inverse_transform_components(pvals1) - pvals2 = self.preprocessor2.inverse_transform_components(pvals2) - - return (hom_pat1, hom_pat2), (pvals1, pvals2) - - def heterogeneous_patterns(self, correction=None, alpha=0.05): - """Return the heterogeneous patterns of the left and right field. - - The heterogeneous patterns are the correlation coefficients between the - input data and the scores of the other field. - - More precisely, the heterogeneous patterns `r_{het}` are defined as - - .. math:: - r_{het, x} = corr \\left(X, A_y \\right) - .. math:: - r_{het, y} = corr \\left(Y, A_x \\right) - - where :math:`X` and :math:`Y` are the input data, :math:`A_x` and :math:`A_y` - are the scores of the left and right field, respectively. - - Parameters - ---------- - correction: str, default=None - Method to apply a multiple testing correction. If None, no correction - is applied. Available methods are: - - bonferroni : one-step correction - - sidak : one-step correction - - holm-sidak : step down method using Sidak adjustments - - holm : step-down method using Bonferroni adjustments - - simes-hochberg : step-up method (independent) - - hommel : closed method based on Simes tests (non-negative) - - fdr_bh : Benjamini/Hochberg (non-negative) (default) - - fdr_by : Benjamini/Yekutieli (negative) - - fdr_tsbh : two stage fdr correction (non-negative) - - fdr_tsbky : two stage fdr correction (non-negative) - alpha: float, default=0.05 - The desired family-wise error rate. Not used if `correction` is None. - - """ - input_data1 = self.data["input_data1"] - input_data2 = self.data["input_data2"] - - scores1 = self.data["scores1"] - scores2 = self.data["scores2"] - - patterns1, pvals1 = pearson_correlation( - input_data1, scores2, correction=correction, alpha=alpha - ) - patterns2, pvals2 = pearson_correlation( - input_data2, scores1, correction=correction, alpha=alpha - ) - - patterns1.name = "left_heterogeneous_patterns" - patterns2.name = "right_heterogeneous_patterns" - - pvals1.name = "pvalues_of_left_heterogeneous_patterns" - pvals2.name = "pvalues_of_right_heterogeneous_patterns" - - patterns1 = self.preprocessor1.inverse_transform_components(patterns1) - patterns2 = self.preprocessor2.inverse_transform_components(patterns2) - - pvals1 = self.preprocessor1.inverse_transform_components(pvals1) - pvals2 = self.preprocessor2.inverse_transform_components(pvals2) - - return (patterns1, patterns2), (pvals1, pvals2) - - def _validate_loaded_data(self, data: xr.DataArray): - if data.attrs.get("placeholder"): - warnings.warn( - f"The input data field '{data.name}' was not saved, which will produce" - " empty results when calling `homogeneous_patterns()` or " - "`heterogeneous_patterns()`. To avoid this warning, you can save the" - " model with `save_data=True`, or add the data manually by running" - " it through the model's `preprocessor.transform()` method and then" - " attaching it with `data.add()`." - ) - - -class HilbertMCA(MCA): - """Hilbert MCA. - - Hilbert MCA, also referred to as Analytical SVD (ASVD) by Elipot et al. (2017) [1]_, - enhances traditional MCA by accommodating both amplitude and phase information. - It achieves this by utilizing the Hilbert transform to preprocess the data, - thus allowing for a more comprehensive analysis in the subsequent MCA computation. - - An optional padding with exponentially decaying values can be applied prior to - the Hilbert transform in order to mitigate the impact of spectral leakage. - - Parameters - ---------- - n_modes: int, default=2 - Number of modes to calculate. - padding : str, optional - Specifies the method used for padding the data prior to applying the Hilbert - transform. This can help to mitigate the effect of spectral leakage. - Currently, only 'exp' for exponential padding is supported. Default is 'exp'. - decay_factor : float, optional - Specifies the decay factor used in the exponential padding. This parameter - is only used if padding='exp'. The recommended value typically ranges between 0.05 to 0.2 - but ultimately depends on the variability in the data. - A smaller value (e.g. 0.05) is recommended for - data with high variability, while a larger value (e.g. 0.2) is recommended - for data with low variability. Default is 0.2. - center: bool, default=True - Whether to center the input data. - standardize: bool, default=False - Whether to standardize the input data. - use_coslat: bool, default=False - Whether to use cosine of latitude for scaling. - n_pca_modes: int, default=None - The number of principal components to retain during the PCA preprocessing - step applied to both data sets prior to executing MCA. - If set to None, PCA preprocessing will be bypassed, and the MCA will be performed on the original datasets. - Specifying an integer value greater than 0 for `n_pca_modes` will trigger the PCA preprocessing, retaining - only the specified number of principal components. This reduction in dimensionality can be especially beneficial - when dealing with high-dimensional data, where computing the cross-covariance matrix can become computationally - intensive or in scenarios where multicollinearity is a concern. - compute : bool, default=True - Whether to compute elements of the model eagerly, or to defer computation. - If True, four pieces of the fit will be computed sequentially: 1) the - preprocessor scaler, 2) optional NaN checks, 3) SVD decomposition, 4) scores - and components. - sample_name: str, default="sample" - Name of the new sample dimension. - feature_name: str, default="feature" - Name of the new feature dimension. - solver: {"auto", "full", "randomized"}, default="auto" - Solver to use for the SVD computation. - random_state: int, optional - Random state for randomized SVD solver. - solver_kwargs: dict, default={} - Additional keyword arguments passed to the SVD solver. - - Notes - ----- - Hilbert MCA extends MCA to complex-valued data that contain both magnitude and phase information. - The Hilbert transform is used to transform real-valued data to complex-valued data, from which both - amplitude and phase can be extracted. - - Similar to MCA, Hilbert MCA is used in climate science to identify coupled patterns of variability - between two different climate variables. But unlike MCA, Hilbert MCA can identify coupled patterns - that involve phase shifts. - - References - ---------- - .. [1] Elipot, S., Frajka-Williams, E., Hughes, C. W., Olhede, S. & Lankhorst, M. Observed Basin-Scale Response of the North Atlantic Meridional Overturning Circulation to Wind Stress Forcing. Journal of Climate 30, 2029–2054 (2017). - - - - Examples - -------- - >>> model = HilbertMCA(n_modes=5, standardize=True) - >>> model.fit(data1, data2) - - """ - - def __init__( - self, - n_modes: int = 2, - padding: str = "exp", - decay_factor: float = 0.2, - center: bool = True, - standardize: bool = False, - use_coslat: bool = False, - check_nans: bool = True, - n_pca_modes: Optional[int] = None, - compute: bool = True, - sample_name: str = "sample", - feature_name: str = "feature", - solver: str = "auto", - random_state: Optional[bool] = None, - solver_kwargs: Dict = {}, - **kwargs, - ): - super().__init__( - n_modes=n_modes, - center=center, - standardize=standardize, - use_coslat=use_coslat, - check_nans=check_nans, - n_pca_modes=n_pca_modes, - compute=compute, - sample_name=sample_name, - feature_name=feature_name, - solver=solver, - random_state=random_state, - solver_kwargs=solver_kwargs, - **kwargs, - ) - self.attrs.update({"model": "Hilbert MCA"}) - self._params.update({"padding": padding, "decay_factor": decay_factor}) - - def _fit_algorithm(self, data1: DataArray, data2: DataArray) -> Self: - assert_not_complex(data1) - assert_not_complex(data2) - - sample_name = self.sample_name - feature_name = self.feature_name - - # Settings for Hilbert transform - hilbert_kwargs = { - "padding": self._params["padding"], - "decay_factor": self._params["decay_factor"], - } - - # Initialize the SVD decomposer - decomposer = Decomposer(**self._decomposer_kwargs) - - # Perform SVD on PCA-reduced data - if (self.pca1 is not None) and (self.pca2 is not None): - # Fit the PCA models - self.pca1.fit(data1, sample_name) - self.pca2.fit(data2, sample_name) - # Get the PCA scores - pca_scores1 = self.pca1.data["scores"] * self.pca1.singular_values() - pca_scores2 = self.pca2.data["scores"] * self.pca2.singular_values() - # Apply hilbert transform - pca_scores1 = hilbert_transform( - pca_scores1, dims=(sample_name, "mode"), **hilbert_kwargs - ) - pca_scores2 = hilbert_transform( - pca_scores2, dims=(sample_name, "mode"), **hilbert_kwargs - ) - # Compute the cross-covariance matrix of the PCA scores - pca_scores1 = pca_scores1.rename({"mode": "feature_temp1"}) - pca_scores2 = pca_scores2.rename({"mode": "feature_temp2"}) - cov_matrix = self._compute_cross_covariance_matrix(pca_scores1, pca_scores2) - - # Perform the SVD - decomposer.fit(cov_matrix, dims=("feature_temp1", "feature_temp2")) - V1 = decomposer.U_ # left singular vectors (feature_temp1 x mode) - V2 = decomposer.V_ # right singular vectors (feature_temp2 x mode) - - # left and right PCA eigenvectors (feature_name x mode) - V1pre = self.pca1.data["components"] - V2pre = self.pca2.data["components"] - - # Compute the singular vectors - V1pre = V1pre.rename({"mode": "feature_temp1"}) - V2pre = V2pre.rename({"mode": "feature_temp2"}) - singular_vectors1 = xr.dot(V1pre, V1, dims="feature_temp1") - singular_vectors2 = xr.dot(V2pre, V2, dims="feature_temp2") - - # Perform SVD directly on data - else: - # Perform Hilbert transform - data1 = hilbert_transform( - data1, dims=(sample_name, feature_name), **hilbert_kwargs - ) - data2 = hilbert_transform( - data2, dims=(sample_name, feature_name), **hilbert_kwargs - ) - # Rename feature and associated dimensions of data objects to avoid index conflicts - dim_renamer1 = DimensionRenamer(feature_name, "1") - dim_renamer2 = DimensionRenamer(feature_name, "2") - data1_temp = dim_renamer1.fit_transform(data1) - data2_temp = dim_renamer2.fit_transform(data2) - # Compute the cross-covariance matrix - cov_matrix = self._compute_cross_covariance_matrix(data1_temp, data2_temp) - - # Perform the SVD - decomposer.fit(cov_matrix, dims=("feature1", "feature2")) - singular_vectors1 = decomposer.U_ - singular_vectors2 = decomposer.V_ - - # Rename the singular vectors - singular_vectors1 = dim_renamer1.inverse_transform(singular_vectors1) - singular_vectors2 = dim_renamer2.inverse_transform(singular_vectors2) - - # Store the results - singular_values = decomposer.s_ - - # Compute total squared variance - squared_covariance = singular_values**2 - total_squared_covariance = (abs(cov_matrix) ** 2).sum() - - norm1 = np.sqrt(singular_values) - norm2 = np.sqrt(singular_values) - - # Index of the sorted squared covariance - idx_sorted_modes = argsort_dask(squared_covariance, "mode")[::-1] - idx_sorted_modes.coords.update(squared_covariance.coords) - - # Project the data onto the singular vectors - scores1 = xr.dot(data1, singular_vectors1) / norm1 - scores2 = xr.dot(data2, singular_vectors2) / norm2 - - self.data.add(name="input_data1", data=data1, allow_compute=False) - self.data.add(name="input_data2", data=data2, allow_compute=False) - self.data.add(name="components1", data=singular_vectors1) - self.data.add(name="components2", data=singular_vectors2) - self.data.add(name="scores1", data=scores1) - self.data.add(name="scores2", data=scores2) - self.data.add(name="squared_covariance", data=squared_covariance) - self.data.add(name="total_squared_covariance", data=total_squared_covariance) - self.data.add(name="idx_modes_sorted", data=idx_sorted_modes) - self.data.add(name="norm1", data=norm1) - self.data.add(name="norm2", data=norm2) - - # Assign analysis relevant meta data - self.data.set_attrs(self.attrs) - return self - - def components_amplitude(self) -> Tuple[DataObject, DataObject]: - """Compute the amplitude of the components. - - The amplitude of the components are defined as - - .. math:: - A_ij = |C_ij| - - where :math:`C_{ij}` is the :math:`i`-th entry of the :math:`j`-th component and - :math:`|\\cdot|` denotes the absolute value. - - Returns - ------- - DataObject - Amplitude of the left components. - DataObject - Amplitude of the left components. - - """ - comps1 = abs(self.data["components1"]) - comps1.name = "left_components_amplitude" - - comps2 = abs(self.data["components2"]) - comps2.name = "right_components_amplitude" - - comps1 = self.preprocessor1.inverse_transform_components(comps1) - comps2 = self.preprocessor2.inverse_transform_components(comps2) - - return (comps1, comps2) - - def components_phase(self) -> Tuple[DataObject, DataObject]: - """Compute the phase of the components. - - The phase of the components are defined as - - .. math:: - \\phi_{ij} = \\arg(C_{ij}) - - where :math:`C_{ij}` is the :math:`i`-th entry of the :math:`j`-th component and - :math:`\\arg(\\cdot)` denotes the argument of a complex number. - - Returns - ------- - DataObject - Phase of the left components. - DataObject - Phase of the right components. - - """ - comps1 = xr.apply_ufunc(np.angle, self.data["components1"], keep_attrs=True) - comps1.name = "left_components_phase" - - comps2 = xr.apply_ufunc(np.angle, self.data["components2"], keep_attrs=True) - comps2.name = "right_components_phase" - - comps1 = self.preprocessor1.inverse_transform_components(comps1) - comps2 = self.preprocessor2.inverse_transform_components(comps2) - - return (comps1, comps2) - - def scores_amplitude(self) -> Tuple[DataArray, DataArray]: - """Compute the amplitude of the scores. - - The amplitude of the scores are defined as - - .. math:: - A_ij = |S_ij| - - where :math:`S_{ij}` is the :math:`i`-th entry of the :math:`j`-th score and - :math:`|\\cdot|` denotes the absolute value. - - Returns - ------- - DataArray - Amplitude of the left scores. - DataArray - Amplitude of the right scores. - - """ - scores1 = abs(self.data["scores1"]) - scores2 = abs(self.data["scores2"]) - - scores1.name = "left_scores_amplitude" - scores2.name = "right_scores_amplitude" - - scores1 = self.preprocessor1.inverse_transform_scores(scores1) - scores2 = self.preprocessor2.inverse_transform_scores(scores2) - return (scores1, scores2) - - def scores_phase(self) -> Tuple[DataArray, DataArray]: - """Compute the phase of the scores. - - The phase of the scores are defined as - - .. math:: - \\phi_{ij} = \\arg(S_{ij}) - - where :math:`S_{ij}` is the :math:`i`-th entry of the :math:`j`-th score and - :math:`\\arg(\\cdot)` denotes the argument of a complex number. - - Returns - ------- - DataArray - Phase of the left scores. - DataArray - Phase of the right scores. - - """ - scores1 = xr.apply_ufunc(np.angle, self.data["scores1"], keep_attrs=True) - scores2 = xr.apply_ufunc(np.angle, self.data["scores2"], keep_attrs=True) - - scores1.name = "left_scores_phase" - scores2.name = "right_scores_phase" - - scores1 = self.preprocessor1.inverse_transform_scores(scores1) - scores2 = self.preprocessor2.inverse_transform_scores(scores2) - - return (scores1, scores2) - - def transform(self, data1: DataObject, data2: DataObject): - raise NotImplementedError("Hilbert MCA does not support transform method.") - - def _inverse_transform_algorithm(self, scores1: DataArray, scores2: DataArray): - data1, data2 = super()._inverse_transform_algorithm(scores1, scores2) - - # Enforce real output - return data1.real, data2.real - - def homogeneous_patterns(self, correction=None, alpha=0.05): - raise NotImplementedError( - "Hilbert MCA does not support homogeneous_patterns method." - ) - - def heterogeneous_patterns(self, correction=None, alpha=0.05): - raise NotImplementedError( - "Hilbert MCA does not support heterogeneous_patterns method." - ) diff --git a/xeofs/multi/__init__.py b/xeofs/multi/__init__.py new file mode 100644 index 00000000..c47cd4bc --- /dev/null +++ b/xeofs/multi/__init__.py @@ -0,0 +1,27 @@ +import warnings + +from .cca import CCA + +__all__ = ["CCA"] + + +DEPRECATED_NAMES = [ + # ("OldClass", "NewClass"), +] + + +def __dir__(): + return sorted(__all__ + [names[0] for names in DEPRECATED_NAMES]) + + +def __getattr__(name): + for old_name, new_name in DEPRECATED_NAMES: + if name == old_name: + msg = ( + f"Class '{old_name}' is deprecated and will be renamed to '{new_name}' in the next major release. " + f"In that release, '{old_name}' will refer to a different class. " + f"Please switch to '{new_name}' to maintain compatibility." + ) + warnings.warn(msg, DeprecationWarning, stacklevel=2) + return globals()[new_name] + raise AttributeError(f"module {__name__} has no attribute {name}") diff --git a/xeofs/models/cca.py b/xeofs/multi/cca.py similarity index 97% rename from xeofs/models/cca.py rename to xeofs/multi/cca.py index 658bfbb9..cfb5bea6 100644 --- a/xeofs/models/cca.py +++ b/xeofs/multi/cca.py @@ -9,7 +9,7 @@ from abc import abstractmethod from datetime import datetime -from typing import Hashable, List, Sequence +from typing import Hashable, Sequence import dask.array as da import numpy as np @@ -21,9 +21,9 @@ from .._version import __version__ from ..preprocessing.preprocessor import Preprocessor +from ..single import EOF from ..utils.data_types import DataArray, DataList, DataObject from ..utils.sanity_checks import assert_not_complex -from .eof import EOF def _check_parameter_number(parameter_name: str, parameter, n_views: int): @@ -143,7 +143,7 @@ def fit( Preprocessor(with_coslat=self.use_coslat[i], **self._preprocessor_kwargs) for i in range(self.n_views_) ] - views2D: List[DataArray] = [ + views2D: list[DataArray] = [ preprocessor.fit_transform(data, dim) for preprocessor, data in zip(self.preprocessors, views) ] @@ -215,7 +215,7 @@ def _apply_pca(self, views: DataList): return view_transformed @abstractmethod - def _fit_algorithm(self, views: List[DataArray]) -> Self: + def _fit_algorithm(self, views: list[DataArray]) -> Self: raise NotImplementedError @@ -272,7 +272,7 @@ class CCA(CCABaseModel): Examples -------- - >>> from xe.models import CCA + >>> from xe.cross import CCA >>> model = CCA(n_modes=5) >>> model.fit(data) >>> can_loadings = model.canonical_loadings() @@ -304,7 +304,7 @@ def __init__( self.c = c self.eps = eps - def _fit_algorithm(self, views: List[DataArray]) -> Self: + def _fit_algorithm(self, views: list[DataArray]) -> Self: # Check input data [assert_not_complex(view) for view in views] @@ -620,26 +620,26 @@ def _apply_smallest_eigval(self, D, dims): def _smallest_eigval(self, D): return min(0, np.linalg.eigvalsh(D).min()) - def weights(self) -> List[DataObject]: + def weights(self) -> list[DataObject]: weights = [ prep.inverse_transform_components(wghts) for prep, wghts in zip(self.preprocessors, self.data["weights"]) ] return weights - def _transform(self, views: Sequence[DataArray]) -> List[DataArray]: + def _transform(self, views: Sequence[DataArray]) -> list[DataArray]: transformed_views = [] for i, view in enumerate(views): transformed_view = xr.dot(view, self.data["weights"][i], dims="feature") transformed_views.append(transformed_view) return transformed_views - def transform(self, views: Sequence[DataObject]) -> List[DataArray]: + def transform(self, views: Sequence[DataObject]) -> list[DataArray]: """Transform the input data into the canonical space. Parameters ---------- - views : List[DataArray | Dataset] + views : list[DataArray | Dataset] Input data to transform """ @@ -655,7 +655,7 @@ def transform(self, views: Sequence[DataObject]) -> List[DataArray]: unstacked_transformed_views.append(unstacked_view) return unstacked_transformed_views - def components(self, normalize: bool = True) -> List[DataObject]: + def components(self, normalize: bool = True) -> list[DataObject]: """Get the canonical loadings for each view.""" can_loads = self.data["canonical_loadings"] input_data = self.data["input_data"] @@ -681,7 +681,7 @@ def components(self, normalize: bool = True) -> List[DataObject]: ] return loadings - def scores(self) -> List[DataArray]: + def scores(self) -> list[DataArray]: """Get the canonical variates for each view.""" variates = [] for i, view in enumerate(self.data["variates"]): @@ -689,11 +689,11 @@ def scores(self) -> List[DataArray]: variates.append(vari) return variates - def explained_variance(self) -> List[DataArray]: + def explained_variance(self) -> list[DataArray]: """Get the explained variance for each view.""" return self.data["explained_variance"] - def explained_variance_ratio(self) -> List[DataArray]: + def explained_variance_ratio(self) -> list[DataArray]: """Get the explained variance ratio for each view.""" return self.data["explained_variance_ratio"] diff --git a/xeofs/preprocessing/__init__.py b/xeofs/preprocessing/__init__.py index 34883ac9..826949bf 100644 --- a/xeofs/preprocessing/__init__.py +++ b/xeofs/preprocessing/__init__.py @@ -1,17 +1,19 @@ from .concatenator import Concatenator from .dimension_renamer import DimensionRenamer from .multi_index_converter import MultiIndexConverter +from .preprocessor import Preprocessor from .sanitizer import Sanitizer from .scaler import Scaler from .stacker import Stacker from .whitener import Whitener __all__ = [ - "Scaler", - "Sanitizer", - "MultiIndexConverter", - "Stacker", "Concatenator", "DimensionRenamer", + "MultiIndexConverter", + "Preprocessor", + "Sanitizer", + "Scaler", + "Stacker", "Whitener", ] diff --git a/xeofs/preprocessing/concatenator.py b/xeofs/preprocessing/concatenator.py index e8e25c69..94368c57 100644 --- a/xeofs/preprocessing/concatenator.py +++ b/xeofs/preprocessing/concatenator.py @@ -1,15 +1,13 @@ -from typing import List, Optional, Dict -from typing_extensions import Self - import numpy as np import xarray as xr +from typing_extensions import Self -from .transformer import Transformer from ..utils.data_types import ( + DataArray, Dims, DimsList, - DataArray, ) +from .transformer import Transformer class Concatenator(Transformer): @@ -22,7 +20,7 @@ def __init__(self, sample_name: str = "sample", feature_name: str = "feature"): self.n_features = [] self.coords_in = {} - def get_serialization_attrs(self) -> Dict: + def get_serialization_attrs(self) -> dict: return dict( n_data=self.n_data, n_features=self.n_features, @@ -31,9 +29,9 @@ def get_serialization_attrs(self) -> Dict: def fit( self, - X: List[DataArray], - sample_dims: Optional[Dims] = None, - feature_dims: Optional[DimsList] = None, + X: list[DataArray], + sample_dims: Dims | None = None, + feature_dims: DimsList | None = None, ) -> Self: # Check that all inputs are DataArrays if not all([isinstance(data, DataArray) for data in X]): @@ -57,14 +55,14 @@ def fit( return self - def transform(self, X: List[DataArray]) -> DataArray: + def transform(self, X: list[DataArray]) -> DataArray: # Test whether the input list has same length as the number of stackers if len(X) != self.n_data: raise ValueError( f"Invalid input. Number of DataArrays ({len(X)}) does not match the number of fitted DataArrays ({self.n_data})." ) - reindexed_data_list: List[DataArray] = [] + reindexed_data_list: list[DataArray] = [] idx_range = np.cumsum([0] + self.n_features) for i, data in enumerate(X): @@ -84,15 +82,15 @@ def transform(self, X: List[DataArray]) -> DataArray: def fit_transform( self, - X: List[DataArray], - sample_dims: Optional[Dims] = None, - feature_dims: Optional[DimsList] = None, + X: list[DataArray], + sample_dims: Dims | None = None, + feature_dims: DimsList | None = None, ) -> DataArray: return self.fit(X, sample_dims, feature_dims).transform(X) - def _split_dataarray_into_list(self, data: DataArray) -> List[DataArray]: + def _split_dataarray_into_list(self, data: DataArray) -> list[DataArray]: feature_name = self.feature_name - data_list: List[DataArray] = [] + data_list: list[DataArray] = [] idx_range = np.cumsum([0] + self.n_features) for i, coords in enumerate(self.coords_in.values()): @@ -106,11 +104,11 @@ def _split_dataarray_into_list(self, data: DataArray) -> List[DataArray]: return data_list - def inverse_transform_data(self, X: DataArray) -> List[DataArray]: + def inverse_transform_data(self, X: DataArray) -> list[DataArray]: """Reshape the 2D data (sample x feature) back into its original shape.""" return self._split_dataarray_into_list(X) - def inverse_transform_components(self, X: DataArray) -> List[DataArray]: + def inverse_transform_components(self, X: DataArray) -> list[DataArray]: """Reshape the 2D components (sample x feature) back into its original shape.""" return self._split_dataarray_into_list(X) diff --git a/xeofs/preprocessing/dimension_renamer.py b/xeofs/preprocessing/dimension_renamer.py index 537c5815..52e4389d 100644 --- a/xeofs/preprocessing/dimension_renamer.py +++ b/xeofs/preprocessing/dimension_renamer.py @@ -1,8 +1,7 @@ -from typing import Dict from typing_extensions import Self +from ..utils.data_types import Data, DataArray, DataVarBound, Dims from .transformer import Transformer -from ..utils.data_types import Dims, DataArray, Data, DataVarBound class DimensionRenamer(Transformer): @@ -23,7 +22,7 @@ def __init__(self, base="dim", start=0): self.start = start self.dim_mapping = {} - def get_serialization_attrs(self) -> Dict: + def get_serialization_attrs(self) -> dict: return dict( dim_mapping=self.dim_mapping, ) diff --git a/xeofs/preprocessing/list_processor.py b/xeofs/preprocessing/list_processor.py index 45fed8b3..6dd1972f 100644 --- a/xeofs/preprocessing/list_processor.py +++ b/xeofs/preprocessing/list_processor.py @@ -1,18 +1,19 @@ -from typing import List, TypeVar, Generic, Type, Dict, Any +from typing import Any, Generic, Type, TypeVar + from typing_extensions import Self -from .dimension_renamer import DimensionRenamer -from .scaler import Scaler -from .sanitizer import Sanitizer -from .multi_index_converter import MultiIndexConverter -from .stacker import Stacker from ..utils.data_types import ( Data, - DataVar, DataArray, + DataVar, Dims, DimsList, ) +from .dimension_renamer import DimensionRenamer +from .multi_index_converter import MultiIndexConverter +from .sanitizer import Sanitizer +from .scaler import Scaler +from .stacker import Stacker T = TypeVar( "T", @@ -33,27 +34,27 @@ class GenericListTransformer(Generic[T]): def __init__(self, transformer: Type[T], **kwargs): self.transformer_class = transformer - self.transformers: List[T] = [] + self.transformers: list[T] = [] self.init_kwargs = kwargs def fit( self, - X: List[DataVar], + X: list[DataVar], sample_dims: Dims, feature_dims: DimsList, - iter_kwargs: Dict[str, List[Any]] = {}, + iter_kwargs: dict[str, list[Any]] = {}, ) -> Self: """Fit transformer to each data element in the list. Parameters ---------- - X: List[Data] - List of data elements. + X: list[Data] + list of data elements. sample_dims: Dims Sample dimensions. feature_dims: DimsList Feature dimensions. - iter_kwargs: Dict[str, List[Any]] + iter_kwargs: dict[str, list[Any]] Keyword arguments for the transformer that should be iterated over. """ @@ -70,30 +71,30 @@ def fit( self.transformers.append(proc) return self - def transform(self, X: List[Data]) -> List[Data]: - X_transformed: List[Data] = [] + def transform(self, X: list[Data]) -> list[Data]: + X_transformed: list[Data] = [] for x, proc in zip(X, self.transformers): X_transformed.append(proc.transform(x)) # type: ignore return X_transformed def fit_transform( self, - X: List[Data], + X: list[Data], sample_dims: Dims, feature_dims: DimsList, - iter_kwargs: Dict[str, List[Any]] = {}, - ) -> List[Data]: + iter_kwargs: dict[str, list[Any]] = {}, + ) -> list[Data]: return self.fit(X, sample_dims, feature_dims, iter_kwargs).transform(X) # type: ignore - def inverse_transform_data(self, X: List[Data]) -> List[Data]: - X_inverse_transformed: List[Data] = [] + def inverse_transform_data(self, X: list[Data]) -> list[Data]: + X_inverse_transformed: list[Data] = [] for x, proc in zip(X, self.transformers): x_inv_trans = proc.inverse_transform_data(x) # type: ignore X_inverse_transformed.append(x_inv_trans) return X_inverse_transformed - def inverse_transform_components(self, X: List[Data]) -> List[Data]: - X_inverse_transformed: List[Data] = [] + def inverse_transform_components(self, X: list[Data]) -> list[Data]: + X_inverse_transformed: list[Data] = [] for x, proc in zip(X, self.transformers): x_inv_trans = proc.inverse_transform_components(x) # type: ignore X_inverse_transformed.append(x_inv_trans) diff --git a/xeofs/preprocessing/multi_index_converter.py b/xeofs/preprocessing/multi_index_converter.py index 65c01db4..5a20cc39 100644 --- a/xeofs/preprocessing/multi_index_converter.py +++ b/xeofs/preprocessing/multi_index_converter.py @@ -1,9 +1,8 @@ -from typing import Optional, Dict -from typing_extensions import Self import pandas as pd +from typing_extensions import Self +from ..utils.data_types import Data, DataArray, DataVar, DataVarBound, Dims from .transformer import Transformer -from ..utils.data_types import Dims, DataArray, Data, DataVar, DataVarBound class MultiIndexConverter(Transformer): @@ -15,7 +14,7 @@ def __init__(self): self.coords_from_fit = {} self.coords_from_transform = {} - def get_serialization_attrs(self) -> Dict: + def get_serialization_attrs(self) -> dict: return dict( modified_dimensions=self.modified_dimensions, coords_from_fit=self.coords_from_fit, @@ -25,8 +24,8 @@ def get_serialization_attrs(self) -> Dict: def fit( self, X: Data, - sample_dims: Optional[Dims] = None, - feature_dims: Optional[Dims] = None, + sample_dims: Dims | None = None, + feature_dims: Dims | None = None, **kwargs, ) -> Self: # Store original MultiIndexes @@ -82,44 +81,3 @@ def inverse_transform_scores(self, X: DataArray) -> DataArray: def inverse_transform_scores_unseen(self, X: DataArray) -> DataArray: return self._inverse_transform(X, reference="transform") - - -# class DataListMultiIndexConverter(BaseEstimator, TransformerMixin): -# """Converts MultiIndexes to simple indexes and vice versa.""" - -# def __init__(self): -# self.converters: List[MultiIndexConverter] = [] - -# def fit(self, X: List[Data], y=None): -# for x in X: -# converter = MultiIndexConverter() -# converter.fit(x) -# self.converters.append(converter) - -# return self - -# def transform(self, X: List[Data]) -> List[Data]: -# X_transformed: List[Data] = [] -# for x, converter in zip(X, self.converters): -# X_transformed.append(converter.transform(x)) - -# return X_transformed - -# def fit_transform(self, X: List[Data], y=None) -> List[Data]: -# return self.fit(X, y).transform(X) - -# def _inverse_transform(self, X: List[Data]) -> List[Data]: -# X_inverse_transformed: List[Data] = [] -# for x, converter in zip(X, self.converters): -# X_inverse_transformed.append(converter._inverse_transform(x)) - -# return X_inverse_transformed - -# def inverse_transform_data(self, X: List[Data]) -> List[Data]: -# return self._inverse_transform(X) - -# def inverse_transform_components(self, X: List[Data]) -> List[Data]: -# return self._inverse_transform(X) - -# def inverse_transform_scores(self, X: DataArray) -> DataArray: -# return self.converters[0].inverse_transform_scores(X) diff --git a/xeofs/preprocessing/pca_preprocessor.py b/xeofs/preprocessing/pca_preprocessor.py deleted file mode 100644 index 8a091e39..00000000 --- a/xeofs/preprocessing/pca_preprocessor.py +++ /dev/null @@ -1,155 +0,0 @@ -from typing import List, Optional, Tuple - -import numpy as np -from typing_extensions import Self - -from ..utils.data_types import ( - Data, - Dims, - DimsList, -) -from .concatenator import Concatenator -from .dimension_renamer import DimensionRenamer -from .multi_index_converter import MultiIndexConverter -from .preprocessor import Preprocessor -from .sanitizer import Sanitizer -from .scaler import Scaler -from .stacker import Stacker -from .whitener import Whitener - - -def extract_new_dim_names(X: List[DimensionRenamer]) -> Tuple[Dims, DimsList]: - """Extract the new dimension names from a list of DimensionRenamer objects. - - Parameters - ---------- - X : list of DimensionRenamer - List of DimensionRenamer objects. - - Returns - ------- - Dims - Sample dimensions - DimsList - Feature dimenions - - """ - new_sample_dims = [] - new_feature_dims: DimsList = [] - for x in X: - new_sample_dims.append(x.sample_dims_after) - new_feature_dims.append(x.feature_dims_after) - new_sample_dims: Dims = tuple(np.unique(np.asarray(new_sample_dims)).tolist()) - return new_sample_dims, new_feature_dims - - -class PCAPreprocessor(Preprocessor): - """Preprocess xarray objects and transform into (whitened) PC space. - - PCA-Preprocesser includes steps from Preprocessor class: - (i) Feature-wise scaling (e.g. removing mean, dividing by standard deviation, applying (latitude) weights - (ii) Renaming dimensions (to avoid conflicts with sample and feature dimensions) - (iii) Converting MultiIndexes to regular Indexes (MultiIndexes cannot be stacked) - (iv) Stacking the data into 2D DataArray - (v) Converting MultiIndexes introduced by stacking into regular Indexes - (vi) Removing NaNs - (vii) Concatenating the 2D DataArrays into one 2D DataArray - (viii) Transform into whitened PC space - - - Parameters - ---------- - n_modes : int | float - If int, specifies the number of modes to retain. If float, specifies the fraction of variance in the whitened data that should be explained by the retained modes. - alpha : float, default=0.0 - Degree of whitening. If 0, the data is completely whitened. If 1, the data is not whitened. - init_rank_reduction : float, default=0.3 - Fraction of the initial rank to reduce the data to before applying PCA. - sample_name : str, default="sample" - Name of the sample dimension. - feature_name : str, default="feature" - Name of the feature dimension. - with_center : bool, default=True - If True, the data is centered by subtracting the mean. - with_std : bool, default=True - If True, the data is divided by the standard deviation. - with_coslat : bool, default=False - If True, the data is multiplied by the square root of cosine of latitude weights. - with_weights : bool, default=False - If True, the data is multiplied by additional user-defined weights. - return_list : bool, default=True - If True, inverse_transform methods returns always a list of DataArray(s). - If False, the output is returned as a single DataArray if possible. - check_nans : bool, default=True - If True, remove full-dimensional NaN features from the data, check to ensure - that NaN features match the original fit data during transform, and check - for isolated NaNs. Note: this forces eager computation of dask arrays. - If False, skip all NaN checks. In this case, NaNs should be explicitly removed - or filled prior to fitting, or SVD will fail. - - """ - - def __init__( - self, - n_modes: int | float, - alpha: float = 1.0, - init_rank_reduction: float = 0.3, - sample_name: str = "sample", - feature_name: str = "feature", - with_center: bool = True, - with_std: bool = False, - with_coslat: bool = False, - return_list: bool = True, - check_nans: bool = True, - compute: bool = True, - ): - super().__init__( - sample_name=sample_name, - feature_name=feature_name, - with_center=with_center, - with_std=with_std, - with_coslat=with_coslat, - return_list=return_list, - check_nans=check_nans, - compute=compute, - ) - - # Set parameters - self.n_modes = n_modes - self.alpha = alpha - self.init_rank_reduction = init_rank_reduction - - # 8 | PCA-Whitener - self.whitener = Whitener( - n_modes=self.n_modes, - init_rank_reduction=self.init_rank_reduction, - alpha=self.alpha, - sample_name=self.sample_name, - feature_name=self.feature_name, - ) - - def transformer_types(self): - """Ordered list of transformer operations.""" - return dict( - scaler=Scaler, - renamer=DimensionRenamer, - preconverter=MultiIndexConverter, - stacker=Stacker, - postconverter=MultiIndexConverter, - sanitizer=Sanitizer, - concatenator=Concatenator, - whitener=Whitener, - ) - - def _fit_algorithm( - self, - X: List[Data] | Data, - sample_dims: Dims, - weights: Optional[List[Data] | Data] = None, - ) -> Tuple[Self, Data]: - _, X = super()._fit_algorithm(X, sample_dims, weights) - - # 8 | PCA-Whitening - X = self.whitener.fit_transform(X) # type: ignore - - return self, X diff --git a/xeofs/preprocessing/preprocessor.py b/xeofs/preprocessing/preprocessor.py index 0cbe1388..d2583239 100644 --- a/xeofs/preprocessing/preprocessor.py +++ b/xeofs/preprocessing/preprocessor.py @@ -1,5 +1,3 @@ -from typing import Dict, List, Optional, Tuple - import numpy as np from typing_extensions import Self @@ -31,13 +29,13 @@ from datatree import DataTree -def extract_new_dim_names(X: List[DimensionRenamer]) -> Tuple[Dims, DimsList]: +def extract_new_dim_names(X: list[DimensionRenamer]) -> tuple[Dims, DimsList]: """Extract the new dimension names from a list of DimensionRenamer objects. Parameters ---------- X : list of DimensionRenamer - List of DimensionRenamer objects. + list of DimensionRenamer objects. Returns ------- @@ -147,7 +145,7 @@ def __init__( # 7 | Concatenate into one 2D DataArray self.concatenator = Concatenator(**dim_names_as_kwargs) - def get_serialization_attrs(self) -> Dict: + def get_serialization_attrs(self) -> dict: return dict(n_data=self.n_data) def transformer_types(self): @@ -170,9 +168,9 @@ def get_transformers(self, inverse: bool = False): def fit( self, - X: List[Data] | Data, + X: list[Data] | Data, sample_dims: Dims, - weights: Optional[List[Data] | Data] = None, + weights: list[Data] | Data | None = None, ) -> Self: """Fit the preprocessor to the data. @@ -196,10 +194,10 @@ def fit( def _fit_algorithm( self, - X: List[Data] | Data, + X: list[Data] | Data, sample_dims: Dims, - weights: Optional[List[Data] | Data] = None, - ) -> Tuple[Self, Data]: + weights: list[Data] | Data | None = None, + ) -> tuple[Self, Data]: self._set_return_list(X) X = convert_to_list(X) self.n_data = len(X) @@ -235,7 +233,7 @@ def _fit_algorithm( return self, X - def transform(self, X: List[Data] | Data) -> DataArray: + def transform(self, X: list[Data] | Data) -> DataArray: """Transform the data. Parameters @@ -266,16 +264,16 @@ def transform(self, X: List[Data] | Data) -> DataArray: def fit_transform( self, - X: List[Data] | Data, + X: list[Data] | Data, sample_dims: Dims, - weights: Optional[List[Data] | Data] = None, + weights: list[Data] | Data | None = None, ) -> DataArray: # Take advantage of the fact that `.fit()` already transforms the data # to avoid duplicate computation self, X = self._fit_algorithm(X, sample_dims, weights) return X - def inverse_transform_data(self, X: DataArray) -> List[Data] | Data: + def inverse_transform_data(self, X: DataArray) -> list[Data] | Data: """Inverse transform the data. Parameters: @@ -295,7 +293,7 @@ def inverse_transform_data(self, X: DataArray) -> List[Data] | Data: return self._process_output(X_it) - def inverse_transform_components(self, X: DataArray) -> List[Data] | Data: + def inverse_transform_components(self, X: DataArray) -> list[Data] | Data: """Inverse transform the components. Parameters: @@ -359,7 +357,7 @@ def inverse_transform_scores_unseen(self, X: DataArray) -> DataArray: return X_it - def _process_output(self, X: List[Data]) -> List[Data] | Data: + def _process_output(self, X: list[Data]) -> list[Data] | Data: if self.return_list: return X else: diff --git a/xeofs/preprocessing/sanitizer.py b/xeofs/preprocessing/sanitizer.py index 1611b17b..674ff9cd 100644 --- a/xeofs/preprocessing/sanitizer.py +++ b/xeofs/preprocessing/sanitizer.py @@ -1,5 +1,3 @@ -from typing import Dict, Optional - import xarray as xr from dask.base import compute from typing_extensions import Self @@ -24,7 +22,7 @@ def __init__(self, sample_name="sample", feature_name="feature", check_nans=True self.sample_coords = xr.DataArray() self.is_valid_feature = xr.DataArray() - def get_serialization_attrs(self) -> Dict: + def get_serialization_attrs(self) -> dict: return dict( feature_coords=self.feature_coords, sample_coords=self.sample_coords, @@ -60,8 +58,8 @@ def _get_valid_features_per_sample(self, X: Data) -> Data: def fit( self, X: Data, - sample_dims: Optional[Dims] = None, - feature_dims: Optional[Dims] = None, + sample_dims: Dims | None = None, + feature_dims: Dims | None = None, **kwargs, ) -> Self: # Check if input is a DataArray diff --git a/xeofs/preprocessing/scaler.py b/xeofs/preprocessing/scaler.py index c2d4284d..ed194bcd 100644 --- a/xeofs/preprocessing/scaler.py +++ b/xeofs/preprocessing/scaler.py @@ -1,13 +1,11 @@ -from typing import Optional, Dict -from typing_extensions import Self - import dask import numpy as np import xarray as xr +from typing_extensions import Self -from .transformer import Transformer -from ..utils.data_types import Dims, DataArray, DataVar, DataVarBound +from ..utils.data_types import DataArray, DataVar, DataVarBound, Dims from ..utils.xarray_utils import compute_sqrt_cos_lat_weights, feature_ones_like +from .transformer import Transformer class Scaler(Transformer): @@ -48,7 +46,7 @@ def __init__( self.coslat_weights_ = xr.DataArray(name="coslat_weights_") self.weights_ = xr.DataArray(name="weights_") - def get_serialization_attrs(self) -> Dict: + def get_serialization_attrs(self) -> dict: return dict( mean_=self.mean_, std_=self.std_, @@ -73,7 +71,7 @@ def fit( X: DataVar, sample_dims: Dims, feature_dims: Dims, - weights: Optional[DataVar] = None, + weights: DataVar | None = None, ) -> Self: """Fit the scaler to the data. @@ -160,7 +158,7 @@ def fit_transform( X: DataVarBound, sample_dims: Dims, feature_dims: Dims, - weights: Optional[DataVarBound] = None, + weights: DataVarBound | None = None, ) -> DataVarBound: return self.fit(X, sample_dims, feature_dims, weights).transform(X) diff --git a/xeofs/preprocessing/stacker.py b/xeofs/preprocessing/stacker.py index 76838bae..7a07243c 100644 --- a/xeofs/preprocessing/stacker.py +++ b/xeofs/preprocessing/stacker.py @@ -1,11 +1,9 @@ -from typing import Dict -from typing_extensions import Self - import pandas as pd import xarray as xr +from typing_extensions import Self +from ..utils.data_types import Data, DataArray, DataSet, DataVar, DataVarBound, Dims from .transformer import Transformer -from ..utils.data_types import Dims, DataArray, DataSet, Data, DataVar, DataVarBound class Stacker(Transformer): @@ -24,15 +22,15 @@ class Stacker(Transformer): The name of the sample dimension (dim=0). feature_name : str The name of the feature dimension (dim=1). - dims_in : Tuple[str] + dims_in : tuple[str] The dimensions of the input data. - dims_out : Tuple[str] + dims_out : tuple[str] The dimensions of the output data. - dims_mapping : Dict[str, Tuple[str]] + dims_mapping : dict[str, tuple[str]] The mapping between the input and output dimensions. - coords_in : Dict[str, xr.Coordinates] + coords_in : dict[str, xr.Coordinates] The coordinates of the input data. - coords_out : Dict[str, xr.Coordinates] + coords_out : dict[str, xr.Coordinates] The coordinates of the output data. """ @@ -51,7 +49,7 @@ def __init__( self.coords_out = {} self.data_type = None - def get_serialization_attrs(self) -> Dict: + def get_serialization_attrs(self) -> dict: return dict( dims_in=self.dims_in, dims_out=self.dims_out, diff --git a/xeofs/preprocessing/transformer.py b/xeofs/preprocessing/transformer.py index 5ff39824..6c7935ab 100644 --- a/xeofs/preprocessing/transformer.py +++ b/xeofs/preprocessing/transformer.py @@ -1,18 +1,16 @@ -from abc import ABC -from typing import Optional, Dict -from typing_extensions import Self -from abc import abstractmethod +from abc import ABC, abstractmethod import pandas as pd import xarray as xr from sklearn.base import BaseEstimator, TransformerMixin +from typing_extensions import Self try: from xarray.core.datatree import DataTree except ImportError: from datatree import DataTree -from ..utils.data_types import Dims, DataArray, DataSet, Data +from ..utils.data_types import Data, DataArray, DataSet, Dims class Transformer(BaseEstimator, TransformerMixin, ABC): @@ -30,7 +28,7 @@ def __init__( self.feature_name = feature_name @abstractmethod - def get_serialization_attrs(self) -> Dict: + def get_serialization_attrs(self) -> dict: """Return a dictionary containing the attributes that need to be serialized as part of a saved transformer. @@ -46,8 +44,8 @@ def get_serialization_attrs(self) -> Dict: def fit( self, X: Data, - sample_dims: Optional[Dims] = None, - feature_dims: Optional[Dims] = None, + sample_dims: Dims | None = None, + feature_dims: Dims | None = None, **kwargs, ) -> Self: """Fit transformer to data. @@ -70,8 +68,8 @@ def transform(self, X: Data) -> Data: def fit_transform( self, X: Data, - sample_dims: Optional[Dims] = None, - feature_dims: Optional[Dims] = None, + sample_dims: Dims | None = None, + feature_dims: Dims | None = None, **kwargs, ) -> Data: return self.fit(X, sample_dims, feature_dims, **kwargs).transform(X) diff --git a/xeofs/preprocessing/whitener.py b/xeofs/preprocessing/whitener.py index c81c60e3..4bf76600 100644 --- a/xeofs/preprocessing/whitener.py +++ b/xeofs/preprocessing/whitener.py @@ -1,17 +1,16 @@ import warnings -from typing import Dict, Optional import numpy as np import xarray as xr from typing_extensions import Self -from ..models.decomposer import Decomposer +from ..linalg._numpy import _fractional_matrix_power +from ..linalg.svd import SVD from ..utils.data_types import ( DataArray, Dims, DimsList, ) -from ..utils.linalg import fractional_matrix_power from ..utils.sanity_checks import assert_single_dataarray from .transformer import Transformer @@ -33,11 +32,15 @@ class Whitener(Transformer): If int, number of components to keep. If float, fraction of variance to keep. If `n_modes="all"`, keep all components. init_rank_reduction: float, default=0.3 Used only when `n_modes` is given as a float. Specifiy the initial PCA rank reduction before truncating the solution to the desired fraction of explained variance. Must be in the half open interval ]0, 1]. Lower values will speed up the computation. + compute_svd: bool, default=False + Whether to perform eager or lazy computation. sample_name: str, default="sample" Name of the sample dimension. feature_name: str, default="feature" Name of the feature dimension. - solver_kwargs: Dict + random_state: np.random.Generator | int | None, default=None + Random seed for reproducibility. + solver_kwargs: dict Additional keyword arguments for the SVD solver. """ @@ -48,9 +51,11 @@ def __init__( use_pca: bool = False, n_modes: int | float | str = "all", init_rank_reduction: float = 0.3, + compute_svd: bool = False, sample_name: str = "sample", feature_name: str = "feature", - solver_kwargs: Dict = {}, + random_state: np.random.Generator | int | None = None, + solver_kwargs: dict = {}, ): super().__init__(sample_name, feature_name) @@ -64,6 +69,8 @@ def __init__( self.use_pca = use_pca self.n_modes = n_modes self.init_rank_reduction = init_rank_reduction + self.compute_svd = compute_svd + self.random_state = random_state self.solver_kwargs = solver_kwargs # Check whether Whitener is identity transformation @@ -90,7 +97,7 @@ def _sanity_check_input(self, X) -> None: ) ) - def _get_n_modes(self, X: xr.DataArray) -> int | float: + def _get_n_modes(self, X: DataArray) -> int | float: if isinstance(self.n_modes, str): if self.n_modes == "all": return min(X.shape) @@ -99,7 +106,7 @@ def _get_n_modes(self, X: xr.DataArray) -> int | float: else: return self.n_modes - def get_serialization_attrs(self) -> Dict: + def get_serialization_attrs(self) -> dict: return dict( alpha=self.alpha, n_modes=self.n_modes, @@ -114,9 +121,9 @@ def get_serialization_attrs(self) -> Dict: def fit( self, - X: xr.DataArray, - sample_dims: Optional[Dims] = None, - feature_dims: Optional[DimsList] = None, + X: DataArray, + sample_dims: Dims | None = None, + feature_dims: DimsList | None = None, ) -> Self: self._sanity_check_input(X) n_samples, n_features = X.shape @@ -132,16 +139,18 @@ def fit( # In case of "all" modes to the rank of the input data self.n_modes = self._get_n_modes(X) - decomposer = Decomposer( + svd = SVD( n_modes=self.n_modes, init_rank_reduction=self.init_rank_reduction, + compute=self.compute_svd, + random_state=self.random_state, + sample_name=self.sample_name, + feature_name=self.feature_name, **self.solver_kwargs, ) - decomposer.fit(X, dims=(self.sample_name, self.feature_name)) - s = decomposer.s_ - V = decomposer.V_ + _, s, V = svd.fit_transform(X) - n_c = np.sqrt(n_samples - 1) + n_c: float = np.sqrt(n_samples - 1) self.T: DataArray = V * (s / n_c) ** (self.alpha - 1) self.Tinv = (s / n_c) ** (1 - self.alpha) * V.conj().T self.s = s @@ -158,9 +167,7 @@ def fit( return self - def _compute_whitener_transform( - self, X: xr.DataArray - ) -> tuple[DataArray, DataArray]: + def _compute_whitener_transform(self, X: DataArray) -> tuple[DataArray, DataArray]: T, Tinv = xr.apply_ufunc( self._compute_whitener_transform_numpy, X, @@ -176,27 +183,12 @@ def _compute_whitener_transform_numpy(self, X): nc = X.shape[0] - 1 C = X.conj().T @ X / nc power = (self.alpha - 1) / 2 - T = fractional_matrix_power(C, power) + svd_kwargs = {"random_state": self.random_state} + T = _fractional_matrix_power(C, power, **svd_kwargs) Tinv = np.linalg.inv(T) return T, Tinv - def _fractional_matrix_power(self, C, power): - V, s, _ = np.linalg.svd(C) - - # cut off small singular values - is_above_zero = s > np.finfo(s.dtype).eps - V = V[:, is_above_zero] - s = s[is_above_zero] - - # TODO: use hermitian=True for numpy>=2.0 - # V, s, _ = np.linalg.svd(C, hermitian=True) - C_scaled = V @ np.diag(s**power) @ V.conj().T - if np.iscomplexobj(C): - return C_scaled - else: - return C_scaled.real - - def get_Tinv(self, unwhiten_only=False) -> xr.DataArray: + def get_Tinv(self, unwhiten_only=False) -> DataArray: """Get the inverse transformation to unwhiten the data without PC transform. In contrast to `inverse_transform()`, this method returns the inverse transformation matrix without the PC transformation. That is, for PC transormed data this transformation only unwhitens the data without transforming back into the input space. For non-PC transformed data, this transformation is equivalent to the inverse transformation. @@ -208,7 +200,7 @@ def get_Tinv(self, unwhiten_only=False) -> xr.DataArray: np.diag, Tinv, input_core_dims=[["mode"]], - output_core_dims=[[self.feature_name, "mode"]], + output_core_dims=[["mode", self.feature_name]], dask="allowed", ) Tinv = Tinv.assign_coords({self.feature_name: self.s.coords["mode"].data}) @@ -216,7 +208,7 @@ def get_Tinv(self, unwhiten_only=False) -> xr.DataArray: else: return self.Tinv - def transform(self, X: xr.DataArray) -> DataArray: + def transform(self, X: DataArray) -> DataArray: """Transform new data into the fractional whitened PC space.""" self._sanity_check_input(X) @@ -229,9 +221,9 @@ def transform(self, X: xr.DataArray) -> DataArray: def fit_transform( self, - X: xr.DataArray, - sample_dims: Optional[Dims] = None, - feature_dims: Optional[DimsList] = None, + X: DataArray, + sample_dims: Dims | None = None, + feature_dims: DimsList | None = None, ) -> DataArray: return self.fit(X, sample_dims, feature_dims).transform(X) @@ -240,7 +232,7 @@ def inverse_transform_data(self, X: DataArray, unwhiten_only=False) -> DataArray Parameters ---------- - X: xr.DataArray + X: DataArray Data to transform back into original space. unwhiten_only: bool, default=False If True, only unwhiten the data without transforming back into the input space. This is useful when the data was transformed with PCA before whitening and you need the unwhitened data in the PC space. @@ -252,6 +244,18 @@ def inverse_transform_data(self, X: DataArray, unwhiten_only=False) -> DataArray X = X.rename({self.feature_name: "mode"}) return xr.dot(X, T_inv, dims="mode") + def transform_components(self, X: DataArray) -> DataArray: + """Transform 2D components (feature x mode) into whitened PC space.""" + + if self.is_identity: + return X + else: + dummy_dim = "dummy_dim" + VS = self.T.conj().T + VS = VS.rename({"mode": dummy_dim}) + transformed = xr.dot(VS, X, dims=self.feature_name) + return transformed.rename({dummy_dim: self.feature_name}) + def inverse_transform_components(self, X: DataArray) -> DataArray: """Transform 2D components (feature x mode) from whitened PC space back into original space.""" diff --git a/xeofs/models/rotator_factory.py b/xeofs/rotator_factory.py similarity index 90% rename from xeofs/models/rotator_factory.py rename to xeofs/rotator_factory.py index 5a7a04f0..dda8d0ab 100644 --- a/xeofs/models/rotator_factory.py +++ b/xeofs/rotator_factory.py @@ -1,7 +1,5 @@ -from .eof import EOF, HilbertEOF -from .eof_rotator import EOFRotator, HilbertEOFRotator -from .mca import MCA, HilbertMCA -from .mca_rotator import HilbertMCARotator, MCARotator +from .cross import MCA, HilbertMCA, HilbertMCARotator, MCARotator +from .single import EOF, EOFRotator, HilbertEOF, HilbertEOFRotator class RotatorFactory: diff --git a/xeofs/models/__init__.py b/xeofs/single/__init__.py similarity index 65% rename from xeofs/models/__init__.py rename to xeofs/single/__init__.py index b3888949..6acd8c3c 100644 --- a/xeofs/models/__init__.py +++ b/xeofs/single/__init__.py @@ -1,39 +1,30 @@ import warnings -from .cca import CCA from .eeof import ExtendedEOF -from .eof import EOF, HilbertEOF -from .eof_rotator import EOFRotator, HilbertEOFRotator +from .eof import EOF, ComplexEOF, HilbertEOF +from .eof_rotator import ComplexEOFRotator, EOFRotator, HilbertEOFRotator from .gwpca import GWPCA -from .mca import MCA, HilbertMCA -from .mca_rotator import HilbertMCARotator, MCARotator from .opa import OPA -from .rotator_factory import RotatorFactory +from .pop import POP from .sparse_pca import SparsePCA __all__ = [ "EOF", - "HilbertEOF", "ExtendedEOF", - "EOFRotator", - "HilbertEOFRotator", + "SparsePCA", + "POP", "OPA", "GWPCA", - "MCA", - "HilbertMCA", - "MCARotator", - "HilbertMCARotator", - "CCA", - "RotatorFactory", - "SparsePCA", + "ComplexEOF", + "HilbertEOF", + "EOFRotator", + "ComplexEOFRotator", + "HilbertEOFRotator", ] DEPRECATED_NAMES = [ - ("ComplexEOF", "HilbertEOF"), - ("ComplexMCA", "HilbertMCA"), - ("ComplexEOFRotator", "HilbertEOFRotator"), - ("ComplexMCARotator", "HilbertMCARotator"), + # ("OldClass", "NewClass"), ] diff --git a/xeofs/single/_numpy/__init__.py b/xeofs/single/_numpy/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/xeofs/models/_np_classes/_sparse_pca.py b/xeofs/single/_numpy/_sparse_pca.py similarity index 99% rename from xeofs/models/_np_classes/_sparse_pca.py rename to xeofs/single/_numpy/_sparse_pca.py index a8376039..c4e2d867 100644 --- a/xeofs/models/_np_classes/_sparse_pca.py +++ b/xeofs/single/_numpy/_sparse_pca.py @@ -619,9 +619,6 @@ def compute_rspca( tol : float, (default ``tol = 1e-5``). Stopping tolerance for reconstruction error. - verbose : bool ``{'True', 'False'}``, optional (default ``verbose = True``). - Display progress. - oversample : integer, optional (default: 10) Controls the oversampling of column space. Increasing this parameter may improve numerical accuracy. diff --git a/xeofs/models/_base_model.py b/xeofs/single/base_model_single_set.py similarity index 55% rename from xeofs/models/_base_model.py rename to xeofs/single/base_model_single_set.py index ff4e78b7..c4b5fdc9 100644 --- a/xeofs/models/_base_model.py +++ b/xeofs/single/base_model_single_set.py @@ -1,46 +1,25 @@ -import warnings -from abc import ABC, abstractmethod -from datetime import datetime +from abc import abstractmethod from typing import ( - Any, - Dict, Hashable, - List, - Literal, - Optional, Sequence, ) -import dask import xarray as xr -from dask.diagnostics.progress import ProgressBar from typing_extensions import Self -try: - from xarray.core.datatree import DataTree -except ImportError: - from datatree import DataTree - -from .._version import __version__ +from ..base_model import BaseModel from ..data_container import DataContainer from ..preprocessing.preprocessor import Preprocessor -from ..utils.data_types import Data, DataArray, DataObject -from ..utils.io import insert_placeholders, open_model_tree, write_model_tree +from ..utils.data_types import DataArray, DataObject from ..utils.sanity_checks import validate_input_type -from ..utils.xarray_utils import ( - convert_to_dim_type, - data_is_dask, -) - -# Ignore warnings from numpy casting with additional coordinates -warnings.filterwarnings("ignore", message=r"^invalid value encountered in cast*") +from ..utils.xarray_utils import convert_to_dim_type xr.set_options(keep_attrs=True) -class _BaseModel(ABC): +class BaseModelSingleSet(BaseModel): """ - Abstract base class for EOF model. + Abstract base class for single-set models. Parameters ---------- @@ -67,9 +46,7 @@ class _BaseModel(ABC): If True, four pieces of the fit will be computed sequentially: 1) the preprocessor scaler, 2) optional NaN checks, 3) SVD decomposition, 4) scores and components. - verbose: bool, default=False - Whether to show a progress bar when computing the decomposition. - random_state: Optional[int], default=None + random_state: int | None, default=None Seed for the random number generator. solver: {"auto", "full", "randomized"}, default="auto" Solver to use for the SVD computation. @@ -88,17 +65,12 @@ def __init__( sample_name="sample", feature_name="feature", compute=True, - verbose=False, random_state=None, solver="auto", solver_kwargs={}, ): - if verbose: - warnings.warn( - "The 'verbose' parameter is deprecated and will be removed in a future release.", - category=DeprecationWarning, - stacklevel=3, - ) + super().__init__() + self.n_modes = n_modes self.sample_name = sample_name self.feature_name = feature_name @@ -113,7 +85,6 @@ def __init__( "sample_name": sample_name, "feature_name": feature_name, "random_state": random_state, - "verbose": verbose, "compute": compute, "solver": solver, "solver_kwargs": solver_kwargs, @@ -123,19 +94,11 @@ def __init__( "solver": solver, "random_state": random_state, "compute": compute, - "verbose": verbose, "solver_kwargs": solver_kwargs, } # Define analysis-relevant meta data - self.attrs = {"model": "BaseModel"} - self.attrs.update( - { - "software": "xeofs", - "version": __version__, - "date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - } - ) + self.attrs.update({"model": "BaseModelSingleSet"}) self.attrs.update(self._params) # Initialize the Preprocessor to scale and stack the data @@ -151,7 +114,7 @@ def __init__( # Initialize the data container that stores the results self.data = DataContainer() - def get_serialization_attrs(self) -> Dict: + def get_serialization_attrs(self) -> dict: return dict( data=self.data, preprocessor=self.preprocessor, @@ -159,21 +122,21 @@ def get_serialization_attrs(self) -> Dict: def fit( self, - X: List[Data] | Data, + X: DataObject, dim: Sequence[Hashable] | Hashable, - weights: Optional[List[Data] | Data] = None, + weights: DataObject | None = None, ) -> Self: """ Fit the model to the input data. Parameters ---------- - X: DataArray | Dataset | List[DataArray] + X: DataObject Input data. dim: Sequence[Hashable] | Hashable Specify the sample dimensions. The remaining dimensions will be treated as feature dimensions. - weights: Optional[DataArray | Dataset | List[DataArray]] + weights: DataObject | None, default=None Weighting factors for the input data. """ @@ -193,6 +156,7 @@ def fit( if self._params["compute"]: self.data.compute() + self._post_compute() return self @@ -213,14 +177,14 @@ def _fit_algorithm(self, data: DataArray) -> Self: """ raise NotImplementedError - def transform(self, data: List[Data] | Data, normalized=True) -> DataArray: + def transform(self, data: DataObject, normalized=False) -> DataArray: """Project data onto the components. Parameters ---------- - data: DataArray | Dataset | List[DataArray] + data: DataObject Data to be transformed. - normalized: bool, default=True + normalized: bool, default=False Whether to normalize the scores by the L2 norm. Returns @@ -257,9 +221,9 @@ def _transform_algorithm(self, data: DataArray) -> DataArray: def fit_transform( self, - data: List[Data] | Data, + data: DataObject, dim: Sequence[Hashable] | Hashable, - weights: Optional[List[Data] | Data] = None, + weights: DataObject | None = None, **kwargs, ) -> DataArray: """Fit the model to the input data and project the data onto the components. @@ -271,7 +235,7 @@ def fit_transform( dim: Sequence[Hashable] | Hashable Specify the sample dimensions. The remaining dimensions will be treated as feature dimensions. - weights: Optional[DataObject] + weights: DataObject | None, default=None Weighting factors for the input data. **kwargs Additional keyword arguments to pass to the transform method. @@ -285,7 +249,7 @@ def fit_transform( return self.fit(data, dim, weights).transform(data, **kwargs) def inverse_transform( - self, scores: DataArray, normalized: bool = True + self, scores: DataArray, normalized: bool = False ) -> DataObject: """Reconstruct the original data from transformed data. @@ -295,12 +259,12 @@ def inverse_transform( Transformed data to be reconstructed. This could be a subset of the `scores` data of a fitted model, or unseen data. Must have a 'mode' dimension. - normalized: bool, default=True + normalized: bool, default=False Whether the scores data have been normalized by the L2 norm. Returns ------- - data: DataArray | Dataset | List[DataArray] + data: DataObject Reconstructed data. """ @@ -322,7 +286,7 @@ def inverse_transform( return self.preprocessor.inverse_transform_data(data_reconstructed) @abstractmethod - def _inverse_transform_algorithm(self, scores: DataObject) -> DataArray: + def _inverse_transform_algorithm(self, scores: DataArray) -> DataArray: """Reconstruct the original data from transformed data. Parameters @@ -340,12 +304,23 @@ def _inverse_transform_algorithm(self, scores: DataObject) -> DataArray: """ raise NotImplementedError - def components(self) -> DataObject: - """Get the components.""" + def components(self, normalized: bool = True) -> DataObject: + """Get the components. + + Parameters + ---------- + normalized: bool, default=True + Whether to normalize the components by the L2 norm. + + """ components = self.data["components"] + if not normalized: + name = components.name + components = components * self.data["norms"] + components.name = name return self.preprocessor.inverse_transform_components(components) - def scores(self, normalized=True) -> DataArray: + def scores(self, normalized: bool = False) -> DataArray: """Get the scores. Parameters @@ -355,151 +330,7 @@ def scores(self, normalized=True) -> DataArray: """ scores = self.data["scores"].copy() if normalized: - attrs = scores.attrs.copy() + name = scores.name scores = scores / self.data["norms"] - scores.attrs.update(attrs) - scores.name = "scores" + scores.name = name return self.preprocessor.inverse_transform_scores(scores) - - def compute(self, verbose: bool = False, **kwargs): - """Compute and load delayed model results. - - Parameters - ---------- - verbose : bool - Whether or not to provide additional information about the computing progress. - **kwargs - Additional keyword arguments to pass to `dask.compute()`. - """ - # find and compute all dask arrays simultaneously to allow dask to optimize the - # shared graph and avoid duplicate i/o and computations - dt = self.serialize() - - data_objs = { - k: v - for k, v in dt.to_dict().items() - if data_is_dask(v) and v.attrs.get("allow_compute", True) - } - - if verbose: - with ProgressBar(): - (data_objs,) = dask.compute(data_objs, **kwargs) - else: - (data_objs,) = dask.compute(data_objs, **kwargs) - - for k, v in data_objs.items(): - dt[k] = DataTree(v) - - # then rebuild the trained model from the computed results - self._deserialize_attrs(dt) - - self._post_compute() - - def _post_compute(self): - pass - - def get_params(self) -> Dict[str, Any]: - """Get the model parameters.""" - return self._params - - def serialize(self) -> DataTree: - """Serialize a complete model with its preprocessor.""" - # Create a root node for this object with its params as attrs - ds_root = xr.Dataset(attrs=dict(params=self.get_params())) - dt = DataTree(data=ds_root, name=type(self).__name__) - - # Retrieve the tree representation of each attached object, or set basic attrs - for key, attr in self.get_serialization_attrs().items(): - if hasattr(attr, "serialize"): - dt[key] = attr.serialize() - dt.attrs[key] = "_is_tree" - else: - dt.attrs[key] = attr - - return dt - - def save( - self, - path: str, - overwrite: bool = False, - save_data: bool = False, - engine: Literal["zarr", "netcdf4", "h5netcdf"] = "zarr", - **kwargs, - ): - """Save the model. - - Parameters - ---------- - path : str - Path to save the model. - overwrite: bool, default=False - Whether or not to overwrite the existing path if it already exists. - Ignored unless `engine="zarr"`. - save_data : str - Whether or not to save the full input data along with the fitted components. - engine : {"zarr", "netcdf4", "h5netcdf"}, default="zarr" - Xarray backend engine to use for writing the saved model. - **kwargs - Additional keyword arguments to pass to `DataTree.to_netcdf()` or `DataTree.to_zarr()`. - - """ - self.compute() - - dt = self.serialize() - - # Remove any raw data arrays at this stage - if not save_data: - dt = insert_placeholders(dt) - - write_model_tree(dt, path, overwrite=overwrite, engine=engine, **kwargs) - - @classmethod - def deserialize(cls, dt: DataTree) -> Self: - """Deserialize the model and its preprocessors from a DataTree.""" - # Recreate the model with parameters set by root level attrs - model = cls(**dt.attrs["params"]) - model._deserialize_attrs(dt) - return model - - def _deserialize_attrs(self, dt: DataTree): - """Set the necessary attributes of the model from a DataTree.""" - for key, attr in dt.attrs.items(): - if key == "params": - continue - elif attr == "_is_tree": - deserialized_obj = getattr(self, key).deserialize(dt[key]) - else: - deserialized_obj = attr - setattr(self, key, deserialized_obj) - - @classmethod - def load( - cls, - path: str, - engine: Literal["zarr", "netcdf4", "h5netcdf"] = "zarr", - **kwargs, - ) -> Self: - """Load a saved model. - - Parameters - ---------- - path : str - Path to the saved model. - engine : {"zarr", "netcdf4", "h5netcdf"}, default="zarr" - Xarray backend engine to use for reading the saved model. - **kwargs - Additional keyword arguments to pass to `open_datatree()`. - - Returns - ------- - model : _BaseModel - The loaded model. - - """ - dt = open_model_tree(path, engine=engine, **kwargs) - model = cls.deserialize(dt) - return model - - def _validate_loaded_data(self, data: DataArray): - """Optionally check the loaded data for placeholders.""" - pass diff --git a/xeofs/models/eeof.py b/xeofs/single/eeof.py similarity index 97% rename from xeofs/models/eeof.py rename to xeofs/single/eeof.py index 77e6859e..fe007b3e 100644 --- a/xeofs/models/eeof.py +++ b/xeofs/single/eeof.py @@ -1,12 +1,10 @@ -from typing import Optional -from typing_extensions import Self - import numpy as np import xarray as xr +from typing_extensions import Self -from .eof import EOF -from ..utils.data_types import DataArray from ..data_container import DataContainer +from ..utils.data_types import DataArray +from .eof import EOF class ExtendedEOF(EOF): @@ -29,7 +27,7 @@ class ExtendedEOF(EOF): Embedding dimension is the number of dimensions in the delay-coordinate space used to represent the dynamics of the system. It determines the number of delayed copies of the time series that are used to construct the delay-coordinate space. - n_pca_modes : Optional[int] + n_pca_modes : int, optional If provided, the input data is first preprocessed using PCA with the specified number of modes. The EEOF analysis is then performed on the resulting PCA scores. This approach can lead to important computational @@ -45,7 +43,7 @@ class ExtendedEOF(EOF): Examples -------- - >>> from xeofs.models import EEOF + >>> from xeofs.single import EEOF >>> model = EEOF(n_modes=5, tau=1, embedding=20, n_pca_modes=20) >>> model.fit(data, dim=("time")) @@ -64,7 +62,7 @@ def __init__( n_modes: int, tau: int, embedding: int, - n_pca_modes: Optional[int] = None, + n_pca_modes: int | None = None, center: bool = True, standardize: bool = False, use_coslat: bool = False, @@ -73,7 +71,7 @@ def __init__( feature_name: str = "feature", compute: bool = True, solver: str = "auto", - random_state: Optional[int] = None, + random_state: int | None = None, solver_kwargs: dict = {}, **kwargs, ): diff --git a/xeofs/models/eof.py b/xeofs/single/eof.py similarity index 71% rename from xeofs/models/eof.py rename to xeofs/single/eof.py index d100c971..0d94be62 100644 --- a/xeofs/models/eof.py +++ b/xeofs/single/eof.py @@ -1,18 +1,17 @@ -from typing import Dict, Optional +import warnings import numpy as np import xarray as xr from typing_extensions import Self +from ..linalg.decomposer import Decomposer from ..utils.data_types import DataArray, DataObject from ..utils.hilbert_transform import hilbert_transform -from ..utils.sanity_checks import assert_not_complex from ..utils.xarray_utils import total_variance as compute_total_variance -from ._base_model import _BaseModel -from .decomposer import Decomposer +from .base_model_single_set import BaseModelSingleSet -class EOF(_BaseModel): +class EOF(BaseModelSingleSet): """EOF analysis. Empirical Orthogonal Functions (EOF) analysis, more commonly known @@ -37,9 +36,7 @@ class EOF(_BaseModel): If True, four pieces of the fit will be computed sequentially: 1) the preprocessor scaler, 2) optional NaN checks, 3) SVD decomposition, 4) scores and components. - verbose: bool, default=False - Whether to show a progress bar when computing the decomposition. - random_state : Optional[int], default=None + random_state : int, optional Seed for the random number generator. solver: {"auto", "full", "randomized"}, default="auto" Solver to use for the SVD computation. @@ -48,8 +45,8 @@ class EOF(_BaseModel): Examples -------- - >>> model = xe.models.EOF(n_modes=5) - >>> model.fit(data) + >>> model = xe.single.EOF(n_modes=5) + >>> model.fit(X) >>> scores = model.scores() """ @@ -64,10 +61,9 @@ def __init__( sample_name: str = "sample", feature_name: str = "feature", compute: bool = True, - verbose: bool = False, - random_state: Optional[int] = None, + random_state: int | None = None, solver: str = "auto", - solver_kwargs: Dict = {}, + solver_kwargs: dict = {}, **kwargs, ): super().__init__( @@ -79,7 +75,6 @@ def __init__( sample_name=sample_name, feature_name=feature_name, compute=compute, - verbose=verbose, random_state=random_state, solver=solver, solver_kwargs=solver_kwargs, @@ -87,16 +82,19 @@ def __init__( ) self.attrs.update({"model": "EOF analysis"}) - def _fit_algorithm(self, data: DataArray) -> Self: + def _fit_algorithm(self, X: DataArray) -> Self: sample_name = self.sample_name feature_name = self.feature_name + # Augment the data + X = self._augment_data(X) + # Compute the total variance - total_variance = compute_total_variance(data, dim=sample_name) + total_variance = compute_total_variance(X, dim=sample_name) # Decompose the data decomposer = Decomposer(**self._decomposer_kwargs) - decomposer.fit(data, dims=(sample_name, feature_name)) + decomposer.fit(X, dims=(sample_name, feature_name)) singular_values = decomposer.s_ components = decomposer.V_ @@ -104,12 +102,12 @@ def _fit_algorithm(self, data: DataArray) -> Self: scores.name = "scores" # Compute the explained variance per mode - n_samples = data.coords[self.sample_name].size + n_samples = X.coords[self.sample_name].size exp_var = singular_values**2 / (n_samples - 1) exp_var.name = "explained_variance" # Store the results - self.data.add(data, "input_data", allow_compute=False) + self.data.add(X, "input_data", allow_compute=False) self.data.add(components, "components") self.data.add(scores, "scores") self.data.add(singular_values, "norms") @@ -119,13 +117,16 @@ def _fit_algorithm(self, data: DataArray) -> Self: self.data.set_attrs(self.attrs) return self - def _transform_algorithm(self, data: DataObject) -> DataArray: + def _augment_data(self, X: DataArray) -> DataArray: + return X + + def _transform_algorithm(self, X: DataObject) -> DataArray: feature_name = self.preprocessor.feature_name components = self.data["components"] # Project the data - projections = xr.dot(data, components, dims=feature_name) + projections = xr.dot(X, components, dims=feature_name) projections.name = "scores" return projections @@ -142,7 +143,7 @@ def _inverse_transform_algorithm(self, scores: DataArray) -> DataArray: Returns ------- - data: DataArray | Dataset | List[DataArray] + data: DataArray | Dataset | list[DataArray] Reconstructed data. """ @@ -154,21 +155,20 @@ def _inverse_transform_algorithm(self, scores: DataArray) -> DataArray: return reconstructed_data - def components(self) -> DataObject: - """Return the (EOF) components. + def components(self, normalized: bool = True) -> DataObject: + """Return the components. - The components in EOF anaylsis are the eigenvectors of the covariance/correlation matrix. - Other names include the principal components or EOFs. + The components are also refered to as eigenvectors, EOFs or loadings depending on the context. Returns ------- - components: DataArray | Dataset | List[DataArray] + components: DataArray | Dataset | list[DataArray] Components of the fitted model. """ - return super().components() + return super().components(normalized=normalized) - def scores(self, normalized: bool = True) -> DataArray: + def scores(self, normalized: bool = False) -> DataArray: """Return the (PC) scores. The scores in EOF anaylsis are the projection of the data matrix onto the @@ -182,7 +182,7 @@ def scores(self, normalized: bool = True) -> DataArray: Returns ------- - components: DataArray | Dataset | List[DataArray] + components: DataArray | Dataset | list[DataArray] Scores of the fitted model. """ @@ -240,31 +240,23 @@ def explained_variance_ratio(self) -> DataArray: return exp_var_ratio -class HilbertEOF(EOF): - """Hilbert EOF analysis. +class ComplexEOF(EOF): + """Complex EOF analysis. - The Hilbert EOF analysis [1]_ [2]_ [3]_ [4]_ (also known as Hilbert EOF analysis) applies a Hilbert transform - to the data before performing the standard EOF analysis. - The Hilbert transform is applied to each feature of the data individually. + EOF analysis applied to a complex-valued field obtained from a pair of + variables such as the zonal and meridional components, :math:`U` and + :math:`V`, of the wind field. Complex EOF analysis then decomposes the + dataset - An optional padding with exponentially decaying values can be applied prior to - the Hilbert transform in order to mitigate the impact of spectral leakage. + .. math:: + Z = U + iV + + into a set of complex-valued components (EOFs) and PC scores [1]_. Parameters ---------- n_modes : int Number of modes to calculate. - padding : str, optional - Specifies the method used for padding the data prior to applying the Hilbert - transform. This can help to mitigate the effect of spectral leakage. - Currently, only 'exp' for exponential padding is supported. Default is 'exp'. - decay_factor : float, optional - Specifies the decay factor used in the exponential padding. This parameter - is only used if padding='exp'. The recommended value typically ranges between 0.05 to 0.2 - but ultimately depends on the variability in the data. - A smaller value (e.g. 0.05) is recommended for - data with high variability, while a larger value (e.g. 0.2) is recommended - for data with low variability. Default is 0.2. center: bool, default=True Whether to center the input data. standardize : bool @@ -276,40 +268,41 @@ class HilbertEOF(EOF): feature_name: str, default="feature" Name of the feature dimension. compute : bool, default=True - Whether to compute elements of the model eagerly, or to defer computation. - If True, four pieces of the fit will be computed sequentially: 1) the - preprocessor scaler, 2) optional NaN checks, 3) SVD decomposition, 4) scores - and components. - verbose: bool, default=False - Whether to show a progress bar when computing the decomposition. - random_state : Optional[int], default=None + Whether to compute elements of the model eagerly, or to defer + computation. If True, four pieces of the fit will be computed + sequentially: 1) the preprocessor scaler, 2) optional NaN checks, 3) SVD + decomposition, 4) scores and components. + random_state : int, optional Seed for the random number generator. solver: {"auto", "full", "randomized"}, default="auto" Solver to use for the SVD computation. solver_kwargs: dict, default={} Additional keyword arguments to be passed to the SVD solver. - solver_kwargs : dict, optional - Additional keyword arguments to be passed to the SVD solver. References ---------- - .. [1] Rasmusson, E. M., Arkin, P. A., Chen, W.-Y. & Jalickee, J. B. Biennial variations in surface temperature over the United States as revealed by singular decomposition. Monthly Weather Review 109, 587–598 (1981). - .. [2] Barnett, T. P. Interaction of the Monsoon and Pacific Trade Wind System at Interannual Time Scales Part I: The Equatorial Zone. Monthly Weather Review 111, 756–773 (1983). - .. [3] Horel, J. Complex Principal Component Analysis: Theory and Examples. J. Climate Appl. Meteor. 23, 1660–1673 (1984). - .. [4] Hannachi, A., Jolliffe, I. & Stephenson, D. Empirical orthogonal functions and related techniques in atmospheric science: A review. International Journal of Climatology 27, 1119–1152 (2007). + .. [1] Storch, H. von & Zwiers, F. W. Statistical Analysis in Climate + Research. (Cambridge University Press (Virtual Publishing), 2003). + Examples -------- - >>> model = HilbertEOF(n_modes=5, standardize=True) - >>> model.fit(data) + + With two DataArrays `u` and `v` representing the zonal and meridional + components of the wind field, construct + + >>> X = u + 1j * v + + and fit the Complex EOF model: + + >>> model = ComplexEOF(n_modes=5, standardize=True) + >>> model.fit(X, "time") """ def __init__( self, n_modes: int = 2, - padding: str = "exp", - decay_factor: float = 0.2, center: bool = True, standardize: bool = False, use_coslat: bool = False, @@ -317,10 +310,9 @@ def __init__( sample_name: str = "sample", feature_name: str = "feature", compute: bool = True, - verbose: bool = False, - random_state: Optional[int] = None, + random_state: int | None = None, solver: str = "auto", - solver_kwargs: Dict = {}, + solver_kwargs: dict = {}, **kwargs, ): super().__init__( @@ -332,68 +324,22 @@ def __init__( sample_name=sample_name, feature_name=feature_name, compute=compute, - verbose=verbose, random_state=random_state, solver=solver, solver_kwargs=solver_kwargs, **kwargs, ) - self.attrs.update({"model": "Hilbert EOF analysis"}) - self._params.update({"padding": padding, "decay_factor": decay_factor}) + self.attrs.update({"model": "Complex EOF analysis"}) - def _fit_algorithm(self, data: DataArray) -> Self: - assert_not_complex(data) - - sample_name = self.sample_name - feature_name = self.feature_name + def _fit_algorithm(self, X: DataArray) -> Self: + if not np.iscomplexobj(X): + warnings.warn( + "Expected complex-valued data but found real-valued data. For Hilbert EOF analysis, use `HilbertEOF` model." + ) - # Apply hilbert transform: - padding = self._params["padding"] - decay_factor = self._params["decay_factor"] - data = hilbert_transform( - data, - dims=(sample_name, feature_name), - padding=padding, - decay_factor=decay_factor, - ) + return super()._fit_algorithm(X) - # Compute the total variance - total_variance = compute_total_variance(data, dim=sample_name) - - # Decompose the complex data - decomposer = Decomposer(**self._decomposer_kwargs) - decomposer.fit(data) - - singular_values = decomposer.s_ - components = decomposer.V_ - scores = decomposer.U_ * decomposer.s_ - - # Compute the explained variance per mode - n_samples = data.coords[self.sample_name].size - exp_var = singular_values**2 / (n_samples - 1) - exp_var.name = "explained_variance" - - # Store the results - self.data.add(data, "input_data", allow_compute=False) - self.data.add(components, "components") - self.data.add(scores, "scores") - self.data.add(singular_values, "norms") - self.data.add(exp_var, "explained_variance") - self.data.add(total_variance, "total_variance") - - # Assign analysis-relevant meta data to the results - self.data.set_attrs(self.attrs) - return self - - def _transform_algorithm(self, data: DataArray) -> DataArray: - raise NotImplementedError("Hilbert EOF does not support transform method.") - - def _inverse_transform_algorithm(self, scores: DataArray) -> DataArray: - Xrec = super()._inverse_transform_algorithm(scores) - # Enforce real output - return Xrec.real - - def components_amplitude(self) -> DataObject: + def components_amplitude(self, normalized=True) -> DataObject: """Return the amplitude of the (EOF) components. The amplitude of the components are defined as @@ -404,13 +350,22 @@ def components_amplitude(self) -> DataObject: where :math:`C_{ij}` is the :math:`i`-th entry of the :math:`j`-th component and :math:`|\\cdot|` denotes the absolute value. + Parameters + ---------- + normalized : bool, default=True + Whether to normalize the components by the singular values + Returns ------- - components_amplitude: DataArray | Dataset | List[DataArray] + components_amplitude: DataArray | Dataset | list[DataArray] Amplitude of the components of the fitted model. """ amplitudes = abs(self.data["components"]) + + if not normalized: + amplitudes = amplitudes * self.data["norms"] + amplitudes.name = "components_amplitude" return self.preprocessor.inverse_transform_components(amplitudes) @@ -427,7 +382,7 @@ def components_phase(self) -> DataObject: Returns ------- - components_phase: DataArray | Dataset | List[DataArray] + components_phase: DataArray | Dataset | list[DataArray] Phase of the components of the fitted model. """ @@ -454,7 +409,7 @@ def scores_amplitude(self, normalized=True) -> DataArray: Returns ------- - scores_amplitude: DataArray | Dataset | List[DataArray] + scores_amplitude: DataArray | Dataset | list[DataArray] Amplitude of the scores of the fitted model. """ @@ -479,7 +434,7 @@ def scores_phase(self) -> DataArray: Returns ------- - scores_phase: DataArray | Dataset | List[DataArray] + scores_phase: DataArray | Dataset | list[DataArray] Phase of the scores of the fitted model. """ @@ -487,3 +442,124 @@ def scores_phase(self) -> DataArray: phases = xr.apply_ufunc(np.angle, scores, dask="allowed", keep_attrs=True) phases.name = "scores_phase" return self.preprocessor.inverse_transform_scores(phases) + + +class HilbertEOF(ComplexEOF): + """Hilbert EOF analysis. + + The Hilbert EOF analysis [1]_ [2]_ [3]_ [4]_ (also known as Hilbert EOF analysis) applies a Hilbert transform + to the data before performing the standard EOF analysis. + The Hilbert transform is applied to each feature of the data individually. + + An optional padding with exponentially decaying values can be applied prior to + the Hilbert transform in order to mitigate the impact of spectral leakage. + + Parameters + ---------- + n_modes : int + Number of modes to calculate. + padding : str, optional + Specifies the method used for padding the data prior to applying the Hilbert + transform. This can help to mitigate the effect of spectral leakage. + Currently, only 'exp' for exponential padding is supported. Default is 'exp'. + decay_factor : float, optional + Specifies the decay factor used in the exponential padding. This parameter + is only used if padding='exp'. The recommended value typically ranges between 0.05 to 0.2 + but ultimately depends on the variability in the data. + A smaller value (e.g. 0.05) is recommended for + data with high variability, while a larger value (e.g. 0.2) is recommended + for data with low variability. Default is 0.2. + center: bool, default=True + Whether to center the input data. + standardize : bool + Whether to standardize the input data. + use_coslat : bool + Whether to use cosine of latitude for scaling. + sample_name: str, default="sample" + Name of the sample dimension. + feature_name: str, default="feature" + Name of the feature dimension. + compute : bool, default=True + Whether to compute elements of the model eagerly, or to defer computation. + If True, four pieces of the fit will be computed sequentially: 1) the + preprocessor scaler, 2) optional NaN checks, 3) SVD decomposition, 4) scores + and components. + random_state : int, optional + Seed for the random number generator. + solver: {"auto", "full", "randomized"}, default="auto" + Solver to use for the SVD computation. + solver_kwargs: dict, default={} + Additional keyword arguments to be passed to the SVD solver. + solver_kwargs : dict, optional + Additional keyword arguments to be passed to the SVD solver. + + References + ---------- + .. [1] Rasmusson, E. M., Arkin, P. A., Chen, W.-Y. & Jalickee, J. B. Biennial variations in surface temperature over the United States as revealed by singular decomposition. Monthly Weather Review 109, 587–598 (1981). + .. [2] Barnett, T. P. Interaction of the Monsoon and Pacific Trade Wind System at Interannual Time Scales Part I: The Equatorial Zone. Monthly Weather Review 111, 756–773 (1983). + .. [3] Horel, J. Complex Principal Component Analysis: Theory and Examples. J. Climate Appl. Meteor. 23, 1660–1673 (1984). + .. [4] Hannachi, A., Jolliffe, I. & Stephenson, D. Empirical orthogonal functions and related techniques in atmospheric science: A review. International Journal of Climatology 27, 1119–1152 (2007). + + Examples + -------- + >>> model = HilbertEOF(n_modes=5, standardize=True) + >>> model.fit(X) + + """ + + def __init__( + self, + n_modes: int = 2, + padding: str = "exp", + decay_factor: float = 0.2, + center: bool = True, + standardize: bool = False, + use_coslat: bool = False, + check_nans: bool = True, + sample_name: str = "sample", + feature_name: str = "feature", + compute: bool = True, + random_state: int | None = None, + solver: str = "auto", + solver_kwargs: dict = {}, + **kwargs, + ): + super().__init__( + n_modes=n_modes, + center=center, + standardize=standardize, + use_coslat=use_coslat, + check_nans=check_nans, + sample_name=sample_name, + feature_name=feature_name, + compute=compute, + random_state=random_state, + solver=solver, + solver_kwargs=solver_kwargs, + **kwargs, + ) + self.attrs.update({"model": "Hilbert EOF analysis"}) + self._params.update({"padding": padding, "decay_factor": decay_factor}) + + def _augment_data(self, X: DataArray) -> DataArray: + # Apply hilbert transform: + padding = self._params["padding"] + decay_factor = self._params["decay_factor"] + return hilbert_transform( + X, + dims=(self.sample_name, self.feature_name), + padding=padding, + decay_factor=decay_factor, + ) + + def _fit_algorithm(self, X: DataArray) -> Self: + EOF._fit_algorithm(self, X) + return self + + def _transform_algorithm(self, X: DataArray) -> DataArray: + raise NotImplementedError("Hilbert EOF does not support transform method.") + + def _inverse_transform_algorithm(self, scores: DataArray) -> DataArray: + Xrec = super()._inverse_transform_algorithm(scores) + # Enforce real output + return Xrec.real diff --git a/xeofs/models/eof_rotator.py b/xeofs/single/eof_rotator.py similarity index 85% rename from xeofs/models/eof_rotator.py rename to xeofs/single/eof_rotator.py index 67f8351d..b321189c 100644 --- a/xeofs/models/eof_rotator.py +++ b/xeofs/single/eof_rotator.py @@ -1,5 +1,4 @@ from datetime import datetime -from typing import Dict import numpy as np import xarray as xr @@ -7,15 +6,15 @@ from .._version import __version__ from ..data_container import DataContainer -from ..preprocessing.preprocessor import Preprocessor +from ..linalg.rotation import promax +from ..preprocessing import Preprocessor from ..utils.data_types import DataArray -from ..utils.rotation import promax from ..utils.xarray_utils import argsort_dask, get_deterministic_sign_multiplier -from .eof import EOF, HilbertEOF +from .eof import EOF, ComplexEOF, HilbertEOF class EOFRotator(EOF): - """Rotate a solution obtained from ``xe.models.EOF``. + """Rotate a solution obtained from ``xe.single.EOF``. Rotated EOF analysis (e.g. [1]_) is a variation of standard EOF analysis, which uses a rotation technique (Varimax or Promax) on the extracted modes to maximize the variance explained by @@ -47,9 +46,9 @@ class EOFRotator(EOF): Examples -------- - >>> model = xe.models.EOF(n_modes=10) - >>> model.fit(data) - >>> rotator = xe.models.EOFRotator(n_modes=10) + >>> model = xe.single.EOF(n_modes=10) + >>> model.fit(X, "time") + >>> rotator = xe.single.EOFRotator(n_modes=10) >>> rotator.fit(model) >>> rotator.components() @@ -93,7 +92,7 @@ def __init__( self.sorted = False - def get_serialization_attrs(self) -> Dict: + def get_serialization_attrs(self) -> dict: return dict( data=self.data, preprocessor=self.preprocessor, @@ -102,11 +101,11 @@ def get_serialization_attrs(self) -> Dict: ) def fit(self, model) -> Self: - """Rotate the solution obtained from ``xe.models.EOF``. + """Rotate the solution obtained from ``xe.single.EOF``. Parameters ---------- - model : ``xe.models.EOF`` + model : ``xe.single.EOF`` The EOF model to be rotated. """ @@ -222,7 +221,7 @@ def _sort_by_variance(self): ) self.sorted = True - def _transform_algorithm(self, data: DataArray) -> DataArray: + def _transform_algorithm(self, X: DataArray) -> DataArray: n_modes = self._params["n_modes"] svals = self.model.singular_values().sel(mode=slice(1, self._params["n_modes"])) @@ -231,7 +230,7 @@ def _transform_algorithm(self, data: DataArray) -> DataArray: components = self.model.data["components"].sel(mode=slice(1, n_modes)) # Compute non-rotated scores by projecting the data onto non-rotated components - projections = xr.dot(data, components) / svals + projections = xr.dot(X, components) / svals projections.name = "scores" # Rotate the scores @@ -287,8 +286,54 @@ def _compute_rot_mat_inv_trans(self, rotation_matrix, input_dims) -> DataArray: return rotation_matrix +class ComplexEOFRotator(EOFRotator, ComplexEOF): + """Rotate a solution obtained from ``xe.single.ComplexEOF``. + + Parameters + ---------- + n_modes : int, default=2 + Specify the number of modes to be rotated. + power : int, default=1 + Set the power for the Promax rotation. A ``power`` value of 1 results in + a Varimax rotation. + max_iter : int, default=1000 + Determine the maximum number of iterations for the computation of the + rotation matrix. + rtol : float, default=1e-8 + Define the relative tolerance required to achieve convergence and + terminate the iterative process. + compute: bool, default=True + Whether to compute the rotation immediately. + + + Examples + -------- + >>> model = xe.single.ComplexEOF(n_modes=10) + >>> model.fit(data) + >>> rotator = xe.single.ComplexEOFRotator(n_modes=10) + >>> rotator.fit(model) + >>> rotator.components() + + + """ + + def __init__( + self, + n_modes: int = 2, + power: int = 1, + max_iter: int = 1000, + rtol: float = 1e-8, + compute: bool = True, + ): + super().__init__( + n_modes=n_modes, power=power, max_iter=max_iter, rtol=rtol, compute=compute + ) + self.attrs.update({"model": "Rotated Complex EOF analysis"}) + self.model = ComplexEOF() + + class HilbertEOFRotator(EOFRotator, HilbertEOF): - """Rotate a solution obtained from ``xe.models.HilbertEOF``. + """Rotate a solution obtained from ``xe.single.HilbertEOF``. Hilbert Rotated EOF analysis [1]_ [2]_ [3]_ extends EOF analysis by incorporating both amplitude and phase information using a Hilbert transform prior to performing MCA and subsequent Varimax or Promax rotation. @@ -319,9 +364,9 @@ class HilbertEOFRotator(EOFRotator, HilbertEOF): Examples -------- - >>> model = xe.models.HilbertEOF(n_modes=10) + >>> model = xe.single.HilbertEOF(n_modes=10) >>> model.fit(data) - >>> rotator = xe.models.HilbertEOFRotator(n_modes=10) + >>> rotator = xe.single.HilbertEOFRotator(n_modes=10) >>> rotator.fit(model) >>> rotator.components() diff --git a/xeofs/models/gwpca.py b/xeofs/single/gwpca.py similarity index 99% rename from xeofs/models/gwpca.py rename to xeofs/single/gwpca.py index f9813ce3..df087a3d 100644 --- a/xeofs/models/gwpca.py +++ b/xeofs/single/gwpca.py @@ -1,23 +1,22 @@ +import numpy as np +import xarray as xr from typing_extensions import Self - from xeofs.utils.data_types import DataArray -from ._base_model import _BaseModel -from ..utils.sanity_checks import assert_not_complex + from ..utils.constants import ( VALID_CARTESIAN_X_NAMES, VALID_CARTESIAN_Y_NAMES, VALID_LATITUDE_NAMES, VALID_LONGITUDE_NAMES, ) -import numpy as np -import xarray as xr - from ..utils.distance_metrics import VALID_METRICS from ..utils.kernels import VALID_KERNELS +from ..utils.sanity_checks import assert_not_complex +from .base_model_single_set import BaseModelSingleSet -class GWPCA(_BaseModel): +class GWPCA(BaseModelSingleSet): """Geographically weighted PCA. Geographically weighted PCA (GWPCA) [1]_ uses a geographically weighted approach to perform PCA for diff --git a/xeofs/models/opa.py b/xeofs/single/opa.py similarity index 94% rename from xeofs/models/opa.py rename to xeofs/single/opa.py index 08a4b539..bb0edf34 100644 --- a/xeofs/models/opa.py +++ b/xeofs/single/opa.py @@ -1,17 +1,15 @@ -from typing import Optional, Dict -from typing_extensions import Self - -import xarray as xr import numpy as np +import xarray as xr +from typing_extensions import Self -from ._base_model import _BaseModel -from .eof import EOF -from .decomposer import Decomposer -from ..utils.data_types import DataObject, DataArray +from ..linalg.decomposer import Decomposer +from ..utils.data_types import DataArray, DataObject from ..utils.sanity_checks import assert_not_complex +from .base_model_single_set import BaseModelSingleSet +from .eof import EOF -class OPA(_BaseModel): +class OPA(BaseModelSingleSet): """Optimal Persistence Analysis. Optimal Persistence Analysis (OPA) [1]_ [2]_ identifies the patterns with the @@ -53,9 +51,9 @@ class OPA(_BaseModel): Examples -------- - >>> from xeofs.models import OPA + >>> from xeofs.single import OPA >>> model = OPA(n_modes=10, tau_max=50, n_pca_modes=100) - >>> model.fit(data, dim=("time")) + >>> model.fit(X, dim=("time")) Retrieve the optimally persistent patterns (OPP) and their time series: @@ -80,8 +78,8 @@ def __init__( sample_name: str = "sample", feature_name: str = "feature", solver: str = "auto", - random_state: Optional[int] = None, - solver_kwargs: Dict = {}, + random_state: int | None = None, + solver_kwargs: dict = {}, ): if n_modes > n_pca_modes: raise ValueError( @@ -127,8 +125,8 @@ def _compute_matrix_inverse(X, dims): dask="allowed", ) - def _fit_algorithm(self, data: DataArray) -> Self: - assert_not_complex(data) + def _fit_algorithm(self, X: DataArray) -> Self: + assert_not_complex(X) sample_name = self.sample_name feature_name = self.feature_name @@ -146,8 +144,8 @@ def _fit_algorithm(self, data: DataArray) -> Self: check_nans=False, solver_kwargs=self._params["solver_kwargs"], ) - pca.fit(data, dim=sample_name) - n_samples = data.coords[sample_name].size + pca.fit(X, dim=sample_name) + n_samples = X.coords[sample_name].size comps = pca.data["components"] * np.sqrt(n_samples - 1) # -> comps (feature x mode) scores = pca.data["scores"] / np.sqrt(n_samples - 1) @@ -270,7 +268,7 @@ def _fit_algorithm(self, data: DataArray) -> Self: self._C0 = C0 # store C0 for testing purposes of orthogonality return self - def _transform_algorithm(self, data: DataArray) -> DataArray: + def _transform_algorithm(self, X: DataArray) -> DataArray: raise NotImplementedError("OPA does not (yet) support transform()") def _inverse_transform_algorithm(self, scores) -> DataObject: diff --git a/xeofs/single/pop.py b/xeofs/single/pop.py new file mode 100644 index 00000000..c783a964 --- /dev/null +++ b/xeofs/single/pop.py @@ -0,0 +1,497 @@ +import warnings + +import numpy as np +import xarray as xr +from typing_extensions import Self + +from ..linalg import total_variance +from ..preprocessing import Whitener +from ..utils.data_types import DataArray, DataObject +from ..utils.xarray_utils import argsort_dask +from .base_model_single_set import BaseModelSingleSet + + +class POP(BaseModelSingleSet): + """Principal Oscillation Pattern (POP) analysis. + + POP analysis [1]_ [2]_ is a linear multivariate technique used to identify + and describe dominant oscillatory modes in a dynamical system. POP analysis + involves computing the eigenvalues and eigenvectors of the `feedback matrix` + defined as + + .. math:: + A = C_1 C_0^{-1} + + where :math:`C_0` is the covariance matrix and :math:`C_1` is the lag-1 + covariance matrix of the input data. The eigenvectors of the feedback matrix + are the POPs and the eigenvalues are related to the damping times and + periods of the oscillatory modes. + + Parameters + ---------- + n_modes: int, default=10 + Number of modes to calculate. + center: bool, default=True + Whether to center the input data. + standardize: bool, default=False + Whether to standardize the input data. + use_coslat: bool, default=False + Whether to use cosine of latitude for scaling. + use_pca : bool, default=False + If True, perform PCA to reduce the dimensionality of the data. + n_pca_modes : int | float | str, default=0.999 + If int, specifies the number of modes to retain. If float, specifies the + fraction of variance in the (whitened) data that should be explained by + the retained modes. If "all", all modes are retained. + init_rank_reduction : float, default=0.3 + Only relevant when `use_pca=True` and `n_modes` is a float, in which + case it denotes the fraction of the initial rank to reduce the data to + via PCA as a first guess before truncating the solution to the desired + fraction of explained variance. This allows for faster computation of + PCA via randomized SVD and avoids the need to compute the full SVD. + sample_name: str, default="sample" + Name of the sample dimension. + feature_name: str, default="feature" + Name of the feature dimension. + check_nans : bool, default=True + If True, remove full-dimensional NaN features from the data, check to + ensure that NaN features match the original fit data during transform, + and check for isolated NaNs. Note: this forces eager computation of dask + arrays. If False, skip all NaN checks. In this case, NaNs should be + explicitly removed or filled prior to fitting, or SVD will fail. + compute : bool, default=True + Whether to compute elements of the model eagerly, or to defer computation. + If True, four pieces of the fit will be computed sequentially: 1) the + preprocessor scaler, 2) optional NaN checks, 3) SVD decomposition, 4) scores + and components. + random_state : int, optional + Seed for the random number generator. + solver: {"auto", "full", "randomized"}, default="auto" + Solver to use for the SVD computation. + solver_kwargs: dict, default={} + Additional keyword arguments to be passed to the SVD solver. + + References + ---------- + .. [1] Hasselmann, K. PIPs and POPs: The reduction of complex dynamical systems using principal interaction and oscillation patterns. J. Geophys. Res. 93, 11015–11021 (1988). + .. [2] von Storch, H., G. Bürger, R. Schnur, and J. von Storch, 1995: + Principal Oscillation Patterns: A Review. J. Climate, 8, 377–400, + https://doi.org/10.1175/1520-0442(1995)008<0377:POPAR>2.0.CO;2. + + + Examples + -------- + + Perform POP analysis in PC space spanned by the first 10 modes: + + >>> pop = xe.single.POP(n_modes="all", use_pca=True, n_pca_modes=10) + >>> pop.fit(X, "time) + + Get the POPs and associated time coefficients: + + >>> patterns = pop.components() + >>> scores = pop.scores() + + Reconstruct the original data using a conjugate pair of POPs: + + >>> pop_pairs = scores.sel(mode=[1, 2]) + >>> X_rec = pop.inverse_transform(pop_pairs) + + """ + + def __init__( + self, + n_modes: int = 2, + center: bool = True, + standardize: bool = False, + use_coslat: bool = False, + use_pca: bool = True, + n_pca_modes: float | int = 0.999, + pca_init_rank_reduction: float = 0.3, + check_nans=True, + sample_name: str = "sample", + feature_name: str = "feature", + compute: bool = True, + random_state: int | None = None, + solver: str = "auto", + solver_kwargs: dict = {}, + **kwargs, + ): + super().__init__( + n_modes=n_modes, + center=center, + standardize=standardize, + use_coslat=use_coslat, + check_nans=check_nans, + sample_name=sample_name, + feature_name=feature_name, + compute=compute, + random_state=random_state, + solver=solver, + solver_kwargs=solver_kwargs, + **kwargs, + ) + self.attrs.update({"model": "Principal Oscillation Pattern analysis"}) + + self.whitener = Whitener( + alpha=1.0, + use_pca=use_pca, + n_modes=n_pca_modes, + init_rank_reduction=pca_init_rank_reduction, + sample_name=sample_name, + feature_name=feature_name, + compute_svd=compute, + random_state=random_state, + solver_kwargs=solver_kwargs, + ) + + self.sorted = False + + def get_serialization_attrs(self) -> dict: + return dict( + data=self.data, + preprocessor=self.preprocessor, + whitener=self.whitener, + sorted=self.sorted, + ) + + def _np_solve_pop_system(self, X): + # Feedack matrix + A = X[1:].conj().T @ X[:-1] @ np.linalg.inv(X[:-1].conj().T @ X[:-1]) + + # Compute POPs + lbda, P = np.linalg.eig(A) + + # e-folding times /damping times + tau = -1 / np.log(abs(lbda)) + + # POP periods + with warnings.catch_warnings(record=True): + warnings.filterwarnings( + "ignore", "divide by zero encountered", RuntimeWarning + ) + + T = 2 * np.pi / np.angle(lbda) + + # POP (time) coefficients (Storch et al. 1995, equation 19) + Z = self._np_compute_pop_coefficients(X, P) + # Reconstruction of original data + # Xrec = Z @ P.T + # It seems that the signs of some columns of Xrec are flipped, probably + + return P, Z, lbda, T, tau + + def _np_compute_pop_coefficients(self, X, P): + # POP (time) coefficients (Storch et al. 1995, equation 19) + Z = np.empty((X.shape[0], P.shape[1]), dtype=complex) + for i in range(P.shape[1]): + p = P[:, i : i + 1] + pr = p.real + pi = p.imag + + M = np.array([[pr.T @ pr, pr.T @ pi], [pr.T @ pi, pi.T @ pi]]).squeeze() + Minv = np.linalg.pinv(M) + zri = Minv @ np.hstack([X @ pr, X @ pi]).T + z = zri[0] + 1j * zri[1] + Z[:, i] = z + return Z + + def _fit_algorithm(self, X: DataArray) -> Self: + sample_name = self.sample_name + feature_name = self.feature_name + + # Transform in PC space + X = self.whitener.fit_transform(X) + + P, Z, lbda, T, tau = xr.apply_ufunc( + self._np_solve_pop_system, + X, + input_core_dims=[[sample_name, feature_name]], + output_core_dims=[ + [feature_name, "mode"], + [sample_name, "mode"], + ["mode"], + ["mode"], + ["mode"], + ], + dask="allowed", + ) + + mode_coords = np.arange(1, P.mode.size + 1) + P = P.assign_coords(mode=mode_coords) + Z = Z.assign_coords(mode=mode_coords) + lbda = lbda.assign_coords(mode=mode_coords) + T = T.assign_coords(mode=mode_coords) + tau = tau.assign_coords(mode=mode_coords) + + # Compute dynamical importance of each mode + var_Z = Z.var(sample_name) + norms = (var_Z) ** (0.5) + + # Compute total variance + var_tot = total_variance(X, sample_name) + + # Reorder according to variance + idx_modes_sorted = argsort_dask(norms, "mode")[::-1] # type: ignore + idx_modes_sorted.coords.update(norms.coords) + + P = self.whitener.inverse_transform_components(P) + + # Store the results + self.data.add(X, "input_data", allow_compute=False) + self.data.add(P, "components") + self.data.add(Z, "scores") + self.data.add(norms, "norms") + self.data.add(lbda, "eigenvalues") + self.data.add(tau, "damping_times") + self.data.add(T, "periods") + self.data.add(idx_modes_sorted, "idx_modes_sorted") + self.data.add(var_tot, "total_variance") + + self.data.set_attrs(self.attrs) + return self + + def _post_compute(self): + """Leave sorting until after compute because it can't be done lazily.""" + self._sort_by_variance() + + def _sort_by_variance(self): + """Re-sort the mode dimension of all data variables by variance explained.""" + if not self.sorted: + for key in self.data.keys(): + if "mode" in self.data[key].dims and key != "idx_modes_sorted": + self.data[key] = ( + self.data[key] + .isel(mode=self.data["idx_modes_sorted"].values) + .assign_coords(mode=self.data[key].mode) + ) + self.sorted = True + + def _transform_algorithm(self, X: DataArray) -> DataArray: + sample_name = self.sample_name + feature_name = self.feature_name + + P = self.data["components"] + + # Transform into PC spcae + P = self.whitener.transform_components(P) + X = self.whitener.transform(X) + + # Project the data + Z = xr.apply_ufunc( + self._np_compute_pop_coefficients, + X, + P, + input_core_dims=[[sample_name, feature_name], [feature_name, "mode"]], + output_core_dims=[[sample_name, "mode"]], + dask="allowed", + ) + Z.name = "scores" + + Z = self.whitener.inverse_transform_scores(Z) + + return Z + + def _inverse_transform_algorithm(self, scores: DataArray) -> DataArray: + """Reconstruct the original data from transformed data. + + Parameters + ---------- + scores: DataArray + Transformed data to be reconstructed. This could be a subset + of the `scores` data of a fitted model, or unseen data. Must + have a 'mode' dimension. + + Returns + ------- + data: DataObject + Reconstructed data. + + """ + # Reconstruct the data + P = self.data["components"].sel(mode=scores.mode) + + # Transform in PC space + P = self.whitener.transform_components(P) + + reconstructed_data = xr.dot(scores, P, dims="mode") + reconstructed_data.name = "reconstructed_data" + + # Inverse transform the data into physical space + reconstructed_data = self.whitener.inverse_transform_data(reconstructed_data) + + return reconstructed_data + + def components(self) -> DataObject: + """Return the POPs. + + The POPs are the eigenvectors of the feedback matrix. + + Returns + ------- + components: DataObject + Principal Oscillation Patterns (POPs). + + """ + return super().components(normalized=False) + + def scores(self, normalized: bool = False) -> DataArray: + """Return the POP coefficients/scores. + + Parameters + ---------- + normalized : bool, default=True + Whether to normalize the scores by the L2 norm. + + Returns + ------- + components: DataObject + POP coefficients. + + """ + return super().scores(normalized=normalized) + + def eigenvalues(self) -> DataArray: + """Return the eigenvalues of the feedback matrix. + + Returns + ------- + DataArray + Real or complex eigenvalues. + + """ + return self.data["eigenvalues"] + + def damping_times(self) -> DataArray: + """Return the damping times of the feedback matrix. + + The damping times are defined as + + .. math:: + \\tau = -\\frac{1}{\\log(|\\lambda|)} + + where :math:`\\lambda` is the eigenvalue. + + Returns + ------- + DataArray + Damping times. + + """ + return self.data["damping_times"] + + def periods(self) -> DataArray: + """Return the periods of the feedback matrix. + + For complex eigenvalues, the periods are defined as + + .. math:: + T = \\frac{2\\pi}{\\arg(\\lambda)} + + where :math:`\\lambda` is the eigenvalue. For real eigenvalues ``inf`` + is returned. + + Returns + ------- + DataArray + Periods. + + """ + return self.data["periods"] + + def components_amplitude(self) -> DataObject: + """Return the amplitude of the POP components. + + The amplitude of the components are defined as + + .. math:: + A_{ij} = |C_{ij}| + + where :math:`C_{ij}` is the :math:`i`-th entry of the :math:`j`-th component and + :math:`|\\cdot|` denotes the absolute value. + + + Returns + ------- + components_amplitude: DataObject + Amplitude of the components of the fitted model. + + """ + amplitudes = abs(self.data["components"]) + + amplitudes.name = "components_amplitude" + return self.preprocessor.inverse_transform_components(amplitudes) + + def components_phase(self) -> DataObject: + """Return the phase of the POP components. + + The phase of the components are defined as + + .. math:: + \\phi_{ij} = \\arg(C_{ij}) + + where :math:`C_{ij}` is the :math:`i`-th entry of the :math:`j`-th component and + :math:`\\arg(\\cdot)` denotes the argument of a complex number. + + Returns + ------- + components_phase: DataObject + Phase of the components of the fitted model. + + """ + comps = self.data["components"] + comp_phase = xr.apply_ufunc(np.angle, comps, dask="allowed", keep_attrs=True) + comp_phase.name = "components_phase" + return self.preprocessor.inverse_transform_components(comp_phase) + + def scores_amplitude(self, normalized=True) -> DataArray: + """Return the amplitude of the POP coefficients/scores. + + The amplitude of the scores are defined as + + .. math:: + A_{ij} = |S_{ij}| + + where :math:`S_{ij}` is the :math:`i`-th entry of the :math:`j`-th score and + :math:`|\\cdot|` denotes the absolute value. + + Parameters + ---------- + normalized : bool, default=True + Whether to normalize the scores by the singular values. + + Returns + ------- + scores_amplitude: DataObject + Amplitude of the scores of the fitted model. + + """ + scores = self.data["scores"].copy() + if normalized: + scores = scores / self.data["norms"] + + amplitudes = abs(scores) + amplitudes.name = "scores_amplitude" + return self.preprocessor.inverse_transform_scores(amplitudes) + + def scores_phase(self) -> DataArray: + """Return the phase of the POP coefficients/scores. + + The phase of the scores are defined as + + .. math:: + \\phi_{ij} = \\arg(S_{ij}) + + where :math:`S_{ij}` is the :math:`i`-th entry of the :math:`j`-th score and + :math:`\\arg(\\cdot)` denotes the argument of a complex number. + + Returns + ------- + scores_phase: DataObject + Phase of the scores of the fitted model. + + """ + scores = self.data["scores"] + phases = xr.apply_ufunc(np.angle, scores, dask="allowed", keep_attrs=True) + phases.name = "scores_phase" + return self.preprocessor.inverse_transform_scores(phases) diff --git a/xeofs/models/sparse_pca.py b/xeofs/single/sparse_pca.py similarity index 89% rename from xeofs/models/sparse_pca.py rename to xeofs/single/sparse_pca.py index 27c9299d..f4d2e209 100644 --- a/xeofs/models/sparse_pca.py +++ b/xeofs/single/sparse_pca.py @@ -1,5 +1,4 @@ # %% -from typing import Dict, Optional import numpy as np import xarray as xr @@ -9,11 +8,11 @@ from ..utils.sanity_checks import assert_not_complex from ..utils.xarray_utils import get_matrix_rank from ..utils.xarray_utils import total_variance as compute_total_variance -from ._base_model import _BaseModel -from ._np_classes._sparse_pca import compute_rspca, compute_spca +from ._numpy._sparse_pca import compute_rspca, compute_spca +from .base_model_single_set import BaseModelSingleSet -class SparsePCA(_BaseModel): +class SparsePCA(BaseModelSingleSet): """ Sparse PCA via Variable Projection. @@ -68,9 +67,7 @@ class SparsePCA(_BaseModel): 2) optional NaN checks, 3) SVD decomposition, 4) scores and components. - verbose : bool, default=False - Whether to show a progress bar when computing the decomposition. - random_state : Optional[int], default=None + random_state : int, optional Seed for the random number generator. solver : {"auto", "full", "randomized"}, default="randomized" Solver to use for the SVD computation. @@ -87,7 +84,7 @@ class SparsePCA(_BaseModel): Examples -------- - >>> model = xe.models.SparsePCA(n_modes=2, alpha=1e-4) + >>> model = xe.single.SparsePCA(n_modes=2, alpha=1e-4) >>> model.fit(data, "time") >>> components = model.components() """ @@ -111,10 +108,9 @@ def __init__( feature_name: str = "feature", check_nans=True, compute: bool = True, - verbose: bool = False, - random_state: Optional[int] = None, + random_state: int | None = None, solver: str = "auto", - solver_kwargs: Dict = {}, + solver_kwargs: dict = {}, **kwargs, ): super().__init__( @@ -126,7 +122,6 @@ def __init__( sample_name=sample_name, feature_name=feature_name, compute=compute, - verbose=verbose, random_state=random_state, solver=solver, solver_kwargs=solver_kwargs, @@ -147,7 +142,7 @@ def __init__( } ) - def _fit_algorithm(self, data: DataArray) -> Self: + def _fit_algorithm(self, X: DataArray) -> Self: sample_name = self.sample_name feature_name = self.feature_name @@ -155,16 +150,16 @@ def _fit_algorithm(self, data: DataArray) -> Self: # NOTE: Complex data is not supported, it's likely possible but current numpy implementation # of sparse_pca needs to be adpated, mainly changing matrix transpose to conjugate transpose. # http://arxiv.org/abs/1804.00341 - assert_not_complex(data) + assert_not_complex(X) # Compute the total variance - total_variance = compute_total_variance(data, dim=sample_name) + total_variance = compute_total_variance(X, dim=sample_name) # Compute matrix rank - rank = get_matrix_rank(data) + rank = get_matrix_rank(X) # Decide whether to use exact or randomized algorithm - is_small_data = max(data.shape) < 500 + is_small_data = max(X.shape) < 500 solver = self._params["solver"] match solver: @@ -213,7 +208,7 @@ def _fit_algorithm(self, data: DataArray) -> Self: # exp_var : eigenvalues components, components_normal, exp_var = xr.apply_ufunc( decomposing_algorithm, - data, + X, input_core_dims=[[sample_name, feature_name]], output_core_dims=[[feature_name, "mode"], [feature_name, "mode"], ["mode"]], dask="allowed", @@ -227,7 +222,7 @@ def _fit_algorithm(self, data: DataArray) -> Self: components.name = "sparse_weight_vectors" components = components.assign_coords( { - feature_name: data.coords[feature_name], + feature_name: X.coords[feature_name], "mode": np.arange(1, self.n_modes + 1), }, ) @@ -235,13 +230,13 @@ def _fit_algorithm(self, data: DataArray) -> Self: components_normal.name = "orthonormal_weight_vectors" components_normal = components_normal.assign_coords( { - feature_name: data.coords[feature_name], + feature_name: X.coords[feature_name], "mode": np.arange(1, self.n_modes + 1), }, ) # Transform the data - scores = xr.dot(data, components, dims=feature_name) + scores = xr.dot(X, components, dims=feature_name) scores.name = "scores" norms = xr.apply_ufunc( @@ -257,7 +252,7 @@ def _fit_algorithm(self, data: DataArray) -> Self: norms.name = "component_norms" # Store the results - self.data.add(data, "input_data", allow_compute=False) + self.data.add(X, "input_data", allow_compute=False) self.data.add(components, "components") self.data.add(components_normal, "components_normal") self.data.add(scores, "scores") @@ -268,13 +263,13 @@ def _fit_algorithm(self, data: DataArray) -> Self: self.data.set_attrs(self.attrs) return self - def _transform_algorithm(self, data: DataObject) -> DataArray: + def _transform_algorithm(self, X: DataObject) -> DataArray: feature_name = self.preprocessor.feature_name components = self.data["components"] # Project the data - projections = xr.dot(data, components, dims=feature_name) + projections = xr.dot(X, components, dims=feature_name) projections.name = "scores" return projections @@ -295,13 +290,13 @@ def components(self) -> DataObject: Returns ------- - components: DataArray | Dataset | List[DataArray] + components: DataArray | Dataset | list[DataArray] Components of the fitted model. """ return super().components() - def scores(self, normalized: bool = True) -> DataArray: + def scores(self, normalized: bool = False) -> DataArray: """Return the component scores. The component scores :math:`U` are defined as the projection of the fitted @@ -313,12 +308,12 @@ def scores(self, normalized: bool = True) -> DataArray: Parameters ---------- - normalized : bool, default=True + normalized : bool, default=False Whether to normalize the scores by the L2 norm. Returns ------- - components: DataArray | Dataset | List[DataArray] + components: DataArray | Dataset | list[DataArray] Scores of the fitted model. """ diff --git a/xeofs/utils/data_types.py b/xeofs/utils/data_types.py index 175eb10a..031653c0 100644 --- a/xeofs/utils/data_types.py +++ b/xeofs/utils/data_types.py @@ -1,11 +1,4 @@ -from typing import ( - List, - TypeAlias, - Sequence, - Tuple, - TypeVar, - Hashable, -) +from typing import Hashable, Sequence, TypeAlias, TypeVar import dask.array as da from xarray.core import dataarray as xr_dataarray @@ -17,16 +10,17 @@ DataVar = TypeVar("DataVar", DataArray, DataSet) DataVarBound = TypeVar("DataVarBound", bound=Data) -DataArrayList: TypeAlias = List[DataArray] -DataSetList: TypeAlias = List[DataSet] -DataList: TypeAlias = List[Data] -DataVarList: TypeAlias = List[DataVar] +DataArrayList: TypeAlias = list[DataArray] +DataSetList: TypeAlias = list[DataSet] +DataList: TypeAlias = list[Data] +DataVarList: TypeAlias = list[DataVar] +GenericType = TypeVar("GenericType") DaskArray: TypeAlias = da.Array # type: ignore DataObject: TypeAlias = DataArray | DataSet | DataList Dims: TypeAlias = Sequence[Hashable] -DimsTuple: TypeAlias = Tuple[Dims, ...] -DimsList: TypeAlias = List[Dims] -DimsListTuple: TypeAlias = Tuple[DimsList, ...] +DimsTuple: TypeAlias = tuple[Dims, ...] +DimsList: TypeAlias = list[Dims] +DimsListTuple: TypeAlias = tuple[DimsList, ...] diff --git a/xeofs/utils/sanity_checks.py b/xeofs/utils/sanity_checks.py index 49ea2f23..ba03c608 100644 --- a/xeofs/utils/sanity_checks.py +++ b/xeofs/utils/sanity_checks.py @@ -102,15 +102,18 @@ def assert_not_complex(da: xr.DataArray) -> None: ) -def sanity_check_n_modes(n_modes: int | float) -> None: +def sanity_check_n_modes(n_modes: int | float | str) -> None: """Check if the number of modes is valid.""" match n_modes: case int(): if n_modes < 1: - raise ValueError("n_modes must be greater than 0") + raise ValueError("If integer, n_modes must be greater than 0") case float(): if not (0 < n_modes <= 1.0): - raise ValueError("n_modes must be in the range (0, 1]") + raise ValueError("If float, n_modes must be in the range (0, 1]") + case str(): + if n_modes not in ["all"]: + raise ValueError("If string, n_modes must be 'all'") case _: - raise TypeError("n_modes must be an integer or a float") + raise TypeError("n_modes must be an integer, float or string.") diff --git a/xeofs/utils/statistics.py b/xeofs/utils/statistics.py index 1ac6eca2..3df80ddf 100644 --- a/xeofs/utils/statistics.py +++ b/xeofs/utils/statistics.py @@ -1,11 +1,18 @@ -import xarray as xr import scipy as sc +import xarray as xr from statsmodels.stats.multitest import multipletests as statsmodels_multipletests from .constants import MULTIPLE_TESTS -def pearson_correlation(data1, data2, correction=None, alpha=0.05): +def pearson_correlation( + data1, + data2, + correction=None, + alpha=0.05, + sample_name="sample", + feature_name="feature", +): """Compute Pearson correlation between two xarray objects. Additionally, compute two-sided p-values and adjust them for multiple testing. @@ -40,18 +47,41 @@ def pearson_correlation(data1, data2, correction=None, alpha=0.05): Adjusted p-values for the Pearson correlation. """ + + def _correlation_coefficients_numpy(X, Y): + """Compute Pearson correlation coefficients assuming centered data.""" + X = X / X.std(0) + Y = Y / Y.std(0) + return X.conj().T @ Y / X.shape[0] + n_samples = data1.shape[0] # Compute Pearson correlation coefficients - corr = (data1 * data2).mean("sample") / data1.std("sample") / data2.std("sample") + sample_name_x = "sample_dim_x" + sample_name_y = "sample_dim_y" + data1 = data1.rename({sample_name: sample_name_x}) + data2 = data2.rename({sample_name: sample_name_y}) + feature_name_x = data1.dims[1] + feature_name_y = data2.dims[1] + corr = xr.apply_ufunc( + _correlation_coefficients_numpy, + data1, + data2, + input_core_dims=[ + [sample_name_x, feature_name_x], + [sample_name_y, feature_name_y], + ], + output_core_dims=[[feature_name_x, feature_name_y]], + dask="allowed", + ) # Compute two-sided p-values - pvals = _compute_pvalues(corr, n_samples, dims=["feature"]) + pvals = _compute_pvalues(corr, n_samples) if correction is not None: # Adjust p-values for multiple testing rejected, pvals = _multipletests( - pvals, dim="feature", alpha=alpha, method=correction + pvals, dim=feature_name, alpha=alpha, method=correction ) return corr, pvals @@ -59,7 +89,7 @@ def pearson_correlation(data1, data2, correction=None, alpha=0.05): return corr, pvals -def _compute_pvalues(pearsonr, n_samples: int, dims) -> xr.DataArray: +def _compute_pvalues(pearsonr, n_samples: int) -> xr.DataArray: # Compute two-sided p-values # Reference: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html#r8c6348c62346-1 a = n_samples / 2 - 1 @@ -67,8 +97,8 @@ def _compute_pvalues(pearsonr, n_samples: int, dims) -> xr.DataArray: pvals = 2 * xr.apply_ufunc( dist.cdf, -abs(pearsonr), - input_core_dims=[dims], - output_core_dims=[dims], + input_core_dims=[[]], + output_core_dims=[[]], dask="allowed", vectorize=False, ) diff --git a/xeofs/utils/xarray_utils.py b/xeofs/utils/xarray_utils.py index fcbb2e12..e1775f7e 100644 --- a/xeofs/utils/xarray_utils.py +++ b/xeofs/utils/xarray_utils.py @@ -1,4 +1,4 @@ -from typing import Any, Hashable, List, Sequence, Tuple, TypeVar +from typing import Any, Hashable, Sequence, TypeVar import numpy as np import xarray as xr @@ -19,7 +19,7 @@ T = TypeVar("T") -def unwrap_singleton_list(input_list: List[T]) -> T | List[T]: +def unwrap_singleton_list(input_list: list[T]) -> T | list[T]: if len(input_list) == 1: return input_list[0] else: @@ -47,7 +47,7 @@ def data_is_dask(data: DataArray | DataSet | DataList) -> bool: def process_parameter( parameter_name: str, parameter, default, n_data: int -) -> List[Any]: +) -> list[Any]: if parameter is None: return convert_to_list(default) * n_data elif isinstance(parameter, (list, tuple)): @@ -57,7 +57,7 @@ def process_parameter( return convert_to_list(parameter) * n_data -def convert_to_list(data: T | List[T] | Tuple[T]) -> List[T]: +def convert_to_list(data: T | list[T] | tuple[T]) -> list[T]: if isinstance(data, list): return data elif isinstance(data, tuple): @@ -162,21 +162,21 @@ def extract_latitude_dimension(feature_dims: Dims) -> Hashable: def get_dims( data: DataList, sample_dims: Hashable | Sequence[Hashable], -) -> Tuple[Dims, DimsList]: +) -> tuple[Dims, DimsList]: """Extracts the dimensions of a DataArray or Dataset that are not included in the sample dimensions. Parameters: ------------ data: xr.DataArray or xr.Dataset or list of xr.DataArray Input data. - sample_dims: Hashable or Sequence[Hashable] or List[Sequence[Hashable]] + sample_dims: Hashable or Sequence[Hashable] or list[Sequence[Hashable]] Sample dimensions. Returns: --------- - sample_dims: Tuple[Hashable] + sample_dims: tuple[Hashable] Sample dimensions. - feature_dims: Tuple[Hashable] + feature_dims: tuple[Hashable] Feature dimensions. """ @@ -199,12 +199,12 @@ def _get_feature_dims(data: DataArray | DataSet, sample_dims: Dims) -> Dims: ------------ data: xr.DataArray or xr.Dataset Input data. - sample_dims: Tuple[str] + sample_dims: tuple[str] Sample dimensions. Returns: --------- - feature_dims: Tuple[str] + feature_dims: tuple[str] Feature dimensions. """ diff --git a/xeofs/validation/bootstrapper.py b/xeofs/validation/bootstrapper.py index 8a1c78a4..71765978 100644 --- a/xeofs/validation/bootstrapper.py +++ b/xeofs/validation/bootstrapper.py @@ -1,15 +1,15 @@ from abc import ABC, abstractmethod from datetime import datetime -from typing import Any, Dict +from typing import Any import numpy as np import xarray as xr from tqdm import trange -from ..models import EOF +from .._version import __version__ from ..data_container import DataContainer +from ..single import EOF from ..utils.data_types import DataArray -from .._version import __version__ class _BaseBootstrapper(ABC): @@ -22,7 +22,7 @@ def __init__(self, n_bootstraps=20, seed=None): } # Define analysis-relevant meta data - self.attrs: Dict[str, Any] = {"model": "BaseBootstrapper"} + self.attrs: dict[str, Any] = {"model": "BaseBootstrapper"} self.attrs.update(self._params) self.attrs.update( {