cognitedata · 23andreas · Aug 19, 2024 · Aug 4, 2024 · Aug 5, 2024 · Aug 6, 2024
@@ -29,3 +29,11 @@ Reindex
 
    * :ref:`sphx_glr_auto_examples_reindex_plot_pearson_correlation.py`
 
+Reindex scatterplot
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+.. autofunction:: indsl.resample.reindex_scatter
+
+Reindex scatterplot x-values
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+.. autofunction:: indsl.resample.reindex_scatter_x
+
@@ -0,0 +1,87 @@
+# Copyright 2024 Cognite AS
+"""
+=======================================================
+Re-indexing to mock a scatter plot
+=======================================================
+
+This shows how we can superimpose a scatter plot on an existing chart
+"""
+
+import os
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from datetime import datetime
+
+from indsl.resample.mock_scatter_plot import reindex_scatter, reindex_scatter_x
+
+# Load the pressure sensor data
+base_path = "" if __name__ == "__main__" else os.path.dirname(__file__)
+
+# Read in data for a production choke opening
+filename = os.path.join(base_path, "../../datasets/data/pd_series_HCV.pkl")
+HCV_series = pd.read_pickle(filename)
+
+
+# Creating a mock CV curve using a sine form
+n = 20
+x_values = np.linspace(0, 100, n)
+y_values = (np.sin(x_values / x_values.max() * np.pi - np.pi * 0.5) + 1) * 0.5
+from scipy.interpolate import interp1d
+
+# Calculate the CV value for the different choke openings, using the interpolated CV curve
+interpolator = interp1d(x_values, y_values)
+CV_array = interpolator(HCV_series.values)
+# Create the series for the CV value
+CV_series = pd.Series(CV_array, index=HCV_series.index)
+
+# We normalise choke opening such that [0,100] covers the entrie time range
+x_min_value = 0
+x_max_value = 100
+scatter_y = reindex_scatter(
+    HCV_series, CV_series, x_min_value=x_min_value, x_max_value=x_max_value, align_timesteps=True
+)
+scatter_x = reindex_scatter_x(HCV_series, x_min_value=x_min_value, x_max_value=x_max_value)
+
+
+fig = plt.figure(figsize=(12, 8))
+lns1 = plt.plot(HCV_series.index, HCV_series.values, "-b", label="Choke opening")
+axl = plt.gca()
+axr = axl.twinx()
+lns2 = axr.plot(scatter_y.index, scatter_y.values, ".r", label="CV curve")
+lns3 = axl.plot(scatter_x.index, scatter_x, ".g", label="Choke opening")
+axl.set_xlabel("Time/choke opening")
+axl.set_ylabel("Choke opening [%]")
+axr.set_ylabel("CV [-]")
+
+# Adding both time and choke opening for the x-axis.
+xticks_pos = axl.get_xticks()  # Get the position of the existing ticks
+xtick_labels = axl.get_xticklabels()  # Get the date for the existing ticks
+xticks_pos_epoc = [
+    datetime.strptime(val.get_text(), "%Y-%m-%d").timestamp() for val in xtick_labels
+]  # Convert the dates to timestamp
+
+# Need to convert the timestamp to the corresponding choke opening
+epoc_start = HCV_series.index[0].timestamp()
+epoc_end = HCV_series.index[-1].timestamp()
+d_epoc = epoc_end - epoc_start
+# The scale of HCV_series is [x_min_value,x_max_value]. We will now map it to the epoc and then convert it to datetime
+xtic_labels_hcv = [
+    (val - epoc_start) / d_epoc * (x_max_value - x_min_value) for val in xticks_pos_epoc
+]  # gives us the HCV value for the corresponding points
+# Create the tick label consisting of the date, and the choke opening value on a new line
+xtick_labels_mod = [
+    val1.get_text() + "\n" + "%1.3g" % (min([max([val2, x_min_value]), x_max_value]))
+    for (val1, val2) in zip(xtick_labels, xtic_labels_hcv)
+]
+# Finally update the x-tics values
+plt.xticks(xticks_pos, xtick_labels_mod)
+
+# added the lines to the legend
+lns = lns1 + lns2 + lns3
+labs = [l.get_label() for l in lns]
+plt.legend(lns, labs, loc=4)
+plt.xlim([HCV_series.index[0], HCV_series.index[-1]])
+plt.tight_layout()
+plt.show()
@@ -1,12 +1,29 @@
 # Copyright 2023 Cognite AS
 from .group_by import group_by_region
 from .interpolate import interpolate
+from .mock_scatter_plot import reindex_scatter, reindex_scatter_x
 from .reindex import reindex
 from .resample import resample, resample_to_granularity
 
 
 TOOLBOX_NAME = "Resample"
 
-__all__ = ["interpolate", "resample", "resample_to_granularity", "group_by_region", "reindex"]
+__all__ = [
+    "interpolate",
+    "resample",
+    "resample_to_granularity",
+    "group_by_region",
+    "reindex",
+    "reindex_scatter",
+    "reindex_scatter_x",
+]
 
-__cognite__ = ["interpolate", "resample", "resample_to_granularity", "group_by_region", "reindex"]
+__cognite__ = [
+    "interpolate",
+    "resample",
+    "resample_to_granularity",
+    "group_by_region",
+    "reindex",
+    "reindex_scatter",
+    "reindex_scatter_x",
+]
@@ -0,0 +1,111 @@
+# Copyright 2024 Cognite AS
+from datetime import datetime
+from typing import Tuple
+
+import numpy as np
+import pandas as pd
+
+from indsl.resample.auto_align import auto_align
+from indsl.type_check import check_types
+
+
+@check_types
+def __reindex_scatter_core(
+    signal_x: pd.Series, x_min_value: float = 0, x_max_value: float = 1
+) -> Tuple[np.array, np.array]:
+    """Reindex scatterplot core.
+
+    It returns a reindexed array of timestamp. The timestamps are creates such that the values from signal_x
+    are scaled to the range of signal_x.index, and then applied as index.
+    This is a way of creating a scatterplot inside a chart
+
+    Args:
+        signal_x: x-value.
+            The time series where the values are used as the x-value
+        x_min_value: Minimum x value.
+            Minimum x value, used to scale the x values.
+        x_max_value: Maximum x value.
+            Maximum x value, used to scale the x value.
+
+    Returns:
+        np.array: Scatter plot index, Scatter plot sort index
+    """
+    # convert timestamps to epoc
+    epoc = np.array([val.timestamp() for val in signal_x.index])
+    d_epoc = epoc[-1] - epoc[0]
+    # We will now map the values in singla_x to the epoc and then convert it back to datetime
+    sequence_epoc = signal_x.values / (x_max_value - x_min_value) * d_epoc
+    index_x_epoc = sequence_epoc + epoc[0]  # translate
+    index_x = np.array([datetime.fromtimestamp(epoc_) for epoc_ in index_x_epoc])
+    # create a sort index, such that the order is increasing
+    index_sort = np.argsort(index_x_epoc)
+
+    return index_x[index_sort], index_sort
+
+
+@check_types
+def reindex_scatter(
+    signal_x: pd.Series,
+    signal_y: pd.Series,
+    x_min_value: float = 0,
+    x_max_value: float = 1,
+    align_timesteps: bool = False,
+) -> pd.Series:
+    """Reindex scatterplot.
+
+    It returns the values from signal_y with the timestamps as the values from signal_x,
+    where the timestamps has been scaled to the range of timestamps from signal_x.
+    The timestamps are sorted in ascending order, and the values are sorted with the same sort-index
+
+    This is a way of creating a scatterplot inside a chart
+
+    Args:
+        signal_x: x-value.
+            The time series where the values are used as the x-value
+        signal_y: y-value.
+            The time series where the values are used as the y-value
+        x_min_value: Minimum x value.
+            Minimum x value, used to scale the x values.
+        x_max_value: Maximum x value.
+            Maximum x value, used to scale the x value.
+        align_timesteps (bool) : Auto-align
+            Automatically align time stamp  of input time series. Default is False.
+
+    Returns:
+        pandas.Series: Scatter plot
+    """
+    if align_timesteps:
+        signal_x, signal_y = auto_align([signal_x, signal_y], align_timesteps)
+
+    index_x_sorted, index_sort = __reindex_scatter_core(signal_x, x_min_value=x_min_value, x_max_value=x_max_value)
+
+    signal_scatter = pd.Series(signal_y.array[index_sort], index=index_x_sorted)
+
+    return signal_scatter
+
+
+@check_types
+def reindex_scatter_x(signal_x: pd.Series, x_min_value: float = 0, x_max_value: float = 1) -> pd.Series:
+    """Reindex scatterplot x-values.
+
+    It returns the values from signal_y with the timestamps as the values from signal_x,
+    where the timestamps has been scaled to the range of timestamps from signal_x.
+    The timestamps are sorted in ascending order, and the values are sorted with the same sort-index
+    In effect this is a straight line going from x_min_value to x_max_value over the time range of signal_x
+
+    Args:
+        signal_x: x-value.
+            The time series where the values are used as the x-value
+        x_min_value: Minimum x value.
+            Minimum x value, used to scale the x values.
+        x_max_value: Maximum x value.
+            Maximum x value, used to scale the x value.
+
+    Returns:
+        pandas.Series: Scatter plot
+    """
+    index_x_sorted, index_sort = __reindex_scatter_core(signal_x, x_min_value=x_min_value, x_max_value=x_max_value)
+
+    signal_scatter_x = pd.Series(signal_x.array[index_sort], index=index_x_sorted)
+
+    return signal_scatter_x
@@ -0,0 +1,65 @@
+# Copyright 2024 Cognite AS
+import random
+
+from datetime import datetime
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from pandas.testing import assert_series_equal
+
+from indsl.exceptions import UserTypeError, UserValueError
+from indsl.resample.mock_scatter_plot import reindex_scatter, reindex_scatter_x
+
+
+# Test for empty data
+@pytest.mark.core
+def test_reindex_scatter():
+
+    HCV_series = pd.read_pickle("./datasets/data/pd_series_HCV.pkl")
+    # defining the interpolation curve
+    n = 20
+    x_values = np.linspace(0, 100, n)
+    y_values = (np.sin(x_values / x_values.max() * np.pi - np.pi * 0.5) + 1) * 0.5
+
+    from scipy.interpolate import interp1d
+
+    interpolator = interp1d(x_values, y_values)
+    CV_array = interpolator(HCV_series.values)
+    # Create the series for the CV value
+    CV_series = pd.Series(CV_array, index=HCV_series.index)
+
+    x_min_value = 0
+    x_max_value = 100
+    signal_scatter = reindex_scatter(
+        HCV_series, CV_series, x_min_value=x_min_value, x_max_value=x_max_value, align_timesteps=True
+    )
+    signal_scatter_x = reindex_scatter_x(HCV_series, x_min_value=x_min_value, x_max_value=x_max_value)
+
+    # Calculate separately
+    # convert timestamps to epoc
+    epoc = np.array([val.timestamp() for val in HCV_series.index])
+    d_epoc = epoc[-1] - epoc[0]
+    # The scale of HCV_series is [0,100]. We will now map it to the epoc and then convert it to datetime
+    sequence_epoc = HCV_series.values / 100 * d_epoc
+    index_cv_epoc = sequence_epoc + epoc[0]  # translate
+    index_cv = np.array([datetime.fromtimestamp(epoc_) for epoc_ in index_cv_epoc])
+    # create a sort index, such that the order is increasing
+    index_sort = np.argsort(index_cv_epoc)
+    CV_series = pd.Series(CV_array[index_sort], index=index_cv[index_sort])
+    CV_series_x = pd.Series(HCV_series.values[index_sort], index=index_cv[index_sort])
+
+    assert_series_equal(signal_scatter, CV_series)
+    assert_series_equal(signal_scatter_x, CV_series_x)
+
+    if False:
+        import pylab as plt
+
+        fig = plt.figure(figsize=(12, 8))
+        plt.plot(HCV_series.index, HCV_series.values, "-b")
+        axl = plt.gca()
+        axr = axl.twinx()
+        axr.plot(signal_scatter.index, signal_scatter.values, "xr")
+        axr.plot(signal_scatter_x.index, signal_scatter_x * 0.01, ".g")
+        plt.show()