results

#!/usr/bin/env python
"""
Usage: results <tsv>
       results <tsv> --plot

Options:
--plot  Compute pull distributions and plot
"""

from docopt import docopt
from pathlib import Path
import pandas as pd
from rich.table import Table
from sqfactors import console
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import to_rgb


def zscore_to_colorstr(zscore: float, absmin_z: float, absmax_z: float) -> str:
    color_0 = '#FF2233'
    color_1 = '#111188'
    end_color = tuple(int(comp * 255) for comp in to_rgb(color_0))
    start_color = tuple(int(comp * 255) for comp in to_rgb(color_1))
    normed_zscore = (abs(zscore) - absmin_z) / (absmax_z - absmin_z)
    b = 40.0
    a = 1.0 / (1 - np.exp(-b))
    scaled_zscore = a - a * np.exp(-b * normed_zscore)
    
    if np.isnan(scaled_zscore):
        return 'yellow'
    
    r = str(int(start_color[0] + (end_color[0] - start_color[0]) * scaled_zscore))
    g = str(int(start_color[1] + (end_color[1] - start_color[1]) * scaled_zscore))
    b = str(int(start_color[2] + (end_color[2] - start_color[2]) * scaled_zscore))
    
    if scaled_zscore == 0:
        return 'white'
    return f'rgb({r},{g},{b})'

if __name__ == '__main__':
    args = docopt(__doc__)
    fit_file = Path(args['<tsv>'])
    t = pd.read_table(fit_file)
    t = t[t['Valid']]
    total_iterations = len(t.groupby('Iteration'))
    t = t[
        [
            col
            for col in t.columns
            if 'Error' not in col and 'Iteration' not in col and 'Valid' not in col
        ]
    ]
    truth = t.iloc[0]

    def f_diff(row):
        return row[1:] - truth[1:]

    def f_abs(x):
        if isinstance(x, (int, float)):
            return abs(x)
        else:
            return x

    t.iloc[:, 1:] = t.apply(f_diff, axis=1)
    m_diff = t.groupby('Method')
    m_absdiff = t.map(f_abs).groupby('Method')
    mean = m_absdiff.mean()
    sigma = m_diff.std()  # = std(x) = std(|x-mu|)
    z = mean / sigma
    out = z[[col for col in z.columns if 'Method' not in col]]
    index_order = [
        'Truth',
        'No Weights Analysis',
        'Sideband Subtraction Analysis',
        'InPlot Analysis',
        'Q-Factor Analysis',
        'Q-Factor Analysis (with t)',
        'Q-Factor Analysis (with g)',
        'Q-Factor Analysis (with t & g)',
        'sPlot Analysis',
        'sQ-Factor Analysis',
        'sQ-Factor Analysis (with t)',
        'sQ-Factor Analysis (with g)',
        'sQ-Factor Analysis (with t & g)',
    ]
    out = out.reindex(index=index_order)
    tab = Table()
    tab.add_column(f'Method ({total_iterations}x)')
    for col in out.columns:
        tab.add_column(col)
    for ind, method in enumerate(index_order):
        if method == 'Truth':
            continue
        rich_row = []
        out_row = out.loc[method]
        for col in out.columns:
            absmin_z = out[col].abs().min()
            absmax_z = out[col].abs().max()
            color_str = zscore_to_colorstr(out_row[col], absmin_z, absmax_z)
            if abs(out_row[col]) == absmin_z:
                color_str += ' bold'
            rich_row += [f'[{color_str}]{out_row[col]:.3f}[/]']
        tab.add_row(
            str(method).replace(' Analysis', ''), *rich_row, end_section=(ind == 2 or ind == 7)
        )
    console.print(tab)

    def plot_pull(axis, method: str, var: str):
        sig = t.iloc[m_diff.indices.get(method)][var].std()
        data = t.iloc[m_diff.indices.get(method)][var].to_numpy()
        range_max = 3.0
        if np.abs(avg := np.mean(data) / sig) > range_max:
            # poor fit
            axis.axvline(0, color='k', lw=3, label=var)
            axis.hist(data / sig, label=method.replace(' Analysis', ''))
            axis.legend(loc='upper right')
            inset = axis.inset_axes([0.4, 0.7, 0.2, 0.2])
            inset.hist(data / sig, range=(avg - range_max, avg + range_max))
            inset.set_xlim(avg - range_max, avg + range_max)
        else:
            # good fit
            axis.axvline(0, color='k', lw=3, label=var)
            axis.hist(
                data / sig,
                range=(avg - range_max, avg + range_max),
                label=method.replace(' Analysis', ''),
            )
            axis.legend(loc='upper right')
            axis.set_xlim(-range_max, range_max)

    if args['--plot']:
        for col in [c for c in t.columns if c != 'Method']:
            fig, ax = plt.subplot_mosaic("""ABCH\nDEFG\nIJKL""")
            plot_pull(
                ax['A'],
                var=col,
                method='No Weights Analysis',
            )
            plot_pull(
                ax['B'],
                var=col,
                method='Sideband Subtraction Analysis',
            )
            plot_pull(
                ax['C'],
                var=col,
                method='InPlot Analysis',
            )
            plot_pull(
                ax['D'],
                var=col,
                method='Q-Factor Analysis',
            )
            plot_pull(
                ax['E'],
                var=col,
                method='Q-Factor Analysis (with t)',
            )
            plot_pull(
                ax['F'],
                var=col,
                method='Q-Factor Analysis (with g)',
            )
            plot_pull(
                ax['G'],
                var=col,
                method='Q-Factor Analysis (with t & g)',
            )
            plot_pull(
                ax['H'],
                var=col,
                method='sPlot Analysis',
            )
            plot_pull(
                ax['I'],
                var=col,
                method='sQ-Factor Analysis',
            )
            plot_pull(
                ax['J'],
                var=col,
                method='sQ-Factor Analysis (with t)',
            )
            plot_pull(
                ax['K'],
                var=col,
                method='sQ-Factor Analysis (with g)',
            )
            plot_pull(
                ax['L'],
                var=col,
                method='sQ-Factor Analysis (with t & g)',
            )
        plt.show()