diff --git a/.gitignore b/.gitignore index 920a79a..935181c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ build dist *.egg-info .idea +builddir/* +venv/* \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 27c8426..6581113 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,11 @@ language: python +dist: focal + python: - - "3.5" - "3.6" - "3.7" + - "3.8" notifications: email: false diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b115fe..1a27869 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,32 @@ ## Change logs +### v0.9.0 +The 0.9.x series drops support of Python 3.5.x. Only Python 3.6+ are now supported. Support has also been added for +Python 3.8 + +Cross-fit estimators have been implemented for better causal inference with machine learning. Cross-fit estimators +include `SingleCrossfitAIPTW`, `DoubleCrossfitAIPTW`, `SingleCrossfitTMLE`, and `DoubleCrossfitTMLE`. Currently +functionality is limited to treatment and outcome nuisance models only (i.e. no model for missing data). These +estimators also do not accept weighted data (since most of `sklearn` does not support weights) + +Super-learner functionality has been added via `SuperLearner`. Additions also include emprical mean (`EmpiricalMeanSL`), +generalized linear model (`GLMSL`), and step-wise backward/forward selection via AIC (`StepwiseSL`). These new +estimators are wrappers that are compatible with `SuperLearner` and mimic some of the R superlearner functionality. + +Directed Acyclic Graphs have been added via `DirectedAcyclicGraph`. These analyze the graph for sufficient adjustment +sets, and can be used to display the graph. These rely on an optional NetworkX dependency. + +`AIPTW` now supports the `custom_model` optional argument for user-input models. This is the same as `TMLE` now. + +`zipper_plot` function for creating zipper plots has been added. + +Housekeeping: `bound` has been updated to new procedure, updated how `print_results` displays to be uniform, created +function to check missingness of input data in causal estimators, added warning regarding ATT and ATU variance for +IPTW, and added back observation IDs for `MonteCarloGFormula` + +Future plans: `TimeFixedGFormula` will be deprecated in favor of two estimators with different labels. This will more +clearly delineate ATE versus stochastic effects. The replacement estimators are to be added + ### v0.8.2 `IPSW` and `AIPSW` now natively support adjusting for confounding. Both now have the `treatment_model()` function, which calculates the inverse probability of treatment weights. How weights are handled in `AIPSW` are updated. They @@ -256,9 +283,9 @@ specified ``TMLE`` now allows estimation of risk ratios and odds ratios. Estimation procedure is based on ``tmle.R`` -``TMLE`` variance formula has been modified to match ``tmle.R`` rather than other resources. This is beneficial for future -implementation of missing data adjustment. Also would allow for mediation analysis with TMLE (not a priority for me at -this time). +``TMLE`` variance formula has been modified to match ``tmle.R`` rather than other resources. This is beneficial for +future implementation of missing data adjustment. Also would allow for mediation analysis with TMLE (not a priority +for me at this time). ``TMLE`` now includes an option to place bounds on predicted probabilities using the ``bound`` option. Default is to use all predicted probabilities. Either symmetrical or asymmetrical truncation can be specified. @@ -303,9 +330,14 @@ edition pg340. ### v0.3.0 **BIG CHANGES**: -To conform with PEP and for clarity, all association/effect measures on a pandas dataframe are now class statements. This makes them distinct from the summary data calculators. Additionally, it allows users to access any part of the results now, unlike the previous implementation. The SD can be pulled from the corresponds results dataframe. Please see the updated webiste for how to use the class statements. +To conform with PEP and for clarity, all association/effect measures on a pandas dataframe are now class statements. +This makes them distinct from the summary data calculators. Additionally, it allows users to access any part of the +results now, unlike the previous implementation. The SD can be pulled from the corresponds results dataframe. Please +see the updated webiste for how to use the class statements. -Name changes within the calculator branch. With the shift of the dataframe calculations to classes, now these functions are given more descriptive names. Additionally, all functions now return a list of the point estimate, SD, lower CL, upper CL. Please see the website for all the new function names +Name changes within the calculator branch. With the shift of the dataframe calculations to classes, now these +functions are given more descriptive names. Additionally, all functions now return a list of the point estimate, SD, +lower CL, upper CL. Please see the website for all the new function names Addition of Targeted Maximum Likelihood Estimator as zepid.causal.doublyrobust.TMLE @@ -372,18 +404,21 @@ Addition of IPW for Interference settings. No current timeline but hopefully bef Further conforming to PEP guidelines (my bad) #### 0.1.6 -Removed histogram option from IPTW in favor of kernel density. Since histograms are easy to generate with matplotlib, just dropped the entire option. +Removed histogram option from IPTW in favor of kernel density. Since histograms are easy to generate with matplotlib, +just dropped the entire option. Created causal branch. IPW functions moved inside this branch -Added depreciation warning to the IPW branch, since this will be removed in 0.2 in favor of the causal branch for organization of future implemented methods +Added depreciation warning to the IPW branch, since this will be removed in 0.2 in favor of the causal branch for +organization of future implemented methods Added time-fixed g-formula Added simple double-robust estimator (based on Funk et al 2011) #### 0.1.5 -Fix to 0.1.4 and since PyPI does not allow reuse of library versions, I had to create new one. Fixes issue with ipcw_prep() that was a pandas error (tried to drop NoneType from columns) +Fix to 0.1.4 and since PyPI does not allow reuse of library versions, I had to create new one. Fixes issue with +ipcw_prep() that was a pandas error (tried to drop NoneType from columns) #### 0.1.4 Updates: Added dynamic risk plot @@ -394,4 +429,5 @@ Fixes: Added user option to allow late entries for ipcw_prep() Updates: added ROC curve generator to graphics, allows user-specification of censoring indicator to ipcw, #### 0.1.2 -Original release. Previous versions (0.1.0, 0.1.1) had errors I found when trying to install via PyPI. I forgot to include the `package` statement in `setup` \ No newline at end of file +Original release. Previous versions (0.1.0, 0.1.1) had errors I found when trying to install via PyPI. I forgot to +include the `package` statement in `setup` \ No newline at end of file diff --git a/docs/Causal Graphs.rst b/docs/Causal Graphs.rst new file mode 100644 index 0000000..6f83ef0 --- /dev/null +++ b/docs/Causal Graphs.rst @@ -0,0 +1,87 @@ +.. image:: images/zepid_logo_small.png + +------------------------------------- + +Causal Graphs +''''''''''''' + +This page demonstrates analysis for causal diagrams (graphs). These diagrams are meant to help identify the sufficient +adjustment set to identify the causal effect. Currently only directed acyclic graphs are supported by single-world +intervention graphs will be added. + +Note that this branch requires installation of ``NetworkX`` since that library is used to analyses the graph objects + +Directed Acyclic Graphs +========================== +Directed acyclic graphs (DAGs) provide an easy graphical tool to determine sufficient adjustment sets to control for all +confounding and identify the causal effect of an exposure on an outcome. DAGs rely on the assumption of d-separation of +the exposure and outcome. Currently the ``DirectedAcyclicGraph`` class only allows for assessing the d-separation +of the exposure and outcome. Additional support for checking d-separation between missingness, censoring, mediators, +and time-varying exposures will be added in future versions. + +Remember that DAGs should be constructed prior to data collection preferablly. Also the major assumptions that a DAG +makes is the *lack* of arrows and *lack* of nodes. The assumptions are the items not present within the diagram. + +Let's look at some classical examples of DAGs. + +M-Bias +^^^^^^^^^^^ + +First we will create the "M-bias" DAG. This DAG is named after its distinct shape + +.. code:: python + + from zepid.causal.causalgraph import DirectedAcyclicGraph + import matplotlib.pyplot as plt + + dag = DirectedAcyclicGraph(exposure='X', outcome="Y") + dag.add_arrows((('X', 'Y'), + ('U1', 'X'), ('U1', 'B'), + ('U2', 'B'), ('U2', 'Y') + )) + pos = {"X": [0, 0], "Y": [1, 0], "B": [0.5, 0.5], + "U1": [0, 1], "U2": [1, 1]} + + dag.draw_dag(positions=pos) + plt.tight_layout() + plt.show() + +.. image:: images/zepid_dag_mbias.png + +After creating the DAG, we can determine the sufficient adjustment set + +.. code:: python + + dag.calculate_adjustment_sets() + print(dag.adjustment_sets) + +Since B is a collider, the minimally sufficient adjustment set is the empty set + +Butterfly-Bias +^^^^^^^^^^^^^^ +Butterfly-bias is an extension of the previous M-bias DAG where we need to adjust for B but B also opens a backdoor +path (specifically the path it is a collider on). + +.. code:: python + + dag.add_arrows((('X', 'Y'), + ('U1', 'X'), ('U1', 'B'), + ('U2', 'B'), ('U2', 'Y'), + ('B', 'X'), ('B', 'Y') + )) + + dag.draw_dag(positions=pos) + plt.tight_layout() + plt.show() + +.. image:: images/zepid_dag_bbias.png + +In the case of Butterfly bias, there are 3 possible adjustment sets + +.. code:: python + + dag.calculate_adjustment_sets() + print(dag.adjustment_sets) + +Remember that DAGs should be constructed prior to data collection preferablly. Also the major assumptions that a DAG +makes is the *lack* of arrows and *lack* of nodes. The assumptions are the items not present within the diagram \ No newline at end of file diff --git a/docs/Graphics.rst b/docs/Graphics.rst index f66dbec..0de0d46 100644 --- a/docs/Graphics.rst +++ b/docs/Graphics.rst @@ -385,3 +385,29 @@ one more example, In this example, there is additive modification, but *no multiplicative modification*. These plots also can have the number of reference lines displayed changed, and support the keyword arguments of `plt.plot()` function. See the function documentation for further details. + + +Zipper Plot +=========== +Zipper plots provide an easy way to visualize the performance of confidence intervals in simulations. Confidence +intervals across simulations are displayed in a single plot, with the option to color the confidence limits by whether +they include the true value. Below is an example of a zipper plot. For ease, I generated the confidence intervals using +some random numbers (you would pull the confidence intervals from the estimators in practice). + +.. code:: python + + from zepid.graphics import zipper_plot + lower = np.random.uniform(-0.1, 0.1, size=100) + upper = lower + np.random.uniform(0.1, 0.2, size=100) + + zipper_plot(truth=0, + lcl=lower, + ucl=upper, + colors=('blue', 'green')) + plt.show() + + +.. image:: images/zipper_example.png + +In this example, confidence interval coverage would be considered rather poor (if we are expecting the usual 95% +coverage). \ No newline at end of file diff --git a/docs/Reference/Causal.rst b/docs/Reference/Causal.rst index d30b44d..51a863e 100644 --- a/docs/Reference/Causal.rst +++ b/docs/Reference/Causal.rst @@ -2,6 +2,17 @@ Causal ====== Documentation for each of the causal inference methods implemented in zEpid +Causal Diagrams +--------------------------- + +.. currentmodule:: zepid.causal.causalgraph.dag + +.. autosummary:: + :toctree: generated/ + + DirectedAcyclicGraph + + Inverse Probability Weights --------------------------- @@ -60,6 +71,14 @@ Augmented Inverse Probability Weights AIPTW +.. currentmodule:: zepid.causal.doublyrobust.crossfit + +.. autosummary:: + :toctree: generated/ + + SingleCrossfitAIPTW + DoubleCrossfitAIPTW + Targeted Maximum Likelihood Estimator ------------------------------------- @@ -71,6 +90,14 @@ Targeted Maximum Likelihood Estimator TMLE StochasticTMLE +.. currentmodule:: zepid.causal.doublyrobust.crossfit + +.. autosummary:: + :toctree: generated/ + + SingleCrossfitTMLE + DoubleCrossfitTMLE + G-estimation of SNM ------------------- diff --git a/docs/Reference/Graphics.rst b/docs/Reference/Graphics.rst index 29bb59c..2404e0c 100644 --- a/docs/Reference/Graphics.rst +++ b/docs/Reference/Graphics.rst @@ -24,6 +24,7 @@ Displaying Results pvalue_plot dynamic_risk_plot labbe_plot + zipper_plot .. automodule:: zepid.graphics.graphics diff --git a/docs/Reference/Super Learner.rst b/docs/Reference/Super Learner.rst new file mode 100644 index 0000000..c0b2bb5 --- /dev/null +++ b/docs/Reference/Super Learner.rst @@ -0,0 +1,24 @@ +Super Learner +==================== +Details for super learner and associated candidate estimators +within zEpid. + +Super Learners +-------------- + +.. currentmodule:: zepid.superlearner.stackers + +.. autosummary:: + + SuperLearner + +Candidate Estimators +--------------------- + +.. currentmodule:: zepid.superlearner.estimators + +.. autosummary:: + + EmpiricalMeanSL + GLMSL + StepwiseSL diff --git a/docs/Reference/generated/zepid.base.Diagnostics.rst b/docs/Reference/generated/zepid.base.Diagnostics.rst index 400301b..e4cc3d3 100644 --- a/docs/Reference/generated/zepid.base.Diagnostics.rst +++ b/docs/Reference/generated/zepid.base.Diagnostics.rst @@ -1,14 +1,24 @@ -zepid.base.Diagnostics -========================================= +zepid.base.Diagnostics +====================== .. currentmodule:: zepid.base .. autoclass:: Diagnostics - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - + + ~Diagnostics.__init__ ~Diagnostics.fit ~Diagnostics.summary + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.base.IncidenceRateDifference.rst b/docs/Reference/generated/zepid.base.IncidenceRateDifference.rst index 251401b..5202cc4 100644 --- a/docs/Reference/generated/zepid.base.IncidenceRateDifference.rst +++ b/docs/Reference/generated/zepid.base.IncidenceRateDifference.rst @@ -1,15 +1,25 @@ -zepid.base.IncidenceRateDifference -========================================= +zepid.base.IncidenceRateDifference +================================== .. currentmodule:: zepid.base .. autoclass:: IncidenceRateDifference - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - + + ~IncidenceRateDifference.__init__ ~IncidenceRateDifference.fit - ~IncidenceRateDifference.summary ~IncidenceRateDifference.plot + ~IncidenceRateDifference.summary + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.base.IncidenceRateRatio.rst b/docs/Reference/generated/zepid.base.IncidenceRateRatio.rst index 334b836..4cf3129 100644 --- a/docs/Reference/generated/zepid.base.IncidenceRateRatio.rst +++ b/docs/Reference/generated/zepid.base.IncidenceRateRatio.rst @@ -1,15 +1,25 @@ -zepid.base.IncidenceRateRatio -========================================= +zepid.base.IncidenceRateRatio +============================= .. currentmodule:: zepid.base .. autoclass:: IncidenceRateRatio - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - + + ~IncidenceRateRatio.__init__ ~IncidenceRateRatio.fit - ~IncidenceRateRatio.summary ~IncidenceRateRatio.plot + ~IncidenceRateRatio.summary + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.base.NNT.rst b/docs/Reference/generated/zepid.base.NNT.rst index 5c9e564..a60ca56 100644 --- a/docs/Reference/generated/zepid.base.NNT.rst +++ b/docs/Reference/generated/zepid.base.NNT.rst @@ -1,14 +1,24 @@ -zepid.base.NNT -========================================= +zepid.base.NNT +============== .. currentmodule:: zepid.base .. autoclass:: NNT - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - + + ~NNT.__init__ ~NNT.fit ~NNT.summary + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.base.OddsRatio.rst b/docs/Reference/generated/zepid.base.OddsRatio.rst index ef84a95..3be492b 100644 --- a/docs/Reference/generated/zepid.base.OddsRatio.rst +++ b/docs/Reference/generated/zepid.base.OddsRatio.rst @@ -1,15 +1,25 @@ -zepid.base.OddsRatio -========================================= +zepid.base.OddsRatio +==================== .. currentmodule:: zepid.base .. autoclass:: OddsRatio - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - + + ~OddsRatio.__init__ ~OddsRatio.fit - ~OddsRatio.summary ~OddsRatio.plot + ~OddsRatio.summary + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.base.RiskDifference.rst b/docs/Reference/generated/zepid.base.RiskDifference.rst index a04e12a..1a676f3 100644 --- a/docs/Reference/generated/zepid.base.RiskDifference.rst +++ b/docs/Reference/generated/zepid.base.RiskDifference.rst @@ -1,15 +1,25 @@ -zepid.base.RiskDifference -========================================= +zepid.base.RiskDifference +========================= .. currentmodule:: zepid.base .. autoclass:: RiskDifference - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - + + ~RiskDifference.__init__ ~RiskDifference.fit - ~RiskDifference.summary ~RiskDifference.plot + ~RiskDifference.summary + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.base.RiskRatio.rst b/docs/Reference/generated/zepid.base.RiskRatio.rst index d1bfab2..47c55e0 100644 --- a/docs/Reference/generated/zepid.base.RiskRatio.rst +++ b/docs/Reference/generated/zepid.base.RiskRatio.rst @@ -1,15 +1,25 @@ -zepid.base.RiskRatio -========================================= +zepid.base.RiskRatio +==================== .. currentmodule:: zepid.base .. autoclass:: RiskRatio - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - + + ~RiskRatio.__init__ ~RiskRatio.fit - ~RiskRatio.summary ~RiskRatio.plot + ~RiskRatio.summary + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.base.Sensitivity.rst b/docs/Reference/generated/zepid.base.Sensitivity.rst index d4be908..7c145b9 100644 --- a/docs/Reference/generated/zepid.base.Sensitivity.rst +++ b/docs/Reference/generated/zepid.base.Sensitivity.rst @@ -1,14 +1,24 @@ -zepid.base.Sensitivity -========================================= +zepid.base.Sensitivity +====================== .. currentmodule:: zepid.base .. autoclass:: Sensitivity - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - + + ~Sensitivity.__init__ ~Sensitivity.fit ~Sensitivity.summary + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.base.Specificity.rst b/docs/Reference/generated/zepid.base.Specificity.rst index d5bdc42..e47c579 100644 --- a/docs/Reference/generated/zepid.base.Specificity.rst +++ b/docs/Reference/generated/zepid.base.Specificity.rst @@ -1,14 +1,24 @@ -zepid.base.Specificity -========================================= +zepid.base.Specificity +====================== .. currentmodule:: zepid.base .. autoclass:: Specificity - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - + + ~Specificity.__init__ ~Specificity.fit ~Specificity.summary + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.base.create_spline_transform.rst b/docs/Reference/generated/zepid.base.create_spline_transform.rst index 312a5a9..f2e3557 100644 --- a/docs/Reference/generated/zepid.base.create_spline_transform.rst +++ b/docs/Reference/generated/zepid.base.create_spline_transform.rst @@ -1,5 +1,5 @@ -zepid.base.create\_spline\_transform -===================================== +zepid.base.create\_spline\_transform +==================================== .. currentmodule:: zepid.base diff --git a/docs/Reference/generated/zepid.base.interaction_contrast.rst b/docs/Reference/generated/zepid.base.interaction_contrast.rst index 38d9831..0d994cc 100644 --- a/docs/Reference/generated/zepid.base.interaction_contrast.rst +++ b/docs/Reference/generated/zepid.base.interaction_contrast.rst @@ -1,5 +1,5 @@ -zepid.base.interaction_contrast -================================= +zepid.base.interaction\_contrast +================================ .. currentmodule:: zepid.base diff --git a/docs/Reference/generated/zepid.base.interaction_contrast_ratio.rst b/docs/Reference/generated/zepid.base.interaction_contrast_ratio.rst index c8b7532..933b4c1 100644 --- a/docs/Reference/generated/zepid.base.interaction_contrast_ratio.rst +++ b/docs/Reference/generated/zepid.base.interaction_contrast_ratio.rst @@ -1,5 +1,5 @@ -zepid.base.interaction_contrast_ratio -===================================== +zepid.base.interaction\_contrast\_ratio +======================================= .. currentmodule:: zepid.base diff --git a/docs/Reference/generated/zepid.base.spline.rst b/docs/Reference/generated/zepid.base.spline.rst index 8d2540d..e7efb2a 100644 --- a/docs/Reference/generated/zepid.base.spline.rst +++ b/docs/Reference/generated/zepid.base.spline.rst @@ -1,5 +1,5 @@ -zepid.base.spline -================================= +zepid.base.spline +================= .. currentmodule:: zepid.base diff --git a/docs/Reference/generated/zepid.base.table1_generator.rst b/docs/Reference/generated/zepid.base.table1_generator.rst index 6c87bf7..4292e93 100644 --- a/docs/Reference/generated/zepid.base.table1_generator.rst +++ b/docs/Reference/generated/zepid.base.table1_generator.rst @@ -1,5 +1,5 @@ -zepid.base.table1_generator -================================= +zepid.base.table1\_generator +============================ .. currentmodule:: zepid.base diff --git a/docs/Reference/generated/zepid.calc.utils.rubins_rules.rst b/docs/Reference/generated/zepid.calc.utils.rubins_rules.rst index 97ff71c..450aec8 100644 --- a/docs/Reference/generated/zepid.calc.utils.rubins_rules.rst +++ b/docs/Reference/generated/zepid.calc.utils.rubins_rules.rst @@ -1,6 +1,6 @@ -zepid.calc.utils.rubins\_rules +zepid.calc.utils.rubins\_rules ============================== .. currentmodule:: zepid.calc.utils -.. autofunction:: rubins_rules +.. autofunction:: rubins_rules \ No newline at end of file diff --git a/docs/Reference/generated/zepid.calc.utils.s_value.rst b/docs/Reference/generated/zepid.calc.utils.s_value.rst index 58ccd0c..bd14f0b 100644 --- a/docs/Reference/generated/zepid.calc.utils.s_value.rst +++ b/docs/Reference/generated/zepid.calc.utils.s_value.rst @@ -1,5 +1,5 @@ -zepid.calc.utils.s\_value -============================== +zepid.calc.utils.s\_value +========================= .. currentmodule:: zepid.calc.utils diff --git a/docs/Reference/generated/zepid.causal.causalgraph.dag.DirectedAcyclicGraph.rst b/docs/Reference/generated/zepid.causal.causalgraph.dag.DirectedAcyclicGraph.rst new file mode 100644 index 0000000..d9502d3 --- /dev/null +++ b/docs/Reference/generated/zepid.causal.causalgraph.dag.DirectedAcyclicGraph.rst @@ -0,0 +1,28 @@ +zepid.causal.causalgraph.dag.DirectedAcyclicGraph +================================================= + +.. currentmodule:: zepid.causal.causalgraph.dag + +.. autoclass:: DirectedAcyclicGraph + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~DirectedAcyclicGraph.__init__ + ~DirectedAcyclicGraph.add_arrow + ~DirectedAcyclicGraph.add_arrows + ~DirectedAcyclicGraph.add_from_networkx + ~DirectedAcyclicGraph.assess_misdirections + ~DirectedAcyclicGraph.calculate_adjustment_sets + ~DirectedAcyclicGraph.draw_dag + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.causal.doublyrobust.AIPW.AIPTW.rst b/docs/Reference/generated/zepid.causal.doublyrobust.AIPW.AIPTW.rst index 9e54d70..bd498e9 100644 --- a/docs/Reference/generated/zepid.causal.doublyrobust.AIPW.AIPTW.rst +++ b/docs/Reference/generated/zepid.causal.doublyrobust.AIPW.AIPTW.rst @@ -1,24 +1,32 @@ -zepid.causal.doublyrobust.AIPW.AIPTW +zepid.causal.doublyrobust.AIPW.AIPTW ==================================== .. currentmodule:: zepid.causal.doublyrobust.AIPW .. autoclass:: AIPTW - :members: + + .. automethod:: __init__ + .. rubric:: Methods .. autosummary:: - + + ~AIPTW.__init__ ~AIPTW.exposure_model - ~AIPTW.outcome_model - ~AIPTW.missing_model ~AIPTW.fit - ~AIPTW.summary - ~AIPTW.run_diagnostics - ~AIPTW.positivity - ~AIPTW.standardized_mean_differences + ~AIPTW.missing_model + ~AIPTW.outcome_model ~AIPTW.plot_kde ~AIPTW.plot_love + ~AIPTW.positivity + ~AIPTW.run_diagnostics + ~AIPTW.standardized_mean_differences + ~AIPTW.summary + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.causal.doublyrobust.TMLE.StochasticTMLE.rst b/docs/Reference/generated/zepid.causal.doublyrobust.TMLE.StochasticTMLE.rst index 9428a15..27dc302 100644 --- a/docs/Reference/generated/zepid.causal.doublyrobust.TMLE.StochasticTMLE.rst +++ b/docs/Reference/generated/zepid.causal.doublyrobust.TMLE.StochasticTMLE.rst @@ -1,17 +1,30 @@ -zepid.causal.doublyrobust.TMLE.StochastsicTMLE -============================================== +zepid.causal.doublyrobust.TMLE.StochasticTMLE +============================================= -.. currentmodule:: zepid.causal.doublyrobust.StochasticTMLE +.. currentmodule:: zepid.causal.doublyrobust.TMLE .. autoclass:: StochasticTMLE - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: + + ~StochasticTMLE.__init__ + ~StochasticTMLE.est_conditional_variance + ~StochasticTMLE.est_marginal_variance + ~StochasticTMLE.exposure_model + ~StochasticTMLE.fit + ~StochasticTMLE.outcome_model + ~StochasticTMLE.run_diagnostics + ~StochasticTMLE.summary + ~StochasticTMLE.targeting_step + + - ~TMLE.exposure_model - ~TMLE.outcome_model - ~TMLE.fit - ~TMLE.summary - ~TMLE.run_diagnostics + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.causal.doublyrobust.TMLE.TMLE.rst b/docs/Reference/generated/zepid.causal.doublyrobust.TMLE.TMLE.rst index e26a8fa..a553094 100644 --- a/docs/Reference/generated/zepid.causal.doublyrobust.TMLE.TMLE.rst +++ b/docs/Reference/generated/zepid.causal.doublyrobust.TMLE.TMLE.rst @@ -1,27 +1,32 @@ -zepid.causal.doublyrobust.TMLE.TMLE +zepid.causal.doublyrobust.TMLE.TMLE =================================== .. currentmodule:: zepid.causal.doublyrobust.TMLE .. autoclass:: TMLE - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - + + ~TMLE.__init__ ~TMLE.exposure_model - ~TMLE.outcome_model - ~TMLE.missing_model ~TMLE.fit - ~TMLE.summary - ~TMLE.run_diagnostics - ~TMLE.positivity - ~TMLE.standardized_mean_differences + ~TMLE.missing_model + ~TMLE.outcome_model ~TMLE.plot_kde ~TMLE.plot_love + ~TMLE.positivity + ~TMLE.run_diagnostics + ~TMLE.standardized_mean_differences + ~TMLE.summary + + - - - - + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.causal.doublyrobust.crossfit.DoubleCrossfitAIPTW.rst b/docs/Reference/generated/zepid.causal.doublyrobust.crossfit.DoubleCrossfitAIPTW.rst new file mode 100644 index 0000000..0562877 --- /dev/null +++ b/docs/Reference/generated/zepid.causal.doublyrobust.crossfit.DoubleCrossfitAIPTW.rst @@ -0,0 +1,27 @@ +zepid.causal.doublyrobust.crossfit.DoubleCrossfitAIPTW +====================================================== + +.. currentmodule:: zepid.causal.doublyrobust.crossfit + +.. autoclass:: DoubleCrossfitAIPTW + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~DoubleCrossfitAIPTW.__init__ + ~DoubleCrossfitAIPTW.exposure_model + ~DoubleCrossfitAIPTW.fit + ~DoubleCrossfitAIPTW.outcome_model + ~DoubleCrossfitAIPTW.run_diagnostics + ~DoubleCrossfitAIPTW.summary + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.causal.doublyrobust.crossfit.DoubleCrossfitTMLE.rst b/docs/Reference/generated/zepid.causal.doublyrobust.crossfit.DoubleCrossfitTMLE.rst new file mode 100644 index 0000000..9d96e84 --- /dev/null +++ b/docs/Reference/generated/zepid.causal.doublyrobust.crossfit.DoubleCrossfitTMLE.rst @@ -0,0 +1,27 @@ +zepid.causal.doublyrobust.crossfit.DoubleCrossfitTMLE +===================================================== + +.. currentmodule:: zepid.causal.doublyrobust.crossfit + +.. autoclass:: DoubleCrossfitTMLE + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~DoubleCrossfitTMLE.__init__ + ~DoubleCrossfitTMLE.exposure_model + ~DoubleCrossfitTMLE.fit + ~DoubleCrossfitTMLE.outcome_model + ~DoubleCrossfitTMLE.run_diagnostics + ~DoubleCrossfitTMLE.summary + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.causal.doublyrobust.crossfit.SingleCrossfitAIPTW.rst b/docs/Reference/generated/zepid.causal.doublyrobust.crossfit.SingleCrossfitAIPTW.rst new file mode 100644 index 0000000..0dc9a6e --- /dev/null +++ b/docs/Reference/generated/zepid.causal.doublyrobust.crossfit.SingleCrossfitAIPTW.rst @@ -0,0 +1,27 @@ +zepid.causal.doublyrobust.crossfit.SingleCrossfitAIPTW +====================================================== + +.. currentmodule:: zepid.causal.doublyrobust.crossfit + +.. autoclass:: SingleCrossfitAIPTW + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~SingleCrossfitAIPTW.__init__ + ~SingleCrossfitAIPTW.exposure_model + ~SingleCrossfitAIPTW.fit + ~SingleCrossfitAIPTW.outcome_model + ~SingleCrossfitAIPTW.run_diagnostics + ~SingleCrossfitAIPTW.summary + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.causal.doublyrobust.crossfit.SingleCrossfitTMLE.rst b/docs/Reference/generated/zepid.causal.doublyrobust.crossfit.SingleCrossfitTMLE.rst new file mode 100644 index 0000000..8b4dea2 --- /dev/null +++ b/docs/Reference/generated/zepid.causal.doublyrobust.crossfit.SingleCrossfitTMLE.rst @@ -0,0 +1,27 @@ +zepid.causal.doublyrobust.crossfit.SingleCrossfitTMLE +===================================================== + +.. currentmodule:: zepid.causal.doublyrobust.crossfit + +.. autoclass:: SingleCrossfitTMLE + + + .. automethod:: __init__ + + + .. rubric:: Methods + + .. autosummary:: + + ~SingleCrossfitTMLE.__init__ + ~SingleCrossfitTMLE.exposure_model + ~SingleCrossfitTMLE.fit + ~SingleCrossfitTMLE.outcome_model + ~SingleCrossfitTMLE.run_diagnostics + ~SingleCrossfitTMLE.summary + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.causal.generalize.estimators.AIPSW.rst b/docs/Reference/generated/zepid.causal.generalize.estimators.AIPSW.rst index 00959a2..499b65a 100644 --- a/docs/Reference/generated/zepid.causal.generalize.estimators.AIPSW.rst +++ b/docs/Reference/generated/zepid.causal.generalize.estimators.AIPSW.rst @@ -1,17 +1,27 @@ -zepid.causal.generalize.estimators.AIPSW -===================================================== +zepid.causal.generalize.estimators.AIPSW +======================================== .. currentmodule:: zepid.causal.generalize.estimators .. autoclass:: AIPSW - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - - ~AIPSW.sampling_model - ~AIPSW.treatment_model - ~AIPSW.outcome_model + + ~AIPSW.__init__ ~AIPSW.fit + ~AIPSW.outcome_model + ~AIPSW.sampling_model ~AIPSW.summary + ~AIPSW.treatment_model + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.causal.generalize.estimators.GTransportFormula.rst b/docs/Reference/generated/zepid.causal.generalize.estimators.GTransportFormula.rst index 314ae0a..6d36712 100644 --- a/docs/Reference/generated/zepid.causal.generalize.estimators.GTransportFormula.rst +++ b/docs/Reference/generated/zepid.causal.generalize.estimators.GTransportFormula.rst @@ -1,15 +1,25 @@ -zepid.causal.generalize.estimators.GTransportFormula -===================================================== +zepid.causal.generalize.estimators.GTransportFormula +==================================================== .. currentmodule:: zepid.causal.generalize.estimators .. autoclass:: GTransportFormula - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - - ~GTransportFormula.outcome_model + + ~GTransportFormula.__init__ ~GTransportFormula.fit + ~GTransportFormula.outcome_model ~GTransportFormula.summary + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.causal.generalize.estimators.IPSW.rst b/docs/Reference/generated/zepid.causal.generalize.estimators.IPSW.rst index 90b01b9..ff20dfe 100644 --- a/docs/Reference/generated/zepid.causal.generalize.estimators.IPSW.rst +++ b/docs/Reference/generated/zepid.causal.generalize.estimators.IPSW.rst @@ -1,16 +1,26 @@ -zepid.causal.generalize.estimators.IPSW -================================================= +zepid.causal.generalize.estimators.IPSW +======================================= .. currentmodule:: zepid.causal.generalize.estimators .. autoclass:: IPSW - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - - ~IPSW.sampling_model - ~IPSW.treatment_model + + ~IPSW.__init__ ~IPSW.fit + ~IPSW.sampling_model ~IPSW.summary + ~IPSW.treatment_model + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.causal.gformula.TimeFixed.SurvivalGFormula.rst b/docs/Reference/generated/zepid.causal.gformula.TimeFixed.SurvivalGFormula.rst index f62f48e..53505a8 100644 --- a/docs/Reference/generated/zepid.causal.gformula.TimeFixed.SurvivalGFormula.rst +++ b/docs/Reference/generated/zepid.causal.gformula.TimeFixed.SurvivalGFormula.rst @@ -1,15 +1,25 @@ -zepid.causal.gformula.TimeFixed.SurvivalGFormula -================================================= +zepid.causal.gformula.TimeFixed.SurvivalGFormula +================================================ .. currentmodule:: zepid.causal.gformula.TimeFixed .. autoclass:: SurvivalGFormula - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - - ~SurvivalGFormula.outcome_model + + ~SurvivalGFormula.__init__ ~SurvivalGFormula.fit + ~SurvivalGFormula.outcome_model ~SurvivalGFormula.plot + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.causal.gformula.TimeFixed.TimeFixedGFormula.rst b/docs/Reference/generated/zepid.causal.gformula.TimeFixed.TimeFixedGFormula.rst index b55d19f..816d2f9 100644 --- a/docs/Reference/generated/zepid.causal.gformula.TimeFixed.TimeFixedGFormula.rst +++ b/docs/Reference/generated/zepid.causal.gformula.TimeFixed.TimeFixedGFormula.rst @@ -1,20 +1,27 @@ -zepid.causal.gformula.TimeFixed.TimeFixedGFormula +zepid.causal.gformula.TimeFixed.TimeFixedGFormula ================================================= .. currentmodule:: zepid.causal.gformula.TimeFixed .. autoclass:: TimeFixedGFormula - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - - ~TimeFixedGFormula.outcome_model + + ~TimeFixedGFormula.__init__ ~TimeFixedGFormula.fit ~TimeFixedGFormula.fit_stochastic - - - - - + ~TimeFixedGFormula.outcome_model + ~TimeFixedGFormula.plot_kde + ~TimeFixedGFormula.run_diagnostics + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.causal.gformula.TimeVary.IterativeCondGFormula.rst b/docs/Reference/generated/zepid.causal.gformula.TimeVary.IterativeCondGFormula.rst index 56af635..b8f3a86 100644 --- a/docs/Reference/generated/zepid.causal.gformula.TimeVary.IterativeCondGFormula.rst +++ b/docs/Reference/generated/zepid.causal.gformula.TimeVary.IterativeCondGFormula.rst @@ -1,17 +1,24 @@ -zepid.causal.gformula.TimeVary.IterativeCondGFormula -=============================================== +zepid.causal.gformula.TimeVary.IterativeCondGFormula +==================================================== .. currentmodule:: zepid.causal.gformula.TimeVary .. autoclass:: IterativeCondGFormula - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - - ~MonteCarloGFormula.outcome_model - ~MonteCarloGFormula.fit - - - + + ~IterativeCondGFormula.__init__ + ~IterativeCondGFormula.fit + ~IterativeCondGFormula.outcome_model + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.causal.gformula.TimeVary.MonteCarloGFormula.rst b/docs/Reference/generated/zepid.causal.gformula.TimeVary.MonteCarloGFormula.rst index 14e519a..2011e0e 100644 --- a/docs/Reference/generated/zepid.causal.gformula.TimeVary.MonteCarloGFormula.rst +++ b/docs/Reference/generated/zepid.causal.gformula.TimeVary.MonteCarloGFormula.rst @@ -1,22 +1,27 @@ -zepid.causal.gformula.TimeVary.MonteCarloGFormula -=============================================== +zepid.causal.gformula.TimeVary.MonteCarloGFormula +================================================= .. currentmodule:: zepid.causal.gformula.TimeVary .. autoclass:: MonteCarloGFormula - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - - ~MonteCarloGFormula.exposure_model - ~MonteCarloGFormula.outcome_model + + ~MonteCarloGFormula.__init__ ~MonteCarloGFormula.add_covariate_model ~MonteCarloGFormula.censoring_model + ~MonteCarloGFormula.exposure_model ~MonteCarloGFormula.fit + ~MonteCarloGFormula.outcome_model + + - - - - + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.causal.ipw.IPCW.IPCW.rst b/docs/Reference/generated/zepid.causal.ipw.IPCW.IPCW.rst index fd5b540..ee2e3de 100644 --- a/docs/Reference/generated/zepid.causal.ipw.IPCW.IPCW.rst +++ b/docs/Reference/generated/zepid.causal.ipw.IPCW.IPCW.rst @@ -1,20 +1,24 @@ -zepid.causal.ipw.IPCW.IPCW +zepid.causal.ipw.IPCW.IPCW ========================== .. currentmodule:: zepid.causal.ipw.IPCW .. autoclass:: IPCW - :members: + + .. automethod:: __init__ + .. rubric:: Methods .. autosummary:: - - ~IPCW.regression_models + + ~IPCW.__init__ ~IPCW.fit + ~IPCW.regression_models + + - - - - + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.causal.ipw.IPMW.IPMW.rst b/docs/Reference/generated/zepid.causal.ipw.IPMW.IPMW.rst index be7d546..f584ed1 100644 --- a/docs/Reference/generated/zepid.causal.ipw.IPMW.IPMW.rst +++ b/docs/Reference/generated/zepid.causal.ipw.IPMW.IPMW.rst @@ -1,19 +1,24 @@ -zepid.causal.ipw.IPMW.IPMW +zepid.causal.ipw.IPMW.IPMW ========================== .. currentmodule:: zepid.causal.ipw.IPMW .. autoclass:: IPMW - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - - ~IPMW.regression_models + + ~IPMW.__init__ ~IPMW.fit + ~IPMW.regression_models + + - - - - + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.causal.ipw.IPTW.IPTW.rst b/docs/Reference/generated/zepid.causal.ipw.IPTW.IPTW.rst index 4c73483..0ca07ad 100644 --- a/docs/Reference/generated/zepid.causal.ipw.IPTW.IPTW.rst +++ b/docs/Reference/generated/zepid.causal.ipw.IPTW.IPTW.rst @@ -1,22 +1,33 @@ -zepid.causal.ipw.IPTW.IPTW +zepid.causal.ipw.IPTW.IPTW ========================== .. currentmodule:: zepid.causal.ipw.IPTW .. autoclass:: IPTW - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - - ~IPTW.treatment_model + + ~IPTW.__init__ + ~IPTW.fit ~IPTW.marginal_structural_model ~IPTW.missing_model - ~IPTW.fit - ~IPTW.run_diagnostics - ~IPTW.positivity - ~IPTW.standardized_mean_differences ~IPTW.plot_boxplot ~IPTW.plot_kde ~IPTW.plot_love + ~IPTW.positivity + ~IPTW.run_diagnostics + ~IPTW.standardized_mean_differences + ~IPTW.summary + ~IPTW.treatment_model + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.causal.ipw.IPTW.StochasticIPTW.rst b/docs/Reference/generated/zepid.causal.ipw.IPTW.StochasticIPTW.rst index 59c9bcc..fd9446e 100644 --- a/docs/Reference/generated/zepid.causal.ipw.IPTW.StochasticIPTW.rst +++ b/docs/Reference/generated/zepid.causal.ipw.IPTW.StochasticIPTW.rst @@ -1,16 +1,25 @@ -zepid.causal.ipw.IPTW.StochasticIPTW +zepid.causal.ipw.IPTW.StochasticIPTW ==================================== .. currentmodule:: zepid.causal.ipw.IPTW .. autoclass:: StochasticIPTW - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - - ~StochasticIPTW.treatment_model + + ~StochasticIPTW.__init__ ~StochasticIPTW.fit ~StochasticIPTW.summary + ~StochasticIPTW.treatment_model + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.causal.snm.g_estimation.GEstimationSNM.rst b/docs/Reference/generated/zepid.causal.snm.g_estimation.GEstimationSNM.rst index 57d0964..6dad2b2 100644 --- a/docs/Reference/generated/zepid.causal.snm.g_estimation.GEstimationSNM.rst +++ b/docs/Reference/generated/zepid.causal.snm.g_estimation.GEstimationSNM.rst @@ -1,17 +1,27 @@ -zepid.causal.snm.g\_estimation.GEstimationSNM +zepid.causal.snm.g\_estimation.GEstimationSNM ============================================= .. currentmodule:: zepid.causal.snm.g_estimation .. autoclass:: GEstimationSNM - :members: + + .. automethod:: __init__ + + .. rubric:: Methods .. autosummary:: - + + ~GEstimationSNM.__init__ ~GEstimationSNM.exposure_model - ~GEstimationSNM.structural_nested_model - ~GEstimationSNM.missing_model ~GEstimationSNM.fit + ~GEstimationSNM.missing_model + ~GEstimationSNM.structural_nested_model ~GEstimationSNM.summary + + + + + + \ No newline at end of file diff --git a/docs/Reference/generated/zepid.superlearner.estimators.EmpiricalMeanSL.rst b/docs/Reference/generated/zepid.superlearner.estimators.EmpiricalMeanSL.rst new file mode 100644 index 0000000..c71d3c3 --- /dev/null +++ b/docs/Reference/generated/zepid.superlearner.estimators.EmpiricalMeanSL.rst @@ -0,0 +1,14 @@ +zepid.superlearner.estimators.EmpiricalMeanSL +============================================= + +.. currentmodule:: zepid.superlearner.estimators + +.. autoclass:: EmpiricalMeanSL + :members: + + .. rubric:: Methods + + .. autosummary:: + + ~EmpiricalMeanSL.fit + ~EmpiricalMeanSL.predict diff --git a/docs/Reference/generated/zepid.superlearner.estimators.GLMSL.rst b/docs/Reference/generated/zepid.superlearner.estimators.GLMSL.rst new file mode 100644 index 0000000..210669c --- /dev/null +++ b/docs/Reference/generated/zepid.superlearner.estimators.GLMSL.rst @@ -0,0 +1,14 @@ +zepid.superlearner.estimators.GLMSL +============================================= + +.. currentmodule:: zepid.superlearner.estimators + +.. autoclass:: GLMSL + :members: + + .. rubric:: Methods + + .. autosummary:: + + ~GLMSL.fit + ~GLMSL.predict \ No newline at end of file diff --git a/docs/Reference/generated/zepid.superlearner.estimators.StepwiseSL.rst b/docs/Reference/generated/zepid.superlearner.estimators.StepwiseSL.rst new file mode 100644 index 0000000..0ff5a99 --- /dev/null +++ b/docs/Reference/generated/zepid.superlearner.estimators.StepwiseSL.rst @@ -0,0 +1,14 @@ +zepid.superlearner.estimators.StepwiseSL +============================================= + +.. currentmodule:: zepid.superlearner.estimators + +.. autoclass:: StepwiseSL + :members: + + .. rubric:: Methods + + .. autosummary:: + + ~StepwiseSL.fit + ~StepwiseSL.predict diff --git a/docs/Reference/generated/zepid.superlearner.stackers.SuperLearner.rst b/docs/Reference/generated/zepid.superlearner.stackers.SuperLearner.rst new file mode 100644 index 0000000..eeca760 --- /dev/null +++ b/docs/Reference/generated/zepid.superlearner.stackers.SuperLearner.rst @@ -0,0 +1,15 @@ +zepid.superlearner.stackers.SuperLearner +======================================== + +.. currentmodule:: zepid.superlearner.stackers + +.. autoclass:: SuperLearner + :members: + + .. rubric:: Methods + + .. autosummary:: + + ~SuperLearner.fit + ~SuperLearner.predict + ~SuperLearner.summary diff --git a/docs/Reference/index.rst b/docs/Reference/index.rst index 45f79ca..bd36916 100644 --- a/docs/Reference/index.rst +++ b/docs/Reference/index.rst @@ -13,6 +13,7 @@ description of the tools, please see the side-bar for each corresponding section Calculations Graphics Causal + Super Learner Sensitivity Data diff --git a/docs/Time-Fixed Exposure.rst b/docs/Time-Fixed Exposure.rst index 94b3dec..937a0ef 100644 --- a/docs/Time-Fixed Exposure.rst +++ b/docs/Time-Fixed Exposure.rst @@ -39,8 +39,8 @@ within defined strata, then g-estimation would be a good choice. Your causal que the list of potential estimators Second, does your question of interest require something not available for all methods? This can also narrow down -estimators, at least ones currently available. For example, only `TimeFixedGFormula` and `StochasticIPTW` allow for -stochastic treatments. See the tutorials +estimators, at least ones currently available. For example, only `TimeFixedGFormula`, `StochasticIPTW`, and +`StochasticTMLE` allow for stochastic treatments. See the tutorials on `Python for Epidemiologists `_ for further details on what each estimator can do. @@ -49,8 +49,8 @@ don't necessarily make one unilaterally better than the other. If all the estima generally be taken as a good sign. It builds some additional confidence in your results. If there are distinctly different results across the estimators, that means that at least one assumption is being substantively broken somewhere. In these situations, I would recommend the doubly robust estimators because they make less restrictive -parametric modeling assumptions. Alternatively, machine learning promises to make less restrictive assumptions regarding -functional forms. However, the lack of agreement between estimators should be noted in your paper. +modeling assumptions. Alternatively, machine learning promises to make less restrictive assumptions regarding +functional forms. However, the lack of agreement between estimators should be noted. Binary Outcome ============================================== @@ -105,8 +105,9 @@ Therefore, the Frechet bounds allow for partial identification of the causal eff values from two unit width (-1 to 1) to unit width (-0.87 to 0.13). However, we don't have point identification. The following methods allow for point identification under the assumption of conditional exchangeability. -Our unadjusted estimate is -0.05 (-0.13, 0.04), which we could cautiously interpret as: ART is associated with a 4.5% -point reduction (95% CL: -0.128, 0.038) in the probability of death at 45-weeks. +Our unadjusted estimate is -0.05 (-0.13, 0.04), which we could interpret as: ART is associated with a 4.5% +point reduction (95% CL: -0.13, 0.04) in the probability of death at 45-weeks. However, this interpretation implies +that ART is given randomly (which is unlikely to occur in the data). Parametric g-formula ---------------------------------------- @@ -155,9 +156,9 @@ procedure. Below is an example that uses bootstrapped confidence limits. print('95% LCL', riskd - 1.96*se) print('95% UCL', riskd + 1.96*se) -In my run (your results may differ), the estimate 95% confidence limits were -0.148, -0.004. We could interpret our +In my run (your results may differ), the estimate 95% confidence limits were -0.15, 0.00. We could interpret our results as; the 45-week risk of death when everyone was treated with ART at enrollment was 7.6% points -(95% CL: -0.148, -0.004) lower than if no one had been treated with ART at enrollment. For further details and +(95% CL: -0.15, -0.00) lower than if no one had been treated with ART at enrollment. For further details and examples of other usage of this estimator see this `tutorial `_ @@ -195,9 +196,9 @@ model and print the results iptw.fit() iptw.summary() -My results were fairly similar to the g-formula (RD = -0.082; 95% CL: -0.156, -0.007). We would interpret this in a +My results were fairly similar to the g-formula (RD = -0.08; 95% CL: -0.16, -0.01). We would interpret this in a similar way: the 45-week risk of death when everyone was treated with ART at enrollment was 8.2% points -(95% CL: -0.156, -0.007) lower than if no one had been treated with ART at enrollment. +(95% CL: -0.16, -0.01) lower than if no one had been treated with ART at enrollment. To account for data that is missing at random, inverse probability of missing weights can be stacked together with IPTW. As of v0.8.0, this is built into the `IPTW` class. Below is an example with accounting for informative censoring @@ -213,7 +214,7 @@ IPTW. As of v0.8.0, this is built into the `IPTW` class. Below is an example wit iptw.fit() iptw.summary() -When accounting for censoring by the above variables, a similar is obtained (RD = -0.081, 95% CL: -0.156, -0.005). For +When accounting for censoring by the above variables, a similar is obtained (RD = -0.08, 95% CL: -0.16, -0.01). For further details and examples of other usage of this estimator see this `tutorial `_ @@ -243,10 +244,10 @@ We can calculate the AIPTW estimator through the following code # Printing summary results aipw.summary() -In the printed results, we have an estimated risk difference of -0.085 (95% CL: -0.155, -0.015). Confidence intervals +In the printed results, we have an estimated risk difference of -0.08 (95% CL: -0.15, -0.02). Confidence intervals come from the efficient influence curve. You can also bootstrap confidence intervals. For the risk ratio, you will need to bootstrap the confidence intervals currently. Our results can be interpreted as: the 45-week risk of death -when everyone was treated with ART at enrollment was 8.5% points (95% CL: -0.155, -0.015) lower than if no one +when everyone was treated with ART at enrollment was 8.4% points (95% CL: -0.15, -0.02) lower than if no one had been treated with ART at enrollment. Similarly, we can also account for missing outcome data using inverse probability weights. Below is an example @@ -260,7 +261,9 @@ Similarly, we can also account for missing outcome data using inverse probabilit aipw.fit() aipw.summary() -For further details and examples see this +AIPTW can also be paired with machine learning algorithms, particularly super-learner. The use of machine learning with +AIPTW means we are making less restrictive parametric assumptions than all the model described above. For further +details, using super-learner / sklearn with AIPTW, and examples see this `tutorial `_ Targeted maximum likelihood estimation @@ -282,8 +285,8 @@ outcome data model (like `AIPTW` and `IPTW`). tmle.fit() tmle.summary() -Using TMLE, we estimate a risk difference of -0.082 (95% CL: -0.152, -0.012). We can interpret this as: the 45-week -risk of death when everyone was treated with ART at enrollment was 8.2% points (95% CL: -0.152, -0.012) lower than if +Using TMLE, we estimate a risk difference of -0.08 (95% CL: -0.15, -0.01). We can interpret this as: the 45-week +risk of death when everyone was treated with ART at enrollment was 8.3% points (95% CL: -0.15, -0.01) lower than if no one had been treated with ART at enrollment. TMLE can also be paired with machine learning algorithms, particularly super-learner. The use of machine learning with @@ -291,8 +294,55 @@ TMLE means we are making less restrictive parametric assumptions than all the mo details, using super-learner / sklearn with TMLE, and examples see this `tutorial `_ -**WARNING**: In v0.9.0, `TMLE` will be losing support of machine learning algorithms due to poor confidence interval -coverage. Instead machine learning algorithms will only be able to be used with crossfit estimators. +Single Cross-fit TMLE +---------------------------------------- +While both AIPTW and TMLE are able to incorporate the use of *some* machine learning algorithms, there are limits. +More specifically, both require that the machine learning algorithms are Donsker. Unfortunately, many flexible +algorithms we may want to use may not be Donsker. In this scenario, confidence interval coverage may be below what +is expected (i.e. the confidence interval are overly narrow due to over-fitting by my the machine learning algorithms). + +Recently, cross-fitting procedures have been proposed as a way to weaken this condition. Cross-fitting allows for +non-Donsker algorithms. For more extensive details on the cross-fitting procedure and why it is necessary, please see my +`paper `_ and the references within. + +*zEpid* supports both single and double cross-fitting for AIPTW and TMLE. The following is simple examples that use +`SuperLearner` with a single cross-fitting procedure for TMLE. The 10-fold super-learner consists of a GLM, a +step-wise GLM with all first-order interactions, and a Random Forest. + +.. code:: + + from sklearn.ensemble import RandomForestClassifier + from zepid.superlearner import GLMSL, StepwiseSL, SuperLearner + from zepid.causal.doublyrobust import SingleCrossfitAIPTW, SingleCrossfitTMLE + + # SuperLearner setup + labels = ["LogR", "Step.int", "RandFor"] + candidates = [GLMSL(sm.families.family.Binomial()), + StepwiseSL(sm.families.family.Binomial(), selection="forward", order_interaction=0), + RandomForestClassifier()] + + # Single cross-fit TMLE + sctmle = SingleCrossfitTMLE(df, exposure='art', outcome='dead') + sctmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + SuperLearner(candidates, labels, folds=10, loss_function="nloglik"), + bound=0.01) + sctmle.outcome_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + SuperLearner(candidates, labels, folds=10, loss_function="nloglik")) + sctmle.fit() + sctmle.summary() + + +Using `SingleCrossfitTMLE`, we estimate a risk difference of -0.08 (95% CL: -0.17, 0.00). We can interpret this as: +the 45-week risk of death when everyone was treated with ART at enrollment was 8.3% points (95% CL: -0.17, 0.00) +lower than if no one had been treated with ART at enrollment. When comparing SingleCrossfitTMLE to the previous TMLE, +you can see the confidence intervals are wider. This is a result of weakening the parametric modeling restrictions +(by including the random forest as a possible option in super learner). + +As these are new procedures, guidelines on their use are still developing. In my experience, I would recommend at least +100 different partitions to be used. Additionally, the data set must be fairly large (more than 500 observations) to +take advantage of the flexibility of the cross-fit estimators with machine learning. If data is no that large, I +recommend using a higher number of folds with SuperLearner (if using), using single cross-fitting, and using the +minimal number of required splits. G-estimation of SNM ---------------------------------------- @@ -344,9 +394,9 @@ Similarly, we need to bootstrap our confidence intervals print('95% LCL', psi - 1.96*se) print('95% UCL', psi + 1.96*se) -Overall, the SNM results are similar to the other models (RD = -0.088; 95% CL: -0.172, -0.003). Instead, we interpret +Overall, the SNM results are similar to the other models (RD = -0.09; 95% CL: -0.17, -0.00). Instead, we interpret this estimate as: the 45-week risk of death when everyone was treated with ART at enrollment was 8.8% points -(95% CL: -0.172, -0.003) lower than if no one had been treated with ART at enrollment across all confounder strata. +(95% CL: -0.17, -0.00) lower than if no one had been treated with ART at enrollment across all strata. SNM can be expanded to include additional terms. Below is code to do that. For this SNM, we will assess if there is modification by gender @@ -360,7 +410,7 @@ modification by gender snm.summary() The 45-week risk of death when everyone was treated with ART at enrollment was 17.6% points lower than if no one had -been treated with ART at enrollment, among women. Among men, risk of death with ART treatment at enrollment was +been treated with ART at enrollment, *among women*. Among men, risk of death with ART treatment at enrollment was 6.8% points lower compared to no treatment. Remember, g-estimation of SNM is distinct from these other methods and targets a different estimand. It is a great @@ -374,11 +424,12 @@ Below is a figure summarizing the results across methods. As we can see, all the methods provided fairly similar answers, even the misspecified structural nested model. This will not always be the case. Differences in model results may indicate parametric model misspecification. In those -scenarios, it may be preferable to use a doubly robust estimator. +scenarios, it may be preferable to use a doubly-robust estimator with machine learning and cross-fitting (when +possible). Additionally, for simplicity we dropped all missing outcome data. We made the assumption that outcome data was missing -complete at random, a strong assumption. We could relax this assumption by pairing the above methods with -inverse-probability-of-missing-weights or using built-in methods (like `TMLE`'s `missing_model`) +complete at random, a strong assumption. We could relax this assumption using built-in methods +(e.g. `missing_model()` functions) Continuous Outcome ============================================== @@ -503,6 +554,42 @@ outcome. Our results are fairly similar to the other models. The mean 45-week CD4 T-cell count if everyone had been given ART at enrollment was 228.35 (95% CL: 118.97, 337.72) higher than the mean if everyone has not been given ART at baseline. +Single Cross-fit TMLE +---------------------------------------- +Similarly, we can pair TMLE with a cross-fitting procedure and machine learning. In this example, we use SuperLearner +with a GLM, a stepwise selection, and a random forest. + +.. code:: + + from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor + + # SuperLearner set-up + labels = ["LogR", "Step.int", "RandFor"] + b_candidates = [GLMSL(sm.families.family.Binomial()), + StepwiseSL(sm.families.family.Binomial(), selection="forward", order_interaction=0), + RandomForestClassifier(random_state=809512)] + c_candidates = [GLMSL(sm.families.family.Gaussian()), + StepwiseSL(sm.families.family.Gaussian(), selection="forward", order_interaction=0), + RandomForestRegressor(random_state=809512)] + + # Single cross-fit TMLE + sctmle = SingleCrossfitTMLE(df, exposure='art', outcome='cd4_wk45') + sctmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + SuperLearner(b_candidates, labels, folds=10, loss_function="nloglik"), + bound=0.01) + sctmle.outcome_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + SuperLearner(c_candidates, labels, folds=10)) + sctmle.fit(n_partitions=3, random_state=201820) + sctmle.summary() + +The mean 45-week CD4 T-cell count if everyone had been given ART at enrollment was 176.9 (95% CL: -37.7, 391.5) +higher than the mean if everyone has not been given ART at baseline. + +The point estimate is similar to other approaches, but the confidence intervals are substantially wider. This is likely +a result of the random forest dominating super-learner and being somewhat dependent on the particular split. This is +the penalty of weaker modeling assumptions (or rather it showcases the undue confidence that results from assuming +that our particular parametric model is sufficient in other estimators). + G-estimation of SNM ---------------------------------------- Recall that g-estimation of SNM estimate the average causal effect within levels of the confounders, *not* the average @@ -519,9 +606,9 @@ For illustrative purposes, I will specify a one-parameter SNM. Below is code to snm.fit() snm.summary() -Overall, the SNM results are similar to the other models (ATE = 266.56). Instead, we interpret -this estimate as: the mean 45-week CD T-cell count when everyone was treated with ART at enrollment was 266.56 -higher than if no one had been treated with ART at enrollment across all confounder strata. +Overall, the SNM results are similar to the other models (ATE = 227.2). Instead, we interpret +this estimate as: the mean 45-week CD T-cell count when everyone was treated with ART at enrollment was 227.2 +higher (95% CL: 134.2, 320.2) than if no one had been treated with ART at enrollment across all strata. SNM can be expanded to include additional terms. Below is code to do that. For this SNM, we will assess if there is modification by gender @@ -534,9 +621,9 @@ modification by gender snm.fit() snm.summary() -The mean 45-week CD4 T-cell count when everyone was treated with ART at enrollment was 258.73 higher than if no one had -been treated with ART at enrollment, among women. Among men, CD4 T-cell count with ART treatment at enrollment was -268.28 higher compared to no treatment. +The mean 45-week CD4 T-cell count when everyone was treated with ART at enrollment was 277.1 higher than if no one had +been treated with ART at enrollment, *among women*. Among men, CD4 T-cell count with ART treatment at enrollment was +213.8 higher compared to no treatment. Remember, g-estimation of SNM is distinct from these other methods and targets a different estimand. It is a great method to consider when you are interested in effect measure modification. diff --git a/docs/conf.py b/docs/conf.py index a34fcd8..3300948 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -47,7 +47,7 @@ # General information about the project. project = "zEpid" copyright = "2018, Paul Zivich" -version = "0.5.1" +version = "0.8.2" # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" diff --git a/docs/images/survival_gf_cif.png b/docs/images/survival_gf_cif.png index 82d6c04..8d40329 100644 Binary files a/docs/images/survival_gf_cif.png and b/docs/images/survival_gf_cif.png differ diff --git a/docs/images/zepid_ate.png b/docs/images/zepid_ate.png index 218c968..0e87a86 100644 Binary files a/docs/images/zepid_ate.png and b/docs/images/zepid_ate.png differ diff --git a/docs/images/zepid_crr.png b/docs/images/zepid_crr.png index 6fd2018..a038ff0 100644 Binary files a/docs/images/zepid_crr.png and b/docs/images/zepid_crr.png differ diff --git a/docs/images/zepid_dag_bbias.png b/docs/images/zepid_dag_bbias.png new file mode 100644 index 0000000..566da6e Binary files /dev/null and b/docs/images/zepid_dag_bbias.png differ diff --git a/docs/images/zepid_dag_mbias.png b/docs/images/zepid_dag_mbias.png new file mode 100644 index 0000000..2d66b15 Binary files /dev/null and b/docs/images/zepid_dag_mbias.png differ diff --git a/docs/images/zepid_effm.png b/docs/images/zepid_effm.png new file mode 100644 index 0000000..9efabcc Binary files /dev/null and b/docs/images/zepid_effm.png differ diff --git a/docs/images/zepid_effrd.png b/docs/images/zepid_effrd.png index ae64d62..709a820 100644 Binary files a/docs/images/zepid_effrd.png and b/docs/images/zepid_effrd.png differ diff --git a/docs/images/zepid_fform1.png b/docs/images/zepid_fform1.png index 4581e7e..02855dd 100644 Binary files a/docs/images/zepid_fform1.png and b/docs/images/zepid_fform1.png differ diff --git a/docs/images/zepid_fform2.png b/docs/images/zepid_fform2.png index 10f00f4..040b1b5 100644 Binary files a/docs/images/zepid_fform2.png and b/docs/images/zepid_fform2.png differ diff --git a/docs/images/zepid_fform3.png b/docs/images/zepid_fform3.png index 3951cfe..1cfda1a 100644 Binary files a/docs/images/zepid_fform3.png and b/docs/images/zepid_fform3.png differ diff --git a/docs/images/zepid_fform4.png b/docs/images/zepid_fform4.png index be54d6e..c40f82c 100644 Binary files a/docs/images/zepid_fform4.png and b/docs/images/zepid_fform4.png differ diff --git a/docs/images/zepid_labbe1.png b/docs/images/zepid_labbe1.png index bac3014..187bc30 100644 Binary files a/docs/images/zepid_labbe1.png and b/docs/images/zepid_labbe1.png differ diff --git a/docs/images/zepid_labbe2.png b/docs/images/zepid_labbe2.png index f66c154..21fd15f 100644 Binary files a/docs/images/zepid_labbe2.png and b/docs/images/zepid_labbe2.png differ diff --git a/docs/images/zepid_labbe3.png b/docs/images/zepid_labbe3.png index a2214ef..293a58e 100644 Binary files a/docs/images/zepid_labbe3.png and b/docs/images/zepid_labbe3.png differ diff --git a/docs/images/zepid_pvalue1.png b/docs/images/zepid_pvalue1.png index 91f7007..b76effa 100644 Binary files a/docs/images/zepid_pvalue1.png and b/docs/images/zepid_pvalue1.png differ diff --git a/docs/images/zepid_pvalue3.png b/docs/images/zepid_pvalue3.png index 0cd3f95..22a654b 100644 Binary files a/docs/images/zepid_pvalue3.png and b/docs/images/zepid_pvalue3.png differ diff --git a/docs/images/zepid_roc.png b/docs/images/zepid_roc.png index c2de1fe..0386920 100644 Binary files a/docs/images/zepid_roc.png and b/docs/images/zepid_roc.png differ diff --git a/docs/images/zepid_spaghetti.png b/docs/images/zepid_spaghetti.png index 3e5d4fb..e4ce9f8 100644 Binary files a/docs/images/zepid_spaghetti.png and b/docs/images/zepid_spaghetti.png differ diff --git a/docs/images/zepid_trapezoid.png b/docs/images/zepid_trapezoid.png index 0e34d4d..79d939d 100644 Binary files a/docs/images/zepid_trapezoid.png and b/docs/images/zepid_trapezoid.png differ diff --git a/docs/images/zepid_tvg1.png b/docs/images/zepid_tvg1.png index 986dd89..d184b01 100644 Binary files a/docs/images/zepid_tvg1.png and b/docs/images/zepid_tvg1.png differ diff --git a/docs/images/zepid_tvg2.png b/docs/images/zepid_tvg2.png index 2eaa5cc..ad36055 100644 Binary files a/docs/images/zepid_tvg2.png and b/docs/images/zepid_tvg2.png differ diff --git a/docs/images/zipper_example.png b/docs/images/zipper_example.png new file mode 100644 index 0000000..2b001e0 Binary files /dev/null and b/docs/images/zipper_example.png differ diff --git a/docs/index.rst b/docs/index.rst index 23e9fa4..4b0ffcf 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -46,10 +46,11 @@ Contents: .. toctree:: :maxdepth: 3 + Causal Graphs Time-Fixed Exposure Time-Varying Exposure Generalizability - Missing Data + Missing Data Graphics Sensitivity Analyses Reference/index diff --git a/docs/website_examples/graphs.py b/docs/website_examples/graphs.py new file mode 100644 index 0000000..a959a23 --- /dev/null +++ b/docs/website_examples/graphs.py @@ -0,0 +1,426 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +##################################################################################################################### +# Causal Graphs +##################################################################################################################### +print("Running causal graphs...") + +from zepid.causal.causalgraph import DirectedAcyclicGraph + +dag = DirectedAcyclicGraph(exposure='X', outcome="Y") +dag.add_arrows((('X', 'Y'), + ('U1', 'X'), ('U1', 'B'), + ('U2', 'B'), ('U2', 'Y') + )) +pos = {"X": [0, 0], "Y": [1, 0], "B": [0.5, 0.5], + "U1": [0, 1], "U2": [1, 1]} + +dag.draw_dag(positions=pos) +plt.tight_layout() +plt.savefig("../images/zepid_dag_mbias.png", format='png', dpi=300) +plt.close() + +dag.calculate_adjustment_sets() +print(dag.adjustment_sets) + +dag.add_arrows((('X', 'Y'), + ('U1', 'X'), ('U1', 'B'), + ('U2', 'B'), ('U2', 'Y'), + ('B', 'X'), ('B', 'Y') + )) + +dag.draw_dag(positions=pos) +plt.tight_layout() +plt.savefig("../images/zepid_dag_bbias.png", format='png', dpi=300) +plt.close() + +dag.calculate_adjustment_sets() +print(dag.adjustment_sets) + +##################################################################################################################### +# Time-Fixed Exposure +##################################################################################################################### +print("Running time-fixed exposures...") + +from zepid.graphics import EffectMeasurePlot + +labels = ["Crude", "G-formula", "IPTW", "AIPTW", "TMLE", "SC-TMLE", "G-estimation"] +riskd = [-0.045, -0.076, -0.082, -0.084, -0.083, -0.083, -0.088] +lcl = [-0.128, -0.144, -0.156, -0.153, -0.152, -0.168, -0.172] +ucl = [0.038, -0.008, -0.007, -0.015, -0.013, 0.001, -0.004] + +p = EffectMeasurePlot(label=labels, effect_measure=riskd, lcl=lcl, ucl=ucl) +p.labels(center=0) +p.plot(figsize=(6.5, 3), t_adjuster=0.06, max_value=0.1, min_value=-0.25, decimal=2) +plt.tight_layout() +plt.savefig("../images/zepid_effrd.png", format='png', dpi=300) +plt.close() + +labels = ["G-formula", "IPTW", "AIPTW", "TMLE", "SC-TMLE", "G-estimation"] +ate = [226.90, 188.63, 195.64, 197.67, 176.93, 227.23] +lcl = [128.80, 75.89, 89.23, 102.48, -37.66, 134.23] +ucl = [325.00, 301.38, 302.06, 292.85, 391.52, 320.23] + +p = EffectMeasurePlot(label=labels, effect_measure=ate, lcl=lcl, ucl=ucl) +p.labels(center=0) +p.plot(figsize=(7, 3), t_adjuster=0.06, max_value=400, min_value=-50, decimal=1) +plt.tight_layout() +plt.savefig("../images/zepid_ate.png", format='png', dpi=300) +plt.close() + +######################################### +# Causal Survival Analysis +from zepid import load_sample_data, spline +from zepid.causal.gformula import SurvivalGFormula + +df = load_sample_data(False).drop(columns=['cd4_wk45']) +df['t'] = np.round(df['t']).astype(int) +df = pd.DataFrame(np.repeat(df.values, df['t'], axis=0), columns=df.columns) +df['t'] = df.groupby('id')['t'].cumcount() + 1 +df.loc[((df['dead'] == 1) & (df['id'] != df['id'].shift(-1))), 'd'] = 1 +df['d'] = df['d'].fillna(0) + +# Spline terms +df[['t_rs1', 't_rs2', 't_rs3']] = spline(df, 't', n_knots=4, term=2, restricted=True) +df[['cd4_rs1', 'cd4_rs2']] = spline(df, 'cd40', n_knots=3, term=2, restricted=True) +df[['age_rs1', 'age_rs2']] = spline(df, 'age0', n_knots=3, term=2, restricted=True) + +sgf = SurvivalGFormula(df.drop(columns=['dead']), idvar='id', exposure='art', outcome='d', time='t') +sgf.outcome_model(model='art + male + age0 + age_rs1 + age_rs2 + cd40 + ' + 'cd4_rs1 + cd4_rs2 + dvl0 + t + t_rs1 + t_rs2 + t_rs3', + print_results=False) + +sgf.fit(treatment='all') +sgf.plot(c='b') +sgf.fit(treatment='none') +sgf.plot(c='r') +plt.ylabel('Probability of death') +plt.tight_layout() +plt.savefig("../images/survival_gf_cif.png", format='png', dpi=300) +plt.close() + +##################################################################################################################### +# Time-Varying Exposure +##################################################################################################################### +print("Running time-varying exposures...") + +import numpy as np +import pandas as pd +from lifelines import KaplanMeierFitter + +from zepid import load_sample_data, spline +from zepid.causal.gformula import MonteCarloGFormula +from zepid.causal.ipw import IPTW, IPCW + +df = load_sample_data(timevary=True) + +# Background variable preparations +df['lag_art'] = df['art'].shift(1) +df['lag_art'] = np.where(df.groupby('id').cumcount() == 0, 0, df['lag_art']) +df['lag_cd4'] = df['cd4'].shift(1) +df['lag_cd4'] = np.where(df.groupby('id').cumcount() == 0, df['cd40'], df['lag_cd4']) +df['lag_dvl'] = df['dvl'].shift(1) +df['lag_dvl'] = np.where(df.groupby('id').cumcount() == 0, df['dvl0'], df['lag_dvl']) +df[['age_rs0', 'age_rs1', 'age_rs2']] = spline(df, 'age0', n_knots=4, term=2, restricted=True) # age spline +df['cd40_sq'] = df['cd40'] ** 2 # cd4 baseline cubic +df['cd40_cu'] = df['cd40'] ** 3 +df['cd4_sq'] = df['cd4'] ** 2 # cd4 current cubic +df['cd4_cu'] = df['cd4'] ** 3 +df['enter_sq'] = df['enter'] ** 2 # entry time cubic +df['enter_cu'] = df['enter'] ** 3 + +mcgf = MonteCarloGFormula(df, # Data set + idvar='id', # ID variable + exposure='art', # Exposure + outcome='dead', # Outcome + time_in='enter', # Start of study period + time_out='out') # End of time per study period +# Pooled Logistic Model: Treatment +exp_m = ('male + age0 + age_rs0 + age_rs1 + age_rs2 + cd40 + cd40_sq + cd40_cu + dvl0 + ' + 'cd4 + cd4_sq + cd4_cu + dvl + enter + enter_sq + enter_cu') +mcgf.exposure_model(exp_m, + print_results=False, + restriction="g['lag_art']==0") # Restricts to only untreated (for ITT assumption) +# Pooled Logistic Model: Outcome +out_m = ('art + male + age0 + age_rs0 + age_rs1 + age_rs2 + cd40 + cd40_sq + cd40_cu + dvl0 + ' + 'cd4 + cd4_sq + cd4_cu + dvl + enter + enter_sq + enter_cu') +mcgf.outcome_model(out_m, + print_results=False, + restriction="g['drop']==0") # Restricting to only uncensored individuals +# Pooled Logistic Model: Detectable viral load +dvl_m = ('male + age0 + age_rs0 + age_rs1 + age_rs2 + cd40 + cd40_sq + cd40_cu + dvl0 + ' + 'lag_cd4 + lag_dvl + lag_art + enter + enter_sq + enter_cu') +mcgf.add_covariate_model(label=1, # Order to fit time-varying models in + covariate='dvl', # Time-varying confounder + print_results=False, + model=dvl_m, + var_type='binary') # Variable type +# Pooled Logistic Model: CD4 T-cell count +cd4_m = ('male + age0 + age_rs0 + age_rs1 + age_rs2 + cd40 + cd40_sq + cd40_cu + dvl0 + lag_cd4 + ' + 'lag_dvl + lag_art + enter + enter_sq + enter_cu') +cd4_recode_scheme = ("g['cd4'] = np.maximum(g['cd4'], 1);" + "g['cd4_sq'] = g['cd4']**2;" + "g['cd4_cu'] = g['cd4']**3") +mcgf.add_covariate_model(label=2, # Order to fit time-varying models in + covariate='cd4', # Time-varying confounder + model=cd4_m, + print_results=False, + recode=cd4_recode_scheme, # Recoding process to use for each iteraction of MCMC + var_type='continuous') # Variable type +# Pooled Logistic Model: Censoring +cens_m = ("male + age0 + age_rs0 + age_rs1 + age_rs2 + cd40 + cd40_sq + cd40_cu + dvl0 + lag_cd4 + " + + "lag_dvl + lag_art + enter + enter_sq + enter_cu") +mcgf.censoring_model(cens_m, print_results=False) + +mcgf.fit(treatment="((g['art']==1) | (g['lag_art']==1))", # Treatment plan + lags={'art': 'lag_art', # Lagged variables to create each loop + 'cd4': 'lag_cd4', + 'dvl': 'lag_dvl'}, + in_recode=("g['enter_sq'] = g['enter']**2;" # Recode statement to execute at the start + "g['enter_cu'] = g['enter']**3"), + sample=20000) # Number of resamples from population (should be large number) + +# Accessing predicted outcome values +gf = mcgf.predicted_outcomes + +# Fitting Kaplan Meier to Natural Course +kmn = KaplanMeierFitter() +kmn.fit(durations=gf['out'], event_observed=gf['dead']) + +# Fitting Kaplan Meier to Observed Data +kmo = KaplanMeierFitter() +kmo.fit(durations=df['out'], event_observed=df['dead'], entry=df['enter']) + +# Plotting risk functions +plt.step(kmn.event_table.index, 1 - kmn.survival_function_, c='k', where='post', label='Natural') +plt.step(kmo.event_table.index, 1 - kmo.survival_function_, c='gray', where='post', label='True') +plt.legend() +plt.tight_layout() +plt.savefig("../images/zepid_tvg1.png", format='png', dpi=300) +plt.close() + +# Treat-all plan +mcgf.fit(treatment="all", + lags={'art': 'lag_art', + 'cd4': 'lag_cd4', + 'dvl': 'lag_dvl'}, + in_recode=("g['enter_sq'] = g['enter']**2;" + "g['enter_cu'] = g['enter']**3"), + sample=20000) +g_all = mcgf.predicted_outcomes + +# Treat-none plan +mcgf.fit(treatment="none", + lags={'art': 'lag_art', + 'cd4': 'lag_cd4', + 'dvl': 'lag_dvl'}, + in_recode=("g['enter_sq'] = g['enter']**2;" + "g['enter_cu'] = g['enter']**3"), + sample=20000) +g_none = mcgf.predicted_outcomes + +# Custom treatment plan +mcgf.fit(treatment="g['cd4'] <= 200", + lags={'art': 'lag_art', + 'cd4': 'lag_cd4', + 'dvl': 'lag_dvl'}, + in_recode=("g['enter_sq'] = g['enter']**2;" + "g['enter_cu'] = g['enter']**3"), + sample=20000, + t_max=None) +g_cd4 = mcgf.predicted_outcomes + +# Risk curve under treat-all +gfs = g_all.loc[g_all['uid_g_zepid'] != g_all['uid_g_zepid'].shift(-1)].copy() +kma = KaplanMeierFitter() +kma.fit(durations=gfs['out'], event_observed=gfs['dead']) + +# Risk curve under treat-all +gfs = g_none.loc[g_none['uid_g_zepid'] != g_none['uid_g_zepid'].shift(-1)].copy() +kmn = KaplanMeierFitter() +kmn.fit(durations=gfs['out'], event_observed=gfs['dead']) + +# Risk curve under treat-all +gfs = g_cd4.loc[g_cd4['uid_g_zepid'] != g_cd4['uid_g_zepid'].shift(-1)].copy() +kmc = KaplanMeierFitter() +kmc.fit(durations=gfs['out'], event_observed=gfs['dead']) + +# Plotting risk functions +plt.step(kma.event_table.index, 1 - kma.survival_function_, c='blue', where='post', label='All') +plt.step(kmn.event_table.index, 1 - kmn.survival_function_, c='red', where='post', label='None') +plt.step(kmc.event_table.index, 1 - kmc.survival_function_, c='green', where='post', label='CD4 < 200') +plt.legend() +plt.tight_layout() +plt.savefig("../images/zepid_tvg2.png", format='png', dpi=300) +plt.close() + +##################################################################################################################### +# Graphics +##################################################################################################################### +print("Running graphics...") + +###################################### +# Functional form assessment +import zepid as ze +from zepid.graphics import functional_form_plot + +df = ze.load_sample_data(timevary=False) +df['age0_sq'] = df['age0']**2 +df[['rqs0', 'rqs1']] = ze.spline(df, var='age0', term=2, n_knots=3, knots=[30, 40, 55], restricted=True) + +functional_form_plot(df, outcome='dead', var='age0', discrete=True) +plt.tight_layout() +plt.savefig("../images/zepid_fform1.png", format='png', dpi=300) +plt.close() + +functional_form_plot(df, outcome='dead', var='age0', discrete=True, points=True) +plt.tight_layout() +plt.savefig("../images/zepid_fform2.png", format='png', dpi=300) +plt.close() + +functional_form_plot(df, outcome='dead', var='age0', f_form='age0 + age0_sq', discrete=True) +plt.tight_layout() +plt.savefig("../images/zepid_fform3.png", format='png', dpi=300) +plt.close() + +functional_form_plot(df, outcome='dead', var='age0', f_form='age0 + rqs0 + rqs1', discrete=True) +plt.vlines(30, 0, 0.85, colors='gray', linestyles='--') +plt.vlines(40, 0, 0.85, colors='gray', linestyles='--') +plt.vlines(55, 0, 0.85, colors='gray', linestyles='--') +plt.tight_layout() +plt.savefig("../images/zepid_fform4.png", format='png', dpi=300) +plt.close() + +###################################### +# P-value plot +from zepid.graphics import pvalue_plot + +pvalue_plot(point=-0.049, sd=0.042) +plt.tight_layout() +plt.savefig("../images/zepid_pvalue1.png", format='png', dpi=300) +plt.close() + +from matplotlib.lines import Line2D + +pvalue_plot(point=-0.049, sd=0.042, color='b', fill=False) +pvalue_plot(point=-0.062, sd=0.0231, color='r', fill=False) +plt.legend([Line2D([0], [0], color='b', lw=2), + Line2D([0], [0], color='r', lw=2)], + ['Our Study', 'Review']) +plt.tight_layout() +plt.savefig("../images/zepid_pvalue3.png", format='png', dpi=300) +plt.close() + +###################################### +# Spaghetti Plot +from zepid.graphics import spaghetti_plot + +df = ze.load_sample_data(timevary=True) + +spaghetti_plot(df, idvar='id', variable='cd4', time='enter') +plt.tight_layout() +plt.savefig("../images/zepid_spaghetti.png", format='png', dpi=300) +plt.close() + +###################################### +# Effect Measure plot +import numpy as np +from zepid.graphics import EffectMeasurePlot + +labs = ['Overall', 'Adjusted', '', + '2012-2013', 'Adjusted', '', + '2013-2014', 'Adjusted', '', + '2014-2015', 'Adjusted'] +measure = [np.nan, 0.94, np.nan, np.nan, 1.22, np.nan, np.nan, 0.59, np.nan, np.nan, 1.09] +lower = [np.nan, 0.77, np.nan, np.nan, '0.80', np.nan, np.nan, '0.40', np.nan, np.nan, 0.83] +upper = [np.nan, 1.15, np.nan, np.nan, 1.84, np.nan, np.nan, 0.85, np.nan, np.nan, 1.44] + +p = EffectMeasurePlot(label=labs, effect_measure=measure, lcl=lower, ucl=upper) +p.labels(scale='log') +p.plot(figsize=(6.5, 3), t_adjuster=0.02, max_value=2, min_value=0.38) +plt.tight_layout() +plt.savefig("../images/zepid_effm.png", format='png', dpi=300) +plt.close() + +###################################### +# ROC +import matplotlib.pyplot as plt +import statsmodels.api as sm +import statsmodels.formula.api as smf +from statsmodels.genmod.families import family,links +from zepid.graphics import roc + +df = ze.load_sample_data(timevary=False).drop(columns=['cd4_wk45']).dropna() +f = sm.families.family.Binomial(sm.families.links.logit) +df['age0_sq'] = df['age0']**2 +df['cd40sq'] = df['cd40']**2 +model = 'dead ~ art + age0 + age0_sq + cd40 + cd40sq + dvl0 + male' +m = smf.glm(model, df, family=f).fit() +df['predicted'] = m.predict(df) + +roc(df.dropna(), true='dead', threshold='predicted') +plt.tight_layout() +plt.title('Receiver-Operator Curve') +plt.tight_layout() +plt.savefig("../images/zepid_roc.png", format='png', dpi=300) +plt.close() + +###################################### +# L'Abbe +from zepid.graphics import labbe_plot + +labbe_plot() +plt.tight_layout() +plt.savefig("../images/zepid_labbe1.png", format='png', dpi=300) +plt.close() + +labbe_plot(r1=[0.3, 0.5], r0=[0.2, 0.7], color='red') +plt.tight_layout() +plt.savefig("../images/zepid_labbe2.png", format='png', dpi=300) +plt.close() + +labbe_plot(r1=[0.25, 0.5], r0=[0.1, 0.2], color='red') +plt.tight_layout() +plt.savefig("../images/zepid_labbe3.png", format='png', dpi=300) +plt.close() + +###################################### +# Zipper plot +from zepid.graphics import zipper_plot +lower = np.random.RandomState(80412).uniform(-0.1, 0.1, size=100) +upper = lower + np.random.RandomState(192041).uniform(0.1, 0.2, size=100) + +zipper_plot(truth=0, + lcl=lower, + ucl=upper, + colors=('blue', 'green')) +plt.tight_layout() +plt.savefig("../images/zipper_example.png", format='png', dpi=300) +plt.close() + +##################################################################################################################### +# Sensitivity +##################################################################################################################### +print("Running sensitivity...") + +from zepid.sensitivity_analysis import trapezoidal + +plt.hist(trapezoidal(mini=1, mode1=1.5, mode2=3, maxi=3.5, size=250000), bins=100) +plt.tight_layout() +plt.savefig("../images/zepid_trapezoid.png", format='png', dpi=300) +plt.close() + +from zepid.sensitivity_analysis import MonteCarloRR + +mcrr = MonteCarloRR(observed_RR=0.73322, sample=10000) +mcrr.confounder_RR_distribution(trapezoidal(mini=0.9, mode1=1.1, mode2=1.7, maxi=1.8, size=10000)) +mcrr.prop_confounder_exposed(trapezoidal(mini=0.25, mode1=0.28, mode2=0.32, maxi=0.35, size=10000)) +mcrr.prop_confounder_unexposed(trapezoidal(mini=0.55, mode1=0.58, mode2=0.62, maxi=0.65, size=10000)) +mcrr.fit() + +mcrr.plot() +plt.tight_layout() +plt.savefig("../images/zepid_crr.png", format='png', dpi=300) +plt.close() diff --git a/docs/website_examples/t-fixed_point.py b/docs/website_examples/t-fixed_point.py new file mode 100644 index 0000000..af34559 --- /dev/null +++ b/docs/website_examples/t-fixed_point.py @@ -0,0 +1,293 @@ +import warnings +import numpy as np +import pandas as pd +import statsmodels.api as sm + +from zepid import load_sample_data, spline + + +####################################################################################################################### +# Binary Outcome +####################################################################################################################### + +df = load_sample_data(timevary=False) +df = df.drop(columns=['cd4_wk45']) +df[['cd4_rs1', 'cd4_rs2']] = spline(df, 'cd40', n_knots=3, term=2, restricted=True) +df[['age_rs1', 'age_rs2']] = spline(df, 'age0', n_knots=3, term=2, restricted=True) + +############################# +# Naive Risk Difference +from zepid import RiskDifference + +rd = RiskDifference() +rd.fit(df, exposure='art', outcome='dead') +rd.summary() + +############################# +# G-formula +from zepid.causal.gformula import TimeFixedGFormula + +g = TimeFixedGFormula(df, exposure='art', outcome='dead') +g.outcome_model(model='art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + print_results=False) +# Estimating marginal effect under treat-all plan +g.fit(treatment='all') +r_all = g.marginal_outcome +# Estimating marginal effect under treat-none plan +g.fit(treatment='none') +r_none = g.marginal_outcome + +riskd = r_all - r_none +print('RD:', riskd) + +rd_results = [] +for i in range(1000): + with warnings.catch_warnings(): + warnings.simplefilter(action='ignore', category=UserWarning) + s = df.sample(n=df.shape[0],replace=True) + g = TimeFixedGFormula(s,exposure='art',outcome='dead') + g.outcome_model(model='art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + print_results=False) + g.fit(treatment='all') + r_all = g.marginal_outcome + g.fit(treatment='none') + r_none = g.marginal_outcome + rd_results.append(r_all - r_none) + +se = np.std(rd_results) +print('95% LCL', riskd - 1.96*se) +print('95% UCL', riskd + 1.96*se) + +############################# +# IPTW +from zepid.causal.ipw import IPTW + +iptw = IPTW(df, treatment='art', outcome='dead') +iptw.treatment_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + bound=0.01, print_results=False) +iptw.marginal_structural_model('art') +iptw.fit() +iptw.summary() + +############################# +# AIPTW +from zepid.causal.doublyrobust import AIPTW + +aipw = AIPTW(df, exposure='art', outcome='dead') +# Treatment model +aipw.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + print_results=False, bound=0.01) +# Outcome model +aipw.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + print_results=False) +# Calculating estimate +aipw.fit() +# Printing summary results +aipw.summary() + +############################# +# TMLE +from zepid.causal.doublyrobust import TMLE + +tmle = TMLE(df, exposure='art', outcome='dead') +tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + print_results=False, bound=0.01) +tmle.missing_model('art + male + age0 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + print_results=False) +tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + print_results=False) +tmle.fit() +tmle.summary() + +############################# +# Cross-fitting +from sklearn.ensemble import RandomForestClassifier +from zepid.superlearner import GLMSL, StepwiseSL, SuperLearner +from zepid.causal.doublyrobust import SingleCrossfitTMLE + +# SuperLearner set-up +labels = ["LogR", "Step.int", "RandFor"] +candidates = [GLMSL(sm.families.family.Binomial()), + StepwiseSL(sm.families.family.Binomial(), selection="forward", order_interaction=0), + RandomForestClassifier(random_state=809512)] + +# Single cross-fit TMLE +sctmle = SingleCrossfitTMLE(df, exposure='art', outcome='dead') +sctmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + SuperLearner(candidates, labels, folds=10, loss_function="nloglik"), + bound=0.01) +sctmle.outcome_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + SuperLearner(candidates, labels, folds=10, loss_function="nloglik")) +sctmle.fit(n_partitions=3, random_state=201820) +sctmle.summary() + +############################# +# G-estimation +from zepid.causal.snm import GEstimationSNM + +snm = GEstimationSNM(df, exposure='art', outcome='dead') +# Specify treatment model +snm.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + print_results=False) +# Specify structural nested model +snm.structural_nested_model('art') +# G-estimation +snm.fit() +snm.summary() + +psi = snm.psi +print('Psi:', psi) + +psi_results = [] +for i in range(500): + with warnings.catch_warnings(): + warnings.simplefilter(action='ignore', category=UserWarning) + dfs = df.sample(n=df.shape[0], replace=True) + snm = GEstimationSNM(dfs, exposure='art', outcome='dead') + snm.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + print_results=False) + snm.structural_nested_model('art') + snm.fit() + psi_results.append(snm.psi) + + +se = np.std(psi_results) +print('95% LCL', psi - 1.96*se) +print('95% UCL', psi + 1.96*se) + +snm = GEstimationSNM(df, exposure='art', outcome='dead') +snm.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + print_results=False) +snm.structural_nested_model('art + art:male') +snm.fit() +snm.summary() + +####################################################################################################################### +# Continuous Outcome +####################################################################################################################### + +df = load_sample_data(timevary=False) +dfs = df.drop(columns=['dead']).dropna() +df[['cd4_rs1', 'cd4_rs2']] = spline(df, 'cd40', n_knots=3, term=2, restricted=True) +df[['age_rs1', 'age_rs2']] = spline(df, 'age0', n_knots=3, term=2, restricted=True) + +############################# +# G-formula +g = TimeFixedGFormula(df, exposure='art', outcome='cd4_wk45', outcome_type='normal') +g.outcome_model(model='art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0') +g.fit(treatment='all') +r_all = g.marginal_outcome +g.fit(treatment='none') +r_none = g.marginal_outcome +ate = r_all - r_none +print('ATE:', ate) + +ate_results = [] +for i in range(1000): + with warnings.catch_warnings(): + warnings.simplefilter(action='ignore', category=UserWarning) + s = df.sample(n=df.shape[0], replace=True) + g = TimeFixedGFormula(s,exposure='art',outcome='cd4_wk45', outcome_type='normal') + g.outcome_model(model='art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + print_results=False) + g.fit(treatment='all') + r_all = g.marginal_outcome + g.fit(treatment='none') + r_none = g.marginal_outcome + ate_results.append(r_all - r_none) + +se = np.std(ate_results) +print('95% LCL', ate - 1.96*se) +print('95% UCL', ate + 1.96*se) + +############################# +# IPTW + +ipw = IPTW(df, treatment='art', outcome='cd4_wk45') +ipw.treatment_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + print_results=False, bound=0.01) +ipw.marginal_structural_model('art') +ipw.fit() +ipw.summary() + +############################# +# AIPTW + +aipw = AIPTW(df, exposure='art', outcome='cd4_wk45') +aipw.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + print_results=False, bound=0.01) +aipw.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + print_results=False) +aipw.fit() +aipw.summary() + +############################# +# TMLE + +tmle = TMLE(df, exposure='art', outcome='cd4_wk45') +tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + print_results=False, bound=0.01) +tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + print_results=False) +tmle.fit() +tmle.summary() + +############################# +# Cross-fitting +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor + +# SuperLearner set-up +labels = ["LogR", "Step.int", "RandFor"] +b_candidates = [GLMSL(sm.families.family.Binomial()), + StepwiseSL(sm.families.family.Binomial(), selection="forward", order_interaction=0), + RandomForestClassifier(random_state=809512)] +c_candidates = [GLMSL(sm.families.family.Gaussian()), + StepwiseSL(sm.families.family.Gaussian(), selection="forward", order_interaction=0), + RandomForestRegressor(random_state=809512)] + +# Single cross-fit TMLE +sctmle = SingleCrossfitTMLE(df, exposure='art', outcome='cd4_wk45') +sctmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + SuperLearner(b_candidates, labels, folds=10, loss_function="nloglik"), + bound=0.01) +sctmle.outcome_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + SuperLearner(c_candidates, labels, folds=10)) +sctmle.fit(n_partitions=3, random_state=201820) +sctmle.summary() + +############################# +# G-estimation + +snm = GEstimationSNM(df, exposure='art', outcome='cd4_wk45') +snm.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + print_results=False) +snm.structural_nested_model('art') +snm.fit() +snm.summary() + +psi = snm.psi +print('Psi:', psi) + +psi_results = [] +for i in range(500): + with warnings.catch_warnings(): + warnings.simplefilter(action='ignore', category=UserWarning) + dfs = df.sample(n=df.shape[0], replace=True) + snm = GEstimationSNM(dfs, exposure='art', outcome='cd4_wk45') + snm.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + print_results=False) + snm.structural_nested_model('art') + snm.fit() + psi_results.append(snm.psi) + + +se = np.std(psi_results, ddof=1) +print('95% LCL', psi - 1.96*se) +print('95% UCL', psi + 1.96*se) + +snm = GEstimationSNM(df, exposure='art', outcome='cd4_wk45') +snm.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + print_results=False) +snm.structural_nested_model('art + art:male') +snm.fit() +snm.summary() diff --git a/setup.py b/setup.py index 932c9d0..6774cfd 100644 --- a/setup.py +++ b/setup.py @@ -16,28 +16,31 @@ 'zepid.calc', 'zepid.graphics', 'zepid.sensitivity_analysis', + 'zepid.superlearner', 'zepid.causal.ipw', 'zepid.causal.gformula', 'zepid.causal.doublyrobust', 'zepid.causal.generalize', 'zepid.causal.snm', + 'zepid.causal.causalgraph', 'zepid.datasets'], include_package_data=True, license='MIT', author='Paul Zivich', author_email='zepidpy@gmail.com', url='https://github.com/pzivich/zepid', - classifiers=['Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7'], + classifiers=['Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8'], install_requires=['pandas>=0.18', 'numpy', 'statsmodels>=0.7.0', - 'matplotlib>=2.0,<3.1; python_version<"3.6"', - 'matplotlib>=2.0; python_version>="3.6"', + 'matplotlib>=2.0', 'scipy', 'tabulate', + 'sklearn', 'patsy'], - long_description=descript, + extras_require={"DirectedAcyclicGraph": ["networkx"], }, + long_description=descript, long_description_content_type="text/markdown", ) diff --git a/tests/graphics_manualtest.py b/tests/graphics_manualtest.py index 36656da..cd991d2 100644 --- a/tests/graphics_manualtest.py +++ b/tests/graphics_manualtest.py @@ -9,11 +9,12 @@ from zepid import (load_sample_data, RiskDifference, RiskRatio, OddsRatio, IncidenceRateDifference, IncidenceRateRatio, spline) from zepid.graphics import (EffectMeasurePlot, functional_form_plot, pvalue_plot, spaghetti_plot, - roc, dynamic_risk_plot, labbe_plot) + roc, dynamic_risk_plot, labbe_plot, zipper_plot) from zepid.causal.ipw import IPTW from zepid.causal.gformula import MonteCarloGFormula, SurvivalGFormula, TimeFixedGFormula from zepid.causal.doublyrobust import AIPTW, TMLE from zepid.sensitivity_analysis import MonteCarloRR, trapezoidal +from zepid.causal.causalgraph import DirectedAcyclicGraph def graphics_check(): @@ -97,6 +98,15 @@ def graphics_check(): dynamic_risk_plot(a, b, loess=False, point_color='green', line_color='green') plt.show() + # 7) Check Zipper Plot + lower = np.random.uniform(-0.1, 0.1, size=100) + upper = lower + np.random.uniform(0.1, 0.2, size=100) + zipper_plot(truth=0, + lcl=lower, + ucl=upper, + colors=('blue', 'green')) + plt.show() + def measures_check(): # 7) Check measures plots @@ -146,8 +156,8 @@ def causal_check(): g.run_diagnostics(decimal=3) # Check IPTW plots - ipt = IPTW(data, treatment='art', outcome='dead', stabilized=True) - ipt.treatment_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0') + ipt = IPTW(data, treatment='art', outcome='dead') + ipt.treatment_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', stabilized=True) ipt.marginal_structural_model('art') ipt.fit() ipt.plot_love() @@ -273,8 +283,18 @@ def mc_gformula_check(): plt.show() +def causalgraph_check(): + dag = DirectedAcyclicGraph(exposure="X", outcome="Y") + dag.add_arrow(source="X", endpoint="Y") + dag.add_arrow(source="V", endpoint="Y") + dag.add_arrows(pairs=(("W", "X"), ("W", "Y"))) + dag.draw_dag() + plt.show() + + # graphics_check() # senstivity_check() # measures_check() # causal_check() +# causalgraph_check() # mc_gformula_check() diff --git a/tests/test_causalgraphs.py b/tests/test_causalgraphs.py new file mode 100644 index 0000000..5e4b4f0 --- /dev/null +++ b/tests/test_causalgraphs.py @@ -0,0 +1,218 @@ +import pytest +import networkx as nx + +from zepid.causal.causalgraph import DirectedAcyclicGraph +from zepid.causal.causalgraph.dag import DAGError + + +@pytest.fixture +def arrow_list_1(): + return (('X', 'Y'), ('Z', 'X'), ('Z', 'Y'), + ('W', 'X'), ('W', 'V'), ('V', 'Y')) + + +@pytest.fixture +def arrow_list_2(): + return (('X', 'Y'), ('X', 'C'), + ('Y', 'C')) + + +@pytest.fixture +def arrow_list_3(): + return (('X', 'Y'), + ('Y', 'V')) + + +@pytest.fixture +def arrow_mbias(): + return (('X', 'Y'), + ('U1', 'X'), ('U1', 'B'), + ('U2', 'B'), ('U2', 'Y'), + ) + + +@pytest.fixture +def arrow_butterfly(): + return (('X', 'Y'), + ('B', 'X'), ('B', 'Y'), + ('U1', 'X'), ('U1', 'B'), + ('U2', 'B'), ('U2', 'Y')) + + +class TestDirectedAcyclicGraph: + + def test_error_add_cyclic_arrow(self): + dag = DirectedAcyclicGraph(exposure='X', outcome="Y") + dag.add_arrows(pairs=(("X", "Y"), ("Y", "C"))) + with pytest.raises(DAGError): + dag.add_arrow("C", "X") + + def test_error_add_from_cyclic_arrow(self): + dag = DirectedAcyclicGraph(exposure='X', outcome="Y") + with pytest.raises(DAGError): + dag.add_arrows(pairs=(("X", "Y"), ("Y", "C"), ("C", "X"))) + + def test_error_read_networkx(self): + G = nx.DiGraph() + G.add_edges_from((("X", "Y"), ("Y", "C"), ("C", "X"))) + dag = DirectedAcyclicGraph(exposure='X', outcome="Y") + with pytest.raises(DAGError): + dag.add_from_networkx(G) + + def test_error_networkx_noX(self): + G = nx.DiGraph() + G.add_edge("W", "Y") + dag = DirectedAcyclicGraph(exposure='X', outcome="Y") + with pytest.raises(DAGError): + dag.add_from_networkx(G) + + def test_error_networkx_noY(self): + G = nx.DiGraph() + G.add_edge("X", "W") + dag = DirectedAcyclicGraph(exposure='X', outcome="Y") + with pytest.raises(DAGError): + dag.add_from_networkx(G) + + def test_read_networkx(self): + G = nx.DiGraph() + G.add_edges_from((("X", "Y"), ("C", "Y"), ("C", "X"))) + dag = DirectedAcyclicGraph(exposure='X', outcome="Y") + dag.add_from_networkx(G) + + def test_adjustment_set_1(self, arrow_list_1): + correct_set = [{"W", "Z"}, {"V", "Z"}, {"W", "V", "Z"}] + + dag = DirectedAcyclicGraph(exposure='X', outcome="Y") + dag.add_arrows(arrow_list_1) + dag.calculate_adjustment_sets() + + # Making sure number of adjustment sets are equal to correct sets + assert len(dag.adjustment_sets) == len(correct_set) + + # Checking no 'double' sets in adjustment sets + assert len(dag.adjustment_sets) == len(set(dag.adjustment_sets)) + + # Checking that all adjustment sets are in the correct + for i in dag.adjustment_sets: + assert set(i) in list(correct_set) + + def test_min_adjustment_set_1(self, arrow_list_1): + correct_set = [{"W", "Z"}, {"V", "Z"}] + + dag = DirectedAcyclicGraph(exposure='X', outcome="Y") + dag.add_arrows(arrow_list_1) + dag.calculate_adjustment_sets() + + # Making sure number of adjustment sets are equal to correct sets + assert len(dag.minimal_adjustment_sets) == len(correct_set) + + # Checking no 'double' sets in adjustment sets + assert len(dag.minimal_adjustment_sets) == len(set(dag.minimal_adjustment_sets)) + + # Checking that all adjustment sets are in the correct + for i in dag.minimal_adjustment_sets: + assert set(i) in list(correct_set) + + def test_adjustment_set_2(self, arrow_list_2): + correct_set = [()] + + dag = DirectedAcyclicGraph(exposure='X', outcome="Y") + dag.add_arrows(arrow_list_2) + dag.calculate_adjustment_sets() + print(dag.adjustment_sets) + + # Making sure number of adjustment sets are equal to correct sets + assert len(dag.adjustment_sets) == len(correct_set) + + # Checking no 'double' sets in adjustment sets + assert len(dag.adjustment_sets) == len(set(dag.adjustment_sets)) + + # Checking that minimal is the same + assert dag.adjustment_sets == dag.minimal_adjustment_sets + + def test_adjustment_set_3(self, arrow_list_3): + correct_set = [()] + + dag = DirectedAcyclicGraph(exposure='X', outcome="Y") + dag.add_arrows(arrow_list_3) + dag.calculate_adjustment_sets() + print(dag.adjustment_sets) + + # Making sure number of adjustment sets are equal to correct sets + assert len(dag.adjustment_sets) == len(correct_set) + + # Checking no 'double' sets in adjustment sets + assert len(dag.adjustment_sets) == len(set(dag.adjustment_sets)) + + # Checking that minimal is the same + assert dag.adjustment_sets == dag.minimal_adjustment_sets + + def test_mbias(self, arrow_mbias): + correct_set = [{}, + {'U1',}, {'U2',}, + {'U1', 'B'}, {'U1', 'U2'}, {'B', 'U2'}, + {'U1', 'B', 'U2'}] + + dag = DirectedAcyclicGraph(exposure='X', outcome="Y") + dag.add_arrows(arrow_mbias) + dag.calculate_adjustment_sets() + + # Making sure number of adjustment sets are equal to correct sets + assert len(dag.adjustment_sets) == len(correct_set) + + # Checking no 'double' sets in adjustment sets + assert len(dag.adjustment_sets) == len(set(dag.adjustment_sets)) + + # Checking that all adjustment sets are in the correct + for i in dag.adjustment_sets: + if len(i) != 0: + assert set(i) in list(correct_set) + + assert dag.minimal_adjustment_sets == [()] + + def test_butterfly(self, arrow_butterfly): + correct_set = [{'U1', 'B'}, {'B', 'U2'}, {'U1', 'B', 'U2'}] + + dag = DirectedAcyclicGraph(exposure='X', outcome="Y") + dag.add_arrows(arrow_butterfly) + dag.calculate_adjustment_sets() + + # Making sure number of adjustment sets are equal to correct sets + assert len(dag.adjustment_sets) == len(correct_set) + + # Checking no 'double' sets in adjustment sets + assert len(dag.adjustment_sets) == len(set(dag.adjustment_sets)) + + # Checking that all adjustment sets are in the correct + for i in dag.adjustment_sets: + assert set(i) in list(correct_set) + + for i in dag.minimal_adjustment_sets: + assert set(i) in [{'U1', 'B'}, {'B', 'U2'}] + + def test_no_mediator(self): + correct_set = [{'W', 'V'}] + + dag = DirectedAcyclicGraph(exposure='X', outcome="Y") + dag.add_arrows((("X", "Y"), + ("W", "X"), + ("W", "Y"), + ("V", "X"), + ("V", "Y"), + ("X", "M"), + ("M", "Y"), + )) + dag.calculate_adjustment_sets() + + # Making sure number of adjustment sets are equal to correct sets + assert len(dag.adjustment_sets) == len(correct_set) + + # Checking no 'double' sets in adjustment sets + assert len(dag.adjustment_sets) == len(set(dag.adjustment_sets)) + + # Checking that all adjustment sets are in the correct + for i in dag.adjustment_sets: + assert set(i) in list(correct_set) + + for i in dag.minimal_adjustment_sets: + assert set(i) in correct_set diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 6792a89..ce72635 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -152,3 +152,14 @@ def test_return_pandas_rct(self): def test_return_pandas_conf(self): df = ze.load_generalize_data(True) assert isinstance(df, type(pd.DataFrame())) + + +class TestZivichBreskin: + + def test_correct_ncols(self): + df = ze.load_zivich_breskin_data() + assert df.shape[1] == 7 + + def test_correct_nobs(self): + df = ze.load_zivich_breskin_data() + assert df.shape[0] == 3000 diff --git a/tests/test_doublyrobust.py b/tests/test_doublyrobust.py index 185be4c..1a61719 100644 --- a/tests/test_doublyrobust.py +++ b/tests/test_doublyrobust.py @@ -3,10 +3,17 @@ import pandas as pd import numpy.testing as npt import pandas.testing as pdt +import statsmodels.api as sm +from scipy.stats import logistic from sklearn.linear_model import LogisticRegression, LinearRegression import zepid as ze -from zepid.causal.doublyrobust import TMLE, AIPTW, StochasticTMLE +from zepid.causal.doublyrobust import (TMLE, AIPTW, StochasticTMLE, + SingleCrossfitAIPTW, DoubleCrossfitAIPTW, + SingleCrossfitTMLE, DoubleCrossfitTMLE, + calculate_joint_estimate) +from zepid.causal.doublyrobust.crossfit import (_sample_split_, _treatment_nuisance_, _outcome_nuisance_) +from zepid.superlearner import GLMSL class TestTMLE: @@ -239,6 +246,34 @@ def test_match_r_continuous_poisson(self, cf): npt.assert_allclose(tmle.average_treatment_effect, r_ate, rtol=1e-3) npt.assert_allclose(tmle.average_treatment_effect_ci, r_ci, rtol=1e-3) + def test_custom_model(self, df): + r_rd = -0.08440622 + r_ci = -0.1541104, -0.01470202 + tmle = TMLE(df, exposure='art', outcome='dead') + tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + custom_model=GLMSL(sm.families.family.Binomial()), + print_results=False) + tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + custom_model=GLMSL(sm.families.family.Binomial()), + print_results=False) + tmle.fit() + npt.assert_allclose(tmle.risk_difference, r_rd) + npt.assert_allclose(tmle.risk_difference_ci, r_ci, rtol=1e-5) + + def test_custom_model_continuous(self, cf): + r_ate = 223.4022 + r_ci = 118.6037, 328.2008 + + tmle = TMLE(cf, exposure='art', outcome='cd4_wk45') + tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + custom_model=GLMSL(sm.families.family.Binomial()), print_results=False) + tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + custom_model=GLMSL(sm.families.family.Gaussian()), print_results=False) + tmle.fit() + + npt.assert_allclose(tmle.average_treatment_effect, r_ate, rtol=1e-3) + npt.assert_allclose(tmle.average_treatment_effect_ci, r_ci, rtol=1e-3) + def test_sklearn_in_tmle(self, df): log = LogisticRegression(C=1.0, solver='liblinear') tmle = TMLE(df, exposure='art', outcome='dead') @@ -732,7 +767,6 @@ def test_weighted_continuous_outcomes(self, cf): print_results=False) aipw.fit() npt.assert_allclose(aipw.average_treatment_effect, 225.13767, rtol=1e-3) - assert aipw.average_treatment_effect_ci is None def test_bounds(self, df): aipw = AIPTW(df, exposure='art', outcome='dead') @@ -753,5 +787,469 @@ def test_bounds2(self, df): print_results=False) aipw.fit() - npt.assert_allclose(aipw.risk_difference, -0.0700780176) - npt.assert_allclose(aipw.risk_difference_ci, (-0.1277925885, -0.0123634468)) + npt.assert_allclose(aipw.risk_difference, -0.075280886) + npt.assert_allclose(aipw.risk_difference_ci, (-0.1329296715, -0.0176321005)) + + def test_custom_binary(self, df): + aipw = AIPTW(df, exposure='art', outcome='dead') + aipw.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + custom_model=GLMSL(sm.families.family.Binomial()), + print_results=False) + aipw.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + custom_model=GLMSL(sm.families.family.Binomial()), + print_results=False) + aipw.fit() + npt.assert_allclose(aipw.risk_difference, -0.0848510605) + + def test_custom_continuous(self, cf): + aipw = AIPTW(cf, exposure='art', outcome='cd4_wk45') + aipw.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + custom_model=GLMSL(sm.families.family.Binomial()), + print_results=False) + aipw.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', + custom_model=GLMSL(sm.families.family.Gaussian()), + print_results=False) + aipw.fit() + npt.assert_allclose(aipw.average_treatment_effect, 225.13767, rtol=1e-3) + npt.assert_allclose(aipw.average_treatment_effect_ci, [118.64677, 331.62858], rtol=1e-3) + + +class TestCrossfitUtils: + + @pytest.fixture + def data(self): + n = 100 + data = pd.DataFrame() + data['W'] = np.random.normal(size=n) + data['A'] = np.random.binomial(n=1, p=logistic.cdf(-0.5 + 0.1 * data['W']), size=n) + data['Y'] = data['W'] + 5 * data['A'] + np.random.normal(size=n) + return data + + def test_split_nonoverlap2(self, data): + splits = _sample_split_(data, n_splits=2) + + split_id_list = [] + id_count = 0 + for s in splits: + id_count += s.shape[0] + split_id_list.extend(list(s.index)) + assert len(set(split_id_list)) == id_count + + def test_split_nonoverlap10(self, data): + splits = _sample_split_(data, n_splits=10) + + split_id_list = [] + id_count = 0 + for s in splits: + id_count += s.shape[0] + split_id_list.extend(list(s.index)) + assert len(set(split_id_list)) == id_count + + def test_treatment_nuisance(self, data): + number_of_splits = 3 + splits = _sample_split_(data, n_splits=number_of_splits) + fnm = _treatment_nuisance_(treatment="A", estimator=LogisticRegression(solver='lbfgs'), + samples=splits, covariates="W") + assert len(fnm) == number_of_splits + for nm in fnm: + assert type(nm) == LogisticRegression + + def test_outcome_nuisance(self, data): + number_of_splits = 3 + splits = _sample_split_(data, n_splits=number_of_splits) + fnm = _outcome_nuisance_(outcome="A", estimator=LogisticRegression(solver='lbfgs'), + samples=splits, covariates="A + W") + assert len(fnm) == number_of_splits + for nm in fnm: + assert type(nm) == LogisticRegression + + +class TestMergeEstimates: + + def test_median_method(self): + truth = (0.5, 0.4183300132670378**2) + calc = calculate_joint_estimate(point_est=np.array([0.5, 0.5, 0.7, 0.9, 0.3, 0.2]), + var_est=np.array([0.1, 0.1, 0.15, 0.2, 0.12, 0.16]), + method="median") + npt.assert_allclose(truth, calc) + + def test_mean_method(self): + truth = (0.5166666666666667, 0.43938087754880223**2) + calc = calculate_joint_estimate(point_est=np.array([0.5, 0.5, 0.7, 0.9, 0.3, 0.2]), + var_est=np.array([0.1, 0.1, 0.15, 0.2, 0.12, 0.16]), + method="mean") + npt.assert_allclose(truth, calc) + + +class TestSingleCrossfitAIPTW: + + @pytest.fixture + def data(self): + n = 100000 + data = pd.DataFrame() + data['W'] = np.random.normal(size=n) + data['A'] = np.random.binomial(n=1, p=logistic.cdf(-0.5 + 0.1 * data['W']), size=n) + data['Y'] = data['W'] + 5 * data['A'] + np.random.normal(size=n) + return data + + @pytest.fixture + def df(self): + df = ze.load_sample_data(False) + df[['cd4_rs1', 'cd4_rs2']] = ze.spline(df, 'cd40', n_knots=3, term=2, restricted=True) + df[['age_rs1', 'age_rs2']] = ze.spline(df, 'age0', n_knots=3, term=2, restricted=True) + return df.drop(columns=['cd4_wk45']).dropna() + + @pytest.fixture + def cf(self): + df = ze.load_sample_data(False) + df[['cd4_rs1', 'cd4_rs2']] = ze.spline(df, 'cd40', n_knots=3, term=2, restricted=True) + df[['age_rs1', 'age_rs2']] = ze.spline(df, 'age0', n_knots=3, term=2, restricted=True) + return df.drop(columns=['dead']).dropna() + + def test_drop_missing_data(self): + df = ze.load_sample_data(False) + aipw = SingleCrossfitAIPTW(df, exposure='art', outcome='dead') + assert df.dropna().shape[0] == aipw.df.shape[0] + + def test_error_when_no_models_specified1(self, df): + aipw = SingleCrossfitAIPTW(df, exposure='art', outcome='dead') + with pytest.raises(ValueError): + aipw.fit() + + def test_error_when_no_models_specified2(self, df): + aipw = SingleCrossfitAIPTW(df, exposure='art', outcome='dead') + aipw.exposure_model('male + age0 + cd40 + dvl0', LogisticRegression()) + with pytest.raises(ValueError): + aipw.fit() + + def test_error_when_no_models_specified3(self, df): + aipw = SingleCrossfitAIPTW(df, exposure='art', outcome='dead') + aipw.outcome_model('art + male + age0 + cd40 + dvl0', LogisticRegression()) + with pytest.raises(ValueError): + aipw.fit() + + def test_error_invalid_splits(self, df): + aipw = SingleCrossfitAIPTW(df, exposure='art', outcome='dead') + aipw.exposure_model('male + age0 + cd40 + dvl0', LogisticRegression()) + aipw.outcome_model('art + male + age0 + cd40 + dvl0', LogisticRegression()) + with pytest.raises(ValueError): + aipw.fit(n_splits=1) + + def test_continuous_flag(self, df, cf): + aipw = SingleCrossfitAIPTW(df, exposure='art', outcome='dead') + assert aipw._continuous_outcome_ is False + + aipw = SingleCrossfitAIPTW(cf, exposure='art', outcome='cd4_wk45') + assert aipw._continuous_outcome_ is True + + def test_estimation_example(self): + d = ze.load_zivich_breskin_data() + g_model = 'diabetes + age + risk_score + ldl_log' + q_model = 'statin + diabetes + age + risk_score + ldl_log' + + scaipw = SingleCrossfitAIPTW(d, exposure='statin', outcome='Y') + scaipw.exposure_model(g_model, GLMSL(sm.families.family.Binomial()), bound=0.01) + scaipw.outcome_model(q_model, GLMSL(sm.families.family.Binomial())) + scaipw.fit(n_splits=2, n_partitions=20, random_state=149528) + + # Comparing Results + npt.assert_allclose(scaipw.risk_difference, -0.08439863429580897) + npt.assert_allclose(scaipw.risk_difference_se, 0.03845818503930426) + npt.assert_allclose(scaipw.risk_difference_ci, + (-0.15977529188362244, -0.009021976707995497)) + + npt.assert_allclose(scaipw.risk_ratio, 0.7207330613683808) + npt.assert_allclose(scaipw.risk_ratio_se, 0.12284661575725178) + npt.assert_allclose(scaipw.risk_ratio_ci, + (0.5665095240048756, 0.9169415936332386)) + + def test_continuous_example(self, cf): + g_model = 'male + age0 + cd40 + dvl0' + q_model = 'art + male + age0 + cd40 + dvl0' + + scaipw = SingleCrossfitAIPTW(cf, exposure='art', outcome='cd4_wk45') + scaipw.exposure_model(g_model, GLMSL(sm.families.family.Binomial()), bound=0.01) + scaipw.outcome_model(q_model, GLMSL(sm.families.family.Gaussian())) + scaipw.fit(n_splits=2, n_partitions=20, random_state=743282) + + # Comparing Results + npt.assert_allclose(scaipw.ace, 248.66381428702869) + npt.assert_allclose(scaipw.ace_se, 59.74608928489513) + npt.assert_allclose(scaipw.ace_ci, + (131.5636310715198, 365.76399750253756)) + + +class TestDoubleCrossfitAIPTW: + + @pytest.fixture + def df(self): + df = ze.load_sample_data(False) + df[['cd4_rs1', 'cd4_rs2']] = ze.spline(df, 'cd40', n_knots=3, term=2, restricted=True) + df[['age_rs1', 'age_rs2']] = ze.spline(df, 'age0', n_knots=3, term=2, restricted=True) + return df.drop(columns=['cd4_wk45']).dropna() + + @pytest.fixture + def cf(self): + df = ze.load_sample_data(False) + df[['cd4_rs1', 'cd4_rs2']] = ze.spline(df, 'cd40', n_knots=3, term=2, restricted=True) + df[['age_rs1', 'age_rs2']] = ze.spline(df, 'age0', n_knots=3, term=2, restricted=True) + return df.drop(columns=['dead']).dropna() + + def test_drop_missing_data(self): + df = ze.load_sample_data(False) + aipw = DoubleCrossfitAIPTW(df, exposure='art', outcome='dead') + assert df.dropna().shape[0] == aipw.df.shape[0] + + def test_error_when_no_models_specified1(self, df): + aipw = DoubleCrossfitAIPTW(df, exposure='art', outcome='dead') + with pytest.raises(ValueError): + aipw.fit() + + def test_error_when_no_models_specified2(self, df): + aipw = DoubleCrossfitAIPTW(df, exposure='art', outcome='dead') + aipw.exposure_model('male + age0 + cd40 + dvl0', LogisticRegression()) + with pytest.raises(ValueError): + aipw.fit() + + def test_error_when_no_models_specified3(self, df): + aipw = DoubleCrossfitAIPTW(df, exposure='art', outcome='dead') + aipw.outcome_model('art + male + age0 + cd40 + dvl0', LogisticRegression()) + with pytest.raises(ValueError): + aipw.fit() + + def test_error_invalid_splits(self, df): + aipw = DoubleCrossfitAIPTW(df, exposure='art', outcome='dead') + aipw.exposure_model('male + age0 + cd40 + dvl0', LogisticRegression()) + aipw.outcome_model('art + male + age0 + cd40 + dvl0', LogisticRegression()) + with pytest.raises(ValueError): + aipw.fit(n_splits=2) + + def test_continuous_flag(self, df, cf): + aipw = DoubleCrossfitAIPTW(df, exposure='art', outcome='dead') + assert aipw._continuous_outcome_ is False + + aipw = DoubleCrossfitAIPTW(cf, exposure='art', outcome='cd4_wk45') + assert aipw._continuous_outcome_ is True + + def test_estimation_example(self): + d = ze.load_zivich_breskin_data() + g_model = 'diabetes + age + risk_score + ldl_log' + q_model = 'statin + diabetes + age + risk_score + ldl_log' + + dcaipw = DoubleCrossfitAIPTW(d, exposure='statin', outcome='Y') + dcaipw.exposure_model(g_model, GLMSL(sm.families.family.Binomial()), bound=0.01) + dcaipw.outcome_model(q_model, GLMSL(sm.families.family.Binomial())) + dcaipw.fit(n_splits=3, n_partitions=20, random_state=149528) + + # Comparing Results + npt.assert_allclose(dcaipw.risk_difference, -0.0876425986321848) + npt.assert_allclose(dcaipw.risk_difference_se, 0.038884182734785167) + npt.assert_allclose(dcaipw.risk_difference_ci, + (-0.16385419636063792, -0.011431000903731697)) + + npt.assert_allclose(dcaipw.risk_ratio, 0.7118710212657574) + npt.assert_allclose(dcaipw.risk_ratio_se, 0.12473305494523035) + npt.assert_allclose(dcaipw.risk_ratio_ci, + (0.557478785513911, 0.9090217674396279)) + + def test_continuous_example(self, cf): + g_model = 'male + age0 + cd40 + dvl0' + q_model = 'art + male + age0 + cd40 + dvl0' + + dcaipw = DoubleCrossfitAIPTW(cf, exposure='art', outcome='cd4_wk45') + dcaipw.exposure_model(g_model, GLMSL(sm.families.family.Binomial()), bound=0.01) + dcaipw.outcome_model(q_model, GLMSL(sm.families.family.Gaussian())) + dcaipw.fit(n_splits=3, n_partitions=20, random_state=743282) + + # Comparing Results + npt.assert_allclose(dcaipw.ace, 264.6411037707777) + npt.assert_allclose(dcaipw.ace_se, 86.05014377568297) + npt.assert_allclose(dcaipw.ace_ci, + (95.98592110594558, 433.29628643560983)) + + +class TestSingleCrossfitTMLE: + + @pytest.fixture + def data(self): + n = 100000 + data = pd.DataFrame() + data['W'] = np.random.normal(size=n) + data['A'] = np.random.binomial(n=1, p=logistic.cdf(-0.5 + 0.1 * data['W']), size=n) + data['Y'] = data['W'] + 5 * data['A'] + np.random.normal(size=n) + return data + + @pytest.fixture + def df(self): + df = ze.load_sample_data(False) + df[['cd4_rs1', 'cd4_rs2']] = ze.spline(df, 'cd40', n_knots=3, term=2, restricted=True) + df[['age_rs1', 'age_rs2']] = ze.spline(df, 'age0', n_knots=3, term=2, restricted=True) + return df.drop(columns=['cd4_wk45']).dropna() + + @pytest.fixture + def cf(self): + df = ze.load_sample_data(False) + df[['cd4_rs1', 'cd4_rs2']] = ze.spline(df, 'cd40', n_knots=3, term=2, restricted=True) + df[['age_rs1', 'age_rs2']] = ze.spline(df, 'age0', n_knots=3, term=2, restricted=True) + return df.drop(columns=['dead']).dropna() + + def test_drop_missing_data(self): + df = ze.load_sample_data(False) + tmle = SingleCrossfitTMLE(df, exposure='art', outcome='dead') + assert df.dropna().shape[0] == tmle.df.shape[0] + + def test_error_when_no_models_specified1(self, df): + tmle = SingleCrossfitTMLE(df, exposure='art', outcome='dead') + with pytest.raises(ValueError): + tmle.fit() + + def test_error_when_no_models_specified2(self, df): + tmle = SingleCrossfitTMLE(df, exposure='art', outcome='dead') + tmle.exposure_model('male + age0 + cd40 + dvl0', LogisticRegression()) + with pytest.raises(ValueError): + tmle.fit() + + def test_error_when_no_models_specified3(self, df): + tmle = SingleCrossfitTMLE(df, exposure='art', outcome='dead') + tmle.outcome_model('art + male + age0 + cd40 + dvl0', LogisticRegression()) + with pytest.raises(ValueError): + tmle.fit() + + def test_error_invalid_splits(self, df): + tmle = SingleCrossfitTMLE(df, exposure='art', outcome='dead') + tmle.exposure_model('male + age0 + cd40 + dvl0', LogisticRegression()) + tmle.outcome_model('art + male + age0 + cd40 + dvl0', LogisticRegression()) + with pytest.raises(ValueError): + tmle.fit(n_splits=1) + + def test_continuous_flag(self, df, cf): + tmle = SingleCrossfitTMLE(df, exposure='art', outcome='dead') + assert tmle._continuous_outcome_ is False + + tmle = SingleCrossfitTMLE(cf, exposure='art', outcome='cd4_wk45') + assert tmle._continuous_outcome_ is True + + def test_estimation_example(self): + d = ze.load_zivich_breskin_data() + g_model = 'diabetes + age + risk_score + ldl_log' + q_model = 'statin + diabetes + age + risk_score + ldl_log' + + sctmle = SingleCrossfitTMLE(d, exposure='statin', outcome='Y') + sctmle.exposure_model(g_model, GLMSL(sm.families.family.Binomial()), bound=0.01) + sctmle.outcome_model(q_model, GLMSL(sm.families.family.Binomial())) + sctmle.fit(n_splits=2, n_partitions=20, random_state=149528) + + # Comparing Results + npt.assert_allclose(sctmle.risk_difference, -0.12271324570295966) + npt.assert_allclose(sctmle.risk_difference_se, 0.03050536890832196) + npt.assert_allclose(sctmle.risk_difference_ci, + (-0.18250267009837864, -0.06292382130754068)) + + npt.assert_allclose(sctmle.risk_ratio, 0.6396994156614049) + npt.assert_allclose(sctmle.risk_ratio_se, 0.10240088109353096) + npt.assert_allclose(sctmle.risk_ratio_ci, + (0.5233740263824294, 0.781879347788897)) + + def test_continuous_example(self, cf): + g_model = 'male + age0 + cd40 + dvl0' + q_model = 'art + male + age0 + cd40 + dvl0' + + sctmle = SingleCrossfitTMLE(cf, exposure='art', outcome='cd4_wk45') + sctmle.exposure_model(g_model, GLMSL(sm.families.family.Binomial()), bound=0.01) + sctmle.outcome_model(q_model, GLMSL(sm.families.family.Gaussian())) + sctmle.fit(n_splits=2, n_partitions=20, random_state=743282) + + # Comparing Results + npt.assert_allclose(sctmle.ace, 247.25788152025098) + npt.assert_allclose(sctmle.ace_se, 58.006860005167) + npt.assert_allclose(sctmle.ace_ci, + (133.56652505386677, 360.9492379866352)) + + +class TestDoubleCrossfitTMLE: + + @pytest.fixture + def df(self): + df = ze.load_sample_data(False) + df[['cd4_rs1', 'cd4_rs2']] = ze.spline(df, 'cd40', n_knots=3, term=2, restricted=True) + df[['age_rs1', 'age_rs2']] = ze.spline(df, 'age0', n_knots=3, term=2, restricted=True) + return df.drop(columns=['cd4_wk45']).dropna() + + @pytest.fixture + def cf(self): + df = ze.load_sample_data(False) + df[['cd4_rs1', 'cd4_rs2']] = ze.spline(df, 'cd40', n_knots=3, term=2, restricted=True) + df[['age_rs1', 'age_rs2']] = ze.spline(df, 'age0', n_knots=3, term=2, restricted=True) + return df.drop(columns=['dead']).dropna() + + def test_drop_missing_data(self): + df = ze.load_sample_data(False) + tmle = DoubleCrossfitTMLE(df, exposure='art', outcome='dead') + assert df.dropna().shape[0] == tmle.df.shape[0] + + def test_error_when_no_models_specified1(self, df): + tmle = DoubleCrossfitTMLE(df, exposure='art', outcome='dead') + with pytest.raises(ValueError): + tmle.fit() + + def test_error_when_no_models_specified2(self, df): + tmle = DoubleCrossfitTMLE(df, exposure='art', outcome='dead') + tmle.exposure_model('male + age0 + cd40 + dvl0', LogisticRegression()) + with pytest.raises(ValueError): + tmle.fit() + + def test_error_when_no_models_specified3(self, df): + tmle = DoubleCrossfitTMLE(df, exposure='art', outcome='dead') + tmle.outcome_model('art + male + age0 + cd40 + dvl0', LogisticRegression()) + with pytest.raises(ValueError): + tmle.fit() + + def test_error_invalid_splits(self, df): + tmle = DoubleCrossfitTMLE(df, exposure='art', outcome='dead') + tmle.exposure_model('male + age0 + cd40 + dvl0', LogisticRegression()) + tmle.outcome_model('art + male + age0 + cd40 + dvl0', LogisticRegression()) + with pytest.raises(ValueError): + tmle.fit(n_splits=2) + + def test_continuous_flag(self, df, cf): + tmle = DoubleCrossfitTMLE(df, exposure='art', outcome='dead') + assert tmle._continuous_outcome_ is False + + tmle = DoubleCrossfitTMLE(cf, exposure='art', outcome='cd4_wk45') + assert tmle._continuous_outcome_ is True + + def test_estimation_example(self): + d = ze.load_zivich_breskin_data() + g_model = 'diabetes + age + risk_score + ldl_log' + q_model = 'statin + diabetes + age + risk_score + ldl_log' + + dctmle = DoubleCrossfitTMLE(d, exposure='statin', outcome='Y') + dctmle.exposure_model(g_model, GLMSL(sm.families.family.Binomial()), bound=0.01) + dctmle.outcome_model(q_model, GLMSL(sm.families.family.Binomial())) + dctmle.fit(n_splits=3, n_partitions=20, random_state=149528) + + # Comparing Results + npt.assert_allclose(dctmle.risk_difference, -0.12164253131180072) + npt.assert_allclose(dctmle.risk_difference_se, 0.02740647410798642) + npt.assert_allclose(dctmle.risk_difference_ci, + (-0.17535823350668361, -0.06792682911691783)) + + npt.assert_allclose(dctmle.risk_ratio, 0.6442319481957228) + npt.assert_allclose(dctmle.risk_ratio_se, 0.09584203316755982) + npt.assert_allclose(dctmle.risk_ratio_ci, + (0.5339017835202516, 0.777361709375735)) + + def test_continuous_example(self, cf): + g_model = 'male + age0 + cd40 + dvl0' + q_model = 'art + male + age0 + cd40 + dvl0' + + dctmle = DoubleCrossfitTMLE(cf, exposure='art', outcome='cd4_wk45') + dctmle.exposure_model(g_model, GLMSL(sm.families.family.Binomial()), bound=0.01) + dctmle.outcome_model(q_model, GLMSL(sm.families.family.Gaussian())) + dctmle.fit(n_splits=3, n_partitions=20, random_state=743282) + + # Comparing Results + npt.assert_allclose(dctmle.ace, 238.23203641165946) + npt.assert_allclose(dctmle.ace_se, 69.14615318685628) + npt.assert_allclose(dctmle.ace_ci, + (102.70806649593166, 373.75600632738724)) diff --git a/tests/test_ipw.py b/tests/test_ipw.py index 31adb3c..e8029e2 100644 --- a/tests/test_ipw.py +++ b/tests/test_ipw.py @@ -276,6 +276,10 @@ def test_error_conditional(self, sdata): with pytest.raises(ValueError): sipw.fit(p=[0.8], conditional=["df['male']==1", "df['male']==0"]) + def test_drop_missing(self, cdata): + sipw = StochasticIPTW(cdata, treatment='art', outcome='cd4_wk45') + assert sipw.df.shape[0] == cdata.dropna().shape[0] + def test_uncond_treatment(self, sdata): r_pred = 0.1165162207 diff --git a/tests/test_superlearner.py b/tests/test_superlearner.py new file mode 100644 index 0000000..fe1be0f --- /dev/null +++ b/tests/test_superlearner.py @@ -0,0 +1,324 @@ +import pytest +import numpy as np +import pandas as pd +import numpy.testing as npt +import pandas.testing as pdt +import statsmodels.api as sm +import statsmodels.formula.api as smf +from sklearn.linear_model import LogisticRegression, LinearRegression + +from zepid.superlearner import EmpiricalMeanSL, GLMSL, StepwiseSL, SuperLearner + + +@pytest.fixture +def data(): + data = pd.DataFrame() + data['C'] = [5, 10, 12, 13, -10, 0, 37] + data['B'] = [0, 0, 0, 1, 1, 1, 1] + data['M'] = [0, 0, 1, np.nan, 0, 1, 1] + return data + + +@pytest.fixture +def data_test(): + # True Models: y ~ a + w + w*x + N(0, 1) + # True Models: Pr(b=1) ~ logit(a + w - w*x) + data = pd.DataFrame() + data['X'] = [1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0] + data['W'] = [-3, 2, -2, -1, 2, -2, 2, -2, -1, -1, 1, 2, -1, 0, -2, -1, -1, -3, -1, 1] + data['A'] = [0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0] + data['Y'] = [-6.6, 4.2, -2.0, -0.6, 6.6, -2.2, 1.2, -4.9, -2.2, 0.8, 1.3, 3.4, 0.3, 1.4, -1.8, -2.4, -1.6, + -4.1, -2.5, 2.5] + data['B'] = [0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1] + return data + + +class TestEmpiricalMeanSL: + + def test_error_missing_data(self, data): + empm = EmpiricalMeanSL() + with pytest.raises(ValueError, match="missing values in X or y"): + empm.fit(np.asarray(data['M']), np.asarray(data['C'])) + + with pytest.raises(ValueError, match="missing values in X or y"): + empm.fit(np.asarray(data['C']), np.asarray(data['M'])) + + def test_error_shapes(self, data): + empm = EmpiricalMeanSL() + with pytest.raises(ValueError, match="same number of observations"): + empm.fit(np.asarray(data['C']), np.array([0, 1, 1])) + + def test_mean_correct(self, data): + empm = EmpiricalMeanSL() + + # Continuous + empm.fit(X=np.asarray(data['B']), y=np.asarray(data['C'])) + npt.assert_allclose(empm.empirical_mean, np.mean(data['C'])) + + # Binary + empm.fit(X=np.asarray(data['C']), y=np.asarray(data['B'])) + npt.assert_allclose(empm.empirical_mean, np.mean(data['B'])) + + def test_predict(self, data): + empm = EmpiricalMeanSL() + + # Continuous + empm.fit(X=np.asarray(data['B']), y=np.asarray(data['C'])) + X_pred = np.array([1, 1, 1]) + pred_y = empm.predict(X=X_pred) + assert pred_y.shape[0] == X_pred.shape[0] # Same shape in output + npt.assert_allclose(empm.empirical_mean, + [np.mean(data['C'])] * X_pred.shape[0]) + + # Binary + empm.fit(X=np.asarray(data['C']), y=np.asarray(data['B'])) + X_pred = np.array([1, 1, 1, 0]) + pred_y = empm.predict(X=X_pred) + assert pred_y.shape[0] == X_pred.shape[0] # Same shape in output + npt.assert_allclose(empm.empirical_mean, + [np.mean(data['B'])] * X_pred.shape[0]) + + +class TestGLMSL: + + def test_error_missing_data(self, data): + f = sm.families.family.Binomial() + glm = GLMSL(f) + with pytest.raises(ValueError, match="missing values in X or y"): + glm.fit(np.asarray(data['M']), np.asarray(data['C'])) + + with pytest.raises(ValueError, match="missing values in X or y"): + glm.fit(np.asarray(data['C']), np.asarray(data['M'])) + + def test_error_shapes(self, data): + f = sm.families.family.Binomial() + glm = GLMSL(f) + with pytest.raises(ValueError, match="same number of observations"): + glm.fit(np.asarray(data['C']), np.array([0, 1, 1])) + + def test_match_statsmodels_continuous(self, data_test): + f = sm.families.family.Gaussian() + glm = GLMSL(f) + glm.fit(np.asarray(data_test[['A', 'W', 'X']]), np.asarray(data_test['Y'])) + + # Checking chosen covariates + sm_glm = smf.glm("Y ~ A + W + X", data_test, family=f).fit() + npt.assert_allclose(glm.model.params, + sm_glm.params) + + # Checking predictions from model + step_preds = glm.predict(np.asarray(data_test.loc[0:5, ['A', 'W', 'X']])) + npt.assert_allclose(step_preds, + sm_glm.predict(data_test.loc[0:5, ])) + + def test_match_statsmodels_binary(self, data_test): + f = sm.families.family.Binomial() + glm = GLMSL(f) + glm.fit(np.asarray(data_test[['A', 'W']]), np.asarray(data_test['B'])) + + # Checking chosen covariates + sm_glm = smf.glm("B ~ A + W", data_test, family=f).fit() + npt.assert_allclose(glm.model.params, + sm_glm.params) + + # Checking predictions from model + step_preds = glm.predict(np.asarray(data_test.loc[0:5, ['A', 'W']])) + npt.assert_allclose(step_preds, + sm_glm.predict(data_test.loc[0:5, ])) + + +class TestStepWiseSL: + + def test_error_setup(self): + f = sm.families.family.Binomial() + # Testing selection method error + with pytest.raises(ValueError, match="`method` must be one"): + StepwiseSL(f, selection="wrong") + # Testing interaction_order < 0 + with pytest.raises(ValueError, match="interaction_order"): + StepwiseSL(f, order_interaction=-1) + # Testing interaction_order != int + with pytest.raises(ValueError, match="interaction_order"): + StepwiseSL(f, order_interaction=0.4) + + def test_error_missing_data(self, data): + f = sm.families.family.Binomial() + step = StepwiseSL(f) + with pytest.raises(ValueError, match="missing values in X or y"): + step.fit(np.asarray(data['M']), np.asarray(data['C'])) + + with pytest.raises(ValueError, match="missing values in X or y"): + step.fit(np.asarray(data['C']), np.asarray(data['M'])) + + def test_error_shapes(self, data): + f = sm.families.family.Binomial() + step = StepwiseSL(f) + with pytest.raises(ValueError, match="same number of observations"): + step.fit(np.asarray(data['C']), np.array([0, 1, 1])) + + def test_error_backward_saturated(self, data_test): + f = sm.families.family.Binomial() + step = StepwiseSL(f, selection="backward", order_interaction=1, verbose=True) + with pytest.raises(ValueError, match="Saturated model"): + step.fit(np.asarray(data_test[['A', 'W']]), np.asarray(data_test['B'])) + + def test_forward_continuous(self, data_test): + f = sm.families.family.Gaussian() + step = StepwiseSL(f, selection="forward", order_interaction=1) + step.fit(np.asarray(data_test[['A', 'W', 'X']]), np.asarray(data_test['Y'])) + + # Checking chosen covariates + best_x_indices = np.asarray((1, 5, 4, 3)) # This is the order the AIC's got forward + npt.assert_array_equal(np.asarray(step.cols_optim), + best_x_indices) + + # Checking predictions from model + best_x_preds = np.array([-6.79917101, 5.38279072, -1.86983794, -1.22659046, 5.38279072, -1.86983794]) + step_preds = step.predict(np.asarray(data_test.loc[0:5, ['A', 'W', 'X']])) + npt.assert_allclose(step_preds, + best_x_preds) + + def test_backward_continuous(self, data_test): + f = sm.families.family.Gaussian() + step = StepwiseSL(f, selection="backward", order_interaction=1) + step.fit(np.asarray(data_test[['A', 'W', 'X']]), np.asarray(data_test['Y'])) + + # Checking chosen covariates + best_x_indices = np.asarray((1, 3, 4, 5)) # This is the order the AIC's got backward + npt.assert_array_equal(np.asarray(step.cols_optim), + best_x_indices) + + # Checking predictions from model + best_x_preds = np.array([-6.79917101, 5.38279072, -1.86983794, -1.22659046, 5.38279072, -1.86983794]) + step_preds = step.predict(np.asarray(data_test.loc[0:5, ['A', 'W', 'X']])) + npt.assert_allclose(step_preds, + best_x_preds) + + def test_forward_binary(self, data_test): + f = sm.families.family.Binomial() + step = StepwiseSL(f, selection="forward", order_interaction=1) + step.fit(np.asarray(data_test[['A', 'W', 'X']]), np.asarray(data_test['B'])) + + # Checking chosen covariates + best_x_indices = np.asarray((1, 3)) # This is the order the AIC's got backward + npt.assert_array_equal(np.asarray(step.cols_optim), + best_x_indices) + + # Checking predictions from model + best_x_preds = np.array([0.00646765, 0.96985036, 0.7380893, 0.45616085, 0.96985036, 0.7380893]) + step_preds = step.predict(np.asarray(data_test.loc[0:5, ['A', 'W', 'X']])) + npt.assert_allclose(step_preds, + best_x_preds, rtol=1e-5) + + def test_backward_binary(self, data_test): + f = sm.families.family.Binomial() + step = StepwiseSL(f, selection="backward", order_interaction=1) + step.fit(np.asarray(data_test[['A', 'X']]), np.asarray(data_test['B'])) + + # Checking chosen covariates + best_x_indices = np.asarray([]) # This is the order the AIC's got backward + npt.assert_array_equal(np.asarray(step.cols_optim), + best_x_indices) + + # Checking predictions from model + best_x_preds = np.array([0.7, 0.7, 0.7, 0.7, 0.7, 0.7]) + step_preds = step.predict(np.asarray(data_test.loc[0:5, ['A', 'X']])) + npt.assert_allclose(step_preds, + best_x_preds, rtol=1e-5) + + +class TestSuperLearner: + + @pytest.fixture + def load_estimators_continuous(self): + emp = EmpiricalMeanSL() + linr = LinearRegression() + step = StepwiseSL(family=sm.families.family.Gaussian(), selection="forward", order_interaction=1) + return [emp, linr, step] + + @pytest.fixture + def load_estimators_binary(self): + emp = EmpiricalMeanSL() + logr = LogisticRegression() + step = StepwiseSL(family=sm.families.family.Binomial(), selection="forward", order_interaction=1) + return [emp, logr, step] + + def test_error_estimator_length(self, load_estimators_continuous): + with pytest.raises(ValueError, match="estimators and estimator_labels"): + SuperLearner(estimators=load_estimators_continuous, estimator_labels=["wrong", "number"]) + + def test_error_solver(self, load_estimators_continuous): + with pytest.raises(ValueError, match="The solver INVALID_SOLVER is not currently"): + SuperLearner(estimators=load_estimators_continuous, estimator_labels=["Mean", "LineR", "Step"], + solver="INVALID_SOLVER") + + def test_error_lossf(self, load_estimators_continuous): + with pytest.raises(ValueError, match="The loss function INVALID_LOSSF is not currently"): + SuperLearner(estimators=load_estimators_continuous, estimator_labels=["Mean", "LineR", "Step"], + loss_function="INVALID_LOSSF") + + def test_error_shapes(self, data, load_estimators_continuous): + sl = SuperLearner(estimators=load_estimators_continuous, estimator_labels=["Mean", "LineR", "Step"]) + with pytest.raises(ValueError, match="same number of observations"): + sl.fit(np.asarray(data['C']), np.array([0, 1, 1])) + + with pytest.raises(ValueError, match="same number of observations"): + sl.fit(np.array([0, 1, 1]), np.asarray(data['C'])) + + def test_error_nan(self, data, load_estimators_continuous): + sl = SuperLearner(estimators=load_estimators_continuous, estimator_labels=["Mean", "LineR", "Step"], folds=2) + with pytest.raises(ValueError, match="missing values in X or y"): + sl.fit(np.asarray(data['C']), np.asarray(data['M'])) + + with pytest.raises(ValueError, match="missing values in X or y"): + sl.fit(np.asarray(data['M']), np.asarray(data['C'])) + + fsl = sl.fit(np.asarray(data['B']).reshape(-1, 1), np.asarray(data['C'])) + with pytest.raises(ValueError, match="missing values in X"): + fsl.predict(np.asarray(data['M'])) + + def test_error_before_fit(self, data, load_estimators_continuous): + sl = SuperLearner(estimators=load_estimators_continuous, estimator_labels=["Mean", "LineR", "Step"]) + with pytest.raises(ValueError, match="must be called before"): + sl.predict(np.asarray(data['C'])) + + with pytest.raises(ValueError, match="must be called before"): + sl.summary() + + def test_warn_lossf(self, data_test, load_estimators_binary): + sl = SuperLearner(estimators=load_estimators_binary, estimator_labels=["Mean", "LineR", "Step"], folds=3) + with pytest.warns(UserWarning, match="looks like your `y` is binary"): + sl.fit(np.asarray(data_test[['A', 'W', 'X']]), np.asarray(data_test['B'])) + + def test_continuous_superlearner(self, data_test, load_estimators_continuous): + sl = SuperLearner(estimators=load_estimators_continuous, estimator_labels=["Mean", "LineR", "Step"], folds=5) + fsl = sl.fit(np.asarray(data_test[['A', 'W', 'X']]), np.asarray(data_test['Y'])) + + # Coefficients and CV-Error + expected = pd.DataFrame.from_records([{"estimator": "Mean", "cv_error": 10.2505625, "coefs": 0.097767}, + {"estimator": "LineR", "cv_error": 1.90231789, "coefs": 0.357968}, + {"estimator": "Step", "cv_error": 1.66769069, "coefs": 0.544265}]) + pdt.assert_frame_equal(fsl.est_performance, + expected) + + # Predicted values + expected = np.array([-5.65558813, 4.45487519, -1.91811241, -1.46252119, 4.45487519, -1.91811241]) + npt.assert_allclose(fsl.predict(np.asarray(data_test.loc[0:5, ["A", "W", "X"]])), + expected) + + def test_binary_superlearner(self, data_test, load_estimators_binary): + sl = SuperLearner(estimators=load_estimators_binary, estimator_labels=["Mean", "LogR", "Step"], + loss_function='nloglik', folds=5) + fsl = sl.fit(np.asarray(data_test[['A', 'X']]), np.asarray(data_test['B'])) + + # Coefficients and CV-Error + expected = pd.DataFrame.from_records([{"estimator": "Mean", "cv_error": -0.049431, "coefs": 0.966449}, + {"estimator": "LogR", "cv_error": -0.030154, "coefs": 0.033551}, + {"estimator": "Step", "cv_error": 1.797190, "coefs": 0.}]) + pdt.assert_frame_equal(fsl.est_performance, + expected) + + # Predicted values + expected = np.array([0.69634645, 0.70191334, 0.70322108, 0.69766808, 0.70191334, 0.70322108]) + npt.assert_allclose(fsl.predict(np.asarray(data_test.loc[0:5, ["A", "X"]])), + expected) diff --git a/travis-requirements.txt b/travis-requirements.txt index 666490f..3a3bfd1 100644 --- a/travis-requirements.txt +++ b/travis-requirements.txt @@ -1,9 +1,10 @@ -numpy<=1.18.5 pandas>=0.18 +numpy>=1.16.5 statsmodels>=0.7.0 matplotlib>=2.1.0 -scipy<=1.4.1 +scipy tabulate patsy -sklearn -lifelines>=0.14.2 \ No newline at end of file +scikit-learn +lifelines>=0.14.2 +networkx \ No newline at end of file diff --git a/zepid/__init__.py b/zepid/__init__.py index 18a0d99..6b1afb4 100644 --- a/zepid/__init__.py +++ b/zepid/__init__.py @@ -29,19 +29,30 @@ See http://zepid.readthedocs.io/en/latest/ for a full guide through all the package features """ +from .version import __version__ + from .base import (RiskRatio, RiskDifference, NNT, OddsRatio, IncidenceRateRatio, IncidenceRateDifference, Sensitivity, Specificity, Diagnostics, interaction_contrast, interaction_contrast_ratio, spline, table1_generator, create_spline_transform) from .datasets import (load_sample_data, load_ewing_sarcoma_data, load_gvhd_data, load_sciatica_data, load_leukemia_data, load_longitudinal_data, load_binge_drinking_data, load_case_control_data, - load_monotone_missing_data, load_generalize_data) + load_monotone_missing_data, load_generalize_data, load_zivich_breskin_data) import zepid.calc + import zepid.graphics + +import zepid.sensitivity_analysis + +import zepid.superlearner + import zepid.causal.gformula import zepid.causal.ipw import zepid.causal.doublyrobust import zepid.causal.generalize -import zepid.sensitivity_analysis -from .version import __version__ +dags_available = True +try: + import zepid.causal.causalgraph +except ImportError: + dags_available = False diff --git a/zepid/calc/__init__.py b/zepid/calc/__init__.py index 55359b5..11ef0b5 100644 --- a/zepid/calc/__init__.py +++ b/zepid/calc/__init__.py @@ -1,5 +1,7 @@ -from .utils import (risk_ci, incidence_rate_ci, risk_ratio, risk_difference, number_needed_to_treat, odds_ratio, - incidence_rate_ratio, incidence_rate_difference, attributable_community_risk, - population_attributable_fraction, probability_to_odds, odds_to_probability, counternull_pvalue, - semibayes, sensitivity, specificity, ppv_converter, npv_converter, screening_cost_analyzer, - rubins_rules, s_value) +from .utils import (risk_ci, risk_ratio, risk_difference, number_needed_to_treat, odds_ratio, + incidence_rate_ci, incidence_rate_ratio, incidence_rate_difference, + attributable_community_risk, population_attributable_fraction, + probability_to_odds, odds_to_probability, logit, inverse_logit, probability_bounds, + counternull_pvalue, s_value, + sensitivity, specificity, ppv_converter, npv_converter, screening_cost_analyzer, + rubins_rules, semibayes) diff --git a/zepid/calc/utils.py b/zepid/calc/utils.py index cae95d3..21427b9 100644 --- a/zepid/calc/utils.py +++ b/zepid/calc/utils.py @@ -749,6 +749,36 @@ def odds_to_probability(odds): return odds / (1 + odds) +def logit(prob): + """Logit transformation of probabilities. Input can be a single probability of array of probabilities + + Parameters + ---------- + prob : float, array + A single probability or an array of probabilities + + Returns + ------- + logit-transformed probabilities + """ + return np.log(prob / (1 - prob)) + + +def inverse_logit(logodds): + """Inverse logit transformation. Returns probabilities + + Parameters + ---------- + logodds : float, array + A single log-odd or an array of log-odds + + Returns + ------- + inverse-logit transformed results (i.e. probabilities for log-odds) + """ + return 1 / (1 + np.exp(-logodds)) + + def counternull_pvalue(estimate, lcl, ucl, sided='two', alpha=0.05, decimal=3): r"""Calculates the counternull p-value. It is useful to prevent over-interpretation of results @@ -1352,3 +1382,48 @@ def s_value(pvalue): Replication Crisis if We Don’t Expect Replication. The American Statistician. """ return -1 * np.log2(np.array(pvalue)) + + +def probability_bounds(v, bounds): + """Function to generate bounded values for probabilities. Specifically this function is used in multiple + estimators to generate bounded probabilities. This is available for both symmetric and asymmetric bounds. + + Parameters + ---------- + v : numpy.array + Array of values to bound + bounds : float, list, numpy.array + Bounds to apply to v. If only a single value is provided, then symmetric bounds are used. + + Returns + ------- + numpy.array of bounded values + """ + v = np.asarray(v) + if type(bounds) is float: # Symmetric Bounding + if bounds < 0 or bounds > 1: + raise ValueError('Bound value must be between (0, 1)') + v[v < bounds] = bounds + v[v > 1 - bounds] = 1 - bounds + + elif type(bounds) is str: # Catching string inputs + raise ValueError('Bounds must either be a float between (0, 1), or a collection of floats between (0, 1)') + + elif type(bounds) is int: # Catching string inputs + raise ValueError('Bounds must either be a float between (0, 1), or a collection of floats between (0, 1)') + + else: # Asymmetric bounds + if bounds[0] > bounds[1]: + raise ValueError('Bound thresholds must be listed in ascending order') + if len(bounds) > 2: + warnings.warn('It looks like your specified bounds is more than two floats. Only the first two ' + 'specified bounds are used by the bound statement. So only ' + + str(bounds[0:2]) + ' will be used', UserWarning) + if type(bounds[0]) is str or type(bounds[1]) is str: + raise ValueError('Bounds must be floats between (0, 1)') + if (bounds[0] < 0 or bounds[1] > 1) or (bounds[0] < 0 or bounds[1] > 1): + raise ValueError('Both bound values must be between (0, 1)') + v[v < bounds[0]] = bounds[0] + v[v > bounds[1]] = bounds[1] + + return v diff --git a/zepid/causal/causalgraph/__init__.py b/zepid/causal/causalgraph/__init__.py new file mode 100644 index 0000000..55b3b1d --- /dev/null +++ b/zepid/causal/causalgraph/__init__.py @@ -0,0 +1 @@ +from .dag import DirectedAcyclicGraph \ No newline at end of file diff --git a/zepid/causal/causalgraph/dag.py b/zepid/causal/causalgraph/dag.py new file mode 100644 index 0000000..4ab139b --- /dev/null +++ b/zepid/causal/causalgraph/dag.py @@ -0,0 +1,283 @@ +import matplotlib.pyplot as plt +import networkx as nx +from networkx.algorithms.dag import descendants, ancestors +from itertools import combinations + + +class DirectedAcyclicGraph: + def __init__(self, exposure, outcome): + """Constructs a Directed Acyclic Graph (DAG) for determination of adjustment sets + + Parameters + ---------- + exposure : str + Exposure of interest in the causal diagram + outcome : str + Outcome of interest in the causal diagram + + # TODO add other implementations in the future... have as self.mediator, self.censor, self.missing + + Examples + -------- + Setting up environment + + >>> from zepid.causal.causalgraph import DirectedAcyclicGraph + + Creating directed acyclic graph + + >>> dag = DirectedAcyclicGraph(exposure="X", outcome="Y") + >>> dag.add_arrow(source="X", endpoint="Y") + >>> dag.add_arrow(source="V", endpoint="Y") + >>> dag.add_arrows(pairs=(("W", "X"), ("W", "Y"))) + + Determining adjustment sets + + >>> dag.calculate_adjustment_sets() + >>> dag.adjustment_sets + >>> dag.minimal_adjustment_sets + + Plot diagram + + >>> dag.draw_dag() + >>> plt.show() + + Assess arrow misdirections that result in the chosen adjustment set being invalid + + >>> dag.assess_misdirections(chosen_adjustment_set=set("W")) + + References + ---------- + Shrier I, & Platt RW. (2008). Reducing bias through directed acyclic graphs. + BMC medical research methodology, 8(1), 70. + """ + self.exposure = exposure + self.outcome = outcome + + dag = nx.DiGraph() + dag.add_edge(self.exposure, self.outcome) + self.dag = dag + + self.adjustment_sets = None + self.minimal_adjustment_sets = None + self.arrow_misdirections = None + + def add_arrow(self, source, endpoint): + """Add a single arrow to the current causal DAG + + Parameters + ---------- + source : str + Node that arrow originates from + endpoint : str + Node that arrow points to + """ + dag = self.dag.copy() + dag.add_edge(source, endpoint) + if not nx.is_directed_acyclic_graph(dag): + raise DAGError("Cyclic graph detected. Invalid addition for arrow.") + + self.dag = dag + + def add_arrows(self, pairs): + """Add a set of arrows to the current causal DAG + + Parameters + ---------- + pairs : list, set, container + Set of sets of node pairs to add arrows in the DAG, with the first node as the source (node that arrows + originates from) and the second node is the endpoint (node that arrow points to) + """ + dag = self.dag.copy() + dag.add_edges_from(pairs) + if not nx.is_directed_acyclic_graph(dag): + raise DAGError("Cyclic graph detected. Invalid addition for arrow(s).") + + self.dag = dag + + def add_from_networkx(self, network): + # Checking that it is a directed acyclic graph + if not nx.is_directed_acyclic_graph(network): + raise DAGError("Cyclic graph detected. Invalid networkx input.") + + # Checking that exposure and outcome are valid nodes + nodes = list(network.nodes) + if self.exposure not in nodes: + raise DAGError(str(self.exposure)+" is not a node in the DAG") + if self.outcome not in nodes: + raise DAGError(str(self.outcome)+" is not a node in the DAG") + + self.dag = network.copy() + + def draw_dag(self, positions=None, invert=False, fig_size=(6, 5), node_size=1000): + """Draws the current input causal DAG + + Parameters + ---------- + positions : + Option to provide node locations based on other functionalities + invert : bool, optional + To display the (often hidden) assumptions of causal DAG, the invert options displays all arrows assumed + to not exist. The reversal is an easy way to display lower densities of in the original DAG. No direction + is provided for these arrows + fig_size : set + controls the figure size returned + node_size : int, float + Size of nodes in the subsequent plot + """ + if invert: + dag = nx.complement(self.dag.to_undirected()) + else: + dag = self.dag.copy() + + fig = plt.figure(figsize=fig_size) + ax = plt.subplot(1, 1, 1) + + if positions is None: + positions = nx.spectral_layout(self.dag) + + nx.draw_networkx(dag, positions, node_color="#d3d3d3", node_size=node_size, + edge_color='black', linewidths=1.0, width=1.5, + arrowsize=15, ax=ax, font_size=12) + plt.axis('off') + return ax + + def calculate_adjustment_sets(self): + """Determines all sufficient adjustment sets for the causal diagram using the algorithm described in Shrier & + Platt "Reducing bias through directed acyclic graphs" BMC Medical Research Methodology 2008. + + All possible adjustment sets are enumerated and then assessed. We can briefly consider this as a backtracking + algorithm where we assess each possible combination that exists within the data + + # TODO in future should allow for adjustment sets to determine causal, censor, missing sets. default to all + + Adjustment sets are added as `DirectedAcyclicGraph.adjustment_sets` and + `DirectedAcyclicGraph.minimal_adjustment_sets` + """ + # Extracting list of all sets to check + sets_to_check = self._define_all_adjustment_sets_(dag=self.dag) + + valid_sets = [] + for adj_set in sets_to_check: + if self._check_valid_adjustment_set_(graph=self.dag, adjustment_set=adj_set): + valid_sets.append(adj_set) + + self.adjustment_sets = valid_sets + self.minimal_adjustment_sets = [x for x in valid_sets if len(x) == len(min(valid_sets, key=len))] + + def _define_all_adjustment_sets_(self, dag): + """Background function to determine all possible adjustment set combinations to explore. Used to explore every + possible combinations of adjustment sets to assess whether they are valid for d-separation. + """ + # List of all nodes valid for adjustment + all_nodes = list(dag.nodes) + all_nodes.remove(self.exposure) + all_nodes.remove(self.outcome) + list_of_sets = [] + for i in range(0, len(all_nodes) + 1): + list_of_sets.extend([x for x in combinations(all_nodes, i)]) + return list_of_sets + + def _check_valid_adjustment_set_(self, graph, adjustment_set): + """Checks the adjustment set as valid using the following 6 steps + Step 1) check no descendants of X are included in adjustment set + Step 2) delete variables that meet certain definitions + Step 3) delete all arrows that originate from exposure + Step 4) connect all source nodes (to assess for collider stratification) + Step 5) convert to undirected graph + Step 6) check whether a path exists between exposure & outcome + """ + dag = graph.copy() + + # List of all nodes valid for adjustment + all_nodes = list(dag.nodes()) + all_nodes.remove(self.exposure) + all_nodes.remove(self.outcome) + + # Step 1) Check no descendants of X + desc_x = descendants(dag, self.exposure) + if desc_x & set(adjustment_set): + return False + + # Step 2) Delete all variables that: (a) non-ancestors of X, (b) non-ancestors of Y, (c) non-ancestors + # of adjustment set + set_check = set(adjustment_set).union([self.exposure, self.outcome]) + set_remove = set(dag.nodes) + for n in set_check: + set_remove = set_remove & (dag.nodes - ancestors(dag, n)) + set_remove = set_remove - set([self.exposure, self.outcome]) - set(adjustment_set) + dag.remove_nodes_from(set_remove) + + # Step 3) Delete all arrows with X as the source + for endpoint in list(dag.successors(self.exposure)): + dag.remove_edge(self.exposure, endpoint) + + # Step 4) Directly connect all source nodes pointing to same endpoint (for collider assessment) + for n in dag: + sources = list(dag.predecessors(n)) + if len(sources) > 1: + for s1, s2 in combinations(sources, 2): + if not (dag.has_edge(s2, s1) or dag.has_edge(s1, s2)): + dag.add_edge(s1, s2) + + # Step 5) Remove arrow directionality + uag = dag.to_undirected() + + # Step 6) Remove nodes from the adjustment set + uag.remove_nodes_from(adjustment_set) + + # Checking whether a a path between X and Y exists now + if nx.has_path(uag, self.exposure, self.outcome): + return False + else: + return True + + def assess_misdirections(self, chosen_adjustment_set): + """Arrow direction can potentially be misspecified. This function checks every possible arrow reversal and + determines the relevant sufficient adjustment sets. Those new adjustment sets are compared to the chosen + adjustment set for differences. + + Parameters + ---------- + chosen_adjustment_set : set, list, container + The sufficient adjustment set selected for the data analysis + """ + all_edges = list(self.dag.edges()) + l = [] + for i in range(0, len(all_edges)+1): + l.append([x for x in combinations(all_edges, i)]) + + valid_switches = [] + valid_graphs = [] + for c in range(1, len(l)): + for s in l[c]: + # Copy graph + g = self.dag.copy() + g.remove_edges_from(s) # Remove edge + # Reversing all edges in that set + for pair in s: + g.add_edge(pair[1], pair[0]) # Add reversed edge + # Check if DAG + if nx.is_directed_acyclic_graph(g): + valid_graphs.append(g) + valid_switches.append(s) + + alternative_adjustment_sets = {} + for v, g in zip(valid_switches, valid_graphs): + sets_to_check = self._define_all_adjustment_sets_(dag=g) + valid_sets = [] + for adj_set in sets_to_check: + if self._check_valid_adjustment_set_(graph=g, adjustment_set=adj_set): + valid_sets.append(adj_set) + if chosen_adjustment_set not in set(valid_sets): + alternative_adjustment_sets[v] = valid_sets + + # print(alternative_adjustment_sets) + self.arrow_misdirections = alternative_adjustment_sets + + +class DAGError(Exception): + """Exception raised for errors in Directed Acyclic Graphs not being directed or acyclic + """ + + def __init__(self, message): + super().__init__(message) diff --git a/zepid/causal/doublyrobust/AIPW.py b/zepid/causal/doublyrobust/AIPW.py index 119bec9..e8b56de 100644 --- a/zepid/causal/doublyrobust/AIPW.py +++ b/zepid/causal/doublyrobust/AIPW.py @@ -1,4 +1,6 @@ +import copy import warnings +import patsy import numpy as np import pandas as pd import matplotlib.pyplot as plt @@ -7,9 +9,11 @@ from statsmodels.stats.weightstats import DescrStatsW from scipy.stats import norm -from zepid.causal.utils import (propensity_score, plot_kde, plot_love, iptw_calculator, - standardized_mean_differences, positivity, _bounding_, - plot_kde_accuracy, outcome_accuracy) +from zepid.calc import probability_bounds +from zepid.causal.utils import (check_input_data, propensity_score, plot_kde, plot_love, iptw_calculator, + standardized_mean_differences, positivity, + plot_kde_accuracy, outcome_accuracy, aipw_calculator, + exposure_machine_learner, outcome_machine_learner, missing_machine_learner) class AIPTW: @@ -105,38 +109,24 @@ class AIPTW: References ---------- - Funk, M. J., Westreich, D., Wiesen, C., Stürmer, T., Brookhart, M. A., & Davidian, M. (2011). Doubly robust + Funk MJ, Westreich D, Wiesen C, Stürmer T, Brookhart MA, & Davidian M. (2011). Doubly robust estimation of causal effects. American Journal of Epidemiology, 173(7), 761-767. Lunceford JK, Davidian M. (2004). Stratification and weighting via the propensity score in estimation of causal treatment effects: a comparative study. Statistics in medicine, 23(19), 2937-2960. """ - def __init__(self, df, exposure, outcome, weights=None, alpha=0.05): - if df.dropna(subset=[d for d in df.columns if d != outcome]).shape[0] != df.shape[0]: - warnings.warn("There is missing data that is not the outcome in the data set. AIPTW will drop " - "all missing data that is not missing outcome data. AIPTW will fit " - + str(df.dropna(subset=[d for d in df.columns if d != outcome]).shape[0]) + - ' of ' + str(df.shape[0]) + ' observations', UserWarning) - self.df = df.copy().dropna(subset=[d for d in df.columns if d != outcome]).reset_index() - else: - self.df = df.copy().reset_index() - - # Checking to see if missing outcome data occurs - self._missing_indicator = '__missing_indicator__' - if self.df.dropna(subset=[outcome]).shape[0] != self.df.shape[0]: - self._miss_flag = True - self.df[self._missing_indicator] = np.where(self.df[outcome].isna(), 0, 1) - else: - self._miss_flag = False - self.df[self._missing_indicator] = 1 + def __init__(self, df, exposure, outcome, weights=None, alpha=0.05): self.exposure = exposure self.outcome = outcome - - if df[outcome].dropna().value_counts().index.isin([0, 1]).all(): - self._continuous_outcome = False - else: - self._continuous_outcome = True + self._missing_indicator = '__missing_indicator__' + self.df, self._miss_flag, self._continuous_outcome = check_input_data(data=df, + exposure=exposure, + outcome=outcome, + estimator="AIPTW", + drop_censoring=False, + drop_missing=True, + binary_exposure_only=True) self._weight_ = weights self.alpha = alpha @@ -146,18 +136,24 @@ def __init__(self, df, exposure, outcome, weights=None, alpha=0.05): self.risk_difference_ci = None self.risk_ratio_ci = None self.risk_difference_se = None + self.risk_ratio_se = None self.average_treatment_effect = None self.average_treatment_effect_ci = None self.average_treatment_effect_se = None + self._continuous_type = None self._fit_exposure_ = False + self._exp_model_custom = False self._fit_outcome_ = False + self._out_model_custom = False self._fit_missing_ = False + self._miss_model_custom = False self._exp_model = None self._out_model = None + self._predicted_y_ = None - def exposure_model(self, model, bound=False, print_results=True): + def exposure_model(self, model, custom_model=None, bound=False, print_results=True): r"""Specify the propensity score / inverse probability weight model. Model used to predict the exposure via a logistic regression model. This model estimates @@ -169,6 +165,10 @@ def exposure_model(self, model, bound=False, print_results=True): ---------- model : str Independent variables to predict the exposure. For example, 'var1 + var2 + var3' + custom_model : optional + Input for a custom model that is used in place of the logit model (default). The model must have the + "fit()" and "predict()" attributes. SciKit-Learn style models supported as custom models. In the + background, AIPTW will fit the custom model and generate the predicted probablities bound : float, list, optional Value between 0,1 to truncate predicted probabilities. Helps to avoid near positivity violations. Specifying this argument can improve finite sample performance for random positivity violations. However, @@ -181,20 +181,32 @@ def exposure_model(self, model, bound=False, print_results=True): """ self.__mweight = model self._exp_model = self.exposure + ' ~ ' + model - d, n, iptw = iptw_calculator(df=self.df, treatment=self.exposure, model_denom=model, model_numer='1', - weight=self._weight_, stabilized=False, standardize='population', - bound=None, print_results=print_results) - self.df['_g1_'] = d - self.df['_g0_'] = 1 - d + if custom_model is None: + d, n, iptw = iptw_calculator(df=self.df, treatment=self.exposure, model_denom=model, model_numer='1', + weight=self._weight_, stabilized=False, standardize='population', + bound=None, print_results=print_results) + else: + self._exp_model_custom = True + data = patsy.dmatrix(model + ' - 1', self.df) + d = exposure_machine_learner(xdata=np.asarray(data), + ydata=np.asarray(self.df[self.exposure]), + ml_model=copy.deepcopy(custom_model), + print_results=print_results) + + g1w = d + g0w = 1 - d + # Applying bounds AFTER extracting g1 and g0 if bound: - self.df['_g1_'] = _bounding_(self.df['_g1_'], bounds=bound) - self.df['_g0_'] = _bounding_(self.df['_g0_'], bounds=bound) + g1w = probability_bounds(g1w, bounds=bound) + g0w = probability_bounds(g0w, bounds=bound) + self.df['_g1_'] = g1w + self.df['_g0_'] = g0w self._fit_exposure_ = True - def missing_model(self, model, bound=False, print_results=True): + def missing_model(self, model, custom_model=None, bound=False, print_results=True): r"""Estimation of Pr(M=0|A,L), which is the missing data mechanism for the outcome. Predicted probabilities are used to create inverse probability of censoring weights to account for informative missing data on the outcome. @@ -214,6 +226,10 @@ def missing_model(self, model, bound=False, print_results=True): model : str Independent variables to predict the exposure. Example) 'var1 + var2 + var3'. The treatment must be included for the missing data model + custom_model : optional + Input for a custom model that is used in place of the logit model (default). The model must have the + "fit()" and "predict()" attributes. SciKit-Learn style models are supported as custom models. In the + background, AIPTW will fit the custom model and generate the predicted probablities bound : float, list, optional Value between 0,1 to truncate predicted probabilities. Helps to avoid near positivity violations. Specifying this argument can improve finite sample performance for random positivity violations. However, @@ -241,23 +257,40 @@ def missing_model(self, model, bound=False, print_results=True): self._miss_model = self._missing_indicator + ' ~ ' + model fitmodel = propensity_score(self.df, self._miss_model, print_results=print_results) - dfx = self.df.copy() - dfx[self.exposure] = 1 - self.df['_ipmw_a1_'] = np.where(self.df[self._missing_indicator] == 1, - fitmodel.predict(dfx), np.nan) - dfx = self.df.copy() - dfx[self.exposure] = 0 - self.df['_ipmw_a0_'] = np.where(self.df[self._missing_indicator] == 1, - fitmodel.predict(dfx), np.nan) + if custom_model is None: # Logistic Regression model for predictions + dfx = self.df.copy() + dfx[self.exposure] = 1 + m1w = np.where(self.df[self._missing_indicator] == 1, fitmodel.predict(dfx), np.nan) + dfx = self.df.copy() + dfx[self.exposure] = 0 + m0w = np.where(self.df[self._missing_indicator] == 1, fitmodel.predict(dfx), np.nan) + else: # User-Specified model + self._miss_model_custom = True + data = patsy.dmatrix(model + ' - 1', self.df) + dfx = self.df.copy() + dfx[self.exposure] = 1 + adata = patsy.dmatrix(model + ' - 1', dfx) + dfx = self.df.copy() + dfx[self.exposure] = 0 + ndata = patsy.dmatrix(model + ' - 1', dfx) + + m1w, m0w = missing_machine_learner(xdata=np.array(data), + mdata=self.df[self._missing_indicator], + all_a=adata, none_a=ndata, + ml_model=copy.deepcopy(custom_model), + print_results=print_results) # If bounds are requested if bound: - self.df['_ipmw_a1_'] = _bounding_(self.df['_ipmw_a1_'], bounds=bound) - self.df['_ipmw_a0_'] = _bounding_(self.df['_ipmw_a0_'], bounds=bound) + m1w = probability_bounds(m1w, bounds=bound) + m0w = probability_bounds(m0w, bounds=bound) + + self.df['_ipmw_a1_'] = m1w + self.df['_ipmw_a0_'] = m0w self._fit_missing_ = True - def outcome_model(self, model, continuous_distribution='gaussian', print_results=True): + def outcome_model(self, model, custom_model=None, continuous_distribution='gaussian', print_results=True): r"""Specify the outcome model. Model used to predict the outcome via a regression model. For binary outcome data, a logistic regression model is used. For continuous outcomes, either linear or Poisson regression are available. @@ -270,6 +303,10 @@ def outcome_model(self, model, continuous_distribution='gaussian', print_results ---------- model : str Independent variables to predict the outcome. For example, 'var1 + var2 + var3 + var4' + custom_model : optional + Input for a custom model that is used in place of the logit model (default). The model must have the + "fit()" and "predict()" attributes. SciKit-Learn style models are supported as custom models. In the + background, TMLE will fit the custom model and generate the predicted values continuous_distribution : str, optional Distribution to use for continuous outcomes. Options are 'gaussian' for normal distributions and 'poisson' for Poisson distributions @@ -279,40 +316,65 @@ def outcome_model(self, model, continuous_distribution='gaussian', print_results if self.exposure not in model: warnings.warn("It looks like '" + self.exposure + "' is not included in the outcome model.") + if self._miss_flag: + cc = self.df.copy().dropna() + else: + cc = self.df.copy() + self._out_model = self.outcome + ' ~ ' + model - if self._continuous_outcome: - self._continuous_type = continuous_distribution - if (continuous_distribution == 'gaussian') or (continuous_distribution == 'normal'): - f = sm.families.family.Gaussian() - elif continuous_distribution == 'poisson': - f = sm.families.family.Poisson() + if custom_model is None: + if self._continuous_outcome: + self._continuous_type = continuous_distribution + if (continuous_distribution == 'gaussian') or (continuous_distribution == 'normal'): + f = sm.families.family.Gaussian() + elif continuous_distribution == 'poisson': + f = sm.families.family.Poisson() + else: + raise ValueError("Only 'gaussian' and 'poisson' distributions are supported") else: - raise ValueError("Only 'gaussian' and 'poisson' distributions are supported") - else: - f = sm.families.family.Binomial() + f = sm.families.family.Binomial() + + if self._weight_ is None: + log = smf.glm(self._out_model, cc, family=f).fit() + else: + log = smf.glm(self._out_model, cc, freq_weights=cc[self._weight_], family=f).fit() + + if print_results: + print('==============================================================================') + print('Outcome Model') + print(log.summary()) + print('==============================================================================') + + # Predicting under treatment strategies + dfx = self.df.copy() + dfx[self.exposure] = 1 + qa1w = log.predict(dfx) + dfx = self.df.copy() + dfx[self.exposure] = 0 + qa0w = log.predict(dfx) - if self._weight_ is None: - log = smf.glm(self._out_model, self.df, family=f).fit() else: - log = smf.glm(self._out_model, self.df, freq_weights=self.df[self._weight_], family=f).fit() - - if print_results: - print('\n----------------------------------------------------------------') - print('MODEL: ' + self._out_model) - print('-----------------------------------------------------------------') - print(log.summary()) - - # Generating predictions for observed variables - self._predicted_y_ = log.predict(self.df) - - # Predicting under treatment strategies - dfx = self.df.copy() - dfx[self.exposure] = 1 - self.df['_pY1_'] = log.predict(dfx) - dfx = self.df.copy() - dfx[self.exposure] = 0 - self.df['_pY0_'] = log.predict(dfx) + self._out_model_custom = True + data = patsy.dmatrix(model + ' - 1', cc) + + dfx = self.df.copy() + dfx[self.exposure] = 1 + adata = patsy.dmatrix(model + ' - 1', dfx) + dfx = self.df.copy() + dfx[self.exposure] = 0 + ndata = patsy.dmatrix(model + ' - 1', dfx) + + qa1w, qa0w = outcome_machine_learner(xdata=np.asarray(data), + ydata=np.asarray(cc[self.outcome]), + all_a=adata, none_a=ndata, + ml_model=copy.deepcopy(custom_model), + continuous=self._continuous_outcome, + print_results=print_results) + + self.df['_pY1_'] = qa1w + self.df['_pY0_'] = qa0w + self._predicted_y_ = qa1w * self.df[self.exposure] + qa0w * (1 - self.df[self.exposure]) self._fit_outcome_ = True def fit(self): @@ -338,55 +400,52 @@ def fit(self): "function", UserWarning) # Doubly robust estimator under all treated - a_obs = self.df[self.exposure] - y_obs = self.df[self.outcome] - py_a1 = self.df['_pY1_'] - py_a0 = self.df['_pY0_'] + a_obs = np.asarray(self.df[self.exposure]) + y_obs = np.asarray(self.df[self.outcome]) + py_a1 = np.asarray(self.df['_pY1_']) + py_a0 = np.asarray(self.df['_pY0_']) if self._fit_missing_: - ps_g1 = self.df['_g1_'] * self.df['_ipmw_a1_'] - ps_g0 = self.df['_g0_'] * self.df['_ipmw_a0_'] + ps_g1 = np.asarray(self.df['_g1_'] * self.df['_ipmw_a1_']) + ps_g0 = np.asarray(self.df['_g0_'] * self.df['_ipmw_a0_']) else: - ps_g1 = self.df['_g1_'] - ps_g0 = self.df['_g0_'] - - # Doubly robust estimator under all treated - dr_a1 = np.where(a_obs == 1, - (y_obs / ps_g1) - ((py_a1 * ps_g0) / ps_g1), - py_a1) + ps_g1 = np.asarray(self.df['_g1_']) + ps_g0 = np.asarray(self.df['_g0_']) - # Doubly robust estimator under all untreated - dr_a0 = np.where(a_obs == 1, - py_a0, - (y_obs / ps_g0 - ((py_a0 * ps_g1) / ps_g0))) + if self._weight_ is None: + w = None + else: + w = self.df[self._weight_] + diff_est, diff_var = aipw_calculator(y=y_obs, a=a_obs, + py_a=py_a1, py_n=py_a0, + pa1=ps_g1, pa0=ps_g0, + difference=True, weights=w, + splits=None, continuous=self._continuous_outcome) # Generating estimates for the risk difference and risk ratio zalpha = norm.ppf(1 - self.alpha / 2, loc=0, scale=1) - if self._weight_ is None: - if self._continuous_outcome: - self.average_treatment_effect = np.nanmean(dr_a1) - np.nanmean(dr_a0) - var_ic = np.nanvar((dr_a1 - dr_a0) - self.average_treatment_effect, ddof=1) / self.df.shape[0] - self.average_treatment_effect_se = np.sqrt(var_ic) - self.average_treatment_effect_ci = [self.average_treatment_effect - zalpha * np.sqrt(var_ic), - self.average_treatment_effect + zalpha * np.sqrt(var_ic)] + if self._continuous_outcome: + self.average_treatment_effect = diff_est + self.average_treatment_effect_se = np.sqrt(diff_var) + self.average_treatment_effect_ci = [self.average_treatment_effect - zalpha * np.sqrt(diff_var), + self.average_treatment_effect + zalpha * np.sqrt(diff_var)] - else: - self.risk_difference = np.nanmean(dr_a1) - np.nanmean(dr_a0) - self.risk_ratio = np.nanmean(dr_a1) / np.nanmean(dr_a0) - var_ic = np.nanvar((dr_a1 - dr_a0) - self.risk_difference, ddof=1) / self.df.shape[0] - self.risk_difference_se = np.sqrt(var_ic) - self.risk_difference_ci = [self.risk_difference - zalpha * np.sqrt(var_ic), - self.risk_difference + zalpha * np.sqrt(var_ic)] else: - dr_m1 = DescrStatsW(dr_a1, weights=self.df[self._weight_]).mean - dr_m0 = DescrStatsW(dr_a0, weights=self.df[self._weight_]).mean - - if self._continuous_outcome: - self.average_treatment_effect = dr_m1 - dr_m0 - else: - self.risk_difference = dr_m1 - dr_m0 - self.risk_ratio = dr_m1 / dr_m0 + self.risk_difference = diff_est + self.risk_difference_se = np.sqrt(diff_var) + self.risk_difference_ci = [self.risk_difference - zalpha * np.sqrt(diff_var), + self.risk_difference + zalpha * np.sqrt(diff_var)] + + rr, ln_rr_var = aipw_calculator(y=y_obs, a=a_obs, + py_a=py_a1, py_n=py_a0, + pa1=ps_g1, pa0=ps_g0, + difference=False, weights=w, + splits=None, continuous=False) + self.risk_ratio = rr + self.risk_ratio_se = np.sqrt(ln_rr_var) + self.risk_ratio_ci = (np.exp(np.log(rr) - zalpha * self.risk_ratio_se), + np.exp(np.log(rr) + zalpha * self.risk_ratio_se)) def summary(self, decimal=3): """Prints a summary of the results for the doubly robust estimator. Confidence intervals are only available for @@ -411,22 +470,29 @@ def summary(self, decimal=3): fmt = 'Outcome: {:<15} No. Missing Outcome: {:<20}' print(fmt.format(self.outcome, np.sum(self.df[self.outcome].isnull()))) - fmt = 'g-Model: {:<15} Q-model: {:<20}' - e = 'Logistic' - if self._continuous_outcome: - y = self._continuous_type + fmt = 'g-Model: {:<15} Missing Model: {:<20}' + if self._exp_model_custom: + e = 'User-specified' else: - y = 'Logistic' - - print(fmt.format(e, y)) - - fmt = 'Missing model: {:<15}' - if self._fit_missing_: + e = 'Logistic' + if self._miss_model_custom and self._fit_missing_: + m = 'User-specified' + elif self._fit_missing_: m = 'Logistic' else: m = 'None' - print(fmt.format(m)) + print(fmt.format(e, m)) + + fmt = 'Q-Model: {:<15}' + if self._out_model_custom: + y = 'User-specified' + elif self._continuous_outcome: + y = self._continuous_type + else: + y = 'Logistic' + + print(fmt.format(y)) print('======================================================================') @@ -448,7 +514,12 @@ def summary(self, decimal=3): print(str(round(100 * (1 - self.alpha), 1)) + '% two-sided CI: -') print('----------------------------------------------------------------------') print('Risk Ratio: ', round(float(self.risk_ratio), decimal)) - print(str(round(100 * (1 - self.alpha), 1)) + '% two-sided CI: -') + if self._weight_ is None: + print(str(round(100 * (1 - self.alpha), 1)) + '% two-sided CI: (' + + str(round(self.risk_ratio_ci[0], decimal)), ',', + str(round(self.risk_ratio_ci[1], decimal)) + ')') + else: + print(str(round(100 * (1 - self.alpha), 1)) + '% two-sided CI: -') print('======================================================================') diff --git a/zepid/causal/doublyrobust/TMLE.py b/zepid/causal/doublyrobust/TMLE.py index 1111476..30f0b98 100644 --- a/zepid/causal/doublyrobust/TMLE.py +++ b/zepid/causal/doublyrobust/TMLE.py @@ -1,3 +1,4 @@ +import copy import warnings import patsy import numpy as np @@ -7,10 +8,12 @@ from scipy.stats import logistic, norm from zepid.causal.utils import propensity_score, stochastic_check_conditional -from zepid.calc import probability_to_odds, odds_to_probability +from zepid.causal.doublyrobust.utils import tmle_unit_bounds, tmle_unit_unbound +from zepid.calc import probability_to_odds, odds_to_probability, probability_bounds from zepid.causal.utils import (exposure_machine_learner, outcome_machine_learner, stochastic_outcome_machine_learner, - stochastic_outcome_predict, missing_machine_learner, _bounding_, plot_kde, plot_love, - standardized_mean_differences, positivity, plot_kde_accuracy, outcome_accuracy) + stochastic_outcome_predict, missing_machine_learner, plot_kde, plot_love, + standardized_mean_differences, positivity, plot_kde_accuracy, outcome_accuracy, + check_input_data) class TMLE: @@ -24,21 +27,6 @@ class TMLE: Valid confidence intervals are only attainable with certain machine learning algorithms. These algorithms must be Donsker class for valid confidence intervals. GAM and LASSO are examples of alogorithms that are Donsker class - Parameters - ---------- - df : DataFrame - Pandas dataframe containing the variables of interest - exposure : str - Column label for the exposure of interest - outcome : str - Column label for the outcome of interest - alpha : float, optional - Alpha for confidence interval level. Default is 0.05 - continuous_bound : float, optional - Optional argument to control the bounding feature for continuous outcomes. The bounding process may result - in values of 0,1 which are undefined for logit(x). This parameter adds or substracts from the scenarios of - 0,1 respectively. Default value is 0.0005 - Note ---- TMLE is a doubly-robust substitution estimator. TMLE obtains the target estimate in a single step. The @@ -81,6 +69,22 @@ class TMLE: Confidence intervals are constructed using influence curves. + Parameters + ---------- + df : DataFrame + Pandas dataframe containing the variables of interest + exposure : str + Column label for the exposure of interest + outcome : str + Column label for the outcome of interest + alpha : float, optional + Alpha for confidence interval level. Default is 0.05 + continuous_bound : float, optional + Optional argument to control the bounding feature for continuous outcomes. The bounding process may result + in values of 0,1 which are undefined for logit(x). This parameter adds or substracts from the scenarios of + 0,1 respectively. Default value is 0.0005 + + Examples -------- Setting up environment @@ -137,39 +141,26 @@ class TMLE: Gruber S, van der Laan, MJ. (2011). tmle: An R package for targeted maximum likelihood estimation. """ def __init__(self, df, exposure, outcome, alpha=0.05, continuous_bound=0.0005): - # Going through missing data (that is not the outcome) - if df.dropna(subset=[d for d in df.columns if d != outcome]).shape[0] != df.shape[0]: - warnings.warn("There is missing data that is not the outcome in the data set. TMLE will drop " - "all missing data that is not missing outcome data. TMLE will fit " - + str(df.dropna(subset=[d for d in df.columns if d != outcome]).shape[0]) + - ' of ' + str(df.shape[0]) + ' observations', UserWarning) - self.df = df.copy().dropna(subset=[d for d in df.columns if d != outcome]).reset_index() - else: - self.df = df.copy().reset_index() - - # Checking to see if missing outcome data occurs - self._missing_indicator = '__missing_indicator__' - if self.df.dropna(subset=[outcome]).shape[0] != self.df.shape[0]: - self._miss_flag = True - self.df[self._missing_indicator] = np.where(self.df[outcome].isna(), 0, 1) - else: - self._miss_flag = False - self.df[self._missing_indicator] = 1 - - # Detailed steps follow "Targeted Learning" chapter 4, figure 4.2 by van der Laan, Rose self.exposure = exposure self.outcome = outcome + self._missing_indicator = '__missing_indicator__' + self.df, self._miss_flag, self._continuous_outcome = check_input_data(data=df, + exposure=exposure, + outcome=outcome, + estimator="TMLE", + drop_censoring=False, + drop_missing=True, + binary_exposure_only=True) - if df[outcome].dropna().value_counts().index.isin([0, 1]).all(): - self._continuous_outcome = False - self._cb = 0.0 - else: - self._continuous_outcome = True - self._continuous_min = np.min(df[outcome]) - self._continuous_max = np.max(df[outcome]) + # Detailed steps follow "Targeted Learning" chapter 4, figure 4.2 by van der Laan, Rose + if self._continuous_outcome: + self._continuous_min = np.min(self.df[outcome]) + self._continuous_max = np.max(self.df[outcome]) self._cb = continuous_bound - self.df[outcome] = _tmle_unit_bounds_(y=df[outcome], mini=self._continuous_min, - maxi=self._continuous_max, bound=self._cb) + self.df[outcome] = tmle_unit_bounds(y=self.df[outcome], mini=self._continuous_min, + maxi=self._continuous_max, bound=self._cb) + else: + self._cb = 0.0 self._out_model = None self._exp_model = None @@ -213,7 +204,7 @@ def exposure_model(self, model, custom_model=None, bound=False, print_results=Tr Independent variables to predict the exposure. Example) 'var1 + var2 + var3' custom_model : optional Input for a custom model that is used in place of the logit model (default). The model must have the - "fit()" and "predict()" attributes. Both sklearn and supylearner are supported as custom models. In the + "fit()" and "predict()" attributes. SciKit-Learn style models are supported as custom models. In the background, TMLE will fit the custom model and generate the predicted probablities bound : float, list, optional Value between 0,1 to truncate predicted probabilities. Helps to avoid near positivity violations. @@ -235,18 +226,17 @@ def exposure_model(self, model, custom_model=None, bound=False, print_results=Tr # User-specified prediction model else: - # TODO need to create smart warning system - # warnings.warn("TMLE can result in confidence intervals below nominal coverage when used with " - # "certain machine learning algorithms") self._exp_model_custom = True data = patsy.dmatrix(model + ' - 1', self.df) - self.g1W = exposure_machine_learner(xdata=np.asarray(data), ydata=np.asarray(self.df[self.exposure]), - ml_model=custom_model, print_results=print_results) + self.g1W = exposure_machine_learner(xdata=np.asarray(data), + ydata=np.asarray(self.df[self.exposure]), + ml_model=copy.deepcopy(custom_model), + print_results=print_results) self.g0W = 1 - self.g1W if bound: # Bounding predicted probabilities if requested - self.g1W = _bounding_(self.g1W, bounds=bound) - self.g0W = _bounding_(self.g0W, bounds=bound) + self.g1W = probability_bounds(self.g1W, bounds=bound) + self.g0W = probability_bounds(self.g0W, bounds=bound) self._fit_exposure_model = True @@ -298,10 +288,6 @@ def missing_model(self, model, custom_model=None, bound=False, print_results=Tru # User-specified model else: - # TODO need to create smart warning system - # warnings.warn("TMLE can result in confidence intervals below nominal coverage when used with " - # "certain machine learning algorithms") - self._miss_model_custom = True data = patsy.dmatrix(model + ' - 1', self.df) @@ -315,11 +301,12 @@ def missing_model(self, model, custom_model=None, bound=False, print_results=Tru self.m1W, self.m0W = missing_machine_learner(xdata=np.array(data), mdata=self.df[self._missing_indicator], all_a=adata, none_a=ndata, - ml_model=custom_model, print_results=print_results) + ml_model=copy.deepcopy(custom_model), + print_results=print_results) if bound: # Bounding predicted probabilities if requested - self.m1W = _bounding_(self.m1W, bounds=bound) - self.m0W = _bounding_(self.m0W, bounds=bound) + self.m1W = probability_bounds(self.m1W, bounds=bound) + self.m0W = probability_bounds(self.m0W, bounds=bound) self._fit_missing_model = True @@ -335,7 +322,7 @@ def outcome_model(self, model, custom_model=None, bound=False, print_results=Tru custom_model : optional Input for a custom model that is used in place of the logit model (default). The model must have the "fit()" and "predict()" attributes. Both sklearn and supylearner are supported as custom models. In the - background, TMLE will fit the custom model and generate the predicted probablities + background, TMLE will fit the custom model and generate the predicted values bound : bool, optional This argument should ONLY be used if the outcome is continuous. Value between 0,1 to truncate the bounded predicted outcomes. Default is `False`, meaning no truncation of predicted outcomes occurs (unless a @@ -373,10 +360,10 @@ def outcome_model(self, model, custom_model=None, bound=False, print_results=Tru log = smf.glm(self._out_model, cc, family=f).fit() if print_results: - print('\n----------------------------------------------------------------') - print('MODEL: ' + self._out_model) - print('-----------------------------------------------------------------') + print('==============================================================================') + print('Outcome Model') print(log.summary()) + print('==============================================================================') # Step 2) Estimation under the scenarios dfx = self.df.copy() @@ -388,9 +375,6 @@ def outcome_model(self, model, custom_model=None, bound=False, print_results=Tru # User-specified model else: - # TODO need to create smart warning system - # warnings.warn("TMLE can result in confidence intervals below nominal coverage when used with " - # "certain machine learning algorithms") self._out_model_custom = True data = patsy.dmatrix(model + ' - 1', cc) @@ -404,7 +388,7 @@ def outcome_model(self, model, custom_model=None, bound=False, print_results=Tru self.QA1W, self.QA0W = outcome_machine_learner(xdata=np.asarray(data), ydata=np.asarray(cc[self.outcome]), all_a=adata, none_a=ndata, - ml_model=custom_model, + ml_model=copy.deepcopy(custom_model), continuous=self._continuous_outcome, print_results=print_results) @@ -412,8 +396,8 @@ def outcome_model(self, model, custom_model=None, bound=False, print_results=Tru bound = self._cb # This bounding step prevents continuous outcomes from being outside the range - self.QA1W = _bounding_(self.QA1W, bounds=bound) - self.QA0W = _bounding_(self.QA0W, bounds=bound) + self.QA1W = probability_bounds(self.QA1W, bounds=bound) + self.QA0W = probability_bounds(self.QA0W, bounds=bound) self.QAW = self.QA1W * self.df[self.exposure] + self.QA0W * (1 - self.df[self.exposure]) self._fit_outcome_model = True @@ -469,13 +453,13 @@ def fit(self): delta = np.where(self.df[self._missing_indicator] == 1, 1, 0) if self._continuous_outcome: # Calculating Average Treatment Effect - Qstar = _tmle_unit_unbound_(Qstar, mini=self._continuous_min, maxi=self._continuous_max) - Qstar1 = _tmle_unit_unbound_(Qstar1, mini=self._continuous_min, maxi=self._continuous_max) - Qstar0 = _tmle_unit_unbound_(Qstar0, mini=self._continuous_min, maxi=self._continuous_max) + Qstar = tmle_unit_unbound(Qstar, mini=self._continuous_min, maxi=self._continuous_max) + Qstar1 = tmle_unit_unbound(Qstar1, mini=self._continuous_min, maxi=self._continuous_max) + Qstar0 = tmle_unit_unbound(Qstar0, mini=self._continuous_min, maxi=self._continuous_max) self.average_treatment_effect = np.nanmean(Qstar1 - Qstar0) # Influence Curve for CL - y_unbound = _tmle_unit_unbound_(self.df[self.outcome], mini=self._continuous_min, maxi=self._continuous_max) + y_unbound = tmle_unit_unbound(self.df[self.outcome], mini=self._continuous_min, maxi=self._continuous_max) ic = np.where(delta == 1, HAW * (y_unbound - Qstar) + (Qstar1 - Qstar0) - self.average_treatment_effect, Qstar1 - Qstar0 - self.average_treatment_effect) @@ -916,33 +900,26 @@ class StochasticTMLE: studies. Springer Science & Business Media, 2011. """ def __init__(self, df, exposure, outcome, alpha=0.05, continuous_bound=0.0005, verbose=False): - # Dropping ALL missing data (currently doesn't allow for censored outcomes) - if df.dropna().shape[0] != df.shape[0]: - warnings.warn("There is missing data in the data set. StochasticTMLE will drop all missing data. " - "StochasticTMLE will fit " - + str(df.dropna().shape[0]) + - ' of ' + str(df.shape[0]) + ' observations', UserWarning) - self.df = df.copy().dropna().reset_index() - else: - self.df = df.copy().reset_index() - - if not df[exposure].value_counts().index.isin([0, 1]).all(): - raise ValueError("StochasticTMLE only supports binary exposures currently") + self.exposure = exposure + self.outcome = outcome + self._missing_indicator = '__missing_indicator__' + self.df, self._miss_flag, self._continuous_outcome = check_input_data(data=df, + exposure=exposure, + outcome=outcome, + estimator="StochasticTMLE", + drop_censoring=True, + drop_missing=True, + binary_exposure_only=True) # Manage outcomes - if df[outcome].dropna().value_counts().index.isin([0, 1]).all(): - self._continuous_outcome = False - self._cb = 0.0 - else: - self._continuous_outcome = True - self._continuous_min = np.min(df[outcome]) - self._continuous_max = np.max(df[outcome]) + if self._continuous_outcome: + self._continuous_min = np.min(self.df[outcome]) + self._continuous_max = np.max(self.df[outcome]) self._cb = continuous_bound - self.df[outcome] = _tmle_unit_bounds_(y=df[outcome], mini=self._continuous_min, - maxi=self._continuous_max, bound=self._cb) - - self.exposure = exposure - self.outcome = outcome + self.df[outcome] = tmle_unit_bounds(y=self.df[outcome], mini=self._continuous_min, + maxi=self._continuous_max, bound=self._cb) + else: + self._cb = 0.0 # Output attributes self.epsilon = None @@ -1005,7 +982,7 @@ def exposure_model(self, model, custom_model=None, bound=False): ml_model=custom_model, print_results=self._verbose_) if bound: # Bounding predicted probabilities if requested - pred2 = _bounding_(pred, bounds=bound) + pred2 = probability_bounds(pred, bounds=bound) self._specified_bound_ = np.sum(np.where(pred2 == pred, 0, 1)) pred = pred2 @@ -1050,8 +1027,9 @@ def outcome_model(self, model, custom_model=None, bound=False, continuous_distri self._outcome_model = smf.glm(self._q_model, self.df, family=f).fit() if self._verbose_: print('==============================================================================') - print('Q-model') + print('Outcome model') print(self._outcome_model.summary()) + print('==============================================================================') # Step 2) Estimation under the scenarios self._Qinit_ = self._outcome_model.predict(self.df) @@ -1071,7 +1049,7 @@ def outcome_model(self, model, custom_model=None, bound=False, continuous_distri bound = self._cb # This bounding step prevents continuous outcomes from being outside the range - self._Qinit_ = _bounding_(self._Qinit_, bounds=bound) + self._Qinit_ = probability_bounds(self._Qinit_, bounds=bound) def fit(self, p, conditional=None, samples=100, seed=None): """Calculate the effect from the predicted exposure probabilities and predicted outcome values using the TMLE @@ -1163,12 +1141,18 @@ def fit(self, p, conditional=None, samples=100, seed=None): q_star_list.append(np.mean(q_star)) # Saving E[Y^*] if self._continuous_outcome: - self.marginals_vector = _tmle_unit_unbound_(np.array(q_star_list), - mini=self._continuous_min, maxi=self._continuous_max) - y_ = np.array(_tmle_unit_unbound_(self.df[self.outcome], mini=self._continuous_min, - maxi=self._continuous_max)) - yq0_ = _tmle_unit_unbound_(self._Qinit_, mini=self._continuous_min, maxi=self._continuous_max) - yqstar_ = _tmle_unit_unbound_(np.array(q_i_star_list), mini=self._continuous_min, maxi=self._continuous_max) + self.marginals_vector = tmle_unit_unbound(np.array(q_star_list), + mini=self._continuous_min, + maxi=self._continuous_max) + y_ = np.array(tmle_unit_unbound(self.df[self.outcome], + mini=self._continuous_min, + maxi=self._continuous_max)) + yq0_ = tmle_unit_unbound(self._Qinit_, + mini=self._continuous_min, + maxi=self._continuous_max) + yqstar_ = tmle_unit_unbound(np.array(q_i_star_list), + mini=self._continuous_min, + maxi=self._continuous_max) else: self.marginals_vector = q_star_list @@ -1321,17 +1305,3 @@ def est_conditional_variance(haw, y_obs, y_pred): doqg_psi_sq = (haw*(y_obs - y_pred))**2 var_est = np.mean(doqg_psi_sq) return var_est - - -# Functions that all TMLEs can call are below -def _tmle_unit_bounds_(y, mini, maxi, bound): - # bounding for continuous outcomes - v = (y - mini) / (maxi - mini) - v = np.where(np.less(v, bound), bound, v) - v = np.where(np.greater(v, 1-bound), 1-bound, v) - return v - - -def _tmle_unit_unbound_(ystar, mini, maxi): - # unbounding of bounded continuous outcomes - return ystar*(maxi - mini) + mini diff --git a/zepid/causal/doublyrobust/__init__.py b/zepid/causal/doublyrobust/__init__.py index fb76187..7707d55 100644 --- a/zepid/causal/doublyrobust/__init__.py +++ b/zepid/causal/doublyrobust/__init__.py @@ -1,2 +1,5 @@ from .AIPW import AIPTW from .TMLE import TMLE, StochasticTMLE +from .crossfit import (SingleCrossfitAIPTW, DoubleCrossfitAIPTW, + SingleCrossfitTMLE, DoubleCrossfitTMLE, + calculate_joint_estimate) diff --git a/zepid/causal/doublyrobust/crossfit.py b/zepid/causal/doublyrobust/crossfit.py new file mode 100644 index 0000000..34a9c7f --- /dev/null +++ b/zepid/causal/doublyrobust/crossfit.py @@ -0,0 +1,1918 @@ +import copy +import warnings +import patsy +import numpy as np +import pandas as pd +from scipy.stats import logistic, norm +import statsmodels.api as sm +import statsmodels.formula.api as smf +import matplotlib.pyplot as plt +from scipy.stats import gaussian_kde +from numpy.random import RandomState + +from zepid.calc.utils import probability_bounds, probability_to_odds, odds_to_probability +from zepid.causal.utils import check_input_data, aipw_calculator +from zepid.causal.doublyrobust.utils import tmle_unit_unbound, tmle_unit_bounds + + +class SingleCrossfitAIPTW: + """Implementation of the Augmented Inverse Probability Weighting estimator with a cross-fit procedure. The purpose + of the cross-fit procedure is to all for non-Donsker nuisance function estimators. Some of machine learning + algorithms are non-Donsker. In practice this means that confidence interval coverage can be incorrect when certain + nuisance function estimators are used. Additionally, bias may persist as well. Cross-fitting is meant to alleviate + this issue, therefore cross-fitting with a doubly-robust estimator is recommended when using machine learning. + + `SingleCrossfitAIPTW` uses a single cross-fit procedure, where the data set is paritioned into at least two + non-overlapping splits. The nuisance function estimators are then estimated in each split. The estimated nuisance + functions are then used to predict values in a non-overlapping split. This decouple the nuisance function estimation + from the data used to estimate it + + Note + ---- + Because of the repetitions of the procedure are needed to reduce variance determined by a particular partition, it + can take a long time to run this code. + + Parameters + ---------- + df : DataFrame + Pandas dataframe containing all necessary variables + exposure : str + Label for treatment column in the pandas data frame + outcome : str + Label for outcome column in the pandas data frame + alpha : float, optional + Alpha for confidence interval level. Default is 0.05 + + Examples + -------- + Setting up environment + + >>> from sklearn.linear_model import LogisticRegression + >>> from zepid import load_sample_data + >>> from zepid.causal.doublyrobust import SingleCrossfitAIPTW + >>> df = load_sample_data(False).drop(columns='cd4_wk45').dropna() + + Estimating the single cross-fit AIPTW + + >>> scaipw = SingleCrossfitAIPTW(df, exposure='art', outcome='dead') + >>> scaipw.exposure_model("male + age0 + cd40 + dvl0", estimator=LogisticRegression(solver='lbfgs')) + >>> scaipw.outcome_model("art + male + age0 + cd40 + dvl0", estimator=LogisticRegression(solver='lbfgs')) + >>> scaipw.fit(n_splits=5, n_partitions=100) + >>> scaipw.summary() + + References + ---------- + Chernozhukov V, Chetverikov D, Demirer M, Duflo E, Hansen C, Newey W, & Robins J. (2018). "Double/debiased machine + learning for treatment and structural parameters". The Econometrics Journal 21:1; pC1–C6 + """ + def __init__(self, df, exposure, outcome, alpha=0.05): + self.exposure = exposure + self.outcome = outcome + self.df, self._miss_flag, self._continuous_outcome_ = check_input_data(data=df, + exposure=exposure, + outcome=outcome, + estimator="SingleCrossfitAIPTW", + drop_censoring=True, + drop_missing=True, + binary_exposure_only=True) + self.alpha = alpha + + self._a_covariates = None + self._y_covariates = None + self._a_estimator = None + self._y_estimator = None + self._fit_treatment_ = False + self._fit_outcome_ = False + self._gbounds = None + self._n_splits_ = 0 + self._n_partitions = 0 + self._combine_method_ = None + + self.ace_vector = None + self.ace_var_vector = None + self.ace = None + self.ace_ci = None + self.ace_se = None + + self.risk_difference_vector = None + self.risk_difference_var_vector = None + self.risk_difference = None + self.risk_difference_ci = None + self.risk_difference_se = None + + self.risk_ratio_vector = None + self.risk_ratio_var_vector = None + self.risk_ratio = None + self.risk_ratio_ci = None + self.risk_ratio_se = None + + def exposure_model(self, covariates, estimator, bound=False): + """Specify the treatment nuisance model variables and estimator(s) to use. These parameters are held + in the background until the .fit() function is called. These approaches are for used each sample split + + Parameters + ---------- + covariates : str + Confounders to include in the propensity score model. Follows patsy notation + estimator : + Estimator to use for prediction of the propensity score + bound : float, list, optional + Whether to bound predicted probabilities. Default is False, which does not bound + """ + self._a_estimator = estimator + self._a_covariates = covariates + self._fit_treatment_ = True + self._gbounds = bound + + def outcome_model(self, covariates, estimator): + """Specify the outcome nuisance model variables and estimator(s) to use. These parameters are held + in the background until the .fit() function is called. These approaches are for used each sample split + + Parameters + ---------- + covariates : str + Confounders to include in the propensity score model. Follows patsy notation + estimator : + Estimator to use for prediction of the propensity score + """ + self._y_estimator = estimator + self._y_covariates = covariates + self._fit_outcome_ = True + + def fit(self, n_splits=2, n_partitions=100, method='median', random_state=None): + """Runs the crossfit estimation procedure with augmented inverse probability weighting estimator. The + estimation process is completed for multiple different splits during the procedure. The final estimate is + defined as either the median or mean of the causal measure from each of the different splits. Median is + used as the default since it is more stable. + + Note + ---- + `n_partition` should be kept high to reduce dependency of results on the chosen number of splits + + Confidence intervals come from influences curves and incorporates the within-split variance and between-split + variance. + + Parameters + ---------- + n_splits : int + Number of splits to use with a default of 2. The number of splits must be greater than or equal to 2. + n_partitions : int + Number of times to repeat the partition process. The default is 100, which I have seen good performance + with in the past. Note that this algorithm can take a long time to run for high values of this parameter. + It is best to test out run-times on small numbers first. Also if running in parallel, it can be reduced + method : str, optional + Method to obtain point estimates and standard errors. Median method takes the median (which is more robust) + and the mean takes the mean. It has been remarked that the median is preferred, since it is more stable to + extreme outliers, which may happen in finite samples + random_state : None, int, optional + Whether to set a seed for the partitions. Default is None (which does not use a user-set seed). Any valid + NumPy seed can be input. Note that you should also state the random_state of all (applicable) estimators + to ensure replicability. Seeds are chosen by the following procedure. The input random_state is based to + np.random.choice to select n_partitions between 0 and 5million. That list of n_partition-length is then + passed to each iteration of the cross-fitting pandas.DataFrame.sample(random_state). + """ + # Checking for various issues + if not self._fit_treatment_: + raise ValueError("exposure_model() must be called before fit()") + if not self._fit_outcome_: + raise ValueError("outcome_model() must be called before fit()") + if n_splits < 2: + raise ValueError("SingleCrossfitAIPTW requires that n_splits > 1") + + # Storing some information + self._n_splits_ = n_splits + self._n_partitions = n_partitions + self._combine_method_ = method + + # Creating blank lists + diff_est, diff_var, ratio_est, ratio_var = [], [], [], [] + + # Conducts the re-sampling procedure + if random_state is None: + random_state = [None] * n_partitions + else: + random_state = RandomState(random_state).choice(range(5000000), size=n_partitions, replace=False) + + for j in range(self._n_partitions): + # Estimating for a particular split (lots of functions happening in the background) + result = self._single_crossfit_(random_state=random_state[j]) + + # Appending results of this particular split combination + diff_est.append(result[0]) + diff_var.append(result[1]) + if not self._continuous_outcome_: + ratio_est.append(result[2]) + ratio_var.append(result[3]) + + # Obtaining overall estimate and (1-alpha)% CL from all splits + zalpha = norm.ppf(1 - self.alpha / 2, loc=0, scale=1) + + est, var = calculate_joint_estimate(diff_est, diff_var, method=method) + if self._continuous_outcome_: + self.ace_vector = diff_est + self.ace_var_vector = diff_var + self.ace = est + self.ace_se = np.sqrt(var) + self.ace_ci = (self.ace - zalpha*self.ace_se, + self.ace + zalpha*self.ace_se) + else: + # Risk Difference + self.risk_difference_vector = diff_est + self.risk_difference_var_vector = diff_var + self.risk_difference = est + self.risk_difference_se = np.sqrt(var) + self.risk_difference_ci = (self.risk_difference - zalpha*self.risk_difference_se, + self.risk_difference + zalpha*self.risk_difference_se) + # Risk Ratio + self.risk_ratio_vector = ratio_est + self.risk_ratio_var_vector = ratio_var + ln_rr, ln_rr_var = calculate_joint_estimate(np.log(self.risk_ratio_vector), + self.risk_ratio_var_vector, method=method) + self.risk_ratio = np.exp(ln_rr) + self.risk_ratio_se = np.sqrt(ln_rr_var) + self.risk_ratio_ci = (np.exp(ln_rr - zalpha*self.risk_ratio_se), + np.exp(ln_rr + zalpha*self.risk_ratio_se)) + + def summary(self, decimal=3): + """Prints summary of model results + + Parameters + ---------- + decimal : int, optional + Number of decimal places to display. Default is 3 + """ + if (self._fit_outcome_ is False) or (self._fit_treatment_ is False): + raise ValueError('exposure_model and outcome_model must be specified before the estimate can ' + 'be generated') + + print('======================================================================') + print(' Single Cross-fit AIPTW ') + print('======================================================================') + fmt = 'Treatment: {:<15} No. Observations: {:<20}' + print(fmt.format(self.exposure, self.df.shape[0])) + fmt = 'Outcome: {:<15} No. of Splits: {:<20}' + print(fmt.format(self.outcome, self._n_splits_)) + fmt = 'Method: {:<15} No. of Partitions: {:<20}' + print(fmt.format(self._combine_method_, self._n_partitions)) + + print('======================================================================') + if self._continuous_outcome_: + print('Average Causal Effect: ', round(float(self.ace), decimal)) + print(str(round(100 * (1 - self.alpha), 1)) + '% two-sided CI: (' + + str(round(self.ace_ci[0], decimal)), ',', + str(round(self.ace_ci[1], decimal)) + ')') + else: + print('Risk Difference: ', round(float(self.risk_difference), decimal)) + print(str(round(100 * (1 - self.alpha), 1)) + '% two-sided CI: (' + + str(round(self.risk_difference_ci[0], decimal)), ',', + str(round(self.risk_difference_ci[1], decimal)) + ')') + print('----------------------------------------------------------------------') + print('Risk Ratio: ', round(float(self.risk_ratio), decimal)) + print(str(round(100 * (1 - self.alpha), 1)) + '% two-sided CI: (' + + str(round(self.risk_ratio_ci[0], decimal)), ',', + str(round(self.risk_ratio_ci[1], decimal)) + ')') + + print('======================================================================') + + def run_diagnostics(self, color='gray'): + """Runs available diagnostics for the plots. Currently diagnostics consist of a plot of the different point + estimates and variance estimates across different partitions. Diagnostics for cross-fit estimators is ongoing. + If you have any suggestions, please feel free to contact me on GitHub + + Parameters + ---------- + color : str, optional + Controls color of the plots. Default is gray + + Returns + ------- + Plot to console + """ + # Continuous outcomes have less plots to generate + if self._continuous_outcome_: + _run_diagnostic_(diff=self.ace_vector, diff_var=self.ace_var_vector, + color=color) + + # Binary outcomes have plots for all measures + else: + _run_diagnostic_(diff=self.risk_difference_vector, diff_var=self.risk_difference_var_vector, + rratio=self.risk_ratio_vector, rratio_var=self.risk_ratio_var_vector, + color=color) + + def _single_crossfit_(self, random_state): + """Background function that runs a single crossfit of the split samples + """ + # Dividing into s different splits + sample_split = _sample_split_(self.df, n_splits=self._n_splits_, random_state=random_state) + + # Determining pairings to use for each sample split and each combination + pairing_exposure = [i - 1 for i in range(self._n_splits_)] + pairing_outcome = pairing_exposure + + # Estimating treatment nuisance model + a_models = _treatment_nuisance_(treatment=self.exposure, estimator=self._a_estimator, + samples=sample_split, covariates=self._a_covariates) + # Estimating outcome nuisance model + y_models = _outcome_nuisance_(outcome=self.outcome, estimator=self._y_estimator, + samples=sample_split, covariates=self._y_covariates) + + # Generating predictions based on set pairs for cross-fit procedure + predictions = [] + y_obs, a_obs = np.array([]), np.array([]) + split_index = [] + for id, ep, op in zip(range(self._n_splits_), pairing_exposure, pairing_outcome): + predictions.append(self._generate_predictions_(sample_split[id], + a_model_v=a_models[ep], + y_model_v=y_models[op])) + # Generating vector of Y in correct order + y_obs = np.append(y_obs, np.asarray(sample_split[id][self.outcome])) + # Generating vector of A in correct order + a_obs = np.append(a_obs, np.asarray(sample_split[id][self.exposure])) + # Generating index for splits + split_index.extend([id]*sample_split[id].shape[0]) + + # Stacking Predicted Pr(A=1), Y(a=1), Y(a=0) + pred_a_array, pred_y1_array, pred_y0_array = np.array([]), np.array([]), np.array([]) + for preds in predictions: + pred_a_array = np.append(pred_a_array, preds[0]) + pred_y1_array = np.append(pred_y1_array, preds[1]) + pred_y0_array = np.append(pred_y0_array, preds[2]) + + # Applying bounds if requested + if self._gbounds: # Bounding g-model if requested + pred_a_array = probability_bounds(pred_a_array, bounds=self._gbounds) + + # Calculating point estimates + difference, var_diff = aipw_calculator(y=y_obs, a=a_obs, + py_a=pred_y1_array, py_n=pred_y0_array, + pa1=pred_a_array, pa0=1-pred_a_array, + splits=np.asarray(split_index), + difference=True, continuous=self._continuous_outcome_) + if self._continuous_outcome_: + return difference, var_diff + else: + ratio, var_ratio = aipw_calculator(y=y_obs, a=a_obs, + py_a=pred_y1_array, py_n=pred_y0_array, + pa1=pred_a_array, pa0=1 - pred_a_array, + splits=np.asarray(split_index), + difference=False, continuous=False) + return difference, var_diff, ratio, var_ratio + + def _generate_predictions_(self, sample, a_model_v, y_model_v): + """Generates predictions from fitted functions (in background of _single_crossfit() + """ + s = sample.copy() + + # Predicting Pr(A=1|L) + xdata = np.asarray(patsy.dmatrix(self._a_covariates + ' - 1', s)) + a_pred = _ml_predictor(xdata, fitted_algorithm=a_model_v) + + # Predicting E(Y|A=1, L) + s[self.exposure] = 1 + xdata = np.asarray(patsy.dmatrix(self._y_covariates + ' - 1', s)) + y_treat = _ml_predictor(xdata, fitted_algorithm=y_model_v) + + # Predicting E(Y|A=0, L) + s[self.exposure] = 0 + xdata = np.asarray(patsy.dmatrix(self._y_covariates + ' - 1', s)) + y_none = _ml_predictor(xdata, fitted_algorithm=y_model_v) + + return a_pred, y_treat, y_none + + +class DoubleCrossfitAIPTW: + """Implementation of the augmented inverse probability weighted estimator with a double cross-fit procedure. The + purpose of the cross-fit procedure is to all for non-Donsker nuisance function estimators. Some of machine learning + algorithms are non-Donsker. In practice this means that confidence interval coverage can be incorrect when certain + nuisance function estimators are used. Additionally, bias may persist as well. Cross-fitting is meant to alleviate + this issue, therefore cross-fitting with a doubly-robust estimator is recommended when using machine learning. + + `DoubleCrossfitAIPTW` allows for double cross-fitting, where the data set is partitioned into at least three + non-overlapping splits. The nuisance function estimators are then estimated in each split. The estimated nuisance + functions are then used to predict values in the opposing split. Different splits are used for each nuisance + function. A double cross-fit procedure further de-couples the nuisance function estimation compared to single + cross-fit procedures. + + Note + ---- + Because of the repetitions of the procedure are needed to reduce variance determined by a particular partition, it + can take a long time to run this code. On a data set of 3000 observations with 100 different partitions it takes + about an hour. The advantage is that the code can be ran in parallel. See the documentation for an example. + + Parameters + ---------- + df : DataFrame + Pandas dataframe containing all necessary variables + exposure : str + Label for treatment column in the pandas data frame + outcome : str + Label for outcome column in the pandas data frame + alpha : float, optional + Alpha for confidence interval level. Default is 0.05 + + Examples + -------- + Setting up environment + + >>> from sklearn.linear_model import LogisticRegression + >>> from zepid import load_sample_data + >>> from zepid.causal.doublyrobust import SingleCrossfitAIPTW + >>> df = load_sample_data(False).drop(columns='cd4_wk45').dropna() + + Estimating the double cross-fit AIPTW + + >>> dcaipw = DoubleCrossfitAIPTW(df, exposure='art', outcome='dead') + >>> dcaipw.exposure_model("male + age0 + cd40 + dvl0", estimator=LogisticRegression(solver='lbfgs')) + >>> dcaipw.outcome_model("art + male + age0 + cd40 + dvl0", estimator=LogisticRegression(solver='lbfgs')) + >>> dcaipw.fit(n_splits=5, n_partitions=100) + >>> dcaipw.summary() + + References + ---------- + Newey WK, Robins JR. (2018) "Cross-fitting and fast remainder rates for semiparametric estimation". + arXiv:1801.09138 + + Zivich PN, & Breskin A. (2020). Machine learning for causal inference: on the use of cross-fit estimators. + arXiv preprint arXiv:2004.10337. + + Chernozhukov V, Chetverikov D, Demirer M, Duflo E, Hansen C, Newey W, & Robins J. (2018). "Double/debiased machine + learning for treatment and structural parameters". The Econometrics Journal 21:1; pC1–C6 + """ + def __init__(self, df, exposure, outcome, alpha=0.05): + self.exposure = exposure + self.outcome = outcome + self.df, self._miss_flag, self._continuous_outcome_ = check_input_data(data=df, + exposure=exposure, + outcome=outcome, + estimator="DoubleCrossfitAIPTW", + drop_censoring=True, + drop_missing=True, + binary_exposure_only=True) + self.alpha = alpha + + self._a_covariates = None + self._y_covariates = None + self._a_estimator = None + self._y_estimator = None + self._fit_treatment_ = False + self._fit_outcome_ = False + self._gbounds = None + self._n_splits_ = 0 + self._n_partitions = 0 + self._combine_method_ = None + + self.ace_vector = None + self.ace_var_vector = None + self.ace = None + self.ace_ci = None + self.ace_se = None + + self.risk_difference_vector = None + self.risk_difference_var_vector = None + self.risk_difference = None + self.risk_difference_ci = None + self.risk_difference_se = None + + self.risk_ratio_vector = None + self.risk_ratio_var_vector = None + self.risk_ratio = None + self.risk_ratio_ci = None + self.risk_ratio_se = None + + def exposure_model(self, covariates, estimator, bound=False): + """Specify the treatment nuisance model variables and estimator(s) to use. These parameters are held + in the background until the .fit() function is called. These approaches are for used each sample split + + Parameters + ---------- + covariates : str + Confounders to include in the propensity score model. Follows patsy notation + estimator : + Estimator to use for prediction of the propensity score + bound : float, list, optional + Whether to bound predicted probabilities. Default is False, which does not bound + """ + self._a_estimator = estimator + self._a_covariates = covariates + self._fit_treatment_ = True + self._gbounds = bound + + def outcome_model(self, covariates, estimator): + """Specify the outcome nuisance model variables and estimator(s) to use. These parameters are held + in the background until the .fit() function is called. These approaches are for used each sample split + + Parameters + ---------- + covariates : str + Confounders to include in the propensity score model. Follows patsy notation + estimator : + Estimator to use for prediction of the propensity score + """ + self._y_estimator = estimator + self._y_covariates = covariates + self._fit_outcome_ = True + + def fit(self, n_splits=3, n_partitions=100, method='median', random_state=None): + """Runs the crossfit estimation procedure with augmented inverse probability weighted estimator. The + estimation process is completed for multiple different splits during the procedure. The final estimate is + defined as either the median or mean of the average causal effect from each of the different splits. Median is + used as the default since it is more stable. + + Note + ---- + `n_partition` should be kept high to reduce dependency of results on the chosen number of splits + + Confidence intervals come from influences curves and incorporates the within-split variance and between-split + variance. + + Parameters + ---------- + n_splits : int + Number of splits to use. The default is 3, which is valid for both single cross-fit and double cross-fit. + Single cross-fit is also compatible with 2 as the the number of splits + n_partitions : int + Number of times to repeat the partition process. The default is 100, which I have seen good performance + with in the past. Note that this algorithm can take a long time to run for high values of this parameter. + It is best to test out run-times on small numbers first. Also if running in parallel, it can be reduced + method : str, optional + Method to obtain point estimates and standard errors. Median method takes the median (which is more robust) + and the mean takes the mean. It has been remarked that the median is preferred, since it is more stable to + extreme outliers, which may happen in finite samples + random_state : None, int, optional + Whether to set a seed for the partitions. Default is None (which does not use a user-set seed). Any valid + NumPy seed can be input. Note that you should also state the random_state of all (applicable) estimators + to ensure replicability. Seeds are chosen by the following procedure. The input random_state is based to + np.random.choice to select n_partitions between 0 and 5million. That list of n_partition-length is then + passed to each iteration of the cross-fitting pandas.DataFrame.sample(random_state). + """ + # Checking for various issues + if not self._fit_treatment_: + raise ValueError("exposure_model() must be called before fit()") + if not self._fit_outcome_: + raise ValueError("outcome_model() must be called before fit()") + if n_splits < 3: + raise ValueError("DoubleCrossfitAIPTW requires that n_splits >= 3") + + # Storing some information + self._n_splits_ = n_splits + self._n_partitions = n_partitions + self._combine_method_ = method + + # Creating blank lists + diff_est, diff_var, ratio_est, ratio_var = [], [], [], [] + + # Conducts the re-sampling procedure + if random_state is None: + random_state = [None] * n_partitions + else: + random_state = RandomState(random_state).choice(range(5000000), size=n_partitions, replace=False) + for j in range(self._n_partitions): + # Estimating for a particular split (lots of functions happening in the background) + result = self._single_crossfit_(random_state=random_state[j]) + + # Appending results of this particular split combination + diff_est.append(result[0]) + diff_var.append(result[1]) + if not self._continuous_outcome_: + ratio_est.append(result[2]) + ratio_var.append(result[3]) + + # Obtaining overall estimate and (1-alpha)% CL from all splits + zalpha = norm.ppf(1 - self.alpha / 2, loc=0, scale=1) + + est, var = calculate_joint_estimate(diff_est, diff_var, method=method) + if self._continuous_outcome_: + self.ace_vector = diff_est + self.ace_var_vector = diff_var + self.ace = est + self.ace_se = np.sqrt(var) + self.ace_ci = (self.ace - zalpha*self.ace_se, + self.ace + zalpha*self.ace_se) + else: + # Risk Difference + self.risk_difference_vector = diff_est + self.risk_difference_var_vector = diff_var + self.risk_difference = est + self.risk_difference_se = np.sqrt(var) + self.risk_difference_ci = (self.risk_difference - zalpha*self.risk_difference_se, + self.risk_difference + zalpha*self.risk_difference_se) + # Risk Ratio + self.risk_ratio_vector = ratio_est + self.risk_ratio_var_vector = ratio_var + ln_rr, ln_rr_var = calculate_joint_estimate(np.log(self.risk_ratio_vector), + self.risk_ratio_var_vector, method=method) + self.risk_ratio = np.exp(ln_rr) + self.risk_ratio_se = np.sqrt(ln_rr_var) + self.risk_ratio_ci = (np.exp(ln_rr - zalpha*self.risk_ratio_se), + np.exp(ln_rr + zalpha*self.risk_ratio_se)) + + def summary(self, decimal=3): + """Prints summary of model results + + Parameters + ---------- + decimal : int, optional + Number of decimal places to display. Default is 3 + """ + if (self._fit_outcome_ is False) or (self._fit_treatment_ is False): + raise ValueError('exposure_model and outcome_model must be specified before the estimate can ' + 'be generated') + + print('======================================================================') + print(' Double Cross-fit AIPTW ') + print('======================================================================') + fmt = 'Treatment: {:<15} No. Observations: {:<20}' + print(fmt.format(self.exposure, self.df.shape[0])) + fmt = 'Outcome: {:<15} No. of Splits: {:<20}' + print(fmt.format(self.outcome, self._n_splits_)) + fmt = 'Method: {:<15} No. of Partitions: {:<20}' + print(fmt.format(self._combine_method_, self._n_partitions)) + + print('======================================================================') + if self._continuous_outcome_: + print('Average Causal Effect: ', round(float(self.ace), decimal)) + print(str(round(100 * (1 - self.alpha), 1)) + '% two-sided CI: (' + + str(round(self.ace_ci[0], decimal)), ',', + str(round(self.ace_ci[1], decimal)) + ')') + else: + print('Risk Difference: ', round(float(self.risk_difference), decimal)) + print(str(round(100 * (1 - self.alpha), 1)) + '% two-sided CI: (' + + str(round(self.risk_difference_ci[0], decimal)), ',', + str(round(self.risk_difference_ci[1], decimal)) + ')') + print('----------------------------------------------------------------------') + print('Risk Ratio: ', round(float(self.risk_ratio), decimal)) + print(str(round(100 * (1 - self.alpha), 1)) + '% two-sided CI: (' + + str(round(self.risk_ratio_ci[0], decimal)), ',', + str(round(self.risk_ratio_ci[1], decimal)) + ')') + + print('======================================================================') + + def run_diagnostics(self, color='gray'): + """Runs available diagnostics for the plots. Currently diagnostics consist of a plot of the different point + estimates and variance estimates across different partitions. Diagnostics for cross-fit estimators is ongoing. + If you have any suggestions, please feel free to contact me on GitHub + + Parameters + ---------- + color : str, optional + Controls color of the plots. Default is gray + + Returns + ------- + Plot to console + """ + # Continuous outcomes have less plots to generate + if self._continuous_outcome_: + _run_diagnostic_(diff=self.ace_vector, diff_var=self.ace_var_vector, + color=color) + + # Binary outcomes have plots for all measures + else: + _run_diagnostic_(diff=self.risk_difference_vector, diff_var=self.risk_difference_var_vector, + rratio=self.risk_ratio_vector, rratio_var=self.risk_ratio_var_vector, + color=color) + + def _single_crossfit_(self, random_state): + """Background function that runs a single crossfit of the split samples + """ + # Dividing into s different splits + sample_split = _sample_split_(self.df, n_splits=self._n_splits_, random_state=random_state) + + # Determining pairings to use for each sample split and each combination + pairing_exposure = [i - 1 for i in range(self._n_splits_)] + pairing_outcome = [i - 2 for i in range(self._n_splits_)] + + # Estimating treatment nuisance model + a_models = _treatment_nuisance_(treatment=self.exposure, estimator=self._a_estimator, + samples=sample_split, covariates=self._a_covariates) + # Estimating outcome nuisance model + y_models = _outcome_nuisance_(outcome=self.outcome, estimator=self._y_estimator, + samples=sample_split, covariates=self._y_covariates) + + # Generating predictions based on set pairs for cross-fit procedure + predictions = [] + y_obs, a_obs = np.array([]), np.array([]) + split_index = [] + for id, ep, op in zip(range(self._n_splits_), pairing_exposure, pairing_outcome): + predictions.append(self._generate_predictions_(sample_split[id], + a_model_v=a_models[ep], + y_model_v=y_models[op])) + # Generating vector of Y in correct order + y_obs = np.append(y_obs, np.asarray(sample_split[id][self.outcome])) + # Generating vector of A in correct order + a_obs = np.append(a_obs, np.asarray(sample_split[id][self.exposure])) + # Generating index for splits + split_index.extend([id]*sample_split[id].shape[0]) + + # Stacking Predicted Pr(A=1), Y(a=1), Y(a=0) + pred_a_array, pred_y1_array, pred_y0_array = np.array([]), np.array([]), np.array([]) + for preds in predictions: + pred_a_array = np.append(pred_a_array, preds[0]) + pred_y1_array = np.append(pred_y1_array, preds[1]) + pred_y0_array = np.append(pred_y0_array, preds[2]) + + # Applying bounds if requested + if self._gbounds: # Bounding g-model if requested + pred_a_array = probability_bounds(pred_a_array, bounds=self._gbounds) + + # Calculating point estimates + difference, var_diff = aipw_calculator(y=y_obs, a=a_obs, + py_a=pred_y1_array, py_n=pred_y0_array, + pa1=pred_a_array, pa0=1-pred_a_array, + splits=np.asarray(split_index), + difference=True, continuous=self._continuous_outcome_) + if self._continuous_outcome_: + return difference, var_diff + else: + ratio, var_ratio = aipw_calculator(y=y_obs, a=a_obs, + py_a=pred_y1_array, py_n=pred_y0_array, + pa1=pred_a_array, pa0=1 - pred_a_array, + splits=np.asarray(split_index), + difference=False, continuous=False) + return difference, var_diff, ratio, var_ratio + + def _generate_predictions_(self, sample, a_model_v, y_model_v): + """Generates predictions from fitted functions (in background of _single_crossfit() + """ + s = sample.copy() + + # Predicting Pr(A=1|L) + xdata = np.asarray(patsy.dmatrix(self._a_covariates + ' - 1', s)) + a_pred = _ml_predictor(xdata, fitted_algorithm=a_model_v) + + # Predicting E(Y|A=1, L) + s[self.exposure] = 1 + xdata = np.asarray(patsy.dmatrix(self._y_covariates + ' - 1', s)) + y_treat = _ml_predictor(xdata, fitted_algorithm=y_model_v) + + # Predicting E(Y|A=0, L) + s[self.exposure] = 0 + xdata = np.asarray(patsy.dmatrix(self._y_covariates + ' - 1', s)) + y_none = _ml_predictor(xdata, fitted_algorithm=y_model_v) + + return a_pred, y_treat, y_none + + +class SingleCrossfitTMLE: + """Implementation of the Targeted Maximum Likelihood Estimator with a single cross-fit procedure. The purpose + of the cross-fit procedure is to all for non-Donsker nuisance function estimators. Some of machine learning + algorithms are non-Donsker. In practice this means that confidence interval coverage can be incorrect when certain + nuisance function estimators are used. Additionally, bias may persist as well. Cross-fitting is meant to alleviate + this issue, therefore cross-fitting with a doubly-robust estimator is recommended when using machine learning. + + `SingleCrossfitTMLE` uses a single cross-fit, where the data set is paritioned into at least two non-overlapping + splits. The nuisance function estimators are then estimated in each split. The estimated nuisance functions are + then used to predict values in a non-overlapping split. This decouple the nuisance function estimation from the + data used to estimate it + + Note + ---- + Because of the repetitions of the procedure are needed to reduce variance determined by a particular partition, it + can take a long time to run this code. + + Parameters + ---------- + df : DataFrame + Pandas dataframe containing all necessary variables + exposure : str + Label for treatment column in the pandas data frame + outcome : str + Label for outcome column in the pandas data frame + alpha : float, optional + Alpha for confidence interval level. Default is 0.05 + continuous_bound : float, optional + Optional argument to control the bounding feature for continuous outcomes. The bounding process may result + in values of 0,1 which are undefined for logit(x). This parameter adds or substracts from the scenarios of + 0,1 respectively. Default value is 0.0005 + + Examples + -------- + Setting up environment + + >>> from sklearn.linear_model import LogisticRegression + >>> from zepid import load_sample_data + >>> from zepid.causal.doublyrobust import SingleCrossfitTMLE + >>> df = load_sample_data(False).drop(columns='cd4_wk45').dropna() + + Estimating the single cross-fit TMLE + + >>> sctmle = SingleCrossfitTMLE(df, exposure='art', outcome='dead') + >>> sctmle.exposure_model("male + age0 + cd40 + dvl0", estimator=LogisticRegression(solver='lbfgs')) + >>> sctmle.outcome_model("art + male + age0 + cd40 + dvl0", estimator=LogisticRegression(solver='lbfgs')) + >>> sctmle.fit(n_splits=5, n_partitions=100) + >>> sctmle.summary() + + References + ---------- + Chernozhukov V, Chetverikov D, Demirer M, Duflo E, Hansen C, Newey W, & Robins J. (2018). "Double/debiased machine + learning for treatment and structural parameters". The Econometrics Journal 21:1; pC1–C6 + """ + def __init__(self, df, exposure, outcome, alpha=0.05, continuous_bound=0.0005): + self.exposure = exposure + self.outcome = outcome + self.df, self._miss_flag, self._continuous_outcome_ = check_input_data(data=df, + exposure=exposure, + outcome=outcome, + estimator="SingleCrossfitTMLE", + drop_censoring=True, + drop_missing=True, + binary_exposure_only=True) + self.alpha = alpha + + # bounding for continuous Y + if self._continuous_outcome_: + self._continuous_min = np.min(self.df[outcome]) + self._continuous_max = np.max(self.df[outcome]) + self._cb = continuous_bound + self.df[outcome] = tmle_unit_bounds(y=self.df[outcome], mini=self._continuous_min, + maxi=self._continuous_max, bound=self._cb) + else: + self._cb = 0.0 + + self._a_covariates = None + self._y_covariates = None + self._a_estimator = None + self._y_estimator = None + self._fit_treatment_ = False + self._fit_outcome_ = False + self._gbounds = None + self._n_splits_ = 0 + self._n_partitions = 0 + self._combine_method_ = None + + self.ace_vector = None + self.ace_var_vector = None + self.ace = None + self.ace_ci = None + self.ace_se = None + + self.risk_difference_vector = None + self.risk_difference_var_vector = None + self.risk_difference = None + self.risk_difference_ci = None + self.risk_difference_se = None + + self.risk_ratio_vector = None + self.risk_ratio_var_vector = None + self.risk_ratio = None + self.risk_ratio_ci = None + self.risk_ratio_se = None + + self.odds_ratio_vector = None + self.odds_ratio_var_vector = None + self.odds_ratio = None + self.odds_ratio_se = None + self.odds_ratio_ci = None + + def exposure_model(self, covariates, estimator, bound=False): + """Specify the treatment nuisance model variables and estimator(s) to use. These parameters are held + in the background until the .fit() function is called. These approaches are for used each sample split + + Parameters + ---------- + covariates : str + Confounders to include in the propensity score model. Follows patsy notation + estimator : + Estimator to use for prediction of the propensity score + bound : float, list, optional + Whether to bound predicted probabilities. Default is False, which does not bound + """ + self._a_estimator = estimator + self._a_covariates = covariates + self._fit_treatment_ = True + self._gbounds = bound + + def outcome_model(self, covariates, estimator): + """Specify the outcome nuisance model variables and estimator(s) to use. These parameters are held + in the background until the .fit() function is called. These approaches are for used each sample split + + Parameters + ---------- + covariates : str + Confounders to include in the propensity score model. Follows patsy notation + estimator : + Estimator to use for prediction of the propensity score + """ + self._y_estimator = estimator + self._y_covariates = covariates + self._fit_outcome_ = True + + def fit(self, n_splits=2, n_partitions=100, method='median', random_state=None): + """Runs the crossfit estimation procedure with the targeted maximum likelihood estimator. The estimation + process is completed for multiple different splits during the procedure. The final estimate is defined as + either the median or mean of the causal measure from each of the different splits. Median is used as the + default since it is more stable. + + Note + ---- + `n_partition` should be kept high to reduce dependency of results on the chosen number of splits + + Confidence intervals come from influences curves and incorporates the within-split variance and between-split + variance. + + Parameters + ---------- + n_splits : int + Number of splits to use with a default of 2. The number of splits must be greater than or equal to 2. + n_partitions : int + Number of times to repeat the partition process. The default is 100, which I have seen good performance + with in the past. Note that this algorithm can take a long time to run for high values of this parameter. + It is best to test out run-times on small numbers first. Also if running in parallel, it can be reduced + method : str, optional + Method to obtain point estimates and standard errors. Median method takes the median (which is more robust) + and the mean takes the mean. It has been remarked that the median is preferred, since it is more stable to + extreme outliers, which may happen in finite samples + random_state : None, int, optional + Whether to set a seed for the partitions. Default is None (which does not use a user-set seed). Any valid + NumPy seed can be input. Note that you should also state the random_state of all (applicable) estimators + to ensure replicability. Seeds are chosen by the following procedure. The input random_state is based to + np.random.choice to select n_partitions between 0 and 5million. That list of n_partition-length is then + passed to each iteration of the cross-fitting pandas.DataFrame.sample(random_state). + """ + # Checking for various issues + if not self._fit_treatment_: + raise ValueError("exposure_model() must be called before fit()") + if not self._fit_outcome_: + raise ValueError("outcome_model() must be called before fit()") + if n_splits < 2: + raise ValueError("SingleCrossfitTMLE requires that n_splits >= 2") + + # Storing some information + self._n_splits_ = n_splits + self._n_partitions = n_partitions + self._combine_method_ = method + + # Creating blank lists + diff_est, diff_var, rratio_est, rratio_var, oratio_est, oratio_var = [], [], [], [], [], [] + + # Conducts the re-sampling procedure + if random_state is None: + random_state = [None] * n_partitions + else: + random_state = RandomState(random_state).choice(range(5000000), size=n_partitions, replace=False) + for j in range(self._n_partitions): + # Estimating for a particular split (lots of functions happening in the background) + result = self._single_crossfit_(random_state=random_state[j]) + + # Appending results of this particular split combination + diff_est.append(result[0]) + diff_var.append(result[1]) + if not self._continuous_outcome_: + rratio_est.append(result[2]) + rratio_var.append(result[3]) + oratio_est.append(result[4]) + oratio_var.append(result[5]) + + # Obtaining overall estimate and (1-alpha)% CL from all splits + zalpha = norm.ppf(1 - self.alpha / 2, loc=0, scale=1) + + est, var = calculate_joint_estimate(diff_est, diff_var, method=method) + if self._continuous_outcome_: + self.ace_vector = diff_est + self.ace_var_vector = diff_var + self.ace = est + self.ace_se = np.sqrt(var) + self.ace_ci = (self.ace - zalpha*self.ace_se, + self.ace + zalpha*self.ace_se) + else: + # Risk Difference + self.risk_difference_vector = diff_est + self.risk_difference_var_vector = diff_var + self.risk_difference = est + self.risk_difference_se = np.sqrt(var) + self.risk_difference_ci = (self.risk_difference - zalpha*self.risk_difference_se, + self.risk_difference + zalpha*self.risk_difference_se) + # Risk Ratio + self.risk_ratio_vector = rratio_est + self.risk_ratio_var_vector = rratio_var + ln_rr, ln_rr_var = calculate_joint_estimate(np.log(self.risk_ratio_vector), + self.risk_ratio_var_vector, method=method) + self.risk_ratio = np.exp(ln_rr) + self.risk_ratio_se = np.sqrt(ln_rr_var) + self.risk_ratio_ci = (np.exp(ln_rr - zalpha*self.risk_ratio_se), + np.exp(ln_rr + zalpha*self.risk_ratio_se)) + # Odds Ratio + self.odds_ratio_vector = oratio_est + self.odds_ratio_var_vector = oratio_var + ln_or, ln_or_var = calculate_joint_estimate(np.log(self.odds_ratio_vector), + self.odds_ratio_var_vector, method=method) + self.odds_ratio = np.exp(ln_or) + self.odds_ratio_se = np.sqrt(ln_or_var) + self.odds_ratio_ci = (np.exp(ln_or - zalpha*self.odds_ratio_se), + np.exp(ln_or + zalpha*self.odds_ratio_se)) + + def summary(self, decimal=3): + """Prints summary of model results + + Parameters + ---------- + decimal : int, optional + Number of decimal places to display. Default is 3 + """ + if (self._fit_outcome_ is False) or (self._fit_treatment_ is False): + raise ValueError('exposure_model and outcome_model must be specified before the estimate can ' + 'be generated') + + print('======================================================================') + print(' Single Cross-fit TMLE ') + print('======================================================================') + fmt = 'Treatment: {:<15} No. Observations: {:<20}' + print(fmt.format(self.exposure, self.df.shape[0])) + fmt = 'Outcome: {:<15} No. of Splits: {:<20}' + print(fmt.format(self.outcome, self._n_splits_)) + fmt = 'Method: {:<15} No. of Partitions: {:<20}' + print(fmt.format(self._combine_method_, self._n_partitions)) + + print('======================================================================') + if self._continuous_outcome_: + print('Average Causal Effect: ', round(float(self.ace), decimal)) + print(str(round(100 * (1 - self.alpha), 1)) + '% two-sided CI: (' + + str(round(self.ace_ci[0], decimal)), ',', + str(round(self.ace_ci[1], decimal)) + ')') + else: + print('Risk Difference: ', round(float(self.risk_difference), decimal)) + print(str(round(100 * (1 - self.alpha), 1)) + '% two-sided CI: (' + + str(round(self.risk_difference_ci[0], decimal)), ',', + str(round(self.risk_difference_ci[1], decimal)) + ')') + print('----------------------------------------------------------------------') + print('Risk Ratio: ', round(float(self.risk_ratio), decimal)) + print(str(round(100 * (1 - self.alpha), 1)) + '% two-sided CI: (' + + str(round(self.risk_ratio_ci[0], decimal)), ',', + str(round(self.risk_ratio_ci[1], decimal)) + ')') + print('----------------------------------------------------------------------') + print('Odds Ratio: ', round(float(self.odds_ratio), decimal)) + print(str(round(100 * (1 - self.alpha), 1)) + '% two-sided CI: (' + + str(round(self.odds_ratio_ci[0], decimal)), ',', + str(round(self.odds_ratio_ci[1], decimal)) + ')') + print('======================================================================') + + def run_diagnostics(self, color='gray'): + """Runs available diagnostics for the plots. Currently diagnostics consist of a plot of the different point + estimates and variance estimates across different partitions. Diagnostics for cross-fit estimators is ongoing. + If you have any suggestions, please feel free to contact me on GitHub + + Parameters + ---------- + color : str, optional + Controls color of the plots. Default is gray + + Returns + ------- + Plot to console + """ + # Continuous outcomes have less plots to generate + if self._continuous_outcome_: + _run_diagnostic_(diff=self.ace_vector, diff_var=self.ace_var_vector, + color=color) + + # Binary outcomes have plots for all measures + else: + _run_diagnostic_(diff=self.risk_difference_vector, diff_var=self.risk_difference_var_vector, + rratio=self.risk_ratio_vector, rratio_var=self.risk_ratio_var_vector, + oratio=self.odds_ratio_vector, oratio_var=self.odds_ratio_var_vector, + color=color) + + def _single_crossfit_(self, random_state): + """Background function that runs a single crossfit of the split samples + """ + # Dividing into s different splits + sample_split = _sample_split_(self.df, n_splits=self._n_splits_, random_state=random_state) + + # Determining pairings to use for each sample split and each combination + pairing_exposure = [i - 1 for i in range(self._n_splits_)] + pairing_outcome = pairing_exposure + + # Estimating treatment nuisance model + a_models = _treatment_nuisance_(treatment=self.exposure, estimator=self._a_estimator, + samples=sample_split, covariates=self._a_covariates) + # Estimating outcome nuisance model + y_models = _outcome_nuisance_(outcome=self.outcome, estimator=self._y_estimator, + samples=sample_split, covariates=self._y_covariates) + + # Generating predictions based on set pairs for cross-fit procedure + predictions = [] + y_obs, a_obs = np.array([]), np.array([]) + split_index = [] + for id, ep, op in zip(range(self._n_splits_), pairing_exposure, pairing_outcome): + predictions.append(self._generate_predictions_(sample_split[id], + a_model_v=a_models[ep], + y_model_v=y_models[op])) + # Generating vector of Y in correct order + y_obs = np.append(y_obs, np.asarray(sample_split[id][self.outcome])) + # Generating vector of A in correct order + a_obs = np.append(a_obs, np.asarray(sample_split[id][self.exposure])) + # Generating index for splits + split_index.extend([id]*sample_split[id].shape[0]) + + # Stacking Predicted Pr(A=1), Y(a=1), Y(a=0) + pred_a_array, pred_y1_array, pred_y0_array = np.array([]), np.array([]), np.array([]) + for preds in predictions: + pred_a_array = np.append(pred_a_array, preds[0]) + pred_y1_array = np.append(pred_y1_array, preds[1]) + pred_y0_array = np.append(pred_y0_array, preds[2]) + + # Applying bounds if requested + if self._gbounds: # Bounding g-model if requested + pred_a_array = probability_bounds(pred_a_array, bounds=self._gbounds) + + # Calculating point estimates + targeted_vals = targeting_step(y=y_obs, a=a_obs, + py_a=pred_y1_array, py_n=pred_y0_array, + pa1=pred_a_array, pa0=1-pred_a_array, + splits=np.asarray(split_index)) + + if self._continuous_outcome_: + difference, var_diff = tmle_calculator(y=y_obs, + ystar1=targeted_vals[0], ystar0=targeted_vals[1], + ystara=targeted_vals[2], + h1w=targeted_vals[3], h0w=targeted_vals[4], haw=targeted_vals[5], + splits=np.asarray(split_index), + measure='ate', + lower_bound=self._continuous_min, upper_bound=self._continuous_max) + return difference, var_diff + else: + difference, var_diff = tmle_calculator(y=y_obs, + ystar1=targeted_vals[0], ystar0=targeted_vals[1], + ystara=targeted_vals[2], + h1w=targeted_vals[3], h0w=targeted_vals[4], haw=targeted_vals[5], + splits=np.asarray(split_index), + measure='risk_difference') + rratio, var_rratio = tmle_calculator(y=y_obs, + ystar1=targeted_vals[0], ystar0=targeted_vals[1], + ystara=targeted_vals[2], + h1w=targeted_vals[3], h0w=targeted_vals[4], haw=targeted_vals[5], + splits=np.asarray(split_index), + measure='risk_ratio') + oratio, var_oratio = tmle_calculator(y=y_obs, + ystar1=targeted_vals[0], ystar0=targeted_vals[1], + ystara=targeted_vals[2], + h1w=targeted_vals[3], h0w=targeted_vals[4], haw=targeted_vals[5], + splits=np.asarray(split_index), + measure='odds_ratio') + return difference, var_diff, rratio, var_rratio, oratio, var_oratio + + def _generate_predictions_(self, sample, a_model_v, y_model_v): + """Generates predictions from fitted functions (in background of _single_crossfit() + """ + s = sample.copy() + + # Predicting Pr(A=1|L) + xdata = np.asarray(patsy.dmatrix(self._a_covariates + ' - 1', s)) + a_pred = _ml_predictor(xdata, fitted_algorithm=a_model_v) + + # Predicting E(Y|A=1, L) + s[self.exposure] = 1 + xdata = np.asarray(patsy.dmatrix(self._y_covariates + ' - 1', s)) + y_treat = _ml_predictor(xdata, fitted_algorithm=y_model_v) + + # Predicting E(Y|A=0, L) + s[self.exposure] = 0 + xdata = np.asarray(patsy.dmatrix(self._y_covariates + ' - 1', s)) + y_none = _ml_predictor(xdata, fitted_algorithm=y_model_v) + + return a_pred, y_treat, y_none + + +class DoubleCrossfitTMLE: + """Implementation of the Targeted Maximum Likelihood Estimator with a double cross-fit procedure. The purpose + of the cross-fit procedure is to all for non-Donsker nuisance function estimators. Some of machine learning + algorithms are non-Donsker. In practice this means that confidence interval coverage can be incorrect when certain + nuisance function estimators are used. Additionally, bias may persist as well. Cross-fitting is meant to alleviate + this issue, therefore cross-fitting with a doubly-robust estimator is recommended when using machine learning. + + `DoubleCrossfitTMLE` uses a double cross-fit, where the data set is paritioned into at least three non-overlapping + split. The nuisance function estimators are then estimated in each split. The estimated nuisance functions are + then used to predict values in a non-overlapping split. This decouple the nuisance function estimation from the + data used to estimate it + + Note + ---- + Because of the repetitions of the procedure are needed to reduce variance determined by a particular partition, it + can take a long time to run this code. + + Parameters + ---------- + df : DataFrame + Pandas dataframe containing all necessary variables + exposure : str + Label for treatment column in the pandas data frame + outcome : str + Label for outcome column in the pandas data frame + alpha : float, optional + Alpha for confidence interval level. Default is 0.05 + continuous_bound : float, optional + Optional argument to control the bounding feature for continuous outcomes. The bounding process may result + in values of 0,1 which are undefined for logit(x). This parameter adds or substracts from the scenarios of + 0,1 respectively. Default value is 0.0005 + + Examples + -------- + Setting up environment + + >>> from sklearn.linear_model import LogisticRegression + >>> from zepid import load_sample_data + >>> from zepid.causal.doublyrobust import DoubleCrossfitTMLE + >>> df = load_sample_data(False).drop(columns='cd4_wk45').dropna() + + Estimating the double cross-fit TMLE + + >>> dctmle = DoubleCrossfitTMLE(df, exposure='art', outcome='dead') + >>> dctmle.exposure_model("male + age0 + cd40 + dvl0", estimator=LogisticRegression(solver='lbfgs')) + >>> dctmle.outcome_model("art + male + age0 + cd40 + dvl0", estimator=LogisticRegression(solver='lbfgs')) + >>> dctmle.fit(n_splits=5, n_partitions=100) + >>> dctmle.summary() + + References + ---------- + Zivich PN, & Breskin A. (2020). Machine learning for causal inference: on the use of cross-fit estimators. + arXiv preprint arXiv:2004.10337. + + Newey WK, Robins JR. (2018) "Cross-fitting and fast remainder rates for semiparametric estimation". + arXiv:1801.09138 + + Chernozhukov V, Chetverikov D, Demirer M, Duflo E, Hansen C, Newey W, & Robins J. (2018). "Double/debiased machine + learning for treatment and structural parameters". The Econometrics Journal 21:1; pC1–C6 + """ + def __init__(self, df, exposure, outcome, alpha=0.05, continuous_bound=0.0005): + self.exposure = exposure + self.outcome = outcome + self.df, self._miss_flag, self._continuous_outcome_ = check_input_data(data=df, + exposure=exposure, + outcome=outcome, + estimator="DoubleCrossfitTMLE", + drop_censoring=True, + drop_missing=True, + binary_exposure_only=True) + self.alpha = alpha + + # bounding for continuous Y + if self._continuous_outcome_: + self._continuous_min = np.min(self.df[outcome]) + self._continuous_max = np.max(self.df[outcome]) + self._cb = continuous_bound + self.df[outcome] = tmle_unit_bounds(y=self.df[outcome], mini=self._continuous_min, + maxi=self._continuous_max, bound=self._cb) + else: + self._cb = 0.0 + + self._a_covariates = None + self._y_covariates = None + self._a_estimator = None + self._y_estimator = None + self._fit_treatment_ = False + self._fit_outcome_ = False + self._gbounds = None + self._n_splits_ = 0 + self._n_partitions = 0 + self._combine_method_ = None + + self.ace_vector = None + self.ace_var_vector = None + self.ace = None + self.ace_ci = None + self.ace_se = None + + self.risk_difference_vector = None + self.risk_difference_var_vector = None + self.risk_difference = None + self.risk_difference_ci = None + self.risk_difference_se = None + + self.risk_ratio_vector = None + self.risk_ratio_var_vector = None + self.risk_ratio = None + self.risk_ratio_ci = None + self.risk_ratio_se = None + + self.odds_ratio_vector = None + self.odds_ratio_var_vector = None + self.odds_ratio = None + self.odds_ratio_se = None + self.odds_ratio_ci = None + + def exposure_model(self, covariates, estimator, bound=False): + """Specify the treatment nuisance model variables and estimator(s) to use. These parameters are held + in the background until the .fit() function is called. These approaches are for used each sample split + + Parameters + ---------- + covariates : str + Confounders to include in the propensity score model. Follows patsy notation + estimator : + Estimator to use for prediction of the propensity score + bound : float, list, optional + Whether to bound predicted probabilities. Default is False, which does not bound + """ + self._a_estimator = estimator + self._a_covariates = covariates + self._fit_treatment_ = True + self._gbounds = bound + + def outcome_model(self, covariates, estimator): + """Specify the outcome nuisance model variables and estimator(s) to use. These parameters are held + in the background until the .fit() function is called. These approaches are for used each sample split + + Parameters + ---------- + covariates : str + Confounders to include in the propensity score model. Follows patsy notation + estimator : + Estimator to use for prediction of the propensity score + """ + self._y_estimator = estimator + self._y_covariates = covariates + self._fit_outcome_ = True + + def fit(self, n_splits=3, n_partitions=100, method='median', random_state=None): + """Runs the crossfit estimation procedure with the targeted maximum likelihood estimator. The + estimation process is completed for multiple different splits during the procedure. The final estimate is + defined as either the median or mean of the causal measure from each of the different splits. Median is + used as the default since it is more stable. + + Note + ---- + `n_partition` should be kept high to reduce dependency of results on the chosen number of splits + + Confidence intervals come from influences curves and incorporates the within-split variance and between-split + variance. + + Parameters + ---------- + n_splits : int + Number of splits to use with a default of 3. The number of splits must be greater than or equal to 3. + n_partitions : int + Number of times to repeat the partition process. The default is 100, which I have seen good performance + with in the past. Note that this algorithm can take a long time to run for high values of this parameter. + It is best to test out run-times on small numbers first. Also if running in parallel, it can be reduced + method : str, optional + Method to obtain point estimates and standard errors. Median method takes the median (which is more robust) + and the mean takes the mean. It has been remarked that the median is preferred, since it is more stable to + extreme outliers, which may happen in finite samples + random_state : None, int, optional + Whether to set a seed for the partitions. Default is None (which does not use a user-set seed). Any valid + NumPy seed can be input. Note that you should also state the random_state of all (applicable) estimators + to ensure replicability. Seeds are chosen by the following procedure. The input random_state is based to + np.random.choice to select n_partitions between 0 and 5million. That list of n_partition-length is then + passed to each iteration of the cross-fitting pandas.DataFrame.sample(random_state). + """ + # Checking for various issues + if not self._fit_treatment_: + raise ValueError("exposure_model() must be called before fit()") + if not self._fit_outcome_: + raise ValueError("outcome_model() must be called before fit()") + if n_splits < 3: + raise ValueError("DoubleCrossfitTMLE requires that n_splits >= 3") + + # Storing some information + self._n_splits_ = n_splits + self._n_partitions = n_partitions + self._combine_method_ = method + + # Creating blank lists + diff_est, diff_var, rratio_est, rratio_var, oratio_est, oratio_var = [], [], [], [], [], [] + + # Conducts the re-sampling procedure + if random_state is None: + random_state = [None] * n_partitions + else: + random_state = RandomState(random_state).choice(range(5000000), size=n_partitions, replace=False) + for j in range(self._n_partitions): + # Estimating for a particular split (lots of functions happening in the background) + result = self._single_crossfit_(random_state=random_state[j]) + + # Appending results of this particular split combination + diff_est.append(result[0]) + diff_var.append(result[1]) + if not self._continuous_outcome_: + rratio_est.append(result[2]) + rratio_var.append(result[3]) + oratio_est.append(result[4]) + oratio_var.append(result[5]) + + # Obtaining overall estimate and (1-alpha)% CL from all splits + zalpha = norm.ppf(1 - self.alpha / 2, loc=0, scale=1) + + est, var = calculate_joint_estimate(diff_est, diff_var, method=method) + if self._continuous_outcome_: + self.ace_vector = diff_est + self.ace_var_vector = diff_var + self.ace = est + self.ace_se = np.sqrt(var) + self.ace_ci = (self.ace - zalpha*self.ace_se, + self.ace + zalpha*self.ace_se) + else: + # Risk Difference + self.risk_difference_vector = diff_est + self.risk_difference_var_vector = diff_var + self.risk_difference = est + self.risk_difference_se = np.sqrt(var) + self.risk_difference_ci = (self.risk_difference - zalpha*self.risk_difference_se, + self.risk_difference + zalpha*self.risk_difference_se) + # Risk Ratio + self.risk_ratio_vector = rratio_est + self.risk_ratio_var_vector = rratio_var + ln_rr, ln_rr_var = calculate_joint_estimate(np.log(self.risk_ratio_vector), + self.risk_ratio_var_vector, method=method) + self.risk_ratio = np.exp(ln_rr) + self.risk_ratio_se = np.sqrt(ln_rr_var) + self.risk_ratio_ci = (np.exp(ln_rr - zalpha*self.risk_ratio_se), + np.exp(ln_rr + zalpha*self.risk_ratio_se)) + # Odds Ratio + self.odds_ratio_vector = oratio_est + self.odds_ratio_var_vector = oratio_var + ln_or, ln_or_var = calculate_joint_estimate(np.log(self.odds_ratio_vector), + self.odds_ratio_var_vector, method=method) + self.odds_ratio = np.exp(ln_or) + self.odds_ratio_se = np.sqrt(ln_or_var) + self.odds_ratio_ci = (np.exp(ln_or - zalpha*self.odds_ratio_se), + np.exp(ln_or + zalpha*self.odds_ratio_se)) + + def summary(self, decimal=3): + """Prints summary of model results + + Parameters + ---------- + decimal : int, optional + Number of decimal places to display. Default is 3 + """ + if (self._fit_outcome_ is False) or (self._fit_treatment_ is False): + raise ValueError('exposure_model and outcome_model must be specified before the estimate can ' + 'be generated') + + print('======================================================================') + print(' Double Cross-fit TMLE ') + print('======================================================================') + fmt = 'Treatment: {:<15} No. Observations: {:<20}' + print(fmt.format(self.exposure, self.df.shape[0])) + fmt = 'Outcome: {:<15} No. of Splits: {:<20}' + print(fmt.format(self.outcome, self._n_splits_)) + fmt = 'Method: {:<15} No. of Partitions: {:<20}' + print(fmt.format(self._combine_method_, self._n_partitions)) + + print('======================================================================') + if self._continuous_outcome_: + print('Average Causal Effect: ', round(float(self.ace), decimal)) + print(str(round(100 * (1 - self.alpha), 1)) + '% two-sided CI: (' + + str(round(self.ace_ci[0], decimal)), ',', + str(round(self.ace_ci[1], decimal)) + ')') + else: + print('Risk Difference: ', round(float(self.risk_difference), decimal)) + print(str(round(100 * (1 - self.alpha), 1)) + '% two-sided CI: (' + + str(round(self.risk_difference_ci[0], decimal)), ',', + str(round(self.risk_difference_ci[1], decimal)) + ')') + print('----------------------------------------------------------------------') + print('Risk Ratio: ', round(float(self.risk_ratio), decimal)) + print(str(round(100 * (1 - self.alpha), 1)) + '% two-sided CI: (' + + str(round(self.risk_ratio_ci[0], decimal)), ',', + str(round(self.risk_ratio_ci[1], decimal)) + ')') + print('----------------------------------------------------------------------') + print('Odds Ratio: ', round(float(self.odds_ratio), decimal)) + print(str(round(100 * (1 - self.alpha), 1)) + '% two-sided CI: (' + + str(round(self.odds_ratio_ci[0], decimal)), ',', + str(round(self.odds_ratio_ci[1], decimal)) + ')') + print('======================================================================') + + def run_diagnostics(self, color='gray'): + """Runs available diagnostics for the plots. Currently diagnostics consist of a plot of the different point + estimates and variance estimates across different partitions. Diagnostics for cross-fit estimators is ongoing. + If you have any suggestions, please feel free to contact me on GitHub + + Parameters + ---------- + color : str, optional + Controls color of the plots. Default is gray + + Returns + ------- + Plot to console + """ + # Continuous outcomes have less plots to generate + if self._continuous_outcome_: + _run_diagnostic_(diff=self.ace_vector, diff_var=self.ace_var_vector, + color=color) + + # Binary outcomes have plots for all measures + else: + _run_diagnostic_(diff=self.risk_difference_vector, diff_var=self.risk_difference_var_vector, + rratio=self.risk_ratio_vector, rratio_var=self.risk_ratio_var_vector, + oratio=self.odds_ratio_vector, oratio_var=self.odds_ratio_var_vector, + color=color) + + def _single_crossfit_(self, random_state): + """Background function that runs a single crossfit of the split samples + """ + # Dividing into s different splits + sample_split = _sample_split_(self.df, n_splits=self._n_splits_, random_state=random_state) + + # Determining pairings to use for each sample split and each combination + pairing_exposure = [i - 1 for i in range(self._n_splits_)] + pairing_outcome = [i - 2 for i in range(self._n_splits_)] + + # Estimating treatment nuisance model + a_models = _treatment_nuisance_(treatment=self.exposure, estimator=self._a_estimator, + samples=sample_split, covariates=self._a_covariates) + # Estimating outcome nuisance model + y_models = _outcome_nuisance_(outcome=self.outcome, estimator=self._y_estimator, + samples=sample_split, covariates=self._y_covariates) + + # Generating predictions based on set pairs for cross-fit procedure + predictions = [] + y_obs, a_obs = np.array([]), np.array([]) + split_index = [] + for id, ep, op in zip(range(self._n_splits_), pairing_exposure, pairing_outcome): + predictions.append(self._generate_predictions_(sample_split[id], + a_model_v=a_models[ep], + y_model_v=y_models[op])) + # Generating vector of Y in correct order + y_obs = np.append(y_obs, np.asarray(sample_split[id][self.outcome])) + # Generating vector of A in correct order + a_obs = np.append(a_obs, np.asarray(sample_split[id][self.exposure])) + # Generating index for splits + split_index.extend([id]*sample_split[id].shape[0]) + + # Stacking Predicted Pr(A=1), Y(a=1), Y(a=0) + pred_a_array, pred_y1_array, pred_y0_array = np.array([]), np.array([]), np.array([]) + for preds in predictions: + pred_a_array = np.append(pred_a_array, preds[0]) + pred_y1_array = np.append(pred_y1_array, preds[1]) + pred_y0_array = np.append(pred_y0_array, preds[2]) + + # Applying bounds if requested + if self._gbounds: # Bounding g-model if requested + pred_a_array = probability_bounds(pred_a_array, bounds=self._gbounds) + + # Calculating point estimates + targeted_vals = targeting_step(y=y_obs, a=a_obs, + py_a=pred_y1_array, py_n=pred_y0_array, + pa1=pred_a_array, pa0=1-pred_a_array, + splits=np.asarray(split_index)) + + if self._continuous_outcome_: + difference, var_diff = tmle_calculator(y=y_obs, + ystar1=targeted_vals[0], ystar0=targeted_vals[1], + ystara=targeted_vals[2], + h1w=targeted_vals[3], h0w=targeted_vals[4], haw=targeted_vals[5], + splits=np.asarray(split_index), + measure='ate', + lower_bound=self._continuous_min, upper_bound=self._continuous_max) + return difference, var_diff + else: + difference, var_diff = tmle_calculator(y=y_obs, + ystar1=targeted_vals[0], ystar0=targeted_vals[1], + ystara=targeted_vals[2], + h1w=targeted_vals[3], h0w=targeted_vals[4], haw=targeted_vals[5], + splits=np.asarray(split_index), + measure='risk_difference') + rratio, var_rratio = tmle_calculator(y=y_obs, + ystar1=targeted_vals[0], ystar0=targeted_vals[1], + ystara=targeted_vals[2], + h1w=targeted_vals[3], h0w=targeted_vals[4], haw=targeted_vals[5], + splits=np.asarray(split_index), + measure='risk_ratio') + oratio, var_oratio = tmle_calculator(y=y_obs, + ystar1=targeted_vals[0], ystar0=targeted_vals[1], + ystara=targeted_vals[2], + h1w=targeted_vals[3], h0w=targeted_vals[4], haw=targeted_vals[5], + splits=np.asarray(split_index), + measure='odds_ratio') + return difference, var_diff, rratio, var_rratio, oratio, var_oratio + + def _generate_predictions_(self, sample, a_model_v, y_model_v): + """Generates predictions from fitted functions (in background of _single_crossfit() + """ + s = sample.copy() + + # Predicting Pr(A=1|L) + xdata = np.asarray(patsy.dmatrix(self._a_covariates + ' - 1', s)) + a_pred = _ml_predictor(xdata, fitted_algorithm=a_model_v) + + # Predicting E(Y|A=1, L) + s[self.exposure] = 1 + xdata = np.asarray(patsy.dmatrix(self._y_covariates + ' - 1', s)) + y_treat = _ml_predictor(xdata, fitted_algorithm=y_model_v) + + # Predicting E(Y|A=0, L) + s[self.exposure] = 0 + xdata = np.asarray(patsy.dmatrix(self._y_covariates + ' - 1', s)) + y_none = _ml_predictor(xdata, fitted_algorithm=y_model_v) + + return a_pred, y_treat, y_none + + +def calculate_joint_estimate(point_est, var_est, method): + """Function that combines the different estimates across partitions into a single point estimate and standard + error. This function can be directly called and have data directly input into. Use of this function allows for + users to run the cross-fit procedure in parallel, output the answers, and then produce the final point estimate + via this function. + + Note + ---- + This function is only intended to make running the parallel cross-fit procedure easier to implement for users. + It does not actually run any part of the cross-fitting procedure. It only calculates a summary from disparate + partitions + + Parameters + ---------- + point_est : container + Container of point estimates + var_est : container + Container of variance estimates + method : string + Method to combine results. Options are 'median' or 'mean'. Median is recommended since it is more stable + across a fewer number of partitions + """ + if len(point_est) != len(var_est): + raise ValueError("The length of point_est and var_est do not match") + + # Using the Median Method + if method == 'median': + single_point = np.median(point_est) + single_point_var = np.median(var_est + (point_est - single_point)**2) + + # Using the Mean Method + elif method == 'mean': + single_point = np.mean(point_est) + single_point_var = np.mean(var_est + (point_est - single_point)**2) + + # Error if neither exists + else: + raise ValueError("Either 'mean' or 'median' must be selected for the pooling of repeated sample splits") + + return single_point, single_point_var + + +def targeting_step(y, a, py_a, py_n, pa1, pa0, splits): + f = sm.families.family.Binomial() + h1w = a / pa1 + h0w = -(1 - a) / pa0 + haw = h1w + h0w + py_o = a * py_a + (1 - a) * py_n + + ystar1, ystar0, ystara = [], [], [] + for s in set(splits): + ys = y[splits == s] + pa1s = pa1[splits == s] + pa0s = pa0[splits == s] + py_as = py_a[splits == s] + py_ns = py_n[splits == s] + py_os = py_o[splits == s] + h1ws = h1w[splits == s] + h0ws = h0w[splits == s] + + # Targeting Step + log = sm.GLM(ys, np.column_stack((h1ws, h0ws)), offset=np.log(probability_to_odds(py_os)), + family=f, missing='drop').fit() + epsilon = log.params + + # Getting updated predictions from targeting step + ystar1 = np.append(ystar1, logistic.cdf(np.log(probability_to_odds(py_as)) + epsilon[0] / pa1s)) + ystar0 = np.append(ystar0, logistic.cdf(np.log(probability_to_odds(py_ns)) - epsilon[1] / pa0s)) + ystara = np.append(ystara, log.predict(np.column_stack((h1ws, h0ws)), + offset=np.log(probability_to_odds(py_os)))) + return ystar1, ystar0, ystara, h1w, h0w, haw + + +def tmle_calculator(y, ystar1, ystar0, ystara, h1w, h0w, haw, splits, + measure='ate', lower_bound=None, upper_bound=None): + """Function to calculate TMLE estimates for SingleCrossfitTMLE, and DoubleCrossfitTMLE + """ + if measure in ["ate", "risk_difference"]: + # Unbounding if continuous outcome (ate) + if measure == "ate": + # Unbounding continuous outcomes + y = tmle_unit_unbound(y, mini=lower_bound, maxi=upper_bound) + ystar1 = tmle_unit_unbound(ystar1, mini=lower_bound, maxi=upper_bound) + ystar0 = tmle_unit_unbound(ystar0, mini=lower_bound, maxi=upper_bound) + ystara = tmle_unit_unbound(ystara, mini=lower_bound, maxi=upper_bound) + + # Point Estimate + estimate = np.mean(ystar1 - ystar0) + # Variance estimate + variance = [] + for s in set(splits): + ys = y[splits == s] + ystar1s = ystar1[splits == s] + ystar0s = ystar0[splits == s] + ystaras = ystara[splits == s] + haws = haw[splits == s] + + ic = haws * (ys - ystaras) + (ystar1s - ystar0s) - estimate + variance.append(np.var(ic, ddof=1)) + + return estimate, (np.mean(variance) / y.shape[0]) + + elif measure == 'risk_ratio': + # Point Estimate + estimate = np.mean(ystar1) / np.mean(ystar0) + variance = [] + for s in set(splits): + ys = y[splits == s] + ystar1s = ystar1[splits == s] + ystar0s = ystar0[splits == s] + ystaras = ystara[splits == s] + h1ws = h1w[splits == s] + h0ws = h0w[splits == s] + + ic = (1/np.mean(ystar1s) * (h1ws * (ys - ystaras)) + ystar1s - np.mean(ystar1s) - + (1/np.mean(ystar0s) * (-1 * h0ws * (ys - ystaras)) + ystar0s - np.mean(ystar0s))) + variance.append(np.var(ic, ddof=1)) + + return estimate, (np.mean(variance) / y.shape[0]) + + elif measure == 'odds_ratio': + # Point Estimate + estimate = (np.mean(ystar1) / (1-np.mean(ystar1))) / (np.mean(ystar0) / (1-np.mean(ystar0))) + variance = [] + for s in set(splits): + ys = y[splits == s] + ystar1s = ystar1[splits == s] + ystar0s = ystar0[splits == s] + ystaras = ystara[splits == s] + h1ws = h1w[splits == s] + h0ws = h0w[splits == s] + + ic = ((1-np.mean(ystar1s))/np.mean(ystar1s)*(h1ws*(ys - ystaras) + ystar1s) - + (1-np.mean(ystar0s))/np.mean(ystar0s)*(-1*h0ws*(ys - ystaras) + ystar0s)) + variance.append(np.var(ic, ddof=1)) + + return estimate, (np.mean(variance) / y.shape[0]) + + else: + raise ValueError("Invalid measure requested within function: tmle_calculator. Input measure is " + + str(measure) + " but only 'ate', 'risk_difference', 'risk_ratio', and " + "'odds_ratio' are accepted.") + + +def _sample_split_(data, n_splits, random_state=None): + """Background function to split data into three non-overlapping pieces + """ + # Break into approx even splits + n = int(data.shape[0] / n_splits) + + splits = [] + data_to_sample = data.copy() + # Procedures is done n_splits - 1 times + for i in range(n_splits-1): # Loops through splits and takes random sample all remaining sets of the data + s = data_to_sample.sample(n=n, random_state=RandomState(random_state)) + splits.append(s.copy()) + data_to_sample = data_to_sample.loc[data_to_sample.index.difference(s.index)].copy() + + # Remaining data becomes last split + splits.append(data_to_sample) + return splits + + +def _ml_predictor(xdata, fitted_algorithm): + """Background function to generate predictions of treatments + """ + if hasattr(fitted_algorithm, 'predict_proba'): + return fitted_algorithm.predict_proba(xdata)[:, 1] + elif hasattr(fitted_algorithm, 'predict'): + return fitted_algorithm.predict(xdata) + + +def _treatment_nuisance_(treatment, estimator, samples, covariates): + """Procedure to fit the treatment ML + """ + treatment_fit_splits = [] + for s in samples: + # Using patsy to pull out the covariates + xdata = np.asarray(patsy.dmatrix(covariates + ' - 1', s)) + ydata = np.asarray(s[treatment]) + + # Fitting machine learner / super learner to each split + est = copy.deepcopy(estimator) + try: + fm = est.fit(X=xdata, y=ydata) + # print("Treatment model") + # print(fm.summary()) + except TypeError: + raise TypeError("Currently custom_model must have the 'fit' function with arguments 'X', 'y'. This " + "covers both sklearn and supylearner") + + # Adding model to the list of models + treatment_fit_splits.append(fm) + + return treatment_fit_splits + + +def _outcome_nuisance_(outcome, estimator, samples, covariates): + """Background function to generate predictions of outcomes + """ + outcome_fit_splits = [] + for s in samples: + # Using patsy to pull out the covariates + xdata = np.asarray(patsy.dmatrix(covariates + ' - 1', s)) + ydata = np.asarray(s[outcome]) + + # Fitting machine learner / super learner to each + est = copy.deepcopy(estimator) + try: + fm = est.fit(X=xdata, y=ydata) + # print(est.summary()) + except TypeError: + raise TypeError("Currently custom_model must have the 'fit' function with arguments 'X', 'y'. This " + "covers both sklearn and supylearner") + + # Adding model to the list of models + outcome_fit_splits.append(fm) + + return outcome_fit_splits + + +def _estimate_density_plot_(estimates, bw_method='scott', fill=True, color='gray', variance=False): + """Generates a density plot of the different estimates for each of the different sample splits. Helps to visualize + the variability between different splits. If there is high variability, this indicates there is high sensitivity + to the particular chosen split. + + Returns + ------- + + """ + if variance: + x = np.linspace(0, np.max(estimates)+0.005, 10000) + else: + x = np.linspace(np.min(estimates)-0.02, np.max(estimates)+0.02, 10000) + density_t = gaussian_kde(estimates, bw_method=bw_method) + + # Plot + ax = plt.gca() + if fill: + ax.fill_between(x, density_t(x), color=color, alpha=0.2, label=None) + ax.plot(x, density_t(x), color=color) + ax.set_yticks([]) + return ax + + +def _run_diagnostic_(diff, diff_var, rratio=None, rratio_var=None, oratio=None, oratio_var=None, color="gray"): + """Background function to run all diagnostics + + Returns + ------- + Plot to console + """ + # Continuous outcomes have less plots to generate + if rratio is None: + # Point estimates + plt.subplot(121) + _estimate_density_plot_(diff, bw_method='scott', fill=True, color=color) + plt.title("ACE") + # Variance estimates + plt.subplot(122) + _estimate_density_plot_(diff_var, bw_method='scott', fill=True, color=color) + plt.title("Var(ACE)") + + # Binary outcomes have plots for all measures + else: + if oratio is None: + # Risk Difference estimates + plt.subplot(221) + _estimate_density_plot_(diff, bw_method='scott', fill=True, color=color) + plt.title("Risk Difference") + # Var(RD) estimates + plt.subplot(223) + _estimate_density_plot_(diff_var, bw_method='scott', + fill=True, color=color, variance=True) + plt.title("Var(RD)") + + # Risk Ratio estimates + plt.subplot(222) + _estimate_density_plot_(rratio, bw_method='scott', fill=True, color=color) + plt.title("Risk Ratio") + # Var(RR) estimates + plt.subplot(224) + _estimate_density_plot_(rratio_var, bw_method='scott', + fill=True, color=color, variance=True) + plt.title("Var(ln(RR))") + else: + # Risk Difference estimates + plt.subplot(231) + _estimate_density_plot_(diff, bw_method='scott', fill=True, color=color) + plt.title("Risk Difference") + # Var(RD) estimates + plt.subplot(234) + _estimate_density_plot_(diff_var, bw_method='scott', + fill=True, color=color, variance=True) + plt.title("Var(RD)") + + # Risk Ratio estimates + plt.subplot(232) + _estimate_density_plot_(rratio, bw_method='scott', fill=True, color=color) + plt.title("Risk Ratio") + # Var(RR) estimates + plt.subplot(235) + _estimate_density_plot_(rratio_var, bw_method='scott', + fill=True, color=color, variance=True) + plt.title("Var(ln(RR))") + # Odds Ratio estimates + plt.subplot(233) + _estimate_density_plot_(oratio, bw_method='scott', fill=True, color=color) + plt.title("Odds Ratio") + # Var(OR) estimates + plt.subplot(236) + _estimate_density_plot_(oratio_var, bw_method='scott', + fill=True, color=color, variance=True) + plt.title("Var(ln(OR))") + + plt.tight_layout() + plt.show() diff --git a/zepid/causal/doublyrobust/utils.py b/zepid/causal/doublyrobust/utils.py new file mode 100644 index 0000000..a287103 --- /dev/null +++ b/zepid/causal/doublyrobust/utils.py @@ -0,0 +1,16 @@ +import numpy as np + +# Utilities only meant for the doubly-robust branch + + +def tmle_unit_bounds(y, mini, maxi, bound): + # bounding for continuous outcomes + v = (y - mini) / (maxi - mini) + v = np.where(np.less(v, bound), bound, v) + v = np.where(np.greater(v, 1-bound), 1-bound, v) + return v + + +def tmle_unit_unbound(ystar, mini, maxi): + # unbounding of bounded continuous outcomes + return ystar*(maxi - mini) + mini diff --git a/zepid/causal/generalize/estimators.py b/zepid/causal/generalize/estimators.py index d156b03..b8fdfaa 100644 --- a/zepid/causal/generalize/estimators.py +++ b/zepid/causal/generalize/estimators.py @@ -4,7 +4,8 @@ import statsmodels.api as sm import statsmodels.formula.api as smf -from zepid.causal.utils import propensity_score, iptw_calculator, _bounding_ +from zepid.calc.utils import probability_bounds +from zepid.causal.utils import propensity_score, iptw_calculator class IPSW: @@ -161,8 +162,8 @@ def sampling_model(self, model_denominator, model_numerator='1', bound=None, sta self.sample['__numer__'] = 1 if bound: - self.sample['__denom__'] = _bounding_(self.sample['__denom__'], bounds=bound) - self.sample['__numer__'] = _bounding_(self.sample['__numer__'], bounds=bound) + self.sample['__denom__'] = probability_bounds(self.sample['__denom__'], bounds=bound) + self.sample['__numer__'] = probability_bounds(self.sample['__numer__'], bounds=bound) # Calculate IPSW (generalizability) if self.generalize: @@ -416,7 +417,10 @@ def outcome_model(self, model, print_results=True): # Printing results of the model and if any observations were dropped if print_results: + print('==============================================================================') + print('Outcome Model') print(self._outcome_model.summary()) + print('==============================================================================') def fit(self): """Uses the g-transport formula to obtain the risk difference and risk ratio from the sample. @@ -731,7 +735,10 @@ def outcome_model(self, model, outcome_type='binary', print_results=True): # Printing results of the model and if any observations were dropped if print_results: + print('==============================================================================') + print('Outcome Model') print(self._outcome_model.summary()) + print('==============================================================================') dfa = self.df.copy() dfa[self.exposure] = 1 diff --git a/zepid/causal/gformula/TimeFixed.py b/zepid/causal/gformula/TimeFixed.py index 2df065a..d6e7d81 100644 --- a/zepid/causal/gformula/TimeFixed.py +++ b/zepid/causal/gformula/TimeFixed.py @@ -5,7 +5,7 @@ import statsmodels.api as sm import statsmodels.formula.api as smf -from zepid.causal.utils import outcome_accuracy, plot_kde_accuracy +from zepid.causal.utils import check_input_data, outcome_accuracy, plot_kde_accuracy class TimeFixedGFormula: @@ -149,16 +149,16 @@ class TimeFixedGFormula: """ def __init__(self, df, exposure, outcome, exposure_type='binary', outcome_type='binary', standardize='population', weights=None): - if df.dropna(subset=[d for d in df.columns if d != outcome]).shape[0] != df.shape[0]: - warnings.warn("There is missing data that is not the outcome in the data set. TimeFixedGFormula will drop " - "all missing data that is not missing outcome data. TimeFixedGFormula will fit " - + str(df.dropna(subset=[d for d in df.columns if d != outcome]).shape[0]) + - ' of ' + str(df.shape[0]) + ' observations', UserWarning) - self.gf = df.copy().dropna(subset=[d for d in df.columns if d != outcome]).reset_index() - else: - self.gf = df.copy().reset_index() self.exposure = exposure self.outcome = outcome + self._missing_indicator = '__missing_indicator__' + self.gf, self._miss_flag, self._continuous_outcome_ = check_input_data(data=df, + exposure=exposure, + outcome=outcome, + estimator="TimeFixedGFormula", + drop_censoring=False, + drop_missing=True, + binary_exposure_only=False) if (outcome_type == 'binary') or (outcome_type == 'normal') or (outcome_type == 'poisson'): self.outcome_type = outcome_type @@ -222,7 +222,10 @@ def outcome_model(self, model, print_results=True): # Printing results of the model and if any observations were dropped if print_results: + print('==============================================================================') + print('Outcome Model') print(self._outcome_model.summary()) + print('==============================================================================') def fit(self, treatment, predict_missing=True): """Fit the parametric g-formula as specified. Binary and multivariate treatments are available. This @@ -556,20 +559,21 @@ class SurvivalGFormula: doi:10.1097/EDE.0b013e3181c1ea43 """ def __init__(self, df, idvar, exposure, outcome, time, weights=None): - if df.dropna().shape[0] != df.shape[0]: - warnings.warn("There is missing data in the dataset. By default, SurvivalGFormula will drop all missing " - "data. SurvivalGFormula will fit " + str(df.dropna().shape[0]) + ' of ' + - str(df.shape[0]) + ' observations', UserWarning) - - self.gf = df.copy().dropna().sort_values(by=[idvar, time]).reset_index(drop=True) - - if not df[exposure].dropna().value_counts().index.isin([0, 1]).all(): - raise ValueError("Only binary exposures are supported") - self.exposure = exposure self.outcome = outcome self.t = time self.id = idvar + self._missing_indicator = '__missing_indicator__' + self.gf, self._miss_flag, self._continuous_outcome_ = check_input_data(data=df, + exposure=exposure, + outcome=outcome, + estimator="SurvivalGFormula", + drop_censoring=True, + drop_missing=True, + binary_exposure_only=True) + self.gf = self.gf.copy().sort_values(by=[idvar, time]).reset_index(drop=True) + if self._continuous_outcome_: + raise ValueError("SurvivalGFormula does not support continuous outcomes") self._weights = weights self._outcome_model = None @@ -603,7 +607,10 @@ def outcome_model(self, model, print_results=True): # Printing results of the model if print_results: + print('==============================================================================') + print('Outcome Model') print(self._outcome_model.summary()) + print('==============================================================================') def fit(self, treatment): """Fit the parametric g-formula for time-to-event data. To obtain the confidence intervals, use a bootstrap diff --git a/zepid/causal/gformula/TimeVary.py b/zepid/causal/gformula/TimeVary.py index 88e0e1e..b91bcad 100644 --- a/zepid/causal/gformula/TimeVary.py +++ b/zepid/causal/gformula/TimeVary.py @@ -207,7 +207,10 @@ def exposure_model(self, model, restriction=None, print_results=True): family=linkdist).fit() if print_results: + print('==============================================================================') + print('Predict-Exposure Model') print(self.exp_model.summary()) + print('==============================================================================') self._exposure_model_fit = True def outcome_model(self, model, restriction=None, print_results=True): @@ -242,8 +245,10 @@ def outcome_model(self, model, restriction=None, print_results=True): self.out_model = smf.glm(self.outcome + ' ~ ' + model, g, freq_weights=g[self._weights], family=linkdist).fit() if print_results: + print('==============================================================================') + print('Outcome Model') print(self.out_model.summary()) - + print('==============================================================================') self._outcome_model_fit = True def censoring_model(self, model, restriction=None, print_results=True): @@ -273,8 +278,10 @@ def censoring_model(self, model, restriction=None, print_results=True): self.cens_model = smf.glm('__uncensored__ ~ ' + model, g, freq_weights=g[self._weights], family=linkdist).fit() if print_results: + print('==============================================================================') + print('Censoring Model') print(self.cens_model.summary()) - + print('==============================================================================') self._censor_model_fit = True def add_covariate_model(self, label, covariate, model, restriction=None, recode=None, var_type='binary', @@ -338,7 +345,10 @@ def add_covariate_model(self, label, covariate, model, restriction=None, recode= f = m.fit() if print_results: + print('==============================================================================') + print('Covariate (' + str(covariate) + ') Model') print(f.summary()) + print('==============================================================================') # Adding to lists, it is used to predict variables later on for the time-varying... self._covariate_models.append(f) @@ -488,9 +498,9 @@ def fit(self, treatment, lags=None, sample=10000, t_max=None, in_recode=None, ou except TypeError: # gets around pandas <0.22 error gs = pd.concat(mc_simulated_data, ignore_index=True) - self.predicted_outcomes = gs[ - ['uid_g_zepid', self.exposure, self.outcome, self.time_in, self.time_out] + self._covariate].sort_values( - by=['uid_g_zepid', self.time_in]).reset_index(drop=True) + cols_to_keep = ['uid_g_zepid', self.idvar, self.exposure, self.outcome, self.time_in, + self.time_out] + self._covariate + self.predicted_outcomes = gs[cols_to_keep].sort_values(by=['uid_g_zepid', self.time_in]).reset_index(drop=True) @staticmethod def _predict(df, model, variable): @@ -623,13 +633,12 @@ def __init__(self, df, exposures, outcomes): # Check same number of treatments and outcomes if len(exposures) != len(outcomes): raise ValueError("The number of exposures must equal the number of outcomes") - self.exposure = exposures + # Checking that outcome is binary for o in outcomes: if not df[o].dropna().value_counts().index.isin([0, 1]).all(): raise ValueError('Only binary outcomes are currently implemented') - self.outcome = outcomes # Checking for recurrent outcomes. Recurrent are not currently supported @@ -714,14 +723,14 @@ def fit(self, treatments): # 2.1) Fit the model to the observed data if self.outcome[::-1].index(d) == 0: fm = smf.glm(d + ' ~ ' + m, df, family=linkdist).fit() # GLM - if self._printseqregresults: - print(fm.summary()) else: df[d] = np.where(df[prior_predict].isna(), df[d], df[prior_predict]) - fm = smf.glm(d + ' ~ ' + m, df, family=linkdist).fit() # GLM - if self._printseqregresults: - print(fm.summary()) + if self._printseqregresults: + print('==============================================================================') + print('Sequential Outcome Model') + print(fm.summary()) + print('==============================================================================') # 2.2) Generating predictions tf = df.copy() diff --git a/zepid/causal/ipw/IPTW.py b/zepid/causal/ipw/IPTW.py index c299f93..16c0b27 100644 --- a/zepid/causal/ipw/IPTW.py +++ b/zepid/causal/ipw/IPTW.py @@ -7,8 +7,10 @@ from statsmodels.tools.sm_exceptions import DomainWarning import matplotlib.pyplot as plt -from zepid.causal.utils import (propensity_score, plot_boxplot, plot_kde, plot_love, stochastic_check_conditional, - standardized_mean_differences, positivity, _bounding_, iptw_calculator) +from zepid.calc.utils import probability_bounds +from zepid.causal.utils import (check_input_data, propensity_score, plot_boxplot, plot_kde, plot_love, + stochastic_check_conditional, standardized_mean_differences, positivity, + iptw_calculator) class IPTW: @@ -147,32 +149,16 @@ class IPTW: See http://chrp.org/love/JSM2004RoundTableHandout. pdf, 1364. """ def __init__(self, df, treatment, outcome, weights=None, standardize='population'): - if df.dropna(subset=[d for d in df.columns if d != outcome]).shape[0] != df.shape[0]: - warnings.warn("There is missing data that is not the outcome in the data set. IPTW will drop " - "all missing data that is not missing outcome data. IPTW will fit " - + str(df.dropna(subset=[d for d in df.columns if d != outcome]).shape[0]) + - ' of ' + str(df.shape[0]) + ' observations', UserWarning) - self.df = df.copy().dropna(subset=[d for d in df.columns if d != outcome]).reset_index() - else: - self.df = df.copy().reset_index() - - # Checking to see if missing outcome data occurs - self._missing_indicator = '__missing_indicator__' - if self.df.dropna(subset=[outcome]).shape[0] != self.df.shape[0]: - self._miss_flag = True - self.df[self._missing_indicator] = np.where(self.df[outcome].isna(), 0, 1) - else: - self._miss_flag = False - self.df[self._missing_indicator] = 1 - self.treatment = treatment self.outcome = outcome - - if df[outcome].dropna().value_counts().index.isin([0, 1]).all(): - self._continuous_outcome = False - else: - self._continuous_outcome = True - + self._missing_indicator = '__missing_indicator__' + self.df, self._miss_flag, self._continuous_outcome = check_input_data(data=df, + exposure=treatment, + outcome=outcome, + estimator="IPTW", + drop_censoring=False, + drop_missing=True, + binary_exposure_only=True) # TODO add detection of continuous treatments self.average_treatment_effect = None @@ -182,6 +168,11 @@ def __init__(self, df, treatment, outcome, weights=None, standardize='population if standardize in ['population', 'exposed', 'unexposed']: self.standardize = standardize + if standardize in ['exposed', 'unexposed']: + warnings.warn("For the ATT and the ATU, confidence intervals calculated using the robust-variance " + "approach (what is currently done in zEpid) may underestimate the variance. Therefore " + "when requesting the ATT or the ATU, it is recommended to use bootstrapped confidence " + "intervals instead.", UserWarning) else: raise ValueError('Please specify one of the currently supported weighting schemes: ' + 'population, exposed, unexposed') @@ -296,7 +287,7 @@ def missing_model(self, model_denominator, model_numerator=None, stabilized=True n = 1 if bound: # Bounding predicted probabilities if requested - d = _bounding_(fitmodel.predict(self.df), bounds=bound) + d = probability_bounds(fitmodel.predict(self.df), bounds=bound) else: d = fitmodel.predict(self.df) @@ -328,10 +319,8 @@ def fit(self, continuous_distribution='gaussian'): """ if self.__mdenom is None: raise ValueError('No model has been fit to generated predicted probabilities') - if self.ms_model is None: raise ValueError('No marginal structural model has been specified') - if self._miss_flag and not self._fit_missing_: warnings.warn("All missing outcome data is assumed to be missing completely at random. To relax this " "assumption to outcome data is missing at random please use the `missing_model()` " @@ -715,18 +704,18 @@ class StochasticIPTW: Biometrics, 68(2), 541-549. """ def __init__(self, df, treatment, outcome, weights=None): - if df.dropna().shape[0] != df.shape[0]: - warnings.warn("There is missing data in the dataset. StochasticIPTW will drop all missing data. " - "StochasticIPTW will fit " + str(df.dropna().shape[0]) + ' of ' + str(df.shape[0]) + - " observations", UserWarning) - self.df = df.copy().dropna().reset_index() - self.treatment = treatment self.outcome = outcome + self._missing_indicator = '__missing_indicator__' + self.df, self._miss_flag, self._continuous_outcome = check_input_data(data=df, + exposure=treatment, + outcome=outcome, + estimator="StochasticIPTW", + drop_censoring=True, + drop_missing=True, + binary_exposure_only=True) self.weights = weights - self.marginal_outcome = np.nan - self._pdenom_ = None def treatment_model(self, model, print_results=True): diff --git a/zepid/causal/snm/g_estimation.py b/zepid/causal/snm/g_estimation.py index 9cd63f8..e3b830a 100644 --- a/zepid/causal/snm/g_estimation.py +++ b/zepid/causal/snm/g_estimation.py @@ -4,7 +4,8 @@ import pandas as pd import scipy.optimize -from zepid.causal.utils import propensity_score, _bounding_ +from zepid.calc.utils import probability_bounds +from zepid.causal.utils import propensity_score, check_input_data class GEstimationSNM: @@ -140,31 +141,16 @@ class GEstimationSNM: nonresponse models. Journal of the American Statistical Association, 94(448), 1096-1120. """ def __init__(self, df, exposure, outcome, weights=None): - # Checking for missing data that is non-outcome - if df.dropna(subset=[d for d in df.columns if d != outcome]).shape[0] != df.shape[0]: - warnings.warn("There is missing data that is not the outcome in the data set. IPTW will drop " - "all missing data that is not missing outcome data. IPTW will fit " - + str(df.dropna(subset=[d for d in df.columns if d != outcome]).shape[0]) + - ' of ' + str(df.shape[0]) + ' observations', UserWarning) - self.df = df.copy().dropna(subset=[d for d in df.columns if d != outcome]).reset_index() - else: - self.df = df.copy().reset_index() - - # Checking to see if missing outcome data occurs - self._missing_indicator = '__missing_indicator__' - if self.df.dropna(subset=[outcome]).shape[0] != self.df.shape[0]: - self._miss_flag = True - self.df[self._missing_indicator] = np.where(self.df[outcome].isna(), 0, 1) - else: - self._miss_flag = False - self.df[self._missing_indicator] = 1 - - # Checking binary exposure only - if not self.df[exposure].value_counts().index.isin([0, 1]).all(): - raise ValueError("GEstimationSNM only supports binary exposures currently") - self.exposure = exposure self.outcome = outcome + self._missing_indicator = '__missing_indicator__' + self.df, self._miss_flag, continuous = check_input_data(data=df, + exposure=exposure, + outcome=outcome, + estimator="GEstimationSNM", + drop_censoring=False, + drop_missing=True, + binary_exposure_only=True) self.psi = None self.psi_labels = None @@ -287,7 +273,7 @@ def missing_model(self, model_denominator, model_numerator=None, stabilized=True n = 1 if bound: # Bounding predicted probabilities if requested - d = _bounding_(fitmodel.predict(self.df), bounds=bound) + d = probability_bounds(fitmodel.predict(self.df), bounds=bound) else: d = fitmodel.predict(self.df) diff --git a/zepid/causal/utils.py b/zepid/causal/utils.py index 6e9c47c..e934859 100644 --- a/zepid/causal/utils.py +++ b/zepid/causal/utils.py @@ -4,11 +4,83 @@ import pandas as pd import statsmodels.api as sm import statsmodels.formula.api as smf +from scipy.stats import logistic from scipy.stats.kde import gaussian_kde from statsmodels.stats.weightstats import DescrStatsW import matplotlib.pyplot as plt -from zepid.calc import probability_to_odds +from zepid.calc import probability_to_odds, probability_bounds + + +def check_input_data(data, exposure, outcome, estimator, drop_censoring, drop_missing, binary_exposure_only): + """Background function used by the various estimators to check the input data for possible issues or + inconsistencies with the format expected by the estimator. + + Parameters + ---------- + data : DataFrame + Input dataframe to be processed + exposure : str + Exposure/treatment column name + outcome : str + Outcome column + estimator : str + Name of the input estimator. Reads back the estimator name in the warning system + drop_censoring : bool + Whether to drop censored observations. Some estimators do not handle censoring (so this is set to True) + drop_missing : bool + Argument currently does nothing. Will be utilized in the future (when some estimators support missing data + aside from censoring) + binary_exposure_only : bool + Argument for whether only binary exposure / treatments are supported. This is currently True for most + estimators + + Returns + ------- + formatted DataFrame, boolean for missing data flag + """ + # Handling missing data and censoring + if drop_censoring: # Censoring is not always supported by all estimators + valid_obs = data.dropna(subset=[d for d in data.columns]).shape[0] + if valid_obs != data.shape[0]: + warnings.warn("There is missing data in the dataset. By default, " + str(estimator) + + " will drop all missing data (including missing outcome data). " + str(estimator) + + " will fit " + str(valid_obs) + ' of ' + str(data.shape[0]) + ' observations', UserWarning) + data = data.copy().dropna().reset_index() + else: + data = data.copy().reset_index() + miss_flag = False + data['__missing_indicator__'] = 1 + else: + valid_obs = data.dropna(subset=[d for d in data.columns if d != outcome]).shape[0] + # Checking for other missing data + if valid_obs != data.shape[0]: + warnings.warn("There is missing data that is not the outcome in the data set. " + str(estimator) + + " will drop all missing data that is not missing outcome data. " + str(estimator) + + " will fit " + str(valid_obs) + " of " + str(data.shape[0]) + " observations", UserWarning) + data = data.copy().dropna(subset=[d for d in data.columns if d != outcome]).reset_index() + else: + data = data.copy().reset_index() + # Checking for censored data + if valid_obs != data.dropna(subset=[outcome]).shape[0]: + miss_flag = True + data['__missing_indicator__'] = np.where(data[outcome].isna(), 0, 1) + else: + miss_flag = False + data['__missing_indicator__'] = 1 + + # Checking for only binary exposures + if binary_exposure_only: + if not data[exposure].value_counts().index.isin([0, 1]).all(): + raise ValueError(str(estimator) + " only supports binary exposures currently") + + # Check outcome data type + if data[outcome].value_counts().index.isin([0, 1]).all(): + continuous = False + else: + continuous = True + + return data, miss_flag, continuous def propensity_score(df, model, weights=None, print_results=True): @@ -44,10 +116,10 @@ def propensity_score(df, model, weights=None, print_results=True): log = smf.glm(model, df, freq_weights=df[weights], family=f).fit() if print_results: - print('\n----------------------------------------------------------------') - print('MODEL: ' + model) - print('-----------------------------------------------------------------') + print('==============================================================================') + print('Propensity Score Model') print(log.summary()) + print('==============================================================================') return log @@ -63,8 +135,11 @@ def exposure_machine_learner(xdata, ydata, ml_model, print_results=True): "covers both sklearn and supylearner. If there is a predictive model you would " "like to use, please open an issue at https://github.com/pzivich/zepid and I " "can work on adding support") - if print_results and hasattr(fm, 'summarize'): # SuPyLearner has a nice summarize function - fm.summarize() + if print_results and hasattr(fm, 'summary'): # SuPyLearner has a nice summarize function + print('==============================================================================') + print('Propensity Score Model') + fm.summary() + print('==============================================================================') # Generating predictions if hasattr(fm, 'predict_proba'): @@ -75,8 +150,7 @@ def exposure_machine_learner(xdata, ydata, ml_model, print_results=True): else: return g[:, 1] elif hasattr(fm, 'predict'): - g = fm.predict(xdata) - return g + return fm.predict(xdata) else: raise ValueError("Currently custom_model must have 'predict' or 'predict_proba' attribute") @@ -93,8 +167,11 @@ def outcome_machine_learner(xdata, ydata, all_a, none_a, ml_model, continuous, p "covers both sklearn and supylearner. If there is a predictive model you would " "like to use, please open an issue at https://github.com/pzivich/zepid and I " "can work on adding support") - if print_results and hasattr(fm, 'summarize'): # Nice summarize option from SuPyLearner - fm.summarize() + if print_results and hasattr(fm, 'summary'): # Nice summarize option from SuPyLearner + print('==============================================================================') + print('Outcome Model') + fm.summary() + print('==============================================================================') # Generating predictions if continuous: @@ -112,7 +189,7 @@ def outcome_machine_learner(xdata, ydata, all_a, none_a, ml_model, continuous, p if (qa1.ndim == 1) and (qa0.ndim == 1): return qa1, qa0 else: - return qa1[:,1], qa0[:,1] + return qa1[:, 1], qa0[:, 1] elif hasattr(fm, 'predict'): qa1 = fm.predict(all_a) qa0 = fm.predict(none_a) @@ -133,8 +210,11 @@ def stochastic_outcome_machine_learner(xdata, ydata, ml_model, continuous, print "covers both sklearn and supylearner. If there is a predictive model you would " "like to use, please open an issue at https://github.com/pzivich/zepid and I " "can work on adding support") - if print_results and hasattr(fm, 'summarize'): # Nice summarize option from SuPyLearner - fm.summarize() + if print_results and hasattr(fm, 'summary'): # Nice summarize option from SuPyLearner + print('==============================================================================') + print('Outcome Model') + fm.summary() + print('==============================================================================') # Generating predictions if continuous: @@ -195,8 +275,11 @@ def missing_machine_learner(xdata, mdata, all_a, none_a, ml_model, print_results "covers both sklearn and supylearner. If there is a predictive model you would " "like to use, please open an issue at https://github.com/pzivich/zepid and I " "can work on adding support") - if print_results and hasattr(fm, 'summarize'): # SuPyLearner has a nice summarize function - fm.summarize() + if print_results and hasattr(fm, 'summary'): # SuPyLearner has a nice summarize function + print('==============================================================================') + print('Censoring Model') + fm.summary() + print('==============================================================================') # Generating predictions if hasattr(fm, 'predict_proba'): @@ -214,41 +297,6 @@ def missing_machine_learner(xdata, mdata, all_a, none_a, ml_model, print_results raise ValueError("Currently custom_model must have 'predict' or 'predict_proba' attribute") -def _bounding_(v, bounds): - """Creates bounding for g-bounds in models - - Parameters - ---------- - v: - -Values to be bounded - bounds: - -Percentile thresholds for bounds - """ - if type(bounds) is float: # Symmetric bounding - if bounds < 0 or bounds > 1: - raise ValueError('Bound value must be between (0, 1)') - v = np.where(v < bounds, bounds, v) - v = np.where(v > 1-bounds, 1-bounds, v) - elif type(bounds) is str: # Catching string inputs - raise ValueError('Bounds must either be a float between (0, 1), or a collection of floats between (0, 1)') - elif type(bounds) is int: # Catching string inputs - raise ValueError('Bounds must either be a float between (0, 1), or a collection of floats between (0, 1)') - else: # Asymmetric bounds - if bounds[0] > bounds[1]: - raise ValueError('Bound thresholds must be listed in ascending order') - if len(bounds) > 2: - warnings.warn('It looks like your specified bounds is more than two floats. Only the first two ' - 'specified bounds are used by the bound statement. So only ' + - str(bounds[0:2]) + ' will be used', UserWarning) - if type(bounds[0]) is str or type(bounds[1]) is str: - raise ValueError('Bounds must be floats between (0, 1)') - if (bounds[0] < 0 or bounds[1] > 1) or (bounds[0] < 0 or bounds[1] > 1): - raise ValueError('Both bound values must be between (0, 1)') - v = np.where(v < bounds[0], bounds[0], v) - v = np.where(v > bounds[1], bounds[1], v) - return v - - def iptw_calculator(df, treatment, model_denom, model_numer, weight, stabilized, standardize, bound, print_results): """Background function to calculate inverse probability of treatment weights. Used by `IPTW`, `AIPTW`, `IPSW`, `AIPSW` @@ -269,8 +317,8 @@ def iptw_calculator(df, treatment, model_denom, model_numer, weight, stabilized, # Bounding predicted probabilities if requested if bound: - d = _bounding_(d, bounds=bound) - n = _bounding_(n, bounds=bound) + d = probability_bounds(d, bounds=bound) + n = probability_bounds(n, bounds=bound) # Calculating weights if stabilized: # Stabilized weights @@ -693,3 +741,57 @@ def stochastic_check_conditional(df, conditional): if np.any(a > 1): warnings.warn("It looks like your conditional categories are NOT exclusive. For appropriate estimation, " "the conditions that designate each category should be exclusive", UserWarning) + + +def aipw_calculator(y, a, py_a, py_n, pa1, pa0, difference=True, weights=None, splits=None, continuous=False): + """Function to calculate AIPW estimates. Called by AIPTW, SingleCrossfitAIPTW, and DoubleCrossfitAIPTW + """ + # Point estimate calculation + y1 = np.where(a == 1, (y - py_a*(1 - pa1)) / pa1, py_a) + y0 = np.where(a == 0, (y - py_n*(1 - pa0)) / pa0, py_n) + + # Warning system if values are out of range + if not continuous: + if np.mean(y1) > 1 or np.mean(y1) < 0: + warnings.warn("The estimated probability for all-exposed is out of the bounds (less than zero or greater " + "than 1). This may indicate positivity issues resulting from extreme weights, too small of a " + "sample size, or too flexible of models. Try setting the optional `bound` argument. If using " + "DoubleCrossfitAIPTW, try SingleCrossfitAIPTW or the TMLE estimators instead.", UserWarning) + if np.mean(y0) > 1 or np.mean(y0) < 0: + warnings.warn("The estimated probability for none-exposed is out of the bounds (less than zero or greater " + "than 1). This may indicate positivity issues resulting from extreme weights, too small of a " + "sample size, or too flexible of models. Try setting the optional `bound` argument. If using " + "DoubleCrossfitAIPTW, try SingleCrossfitAIPTW or the TMLE estimators instead.", UserWarning) + + # Calculating ACE as a difference + if difference: + if weights is None: + estimate = np.nanmean(y1 - y0) + if splits is None: + var = np.nanvar((y1 - y0) - estimate, ddof=1) / y.shape[0] + else: + var_rd = [] + for i in set(splits): + y1s = y1[splits == i] + y0s = y0[splits == i] + var_rd.append(np.var((y1s - y0s) - estimate, ddof=1)) + var = np.mean(var_rd) / y.shape[0] + else: + estimate = DescrStatsW(y1, weights=weights).mean - DescrStatsW(y0, weights=weights).mean + var = np.nan + + # Calculating ACE as a ratio + else: + if weights is None: + estimate = np.nanmean(y1) / np.nanmean(y0) + if estimate < 0: + warnings.warn("lower than 0", UserWarning) + py_o = a*py_a + (1-a)*py_n + ic = ((a*(y-py_o)) / (np.mean(py_a)*pa1) + (py_a - np.mean(py_a)) - + ((1-a)*(y-py_o)) / (np.mean(py_n)*pa0) + (py_n - np.mean(py_n))) + var = np.nanvar(ic, ddof=1) / y.shape[0] + else: + estimate = DescrStatsW(y1, weights=weights).mean / DescrStatsW(y0, weights=weights).mean + var = np.nan + + return estimate, var diff --git a/zepid/datasets/__init__.py b/zepid/datasets/__init__.py index d9cadce..c2711a6 100644 --- a/zepid/datasets/__init__.py +++ b/zepid/datasets/__init__.py @@ -413,3 +413,24 @@ def load_generalize_data(confounding): df = pd.read_csv(resource_filename('zepid', 'datasets/generalize_rct.dat'), index_col=False) df['id'] = df.index return df[['id', 'Y', 'A', 'S', 'L', 'W']] + + +def load_zivich_breskin_data(): + """Loads the singular simulation data set from Zivich PN & Breskin 2021. + + Notes + ----------- + Variables within the dataset are + * Y - outcome (atherosclerotic cardiovascular disease) + * statin - treatment (1 is given statins; 0 is not) + * age - Age + * ldl_log - log-transformed LDL + * diabetes - diabetes indicator + * risk_score - calculated risk score between 0 and 1 + + Returns + ---------- + DataFrame + Returns pandas DataFrame + """ + return pd.read_csv(resource_filename('zepid', 'datasets/zivich_breskin_sim.csv'), index_col=False) diff --git a/zepid/datasets/zivich_breskin_sim.csv b/zepid/datasets/zivich_breskin_sim.csv new file mode 100644 index 0000000..61973ab --- /dev/null +++ b/zepid/datasets/zivich_breskin_sim.csv @@ -0,0 +1,3001 @@ +Y,statin,age,ldl_log,diabetes,risk_score,risk_score_cat +1,0,53,4.82278302607827,0,0.03553955523019,0 +0,1,52,5.24730917498221,0,0.039857334120348,0 +1,0,58,4.91430547794016,0,0.019541973169102,0 +0,0,55,4.88963159230146,0,0.027755782445915,0 +0,0,43,4.58752075188199,0,0.125006174068007,2 +1,0,52,4.83348710084667,0,0.040075846707862,0 +0,0,41,4.98522078293121,0,0.175811504708951,2 +0,0,65,4.88934469120726,0,0.009259430199799,0 +0,0,71,4.93530040618392,0,0.005053955089602,0 +0,0,53,4.80498397758462,0,0.035509752028313,0 +0,0,52,4.58492459728508,0,0.040260208212778,0 +1,1,43,5.18011641350298,1,0.840367557900646,3 +0,0,56,4.82728903698635,0,0.024932649557168,0 +0,0,41,4.95257446066519,0,0.174344217784739,2 +1,1,71,4.82504755657051,0,0.005243042579309,0 +0,0,53,4.96606346706009,0,0.035438722591407,0 +0,0,44,4.82147096396203,0,0.113176257037602,2 +1,0,68,5.07370403969808,0,0.006488838807658,0 +0,0,62,5.04114619951814,0,0.012278424457324,0 +0,0,54,4.6238254651798,0,0.031880730607348,0 +0,0,68,4.52894014374455,0,0.007615517411211,0 +0,0,45,4.93921508755218,0,0.100282446686095,2 +1,1,56,5.3310278617833,0,0.023710982691714,0 +0,0,46,4.67833434309946,0,0.085347611834177,2 +0,0,49,5.00722605958546,0,0.058921744946869,1 +0,0,51,4.76480098494876,0,0.045440886772539,0 +1,0,56,4.91489891217599,0,0.024660622326761,0 +0,0,71,4.63496951111893,0,0.005601962501048,0 +0,1,65,5.15068412585937,0,0.008658038554417,0 +0,0,46,4.77573856574391,0,0.086140482027306,2 +0,1,42,4.83546071083396,0,0.148776986520643,2 +0,1,44,4.55394133257723,0,0.109108500275602,2 +1,1,59,4.95305206334021,0,0.017416567686845,0 +1,0,67,4.85287510069907,0,0.007634349589334,0 +0,1,40,4.71143591252589,0,0.190342764179461,2 +0,1,59,4.72903880964284,0,0.018008786911539,0 +1,1,66,5.12124569200229,0,0.007818036661114,0 +0,0,44,4.86173323791821,0,0.113870522744987,2 +0,0,44,4.70318568852115,0,0.111503975888286,2 +1,0,54,4.94067362896005,0,0.031239841577366,0 +0,0,52,4.76695852282956,0,0.040260985268424,0 +1,0,55,5.18873171779523,0,0.02720167167731,0 +0,1,66,4.90925761625628,0,0.008282641232684,0 +1,1,51,4.95916458892716,1,0.611907574895622,3 +0,0,44,4.96940543253445,0,0.115445894881488,2 +0,0,57,4.83416272251138,0,0.022217545578917,0 +1,0,48,4.97772250340158,0,0.06715836910629,1 +0,1,57,4.9772062527648,0,0.021869124571003,0 +0,0,53,4.70699641397403,0,0.035761194367126,0 +0,0,56,4.70934903789323,0,0.025105540743797,0 +1,0,69,4.81047234416168,0,0.006370818645013,0 +1,1,61,4.99483323846925,1,0.317182797489809,3 +1,0,68,4.9336232172011,0,0.00676192686524,0 +0,0,42,4.81337370302236,0,0.148266101928211,2 +0,0,47,4.76819751565385,0,0.075515284874573,2 +0,0,48,5.02064679057277,0,0.067343656964246,1 +0,0,52,4.87947672317425,0,0.040109845765899,0 +0,0,49,4.64989474333408,0,0.05808077431906,1 +0,0,42,4.95211629917499,0,0.151811168389977,2 +0,0,53,4.77303548146286,0,0.03557874943226,0 +1,0,63,4.95107495470257,0,0.011207758251076,0 +0,0,41,4.68176762657848,0,0.165527789476378,2 +0,0,61,4.72245467382704,0,0.014523881842609,0 +0,0,49,4.83915156840616,0,0.058632978362916,1 +1,0,66,4.96300362627215,0,0.008191757681007,0 +0,0,56,4.93011482974021,0,0.024582751550417,0 +0,1,43,4.82764212925294,0,0.129724740666573,2 +1,1,64,4.82050584292626,0,0.010412042184029,0 +0,0,42,4.96931650230332,0,0.15243652878906,2 +1,0,54,4.99255793509707,0,0.031256788015969,0 +0,1,40,4.82491092185816,0,0.1947996000392,2 +0,0,56,4.857400106207,0,0.024814087678051,0 +0,0,51,4.84681202046429,0,0.045457995298354,0 +1,0,60,4.98360007130223,0,0.015433678661951,0 +0,0,52,4.66210536959203,0,0.040340465872199,0 +0,1,43,4.86613326594188,0,0.130615488538577,2 +0,0,52,5.09480173369542,0,0.039923182758473,0 +1,1,57,4.99596189673085,1,0.423621975691745,3 +0,0,61,4.99239423639568,0,0.013824886281912,0 +0,1,56,4.95445177014621,0,0.024539155335036,0 +1,1,53,5.10337970134007,0,0.035209986735845,0 +0,1,69,5.0528486340156,0,0.005899290371803,0 +0,0,59,4.73543821123915,0,0.017971794119598,0 +0,0,46,4.74893392510478,0,0.086147802067385,2 +0,1,61,5.17951156091491,0,0.013330633292172,0 +1,0,44,5.1951045254084,0,0.119056683948159,2 +0,0,52,4.85652944385978,0,0.040082315904778,0 +1,0,65,4.8110043288809,0,0.009424488664538,0 +0,1,60,4.54341521855228,0,0.016583813782079,0 +1,0,59,4.88571887023848,0,0.017601073473991,0 +1,0,50,5.06939423057795,1,0.644848703622115,3 +0,1,61,5.12005844257641,0,0.013503446627331,0 +0,0,50,4.79703475086964,0,0.051479223036152,1 +0,0,55,4.93066382305758,0,0.027812112676289,0 +0,0,42,4.94138613697082,0,0.151735537346981,2 +1,0,64,5.11406865137832,0,0.009708677784657,0 +0,0,46,4.58926777710697,0,0.084634462186465,2 +0,0,42,4.90680577974267,0,0.150636242317158,2 +1,0,45,4.92735395062294,0,0.100231278126465,2 +0,1,72,4.78730104296666,0,0.004840595599567,0 +1,1,64,4.87654377444932,0,0.010246202023915,0 +0,0,40,4.73417756337826,0,0.191186511256982,2 +1,0,60,5.02864586810039,0,0.015360644687099,0 +1,1,66,5.28643689859497,0,0.00750988044944,0 +0,0,57,4.70316049405636,0,0.022588786589058,0 +0,0,41,4.92203266807358,0,0.173269701536941,2 +1,1,55,4.88474979919477,1,0.486342211740078,3 +0,0,61,4.94620915639758,0,0.013931991161907,0 +1,1,62,4.81020720231508,0,0.012839988671978,0 +0,0,50,4.70754025357889,0,0.051347958692315,1 +1,0,49,4.85481418488231,0,0.058562546723512,1 +0,0,60,4.74544943965039,0,0.01611508919457,0 +0,1,62,5.10377039776925,0,0.012149337309289,0 +1,0,55,4.94129689954596,0,0.027800910618233,0 +0,0,53,4.53833185245034,0,0.035932272902384,0 +0,0,47,5.03068941751933,0,0.077119515474655,2 +1,0,46,5.09813103116774,0,0.088988389260028,2 +1,0,58,4.9667511240357,0,0.019416446138914,0 +0,0,44,4.9830704958518,0,0.115752248455975,2 +1,1,45,4.90158516855721,0,0.099935208311411,2 +0,0,58,4.97562559575444,0,0.019456763652062,0 +1,0,42,4.78523665185814,0,0.147581264310824,2 +1,0,54,5.06007770491054,0,0.031145534356324,0 +1,0,50,4.86152322432886,0,0.051636260952962,1 +0,0,49,4.7802879489425,0,0.058398791691088,1 +0,0,46,4.75648302832323,0,0.085981978986113,2 +1,0,65,4.78789730509499,0,0.009491009166829,0 +0,0,41,4.68359834124041,0,0.165622683676807,2 +1,1,63,5.00402455650621,1,0.26990189231316,3 +0,0,41,4.88763186185425,0,0.172133775862249,2 +0,0,61,5.00760809941657,0,0.013728338711652,0 +1,0,48,5.15053543655288,0,0.067938284000228,1 +0,0,52,4.92575753484969,0,0.040009426443633,0 +1,0,52,4.79049858042139,0,0.040075661717439,0 +1,1,65,5.01160636596969,1,0.230359217706024,3 +0,0,58,4.88480182772954,0,0.019640107936137,0 +0,1,44,4.64442207106286,0,0.110463570831373,2 +0,0,62,4.94504612328398,0,0.012494202160463,0 +0,1,61,5.32045837481551,0,0.013009273930199,0 +1,0,41,4.85865956240552,0,0.171147598060892,2 +0,0,54,4.7163328623194,0,0.031625073738439,0 +0,0,57,5.09442524543191,0,0.021527656777959,0 +0,1,58,4.41599538545682,1,0.414640417676497,3 +1,1,59,5.27173037518183,0,0.016594930142921,0 +1,0,60,4.88766223234412,0,0.015729319439183,0 +1,0,66,5.05437872145787,0,0.007967117842517,0 +0,0,53,5.02440005664268,0,0.035217799724739,0 +0,1,71,5.01139471930308,1,0.140733415984283,2 +1,1,57,5.07624878245618,1,0.421853921578151,3 +0,0,61,4.72020192916116,0,0.014464495193601,0 +0,0,58,4.88438570901309,0,0.019710267203023,0 +0,0,55,5.33995284238615,0,0.026908170661028,0 +1,0,70,4.92815594707505,0,0.00557383500688,0 +0,1,65,5.08043542116861,1,0.227648998362347,3 +1,0,61,4.89657214789518,0,0.014047003320524,0 +0,0,62,5.00367187726585,0,0.012386505084929,0 +0,0,57,4.79384251472167,0,0.022246740082277,0 +0,0,63,4.62932903763967,0,0.012021679109481,0 +0,0,41,5.24096756311111,0,0.183912663139412,2 +1,0,57,4.95223862905856,0,0.02186822023419,0 +1,1,63,4.78997933019947,1,0.280579137057539,3 +0,0,45,4.69750465297497,0,0.097533711932499,2 +0,0,51,4.63215455128467,0,0.045422471934694,0 +0,0,48,4.71545067758892,0,0.066175624636988,1 +0,0,47,4.88178022287787,0,0.076323557114959,2 +0,1,40,4.72515531826493,0,0.190875390518884,2 +0,0,53,4.85468075805022,0,0.035462509547721,0 +0,1,45,4.74162041662538,0,0.098150953572589,2 +0,0,55,4.93729532505868,0,0.027655849835527,0 +0,0,65,4.9131816291338,0,0.009194900801062,0 +0,0,49,4.45916434045305,0,0.057639662322852,1 +0,1,54,4.68296707860408,1,0.521065401639462,3 +0,0,63,4.7340568990462,0,0.011767971180105,0 +1,1,71,5.02018216261841,0,0.004909249451931,0 +1,1,58,4.89055055946605,0,0.019600842641032,0 +0,0,57,4.54914736417643,0,0.022830477522747,0 +1,0,57,5.14518325171122,0,0.021432583190711,0 +0,0,69,4.83968144133598,0,0.006298115432948,0 +0,0,54,4.63099094559146,0,0.031776765971133,0 +0,0,52,5.02153156507819,0,0.039939396458773,0 +0,0,43,4.77910172449321,0,0.128762887168649,2 +0,0,53,4.6390459478229,0,0.035756687968393,0 +0,1,60,4.73576808191045,1,0.351109768985892,3 +0,0,41,4.69303828040346,0,0.165900511215271,2 +0,0,44,4.89410001083759,0,0.114290587368568,2 +0,0,57,5.15037881943584,0,0.021388292445591,0 +1,0,59,4.80994450118645,0,0.017759330579722,0 +1,1,67,5.0456058047741,1,0.194692679911288,2 +0,0,57,4.66174036907446,0,0.02252919235246,0 +1,0,60,4.93316200846109,1,0.343638990350607,3 +1,0,56,4.93032947474825,0,0.024632741278842,0 +1,1,68,5.27060995132371,0,0.006081169436089,0 +1,0,48,5.15616736358269,0,0.068002600755661,1 +0,0,54,4.94611828776641,0,0.031216930609786,0 +0,0,51,5.10546131994986,0,0.045442349783328,0 +0,1,64,4.53656815222327,0,0.011118499760931,0 +1,1,61,5.1694673128301,0,0.013415950116494,0 +0,0,49,4.82693379771189,0,0.058782584464333,1 +0,0,61,4.65194808569079,0,0.014691722441031,0 +0,0,49,5.07875229965394,0,0.059184731982721,1 +0,1,68,4.78323635325959,1,0.19096684092199,2 +0,0,53,4.65646991894183,0,0.035692202014561,0 +0,1,74,4.74248210034681,0,0.004118985002409,0 +0,0,43,4.68250454345259,0,0.126797825946384,2 +0,0,59,4.60515818454081,0,0.018246445488035,0 +0,1,45,4.66365587148489,1,0.78123574616277,3 +1,0,63,5.15366299937895,0,0.010756115040967,0 +0,0,62,5.03408537350293,0,0.012252215653165,0 +0,0,52,4.56946355878086,0,0.040387620901484,0 +1,1,73,4.93414814666924,0,0.004195000755511,0 +0,0,63,5.03280220788315,0,0.011045656803765,0 +0,0,49,4.77145884068081,0,0.058329255820236,1 +1,0,58,5.01851053125779,0,0.019294232768194,0 +0,0,45,4.90037336068315,0,0.099844536624967,2 +0,1,60,4.76490306331813,0,0.015966962050682,0 +0,0,59,4.8329891261749,0,0.017715506450232,0 +0,0,58,4.81189444001995,0,0.019765635978349,0 +0,0,48,4.52613938607356,0,0.065425436835828,1 +1,0,67,4.99267244306353,0,0.007327322137544,0 +0,0,40,4.89460149163282,0,0.197542375009822,2 +1,0,67,4.97407550622701,0,0.007352722558949,0 +0,0,58,4.95125243278217,0,0.019468475536662,0 +1,1,54,5.18452981772812,1,0.51379873699913,3 +1,0,48,4.46308685586407,0,0.065346648913883,1 +1,0,61,4.79504587357352,0,0.014287594514428,0 +1,0,53,4.87183350274577,0,0.035418337749177,0 +1,1,70,4.97095874660696,0,0.005496842209144,0 +0,1,49,5.13760088487642,0,0.059262248409687,1 +0,1,64,5.13926354986511,0,0.009645910720858,0 +0,0,45,4.70174157366968,0,0.097601248678985,2 +1,0,55,4.98061295920681,0,0.027632788629623,0 +0,0,51,4.76048704959983,0,0.04543455974604,0 +0,1,61,4.74594090550799,1,0.327710929306332,3 +1,1,68,5.25327643192313,1,0.170279696937884,2 +1,0,57,5.01567886828576,0,0.021747931631533,0 +1,0,66,5.18494161119272,0,0.0077221632331,0 +0,1,62,4.75956283195136,0,0.01293240711706,0 +0,1,43,4.96323782342811,0,0.132400377346251,2 +1,0,57,4.85261787344328,0,0.022106749198366,0 +1,0,68,4.9766736780609,0,0.006666418342991,0 +0,0,60,4.52383119347396,0,0.016735446982794,0 +1,0,46,5.06744821937544,0,0.088680275235477,2 +0,0,57,5.04375506324077,0,0.021591339578181,0 +0,0,59,4.82717680279619,0,0.017714652883793,0 +1,0,64,4.96007780483646,0,0.010054327818056,0 +1,0,40,4.82571246955108,0,0.194845515001025,2 +0,0,50,5.2776854002911,0,0.052008139347721,1 +0,0,44,4.70996987993989,0,0.111445874658114,2 +0,1,45,4.68044473768472,0,0.097325662347139,2 +1,0,57,4.91576909758774,0,0.022038353793762,0 +0,1,60,4.68452823532842,0,0.016287326682856,0 +0,0,46,4.63923766518309,0,0.08499116855268,2 +0,0,50,4.57812015687088,0,0.051216574619265,1 +0,0,54,4.70819148100196,0,0.031634053522523,0 +0,0,48,4.37243031911341,0,0.064795465870473,1 +1,0,49,5.09624445503086,0,0.059226320089829,1 +0,1,59,4.78439116987488,1,0.376254602139731,3 +1,1,69,5.01280737114712,1,0.166173120282389,2 +1,0,56,4.46835063328374,0,0.025694110842681,0 +1,0,56,5.08447990710014,0,0.024248924664697,0 +0,1,54,4.98842422382802,0,0.031257844030849,0 +1,1,64,5.37124094754207,0,0.009164797895302,0 +0,1,63,4.91212316640539,0,0.011291961352361,0 +0,0,49,4.97113601885389,0,0.05892024732928,1 +0,0,53,5.00097218516859,0,0.035243150944299,0 +0,1,72,5.02476634826182,0,0.004454559408907,0 +1,0,56,5.08399098485901,0,0.024298804247488,0 +0,0,49,4.48793792607324,0,0.057859569912008,1 +1,0,48,5.17409003483301,0,0.068066147556044,1 +0,0,43,4.94756113903913,0,0.132268134615573,2 +0,0,60,4.57498019987783,1,0.357933347730758,3 +0,1,58,4.92170406284503,0,0.019563266299461,0 +1,1,68,5.20425550393992,0,0.006224851305642,0 +0,1,40,5.02042896896259,0,0.202635676819611,3 +0,0,50,4.91476116822512,0,0.051656116036801,1 +0,0,48,4.59436692102735,0,0.065665227856113,1 +0,0,57,4.9237605402245,0,0.021896562334481,0 +0,0,44,4.67673717946963,0,0.110999016165803,2 +0,0,63,4.76635443480189,0,0.011653462031803,0 +1,0,66,5.09815366168129,0,0.00790720709978,0 +0,0,55,4.81757417604094,0,0.028061954744778,0 +0,0,48,4.98415697675364,0,0.06719652313397,1 +0,0,57,5.10660314088486,0,0.021505000138437,0 +1,1,54,4.93051871503031,1,0.516957401144584,3 +1,0,63,5.11644073959214,0,0.010849111079146,0 +1,0,45,4.7910200538069,0,0.09854062783005,2 +0,0,43,4.50605358578002,0,0.123385640106716,2 +0,0,50,4.90979350222284,0,0.051659353619955,1 +1,1,56,5.08257005066517,1,0.452182353640575,3 +1,0,48,4.87650765098543,0,0.066760608841083,1 +0,0,51,4.85424389724923,0,0.045402228773225,0 +1,1,70,5.11907168568004,0,0.005232570788414,0 +0,0,60,4.93192494324098,0,0.0155677787968,0 +0,0,59,4.88496211427443,0,0.017539915342177,0 +1,0,55,4.85309619470635,0,0.027931895977782,0 +0,1,45,4.51140799413169,0,0.095330923119806,2 +1,1,57,4.90690727535798,0,0.022047754947033,0 +0,0,54,4.79172567169828,0,0.031531755213,0 +0,0,64,4.84866522357056,0,0.010314425944234,0 +1,0,69,5.22916879489262,1,0.157106794264031,2 +1,0,51,4.70987995627716,0,0.045397531705519,0 +0,0,42,4.52695643632456,0,0.14102897552038,2 +1,1,65,5.09984266656544,0,0.008777752840959,0 +1,1,67,4.95907444345603,1,0.198481217618223,2 +0,0,68,4.82690625683443,0,0.006959442032249,0 +1,0,66,4.85449866244239,0,0.008438685067219,0 +0,0,46,4.81881868600375,0,0.086555814066689,2 +1,1,64,4.84175892271545,1,0.257408646984121,3 +1,0,63,4.9607747516756,0,0.011160260917784,0 +0,1,48,4.94472369157807,0,0.067025870951716,1 +1,1,63,4.84212847748595,1,0.278246377219329,3 +0,0,41,4.36863296564321,0,0.155993111637119,2 +1,1,54,4.7600387219691,1,0.519951393615529,3 +0,0,42,4.82037136522543,0,0.148348340882625,2 +1,0,69,4.98817699605493,0,0.006022644262432,0 +1,1,64,5.02990111272019,1,0.249154648940943,3 +0,0,46,4.94263569999278,0,0.087723958832,2 +0,0,44,5.06864532935376,0,0.117128681881304,2 +0,1,61,4.53268898846812,1,0.335484738419903,3 +0,0,46,4.69795088905358,0,0.085481183527752,2 +0,0,44,4.88964562123721,0,0.114250672159165,2 +0,0,59,4.78708256544447,0,0.017825244715221,0 +1,1,52,4.99833340934748,0,0.040042073340697,0 +0,0,57,4.836089612633,0,0.022161709644707,0 +0,0,42,4.60561965987995,0,0.142951499911452,2 +0,0,45,4.66542089983245,0,0.097293781011521,2 +0,0,41,5.05817858629142,0,0.177792242106146,2 +1,0,65,4.97208087706076,0,0.009061366250184,0 +0,0,46,4.67407915514767,0,0.085305653195776,2 +0,0,62,4.99655731386326,0,0.012362911666899,0 +0,0,41,4.90428782499931,0,0.17292151491384,2 +0,0,53,4.9618285094191,0,0.035296456132694,0 +1,0,60,5.03128042664993,0,0.015296072515581,0 +0,0,62,4.96784223817205,0,0.012465177993418,0 +1,0,57,5.04894611636594,0,0.021633362154825,0 +1,0,64,4.78897715185103,0,0.010484600448078,0 +1,0,61,5.10837112707518,0,0.01349205383842,0 +1,0,62,4.92466417836681,0,0.012579367547024,0 +0,0,52,4.57904545366641,0,0.040266509172785,0 +0,0,44,4.64553661715948,0,0.11045445297107,2 +0,0,52,4.71953567433923,0,0.040180423634017,0 +0,0,46,4.71359239309946,0,0.085761342231552,2 +0,0,60,4.83875410353372,0,0.015853845111248,0 +0,0,42,4.71383359088535,0,0.145674268482816,2 +0,0,45,4.6750445264141,0,0.097243615056188,2 +1,0,42,4.94851750125042,0,0.151753214640336,2 +1,0,73,4.92671415877974,0,0.00420131593577,0 +0,0,48,4.80224430282036,0,0.066448754800401,1 +0,1,54,4.96978682297792,0,0.031198422850443,0 +0,0,49,4.63126379347781,0,0.058124833103133,1 +1,0,55,4.91120663776218,0,0.027718299925642,0 +0,0,47,4.68400204305605,0,0.075335722897464,2 +0,0,54,5.02321447178373,0,0.031124235541264,0 +0,0,55,4.71489286435108,0,0.028220562689907,0 +0,0,42,4.46910479509387,0,0.139737394125606,2 +0,0,43,5.0769807972301,0,0.134765910523865,2 +0,1,69,5.0178686630659,0,0.005967908803429,0 +0,0,49,4.66661398181204,0,0.058232101841318,1 +0,0,46,4.82065091293461,0,0.08689881597367,2 +1,1,67,5.16979392948546,1,0.189124741662331,2 +0,0,62,4.78986713361326,0,0.012898526600301,0 +1,0,69,4.88914576146829,0,0.006206408402237,0 +0,1,47,5.02368885350827,0,0.077070287512484,2 +0,0,47,4.52087797974411,0,0.074106659779177,1 +0,0,52,4.74100336857635,0,0.040228030189649,0 +0,1,69,4.75382958932256,0,0.006481262684834,0 +1,0,66,5.07537560064574,0,0.007936949580298,0 +0,0,57,4.7566325169101,0,0.022331928499689,0 +1,1,46,4.98059314251218,1,0.76172959522185,3 +0,1,46,4.95640624310242,0,0.087696264471791,2 +0,0,58,4.88054749906185,0,0.019606622146175,0 +0,0,48,4.84888760993466,0,0.066635458365824,1 +0,0,44,4.83546223848588,0,0.113369583209852,2 +0,0,66,4.80148882183032,0,0.00855330519151,0 +0,0,48,4.71656418206737,0,0.066105781784786,1 +1,0,56,4.80137697107803,0,0.024942133245595,0 +0,0,45,4.72991168520289,0,0.098013880313898,2 +1,0,63,4.84811922534484,0,0.011463207084874,0 +1,0,62,5.12737669969377,0,0.012074069730002,0 +0,0,50,4.76852620535668,0,0.051459228318949,1 +1,0,61,4.99797305699453,0,0.013790472447734,0 +1,0,42,5.04457769376015,0,0.154250091154832,2 +1,0,46,4.81091126782989,0,0.086562996036069,2 +0,0,52,5.03015011318938,0,0.039999366417358,0 +1,0,44,5.20180929601601,0,0.119251155190868,2 +0,1,50,4.92128017431529,0,0.051584171346685,1 +0,1,43,4.88039377525148,0,0.130717455555072,2 +0,1,69,5.0047886177832,0,0.005996566335357,0 +1,0,42,4.84490356787753,0,0.149009487700307,2 +1,1,68,4.99516580579963,0,0.006634404048711,0 +0,0,48,4.91628724474185,0,0.066914844900012,1 +0,0,52,5.05380612984311,0,0.039928329796504,0 +0,0,41,4.54788475851233,0,0.161387663490822,2 +0,0,50,4.79424249901193,0,0.051627958191074,1 +1,0,54,4.84050819806744,0,0.031412537622761,0 +1,0,55,4.92059502224749,0,0.027809571673758,0 +1,1,69,4.80340057850527,0,0.00637648303474,0 +1,1,67,4.78193515554006,1,0.206514708071833,3 +1,0,47,5.0912081568634,0,0.07748546492712,2 +0,0,54,4.77589232459681,0,0.031629625959813,0 +0,0,59,5.07901668020854,0,0.017035989781141,0 +0,0,46,4.67003948928104,0,0.085491105521228,2 +0,0,66,4.62895178125313,0,0.00894409332148,0 +0,0,62,4.83708873287545,0,0.012774685798067,0 +0,1,57,4.78705996739854,1,0.430775227546726,3 +0,0,46,4.7588402335183,0,0.086028363043881,2 +0,0,51,4.66186568898341,0,0.04540444197294,0 +1,0,64,5.00666030054621,0,0.009940339859547,0 +0,1,61,4.6479655656169,0,0.014680757081888,0 +0,0,41,4.4456196208336,0,0.158278129571289,2 +1,1,67,5.21291525919685,1,0.186837666947823,2 +0,0,43,4.55751984566308,0,0.124456455450872,2 +1,0,56,4.81251193168793,0,0.024954303044592,0 +1,1,66,4.85296737145657,0,0.008413679521474,0 +0,0,42,5.18762513359568,0,0.158084372227691,2 +0,0,59,4.80111456448025,0,0.017745673359525,0 +0,0,62,4.63072941978929,0,0.01331879256207,0 +1,0,41,4.93201263490192,0,0.173581313267731,2 +1,1,72,5.02792151668236,0,0.004448909461497,0 +1,0,45,5.11469675281404,0,0.102495618521214,2 +0,0,55,4.54075641660791,0,0.028585637389189,0 +0,0,52,4.74730349378811,0,0.040114926457089,0 +0,0,54,5.0079560432518,0,0.031135517423155,0 +1,1,60,5.06550670355923,1,0.339348107358819,3 +1,0,57,5.05953170357366,0,0.021658715831617,0 +0,0,54,4.54433881330838,0,0.03209070319393,0 +0,0,56,4.98686695456298,0,0.024469217420396,0 +0,0,41,4.53465586822117,0,0.161012604161359,2 +1,0,53,5.14984895833362,0,0.03502377206082,0 +1,0,45,5.1297470210849,0,0.102721683254339,2 +1,1,64,5.30438071874691,1,0.237403171676794,3 +1,0,61,4.81291084713948,0,0.014291730543174,0 +0,0,46,4.91173539537164,0,0.087372508054043,2 +0,0,60,4.9035736617303,0,0.015670561413855,0 +0,0,51,4.83626352793227,0,0.045412947569669,0 +1,0,63,4.78475073990039,0,0.011632800888079,0 +0,0,44,4.7285052355466,0,0.111750274621954,2 +0,0,62,4.72824430974987,0,0.013070183132037,0 +1,0,58,5.02835988942016,0,0.01934296854664,0 +1,1,51,4.81402094578865,0,0.045510957307813,0 +0,1,45,4.80864238071157,0,0.098813303842013,2 +0,0,48,4.65731340061529,0,0.065877097281345,1 +0,0,49,4.66834073310624,0,0.058117544906967,1 +1,1,72,4.94178355487948,0,0.004591348475629,0 +0,1,49,5.01858721282206,0,0.058935917569016,1 +1,1,56,4.86205996625264,1,0.456842353915635,3 +0,0,63,4.86047243015142,0,0.011446641885605,0 +0,1,53,5.21371961160972,0,0.034962254865807,0 +0,0,47,4.98765862460478,0,0.076984069299113,2 +0,0,52,4.91891796554138,0,0.040001459177863,0 +0,0,45,4.77466863493541,0,0.098473357087781,2 +0,1,54,5.15442663870199,0,0.030872459980786,0 +1,0,49,4.9346663342604,0,0.058806125617556,1 +0,1,60,4.89486863179206,0,0.015687237222673,0 +1,0,65,4.9459018681438,0,0.009102829803504,0 +0,0,61,4.72653794038878,0,0.014522671747849,0 +0,0,45,4.71699511788885,0,0.097799597151886,2 +0,1,62,4.9544834573291,1,0.295718315925005,3 +0,1,41,5.09601649429308,0,0.178965427699841,2 +1,0,52,4.97428491193049,0,0.04005990769046,0 +0,0,40,4.65476193686617,0,0.188066298241728,2 +0,0,61,4.68990433497371,0,0.014554727380821,0 +0,1,71,4.65713522098067,0,0.005546769494601,0 +0,0,54,4.73971239212172,0,0.031689491279958,0 +1,0,57,5.04169046138628,0,0.021674887323034,0 +1,1,60,4.94805701038895,0,0.015589599079252,0 +0,0,42,4.95839093850705,0,0.151961743390629,2 +0,0,50,4.81894499900182,0,0.05151520182158,1 +1,0,51,5.2103100352377,0,0.04559153980831,0 +0,0,41,4.7418587821183,0,0.167443092590582,2 +0,1,50,5.04797731975964,0,0.051788861928377,1 +0,0,51,4.77199271626666,0,0.045405503925507,0 +0,0,45,4.80316212607534,0,0.09869919943775,2 +0,0,50,4.65946366110238,0,0.051403155805039,1 +0,0,41,4.61267492830583,0,0.163369765355102,2 +0,0,42,4.64147597796136,0,0.143861225804995,2 +0,0,52,4.8206735177148,0,0.040136418239875,0 +1,1,46,4.86467436182739,1,0.75971286152869,3 +0,0,54,5.05831831593456,0,0.03115079469772,0 +0,0,40,4.54246174413433,0,0.183786237759035,2 +0,0,50,4.84769946229646,0,0.051614945414605,1 +1,1,64,5.19174556045042,0,0.009539472859267,0 +0,1,71,4.90322187988682,0,0.005102468320338,0 +0,1,42,5.14305945327876,0,0.156846361658866,2 +1,1,64,4.98409171534329,1,0.25075904900686,3 +0,0,66,4.80740781717149,0,0.008546132789186,0 +0,0,55,4.90175764606664,0,0.027773352723248,0 +0,1,55,5.07327279645881,0,0.027401151467632,0 +1,0,52,5.14812816418192,0,0.039855285875754,0 +0,0,41,4.63118232660846,0,0.163953277331037,2 +0,0,54,4.95184928543637,0,0.031264998843378,0 +0,0,42,4.33342564064999,0,0.136352315435674,2 +0,0,62,4.80177281368378,0,0.012867871750413,0 +0,1,57,4.91056883802868,1,0.426315369123636,3 +0,1,56,4.93075473384049,0,0.024681125050533,0 +0,0,45,4.67780398493933,0,0.097272802662038,2 +0,0,62,4.74032696340356,0,0.012992800326541,0 +0,0,51,4.54546486421868,0,0.045423050007611,0 +1,0,52,4.92634707876783,0,0.039990776040201,0 +0,1,52,4.88546581541693,0,0.040033152693439,0 +0,0,51,4.66519199978386,0,0.045444447595157,0 +0,0,49,4.6224007825,0,0.058183839572982,1 +1,1,59,5.09046180585596,0,0.017053626287306,0 +0,0,61,4.74079195009333,0,0.014508401449309,0 +0,0,43,4.68027833789743,0,0.126781315605585,2 +0,0,59,4.82523832970209,0,0.017732352041974,0 +1,0,67,4.8638552987618,0,0.007606369487726,0 +0,0,70,5.03378588460413,0,0.005382872561576,0 +0,1,58,4.88477085557826,0,0.019629057082032,0 +0,0,41,5.01602567725749,0,0.176372715251491,2 +0,0,43,4.80424985076787,0,0.12933046681736,2 +0,0,44,4.23112255695281,0,0.104345451767688,2 +0,0,69,4.64797343283577,0,0.006682065017094,0 +0,0,47,4.60876343778862,0,0.074639011203897,1 +1,1,71,5.13171781051436,1,0.135963537127239,2 +0,0,58,4.49188115702662,0,0.020658868296727,0 +0,1,57,4.72380704186878,1,0.432859820766706,3 +0,0,67,4.93928533727587,0,0.007454155079148,0 +0,0,53,4.73889831301361,0,0.035681491720989,0 +0,0,46,4.84999044401217,0,0.086987284266067,2 +0,1,44,4.88244078879789,0,0.114133602553016,2 +0,0,49,4.89257388360282,0,0.058789646040458,1 +0,0,60,4.94985904811646,0,0.015551098723285,0 +0,0,46,4.77374643835642,0,0.086214794510435,2 +0,0,42,4.92250192826445,0,0.151009885632494,2 +1,0,54,4.83083086409459,0,0.031459344054718,0 +1,0,45,4.97145632953672,0,0.100767100447321,2 +0,0,63,4.95455388935747,0,0.011209431023182,0 +0,0,44,4.71346176789485,0,0.111494213151643,2 +0,0,48,4.62324136710553,0,0.065844679614884,1 +0,0,47,4.90758650157334,0,0.076441840795035,2 +0,1,46,4.89185787657882,0,0.087422067939299,2 +1,0,55,4.99935092358004,0,0.027657496866905,0 +1,1,50,4.81751861481088,0,0.051460721945612,1 +0,0,41,4.54505422714645,0,0.1612552496092,2 +0,0,45,4.65659470456608,0,0.09700316710815,2 +0,0,47,4.65365663722841,0,0.074887429076067,1 +0,0,46,4.76334732806744,0,0.086056162189452,2 +0,0,50,4.77721711674602,0,0.051430589326937,1 +0,0,44,4.75242573208938,0,0.112070521880123,2 +0,0,50,4.66419416920186,0,0.051432084879823,1 +0,1,49,4.8220734406446,0,0.058565558364471,1 +0,0,58,4.63418485294216,0,0.020279717160822,0 +1,1,54,4.97979944444914,0,0.031137287356567,0 +0,0,52,4.89172646907026,0,0.040064669769254,0 +0,0,59,5.01080808667864,0,0.017307901771525,0 +0,0,55,4.53203818644091,0,0.028579186018322,0 +1,1,69,4.94849716352698,1,0.168891002823771,2 +0,0,47,4.70927488703126,0,0.075267007286727,2 +0,1,47,4.92369419521853,0,0.076551944559413,2 +0,1,57,5.25911954372985,0,0.021132597238341,0 +0,0,47,4.67216124686139,0,0.075409329945412,2 +1,1,42,5.06648685455365,0,0.154869837849769,2 +0,0,51,4.69628284385175,0,0.045409270434819,0 +1,0,56,4.92570276139418,0,0.024636471583857,0 +1,0,51,5.39662321404303,0,0.045439352167256,0 +0,0,41,4.81553258480284,0,0.169784683243447,2 +0,0,53,5.08357969253774,0,0.035118784243149,0 +0,0,51,4.88293135929465,0,0.045407051384658,0 +0,0,52,4.83491076070874,0,0.040071809841352,0 +1,0,56,4.9554793578378,0,0.024593361823128,0 +0,0,57,4.71043219654023,0,0.02253563145364,0 +0,1,59,5.04254148602155,0,0.017133732364262,0 +1,1,60,4.82298312312856,1,0.348817104162198,3 +1,0,64,5.18320668439574,0,0.009557437184179,0 +1,0,55,5.2013104440287,0,0.027151563599071,0 +0,0,48,4.52387526328511,0,0.065358342578783,1 +0,1,59,5.24111401830215,0,0.016656943271243,0 +1,0,65,4.98529537241493,0,0.009006278928332,0 +0,1,42,4.6662504231801,0,0.144559288792083,2 +0,1,44,4.7306914790468,0,0.111846999500831,2 +0,0,55,4.77122981894728,0,0.028145258175324,0 +0,0,45,4.70709295365069,0,0.097621822335511,2 +0,0,43,4.77757862652786,0,0.128791783662388,2 +1,0,55,4.90225063521934,0,0.027815065095617,0 +0,0,51,4.72914731391673,0,0.045351237562489,0 +1,0,53,4.81179043689343,0,0.035632454181678,0 +0,0,59,4.76536957548006,0,0.017831357470133,0 +0,1,60,4.97271835194649,0,0.015481924784071,0 +0,1,62,4.80323464105098,1,0.302210471102323,3 +0,0,46,4.64674675785309,0,0.085270695657855,2 +0,0,50,4.70939430994832,0,0.051406568807496,1 +0,0,49,4.5505755659592,0,0.05791375709123,1 +0,0,55,5.01942973310189,0,0.027525619569941,0 +0,0,48,4.70138517410735,0,0.066147292943302,1 +0,1,62,5.00332993315964,0,0.012385129710226,0 +0,0,44,4.73728059082057,0,0.111837959492392,2 +0,0,56,4.53143460664665,0,0.025523942158641,0 +1,0,55,4.8908563752631,0,0.027960563818629,0 +1,0,55,5.0188750874474,0,0.027492929368503,0 +1,0,64,4.99858457394178,0,0.009995367006839,0 +0,0,59,4.77433493131015,0,0.017887416342776,0 +0,0,42,4.77497768760999,0,0.147266433269052,2 +0,0,41,4.71677288092156,0,0.166699365461863,2 +0,1,61,5.12493734520083,0,0.013457669344579,0 +0,0,55,4.9177057211135,0,0.027775495080835,0 +1,1,65,4.81753756344795,0,0.009368492556148,0 +1,1,63,5.27320416465238,1,0.259795060745207,3 +0,1,67,4.94447709437651,0,0.007430673132645,0 +0,0,59,4.99853370676619,0,0.017232886199727,0 +0,0,60,4.76076827508499,0,0.016044205385352,0 +1,1,46,4.90802484419788,1,0.760247121484634,3 +1,0,63,4.96856896871623,0,0.011160527580114,0 +1,1,60,5.26120199783336,0,0.014753738593776,0 +0,0,61,5.04350860923587,0,0.013697404028629,0 +0,0,65,4.97645591006284,0,0.009058921108206,0 +0,0,46,4.96386285399693,0,0.087760623798742,2 +0,0,59,4.66983551397148,0,0.018122076585471,0 +0,0,41,4.43860888622475,0,0.158107619734846,2 +0,0,47,4.55551989176842,0,0.074250168231741,1 +1,0,56,4.8580188039233,0,0.024821198483926,0 +1,0,67,4.9922035623704,0,0.007342056507835,0 +0,0,50,4.72577192646329,0,0.051502965869416,1 +1,0,47,4.7237675296285,0,0.075233241702808,2 +0,0,51,5.00523920260332,0,0.045472355037147,0 +0,0,48,4.74142482404213,0,0.066287460772546,1 +0,0,48,5.10554253588274,0,0.067759621259695,1 +0,0,54,4.85979935329873,0,0.031434565316772,0 +0,0,47,4.95922694756617,0,0.076687357201898,2 +0,1,46,4.47595358631568,0,0.083636287358955,2 +0,1,41,5.16247078791587,0,0.18141118051758,2 +0,0,53,4.54093148245179,0,0.035965344187434,0 +1,1,49,4.93304756530452,1,0.674077351804753,3 +0,1,57,5.0436102958806,0,0.021611904176395,0 +0,0,54,4.99772600552175,0,0.031122984736534,0 +1,0,56,4.90102287820324,0,0.02476006821821,0 +0,0,43,4.63172403552756,0,0.125795906841461,2 +0,0,64,4.80189594087548,0,0.010441258182465,0 +1,0,61,5.03326287972362,0,0.013692387756732,0 +1,1,59,5.16493272303835,0,0.016787438597055,0 +1,0,51,4.82165076915076,0,0.045375797674513,0 +0,0,47,5.09730581991004,0,0.077510224002416,2 +1,1,72,5.19548222905644,0,0.004176613023972,0 +0,0,43,4.89882212453792,0,0.131146057291031,2 +1,1,62,5.12242470688555,1,0.288329613113012,3 +0,1,74,5.08965310485319,0,0.00360459664894,0 +1,1,40,5.19290392625108,0,0.209849633113529,3 +1,0,59,4.82667687932026,0,0.017726798752777,0 +1,1,70,5.09880359602013,1,0.149193110088778,2 +0,0,44,4.97737738135365,0,0.115567515354237,2 +0,0,66,4.71292237821988,0,0.008759405545849,0 +0,1,58,4.95812931426868,0,0.019454550657268,0 +1,0,53,4.79384918858025,0,0.035696061485491,0 +0,0,61,4.9054389560775,0,0.013991968502049,0 +0,0,42,4.8020029420531,0,0.147908339419593,2 +0,0,50,4.75917345696632,0,0.05137509132681,1 +0,0,46,4.86404389528813,0,0.087058819425093,2 +0,0,44,4.73887715953249,0,0.112013626187743,2 +0,0,53,4.85437920428324,0,0.035549863478138,0 +0,1,44,5.09194396341238,0,0.117395706810801,2 +1,0,64,4.94455839539616,0,0.010112968334436,0 +0,0,50,4.73642524336113,0,0.051419985487521,1 +0,0,44,5.14474988556517,0,0.118468806675398,2 +0,0,66,4.76981386461658,0,0.008609162549947,0 +1,0,48,5.29670491068942,0,0.068518020555451,1 +0,0,54,4.43639357255107,0,0.032200418371413,0 +1,0,59,5.06756815755696,0,0.017090589328267,0 +0,0,41,4.95447966386445,0,0.174322922213746,2 +0,1,53,4.46821624145015,0,0.036095628116911,0 +0,0,54,5.07981427480475,0,0.030972361530169,0 +0,0,47,5.01481695441317,0,0.076993229644131,2 +0,1,55,5.25489433806816,0,0.027090367384948,0 +0,0,59,4.95189692865447,0,0.017320676496705,0 +0,0,42,4.65569889547717,0,0.144258737306817,2 +0,0,62,4.90202912246606,0,0.012585748091016,0 +0,1,68,4.63764233083684,0,0.007382733221385,0 +0,0,55,5.29923362406243,0,0.026993568621571,0 +1,1,72,5.30858992894615,1,0.118158064893169,2 +0,0,60,4.70815712015285,0,0.016159242200056,0 +0,0,55,4.97226358536348,0,0.027655999802989,0 +0,1,43,4.7905538134868,0,0.129018305467838,2 +1,0,48,5.01280650691256,0,0.067297639476493,1 +1,0,50,4.8704758970195,0,0.051528580222128,1 +1,0,70,4.89668691339252,0,0.005623025128305,0 +0,1,58,4.73536286771705,1,0.403490688150154,3 +0,0,45,4.98562978409256,0,0.100876032950464,2 +0,0,60,4.56634470834938,0,0.016567513036498,0 +0,0,54,5.11314714578969,0,0.031079098831812,0 +0,0,41,4.98046624403151,0,0.175274104519258,2 +0,0,46,4.83716475923915,0,0.086809381437558,2 +0,0,63,4.92527196834029,0,0.011285807744788,0 +0,0,53,4.5938327868396,0,0.035880339630802,0 +1,0,59,4.83198101181698,0,0.017761373387904,0 +0,0,44,4.70738059682621,0,0.111578359985049,2 +1,0,67,4.98355805252738,0,0.007363165470064,0 +0,1,53,5.17093215263875,0,0.035074461545477,0 +0,0,42,4.60124055843309,0,0.142815689798723,2 +1,0,46,4.97402941193543,0,0.087994400706905,2 +0,0,62,4.93590521884464,0,0.012544434342463,0 +0,0,64,4.89092753133331,0,0.010214498695748,0 +0,0,49,4.69275938727317,0,0.058220936073704,1 +1,1,53,5.1240859395975,1,0.54645397686631,3 +0,0,54,4.74397923571785,0,0.031643350621046,0 +1,1,66,5.12719697684689,0,0.00784172670796,0 +0,0,42,4.92642823202913,0,0.151107125736059,2 +0,0,40,4.89678469207462,0,0.197789465332803,2 +0,0,43,4.93982948896613,0,0.132152743403585,2 +0,0,49,4.95118314102528,0,0.058757043064288,1 +0,0,48,4.83945878308281,0,0.066601545994362,1 +0,0,53,4.92795870457233,0,0.035304095843608,0 +0,0,41,4.81034858900158,0,0.169646590959393,2 +0,0,60,4.96109094312861,0,0.015478652847061,0 +0,1,54,4.9252489411197,0,0.031270151636647,0 +0,1,53,4.76633549612052,0,0.035649990611499,0 +0,0,64,4.59559974545157,0,0.010967047762081,0 +1,1,43,5.03109172088879,0,0.133836664591181,2 +0,1,60,4.56953327485635,0,0.016508647330092,0 +0,0,49,4.91563080616159,0,0.058678719476701,1 +0,0,68,5.06343130214606,0,0.00648984967682,0 +0,1,65,4.93358081396665,0,0.009122887935859,0 +0,0,55,4.70588741032436,0,0.028298303090278,0 +0,0,61,5.03053532849416,0,0.013729418814276,0 +0,0,49,4.73676077108283,0,0.058279126851203,1 +0,0,68,4.53410173790346,0,0.007597175178785,0 +0,0,52,4.86467867944014,0,0.040105927362801,0 +1,0,60,5.11082055073316,0,0.015180044290367,0 +0,1,55,4.5707592485991,0,0.028493821886242,0 +1,0,40,4.94006241062178,0,0.199391531887538,2 +1,0,65,4.87866187809208,0,0.009258690007952,0 +1,1,59,5.30988003226292,1,0.357242040726391,3 +0,0,47,4.49337892184859,0,0.073874598672271,1 +1,0,52,4.84483611767213,0,0.04015787759241,0 +1,0,57,4.92884003017779,0,0.022016072316826,0 +0,0,41,4.96629193619018,0,0.174776343394814,2 +0,1,61,5.07951130545426,0,0.013581486741059,0 +0,1,59,4.78354628762029,0,0.017806189770557,0 +0,0,44,4.95270964386511,0,0.115397499174113,2 +1,1,67,5.18856046683244,0,0.006951834601322,0 +0,1,40,4.64914889313277,0,0.187861136952013,2 +1,1,60,4.94398147934799,0,0.015540426976503,0 +1,0,60,4.91922170530542,0,0.015666857558138,0 +0,0,41,5.0207294780063,0,0.17701458060913,2 +0,0,42,4.76206049293023,0,0.14684980433847,2 +1,0,45,5.05608394967106,0,0.101705858274716,2 +0,0,48,4.9193000555651,0,0.066934453283527,1 +0,0,42,4.69965141476145,0,0.145360525366373,2 +0,0,49,4.56354644736606,0,0.058031921730695,1 +0,0,47,4.59068385860804,0,0.074482103056458,1 +0,0,44,4.59288744561265,0,0.109673261992027,2 +1,0,65,5.09390654834387,0,0.008776857958276,0 +0,0,58,4.83311476718534,0,0.019818988830967,0 +0,0,45,5.0121079227946,0,0.101173271671343,2 +0,1,69,4.9232709482159,0,0.006136461490646,0 +0,0,53,4.89771903785452,0,0.035422705896453,0 +0,0,54,4.81742148118717,0,0.031457453881627,0 +1,0,63,4.99660821344469,0,0.011126055345109,0 +0,1,61,4.76402536517114,0,0.01441660958858,0 +0,0,47,4.90126965810798,0,0.076313902441711,2 +0,0,62,4.96376447177411,0,0.012480764007082,0 +0,0,47,5.14564841152128,0,0.077820375638548,2 +1,1,67,4.71991534174325,0,0.007928401078079,0 +1,1,58,4.88278412805583,1,0.399583299115283,3 +0,0,54,4.8034602995239,0,0.031519846373654,0 +1,0,58,4.99507239456584,0,0.01946091162248,0 +0,0,56,4.58108643014454,0,0.025401393359929,0 +0,0,43,4.44821278821292,0,0.122252749600522,2 +0,0,49,4.83973449982636,0,0.058657678051294,1 +1,0,66,4.91656670548492,0,0.008285240807183,0 +0,0,54,4.98933975301352,0,0.031285384673823,0 +0,0,49,5.16601816464093,0,0.059309911507758,1 +0,0,62,5.12097477768045,0,0.012060243897912,0 +1,0,57,5.00938728497603,0,0.021713607768556,0 +0,0,54,4.80930119669925,0,0.031497277410081,0 +0,0,59,5.07085201859773,0,0.01701842939214,0 +0,0,63,4.78999510039161,0,0.01163952624858,0 +0,1,45,4.94691194434615,0,0.100551540289532,2 +1,0,50,4.85763673155211,0,0.051589683800293,1 +0,0,63,4.60496812749946,0,0.012051965432822,0 +0,1,43,4.93369294710055,0,0.132286090929423,2 +0,0,51,4.94056403525675,0,0.045381737046047,0 +1,0,46,4.84302869399178,0,0.086756319810879,2 +0,1,43,4.69333686373706,0,0.127023700690775,2 +0,0,46,5.07117758808368,0,0.088830906624714,2 +0,0,61,4.86511022939063,0,0.014178774652523,0 +0,1,72,4.82855942572941,0,0.004759917703057,0 +0,1,70,4.57358902723913,0,0.00625618762021,0 +0,1,54,5.06371247132423,1,0.514624909851006,3 +0,0,64,4.89566503070037,0,0.010228725951969,0 +0,0,45,4.81519891209179,0,0.098881140142934,2 +1,0,69,5.00172660045179,0,0.005986713794382,0 +0,0,47,4.75021293045878,0,0.075592022651136,2 +0,0,71,4.91926074934843,0,0.005077937862508,0 +0,0,58,5.18932776853396,0,0.018894245037485,0 +1,0,52,5.04992708089252,0,0.039921585393852,0 +0,0,54,5.13050830957555,0,0.030916575353702,0 +1,0,65,5.01096489747169,0,0.008968380159474,0 +0,0,40,4.74430669929806,0,0.191572560439344,2 +0,0,49,5.13128355374399,0,0.059358700721582,1 +0,0,53,4.95287617709353,0,0.035409103353499,0 +0,0,56,4.73988795661956,0,0.025038100603564,0 +0,0,50,4.86003680409844,0,0.051586846984643,1 +0,1,67,5.19570766471178,0,0.006915745778499,0 +0,0,44,4.92987489782663,0,0.114820257511329,2 +0,1,46,4.85710312781312,0,0.086906475509956,2 +0,0,44,4.92793468615212,0,0.114912051709939,2 +0,1,47,4.68084313102561,0,0.075035790563164,2 +0,1,66,4.78144735346804,1,0.222211654761286,3 +1,1,66,5.14019703594466,0,0.007806164144669,0 +0,1,44,4.67269700807662,0,0.110857015745761,2 +1,0,67,5.25971238383978,0,0.006815913924876,0 +0,1,40,4.66988748593467,0,0.188789989711846,2 +0,0,72,4.77048999903172,0,0.004869172356369,0 +0,0,58,4.52957480306342,0,0.020577465030728,0 +1,0,46,4.86389296367828,0,0.086972959714308,2 +1,0,53,5.15961717118876,0,0.035114076915081,0 +0,0,58,5.19725616198952,0,0.018947125841141,0 +1,1,69,4.85686008658856,1,0.173157481261366,2 +0,0,49,4.7108252107443,0,0.058276902738894,1 +0,0,61,4.71664967604891,0,0.014542300249272,0 +0,0,42,4.90091610603684,0,0.15044134549239,2 +0,1,48,4.97050794178684,1,0.704531281565036,3 +1,1,64,5.20785643848813,0,0.00950768247892,0 +0,0,52,4.68198687196292,0,0.040304731582675,0 +0,0,48,4.93292594732626,0,0.067004998280501,1 +0,1,46,4.66395985164679,0,0.085318387087889,2 +0,0,44,4.59046619526949,0,0.109697705508024,2 +1,1,62,5.08815746102881,1,0.289643415401055,3 +0,0,48,4.76218534097703,0,0.066363065854552,1 +0,1,59,4.95726887169993,0,0.017391016238567,0 +0,1,42,4.74266629899827,0,0.146367880677511,2 +1,0,60,4.72127393670477,0,0.016182427837976,0 +1,0,48,5.29217171373909,0,0.068439124663764,1 +0,1,62,5.27669565198687,0,0.011717106457315,0 +0,1,67,5.15173131159646,0,0.007025502317622,0 +1,0,52,5.06838630922378,0,0.039913875719141,0 +0,1,62,4.77918136054418,1,0.302710672701883,3 +1,0,45,4.83840235264163,0,0.099171381016912,2 +1,0,55,4.95003826688954,0,0.02765999284918,0 +0,0,49,5.06579980339277,0,0.059039936480174,1 +0,0,46,4.80648880088826,0,0.086691251515068,2 +0,0,43,4.84449120206446,1,0.832290365512318,3 +0,0,58,4.79147885196086,0,0.019834097843787,0 +1,0,47,4.79457490987248,0,0.075707608570559,2 +1,0,57,4.93747575427548,0,0.021890671375707,0 +1,0,59,4.86873026451466,0,0.017616449335118,0 +0,0,48,4.88025030287254,0,0.066784389639441,1 +0,0,41,4.82498565504622,0,0.170183420323606,2 +0,0,60,4.74556031086328,0,0.016113709293602,0 +0,0,61,4.57945035740665,0,0.014914229336628,0 +1,0,56,5.09103957197282,0,0.024337971686048,0 +0,0,55,4.67780542614444,0,0.02832862752556,0 +1,0,54,4.95794041213348,0,0.031271833901219,0 +1,1,67,5.20411044166691,0,0.006917052961767,0 +1,0,59,4.89108912477207,0,0.017528918402457,0 +0,0,52,4.78491678003802,0,0.040164847812529,0 +0,1,42,4.89073082398463,1,0.854405839351363,3 +0,1,53,4.59326535690845,0,0.035882068674658,0 +0,0,46,4.834452050543,0,0.086936636538195,2 +0,0,46,4.81410442133057,0,0.086491393442503,2 +0,1,67,4.71671166953876,1,0.209029578861224,3 +0,1,42,5.08625432739447,0,0.155332087890378,2 +1,1,64,4.81444262343511,0,0.01040377271027,0 +1,0,62,4.99162839814237,0,0.012340438716736,0 +1,1,61,4.79741483588949,1,0.324578831459513,3 +1,0,59,4.92751994543168,0,0.017451083455498,0 +1,0,50,4.81738010677826,0,0.051752783597698,1 +0,0,63,5.05187933945652,0,0.010996627335162,0 +1,0,54,5.00063844065324,0,0.031115094819323,0 +1,0,56,4.68971895828976,0,0.025175547783095,0 +0,1,51,4.87606893490628,0,0.045365716397996,0 +0,0,50,5.10138581305078,0,0.051765087103809,1 +1,0,63,5.08843690026805,0,0.010908661814302,0 +0,0,55,4.8176969689617,0,0.028123069853117,0 +0,0,55,4.76468514424474,0,0.028076794768556,0 +0,0,66,5.01219257495782,0,0.008073091199024,0 +1,0,61,5.04001046896567,0,0.013703931136622,0 +0,0,51,4.88444731236694,0,0.045427387151252,0 +0,0,51,5.09935197913518,0,0.045413114081739,0 +1,1,68,4.86583271493745,0,0.006896650349857,0 +0,0,59,4.72401864270237,0,0.017951145091486,0 +0,0,47,4.68341506926859,0,0.075011673053342,2 +1,1,67,5.01629789625748,0,0.007249941520349,0 +1,0,43,4.93667167483018,0,0.131926476272234,2 +0,0,51,4.74861495138325,0,0.045353308475214,0 +0,0,45,4.86033961982426,0,0.099497059028119,2 +0,0,52,4.86504977286334,0,0.040052649538041,0 +0,0,60,4.75105578584763,0,0.016033583127812,0 +0,0,40,4.86157716364333,0,0.196228238829386,2 +1,1,70,4.91641230024468,0,0.005579621300221,0 +0,1,59,4.64950228189649,0,0.018197769627026,0 +0,0,61,4.96529568014027,0,0.01388795060147,0 +0,0,61,5.12836174193891,0,0.013450466152509,0 +0,0,55,4.70444435505515,0,0.028165857960557,0 +1,0,63,4.99613545372323,0,0.011094337329563,0 +0,0,52,4.78249951844082,0,0.04016331839665,0 +0,1,67,5.0085445258134,0,0.007304318353605,0 +0,0,58,5.0054480492011,0,0.01941961566338,0 +0,0,60,4.99686869725474,0,0.015388241751676,0 +1,0,47,4.97760534920482,0,0.076805624040609,2 +0,1,69,4.93073803177426,0,0.006113870436864,0 +0,1,65,5.104151022308,1,0.226694390533634,3 +0,0,49,4.95876922533932,0,0.058778631037774,1 +1,1,48,4.9688399932193,1,0.704995750876752,3 +1,0,62,4.80819671566058,0,0.012807430076955,0 +0,0,50,4.90772896760579,0,0.051629921511359,1 +0,1,58,4.73520287083025,1,0.404608431761379,3 +0,1,52,5.30900290648997,0,0.039829758274236,0 +0,1,60,4.75696166972838,0,0.016054370616062,0 +0,0,48,4.813370721303,0,0.066538992318288,1 +0,1,44,4.84246008139508,0,0.113477476813942,2 +1,0,63,5.35507508524646,0,0.010293021594215,0 +1,0,66,5.02504645480388,0,0.008043729693876,0 +1,0,64,5.08652409527991,0,0.009774679921945,0 +1,1,68,5.15447526071722,1,0.174320937975565,2 +0,0,57,4.86700502817983,0,0.022145381496248,0 +0,1,57,5.02168683534339,0,0.021645641322569,0 +0,0,47,4.62784683809195,0,0.074719085738665,1 +1,1,63,4.87855595788307,1,0.276895883159496,3 +0,0,51,4.88914994978202,0,0.045356818620447,0 +0,0,52,4.69940840327148,0,0.040229631493423,0 +0,0,47,4.79430490382545,0,0.075691133141579,2 +1,1,58,5.09168088958722,0,0.019162975254749,0 +0,0,43,5.10767859595734,0,0.135433584983307,2 +0,1,64,4.6879030526444,1,0.26392691359096,3 +0,0,44,4.66930684998217,0,0.110822241512232,2 +0,0,42,4.81371617324137,0,0.148233399272658,2 +0,1,58,4.85072591712311,0,0.019748968988244,0 +0,1,52,5.04932905991213,0,0.04017419107527,0 +0,1,72,4.96560163988125,0,0.004549211538538,0 +1,0,42,4.95459575320127,0,0.151940166158666,2 +1,0,55,4.77184326236984,0,0.028095059262942,0 +1,0,60,5.32650144829448,0,0.014599385907683,0 +0,0,58,4.69386685792685,0,0.020099993463649,0 +0,0,51,4.89954826740613,0,0.045362653538666,0 +0,0,48,4.85696056422247,0,0.067031152488475,1 +1,0,51,4.89598955516865,0,0.045401993645805,0 +1,0,59,4.89295166825745,0,0.017480205174985,0 +0,0,63,4.87503616045483,0,0.011363105367377,0 +0,0,44,4.74898805218261,0,0.112082111230449,2 +0,0,43,4.88825939945755,0,0.131382039850075,2 +1,0,54,4.77141834390076,0,0.031626781179334,0 +0,0,45,4.89427971051196,0,0.099848482696862,2 +0,0,51,4.97567527070184,0,0.045478175340354,0 +0,0,46,5.06233285855953,0,0.088798746021372,2 +0,0,46,4.51732174323277,0,0.083982631398995,2 +0,1,59,5.35606611527129,0,0.016334200446325,0 +0,0,43,4.92261717809698,0,0.131694185514163,2 +0,0,50,4.47459869459357,0,0.051169180256655,1 +0,1,60,4.52819970673178,1,0.359019678581349,3 +1,0,56,4.88175616454726,0,0.024779550799126,0 +1,1,47,4.80005169794132,0,0.075698829078987,2 +0,0,62,4.82065511113234,0,0.012814022547909,0 +0,1,69,4.79999466224174,0,0.006387857332591,0 +0,0,65,4.68825679884311,0,0.009729095801873,0 +0,1,66,4.67648775398276,0,0.008826835224855,0 +0,0,54,4.52804678632799,0,0.031995953513825,0 +0,0,54,4.75860209737189,0,0.031738763137154,0 +1,0,49,4.8560376757467,0,0.058565313275739,1 +1,1,42,5.13973733499311,0,0.156730903788921,2 +1,0,41,4.88632903736035,0,0.172205125017559,2 +1,0,61,5.1869100521918,0,0.013320157649448,0 +0,0,44,4.71357703448758,0,0.111540927578806,2 +0,1,66,4.51446023940954,1,0.23502566002394,3 +0,0,52,4.76042898939571,0,0.040306605724119,0 +0,0,51,5.00403769250841,0,0.045501956572251,0 +0,1,62,4.97819726831083,1,0.294193464415434,3 +0,0,60,4.74692035751677,0,0.016120724251355,0 +0,0,55,4.77543022763294,0,0.028076749013478,0 +0,0,43,4.79360622220486,0,0.129241281900227,2 +1,1,70,5.32400149759515,0,0.00489637748959,0 +0,0,52,4.77707853100901,0,0.040128350149424,0 +0,1,47,4.68868594440863,0,0.075051368000922,2 +0,1,66,5.20215190604921,0,0.007691669700568,0 +0,0,55,4.93869770019033,0,0.027729871738632,0 +0,0,41,4.71631508074803,0,0.166639398984554,2 +0,0,44,4.76898963629707,0,0.112330573085338,2 +0,0,51,5.03713602585753,0,0.045367051911792,0 +0,0,45,4.90371243787113,0,0.099880610657699,2 +1,0,58,4.90243988336542,0,0.019661737284635,0 +0,0,52,4.77116656477474,0,0.040118827344445,0 +0,1,45,4.79207437158404,0,0.098602023111812,2 +0,0,56,4.77464139682058,0,0.025038651038307,0 +0,0,71,4.8275491006409,0,0.005240702397048,0 +0,0,43,4.54713881283981,0,0.124220471850131,2 +1,0,58,4.83643653916766,0,0.019847302766922,0 +1,1,67,5.15748200326457,1,0.189305290319473,2 +0,0,64,4.54239979808095,0,0.011074012635964,0 +0,0,45,4.7191517694607,0,0.097709096706514,2 +1,0,53,4.88616244625611,0,0.035363547950072,0 +0,1,42,5.16442477527932,0,0.157668702602328,2 +0,0,64,4.83483979480805,0,0.010380973119237,0 +1,0,62,4.94968740005787,0,0.012504122263337,0 +0,0,56,4.72570583278753,0,0.025152013189279,0 +0,0,62,4.90853696480147,0,0.012578958302052,0 +0,0,45,4.8458028915883,0,0.09917185315388,2 +0,0,44,4.81498278360767,0,0.113351813043764,2 +0,0,50,4.55182416645858,0,0.051240690898333,1 +0,0,48,5.02573581755495,0,0.067341291286487,1 +1,0,50,4.95463936837148,0,0.051781470215889,1 +0,0,63,4.93496025928926,0,0.011281594411682,0 +0,0,53,5.02630827846816,0,0.03532157885804,0 +0,0,61,4.58162628253953,0,0.014923058616841,0 +1,0,56,5.09769208837961,0,0.024204504829092,0 +0,1,60,5.30467952580006,0,0.014628186538857,0 +0,0,44,4.7449585485834,0,0.112033389245951,2 +0,0,54,4.72874672263138,0,0.031686570855749,0 +0,0,53,4.77842356251209,0,0.035738878972451,0 +1,0,57,4.80382200952351,0,0.022277841392666,0 +1,1,49,4.97313537769284,1,0.674486661489531,3 +0,0,56,4.50109734893862,0,0.025699535572469,0 +0,0,44,4.92055271042689,0,0.114792704500218,2 +1,1,59,4.94288263221809,1,0.370095120927459,3 +1,0,53,4.98234777513941,0,0.035241480681051,0 +1,0,50,4.82751851598088,0,0.051504171084819,1 +0,0,68,4.75227301428049,0,0.007133127045258,0 +0,0,59,4.72309312947611,0,0.017974182224532,0 +0,1,47,4.98170952368447,0,0.076931305642658,2 +0,0,56,4.88375398467136,0,0.02470825997211,0 +1,0,62,4.99493096406134,0,0.01235552822353,0 +0,0,56,5.02272049431911,0,0.02439172905095,0 +0,0,41,5.2578015580724,0,0.184506301219364,2 +1,0,46,4.95651622069219,0,0.087710436035715,2 +1,0,65,5.06875356129618,0,0.008830848799875,0 +1,0,69,5.02987932150335,0,0.005940782354761,0 +0,0,63,4.73821984387995,0,0.011765845914414,0 +0,1,69,5.02015227207209,0,0.005958066664209,0 +0,0,42,4.79737240394908,0,0.147879809862172,2 +0,0,49,4.77934220631557,0,0.058427901427492,1 +0,1,50,4.61523755019186,1,0.641920401720562,3 +0,0,40,4.61608005137641,0,0.186572528108178,2 +1,1,67,4.91721971781036,1,0.199517250696124,2 +0,0,42,4.77483479017638,0,0.147227161857405,2 +0,0,55,4.66599492411505,0,0.028310820726423,0 +0,0,43,4.84717295466065,0,0.130196750216438,2 +1,0,58,4.84480793953143,0,0.019731422794832,0 +0,0,47,4.59240999121214,0,0.07445936322643,1 +0,1,55,5.18187844275484,0,0.027211106406798,0 +0,0,56,4.76695849159384,0,0.024967315273945,0 +0,1,52,4.83737615854549,0,0.040164905317593,0 +0,0,43,4.67505589246483,0,0.12666120160044,2 +0,1,44,4.57784328551486,0,0.10949515772762,2 +1,0,57,5.00041631526736,0,0.021776866381848,0 +1,0,52,5.01617503530454,0,0.040132814144451,0 +1,0,53,5.155801998522,0,0.035036327401894,0 +0,0,62,4.96887898635254,0,0.012444740602973,0 +0,0,40,4.69326164506891,0,0.189601211851022,2 +1,0,72,4.86090393907351,0,0.004706705769054,0 +0,0,41,5.01164145482307,0,0.1763900303574,2 +0,0,62,4.94439658857322,0,0.012472774311793,0 +0,0,58,5.02677718803184,0,0.019284859761139,0 +0,0,63,4.64129694996389,0,0.011948824690257,0 +1,0,51,5.09489294032379,0,0.045447963457996,0 +0,0,40,4.58671634059851,0,0.185495160441727,2 +1,0,64,4.93114281035177,0,0.010100026685653,0 +0,0,49,4.63702482578316,0,0.058072883905686,1 +1,0,68,4.84010333452253,0,0.006932029252655,0 +0,0,56,4.90740138425076,0,0.024726610037782,0 +1,0,65,5.09930528458104,0,0.008753581359728,0 +0,0,40,4.76846256705575,0,0.192681255842673,2 +0,1,60,4.84023049510474,0,0.015859368682341,0 +0,0,52,4.90740656555425,0,0.040232415077478,0 +1,0,43,5.29074311693256,0,0.139293629828661,2 +0,1,45,4.88763089127697,0,0.099915469955779,2 +0,0,57,4.97153145946488,0,0.021813865871127,0 +0,0,66,5.051427579374,0,0.008000393521176,0 +0,0,49,4.82916755439758,0,0.058502642267166,1 +0,0,49,5.07845302184633,0,0.059137799971146,1 +0,0,63,4.80226482500496,0,0.011532385172672,0 +1,1,58,4.85913841157236,1,0.399598105083121,3 +0,0,60,4.6539906295248,0,0.016318641776497,0 +0,0,41,4.93365224613669,0,0.173665549700173,2 +0,1,45,4.42519139905801,0,0.094540743940576,2 +1,0,64,4.9173814273309,0,0.010197839660873,0 +1,0,64,5.09280757141185,0,0.009784312322625,0 +1,1,44,5.01841442145596,1,0.813670537158728,3 +0,1,59,4.73665229415205,0,0.017934715608445,0 +0,1,57,4.55216418751584,1,0.437819750113617,3 +0,0,58,4.95002539721319,0,0.019417219487123,0 +0,0,45,4.80357840410334,0,0.098946561517043,2 +0,0,42,4.88614777494521,0,0.15003138372867,2 +1,0,59,5.21409763376433,0,0.016727729497271,0 +1,0,64,5.07198363119147,0,0.009825962144317,0 +1,0,61,4.8982288685989,0,0.014037222189844,0 +1,1,67,4.85273392839139,1,0.203222035715427,3 +0,1,69,4.95187980115832,0,0.006095270083063,0 +0,0,41,4.71024659617876,0,0.166422725393665,2 +1,1,59,5.04456399255425,0,0.017090638498726,0 +1,0,56,4.87618877448034,0,0.024740255685601,0 +0,0,47,4.69609026139761,0,0.075102780503568,2 +1,0,60,4.78566572615177,0,0.01602053077598,0 +0,1,41,5.08659304806339,0,0.178699551867892,2 +0,0,49,4.83585066034031,0,0.058573951934653,1 +0,0,59,4.74183815751873,0,0.01794113087683,0 +0,1,52,5.10432982675894,0,0.040092418834844,0 +1,1,68,4.93219626948464,1,0.184071525198799,2 +0,0,46,4.83094604876746,0,0.086727469023077,2 +0,0,59,4.66624212158543,0,0.018091686035011,0 +1,0,48,5.02292373110982,0,0.067403146592253,1 +1,1,65,4.90200249259818,1,0.235526279482956,3 +0,0,50,4.72044683877358,0,0.051364334850297,1 +0,0,47,4.99532027951454,0,0.077077247359823,2 +0,0,53,5.09335544664878,0,0.03508500687524,0 +0,1,48,4.91150096551921,0,0.066889393020194,1 +0,0,52,5.39367518309015,0,0.039697633205445,0 +0,0,70,4.7326667110196,0,0.005935133292981,0 +1,0,61,4.94800487455761,0,0.013938545491995,0 +0,0,42,4.76572723890133,0,0.147059154792003,2 +0,0,47,4.61046924034836,0,0.074558035354602,1 +0,0,50,5.17574215833445,0,0.051834078434047,1 +0,0,49,4.40063379357127,0,0.057496916554586,1 +0,0,59,4.49903491954724,0,0.018575801377854,0 +0,0,45,4.74035323965987,0,0.098010566097898,2 +1,1,61,5.01277481145777,0,0.0137737100392,0 +0,0,48,4.97920278290406,0,0.067310178073341,1 +0,0,69,4.94559531231342,0,0.006095134121213,0 +0,0,44,4.59348113535055,0,0.109709458994973,2 +1,0,52,4.99697249044231,0,0.039955705461857,0 +0,0,67,5.28369929019177,0,0.006769577802873,0 +1,0,58,4.91279557213529,0,0.019572458298737,0 +1,0,62,4.84010787195023,0,0.012710570294302,0 +1,0,71,4.81574026149612,0,0.005258901927921,0 +0,0,64,5.09320428099154,0,0.009762781857216,0 +1,0,60,5.02971251085857,0,0.015384411337237,0 +0,0,59,5.05852172932436,0,0.017150814429149,0 +1,0,59,4.94279305954722,0,0.017343079619443,0 +1,0,54,4.80370347559801,0,0.031696465036302,0 +1,0,43,4.97741094448518,0,0.132790830091045,2 +0,0,48,4.90160305751556,0,0.066896564158972,1 +0,0,60,4.9740587224628,0,0.015486223138454,0 +0,1,65,4.95409786841077,0,0.009083475296687,0 +1,0,65,4.97346589384529,0,0.009048398237909,0 +1,1,65,5.04040744325179,1,0.229351863374834,3 +0,1,61,4.97882311726654,1,0.316894646107734,3 +1,0,53,4.84465994857503,0,0.03552165261284,0 +0,1,45,4.61593088959238,0,0.096533984034893,2 +0,0,51,4.95370593139969,0,0.045397990521423,0 +1,0,51,4.79724041366463,0,0.045432954629062,0 +0,0,55,4.92527184451772,0,0.027742133025465,0 +0,1,62,4.77077338769132,1,0.303684579137025,3 +1,0,56,5.18917814869517,0,0.024054045616593,0 +0,0,47,4.76090944449685,0,0.075458042149131,2 +0,0,49,4.87314054840129,0,0.058595933701136,1 +0,0,44,4.59488103519257,0,0.109775783246824,2 +0,1,43,4.66600988053662,0,0.126467155959305,2 +0,1,47,4.79968056947074,0,0.07583326900667,2 +0,0,60,4.622500715925,0,0.01639484034173,0 +0,0,61,4.85869108831453,0,0.014147184951329,0 +0,0,49,4.53235316555136,0,0.057886608982541,1 +1,1,52,5.05278101677043,0,0.040006105415294,0 +0,0,53,4.67759678591855,0,0.03571471535476,0 +0,0,45,4.57282082468064,0,0.096091016968541,2 +0,0,61,4.49886997916236,0,0.015132492021262,0 +0,0,61,4.76142540011411,0,0.014362593501519,0 +0,1,46,4.81668152419914,0,0.086542758003265,2 +0,0,53,4.91394277087841,0,0.035342506863269,0 +1,0,51,4.94893683069849,0,0.045369965507732,0 +0,0,68,4.83656528136383,0,0.006959223146988,0 +1,0,55,5.20230535744593,0,0.027121035031169,0 +0,0,67,4.77123226821888,0,0.007806977253892,0 +0,0,65,4.69619305923785,0,0.009704380337983,0 +0,1,56,4.76476704694746,1,0.459532063669649,3 +1,0,62,4.89768726865909,0,0.012645268675747,0 +0,0,48,4.77380625053249,0,0.066626916978882,1 +1,0,61,4.980835540692,0,0.013863854013999,0 +0,0,42,4.82698741965706,0,0.148557441174814,2 +0,1,43,4.88989814647639,0,0.131072626813103,2 +0,1,66,4.929997331305,0,0.00822435556126,0 +0,0,53,4.96790055748486,0,0.035355889629279,0 +0,0,52,4.72693592828739,0,0.040258750931555,0 +0,1,51,4.73570587774343,1,0.612701151312993,3 +0,1,58,4.90295411358814,0,0.019614471904192,0 +0,0,58,4.50413379341554,0,0.020691957457438,0 +0,1,40,4.26006312701912,0,0.173268017126126,2 +0,0,58,4.63774556319685,0,0.020307585635008,0 +0,0,40,4.72203715190383,0,0.190724190316507,2 +0,1,64,5.11853138865985,0,0.009686182211729,0 +0,0,55,4.71054915416502,0,0.028261101903761,0 +0,0,41,4.69654020792468,0,0.166030251348136,2 +0,0,43,4.97066588538205,0,0.132705246258691,2 +1,0,61,4.86177572988687,0,0.014183736491,0 +0,0,41,4.77251638553117,0,0.168521684477325,2 +0,0,61,4.5039590763816,0,0.015093781542325,0 +0,1,43,4.41255801585363,1,0.821011640812678,3 +0,0,42,4.91706268984265,0,0.151322327960296,2 +1,1,48,4.68435932712506,1,0.70123397693495,3 +0,1,63,4.87528680204894,1,0.276423685321314,3 +1,1,44,4.89927780482136,0,0.114363097106372,2 +0,0,44,5.01267609251962,0,0.116158729949491,2 +1,0,60,4.94902371746053,0,0.015611699894054,0 +0,0,47,4.92738813236222,0,0.076679593445118,2 +0,1,68,4.99873927947134,0,0.006629851653553,0 +1,1,66,4.90408221033749,0,0.008299896168473,0 +0,0,44,4.81176243125078,0,0.113232739998042,2 +0,0,48,4.83310864944558,0,0.066615473492559,1 +0,0,56,4.54889157535239,0,0.02554435225797,0 +1,0,58,4.81608340889527,0,0.019750632825735,0 +1,0,59,4.83475737847853,0,0.01773754114463,0 +1,0,65,4.81618132930549,0,0.009422000292252,0 +0,0,47,4.91373890407298,0,0.076416411774189,2 +0,0,43,4.55558069556451,0,0.124425016665822,2 +0,0,51,4.87022146334913,0,0.045453464430972,0 +0,0,49,4.99279486805976,0,0.058860606357843,1 +0,0,53,4.51056552780931,0,0.035921360871806,0 +0,0,42,4.94764763944228,0,0.151880264165519,2 +0,0,45,4.41777378195626,0,0.094427741255672,2 +0,0,61,4.77388503238116,0,0.014376566945974,0 +0,0,47,4.9242791619965,0,0.076438346996905,2 +0,0,51,4.65174364581723,0,0.045535243133842,0 +0,0,55,4.40830343069062,0,0.028861571969214,0 +1,1,48,4.88324571022989,1,0.703466198258807,3 +1,0,55,5.00096257122489,0,0.027524734338965,0 +0,0,42,4.87839256132027,0,0.149873005540619,2 +1,0,40,4.8667735378612,0,0.196420274687657,2 +0,0,52,4.49935675429333,0,0.04043590392241,0 +1,0,65,4.90672999999716,0,0.009199256474863,0 +0,0,49,4.94475338791712,0,0.058869223923127,1 +1,0,63,4.85972413995829,0,0.011444116137414,0 +0,0,59,4.6403487505759,0,0.018241909673177,0 +0,0,41,4.7025627138823,0,0.166172492672993,2 +0,1,66,4.86392739667196,0,0.008402084519958,0 +1,1,65,4.89981898906635,1,0.235960492403525,3 +0,0,50,4.5636730840427,0,0.051259270658322,1 +0,0,65,4.81180061102065,0,0.009399375366187,0 +0,0,58,4.77952123869694,0,0.019852653044646,0 +0,0,55,4.68124452214296,0,0.028283876460098,0 +1,0,52,5.05282402715232,0,0.040176035611328,0 +0,1,46,4.65745748657638,0,0.085209533576803,2 +0,0,62,4.54187281944659,0,0.013571127759767,0 +0,0,41,5.08406130008598,0,0.17857627409275,2 +0,0,43,4.9575708922253,0,0.132339407366483,2 +1,0,65,4.97793313174062,0,0.008995894833929,0 +0,0,51,4.97586170145059,0,0.045369094154696,0 +0,0,41,4.68235802162968,0,0.165558616954032,2 +1,0,54,4.9210206803149,0,0.031344846963844,0 +0,0,46,4.8332148806737,0,0.086651281005557,2 +1,0,50,5.15803904601384,0,0.051897577691587,1 +0,0,55,5.05336103350122,0,0.027521778185651,0 +0,0,43,4.70956582215756,0,0.127655100149009,2 +0,0,46,4.72725455034622,0,0.085815907054577,2 +0,0,49,4.71184175416883,0,0.058454431228223,1 +1,0,58,4.87008146213767,0,0.019747953131898,0 +0,0,50,4.56047005359688,0,0.051368573473444,1 +0,1,47,4.76556140056976,0,0.075537919471985,2 +0,1,45,4.63566347359387,0,0.096766481347989,2 +0,1,49,4.88351604693896,0,0.058777541941381,1 +1,1,61,4.92868249767529,1,0.319972373045658,3 +1,0,48,4.9679476594607,0,0.067378933667572,1 +1,0,69,4.96635286620266,0,0.006062627603119,0 +1,1,66,4.90023946469316,1,0.217518137235069,3 +0,1,46,5.0678301237663,0,0.08873921722127,2 +0,0,49,4.73735593115051,0,0.058265735344248,1 +0,0,61,4.42529788866012,0,0.01532562580648,0 +1,0,46,4.84559444008014,0,0.086794416209214,2 +0,0,45,4.65853689685279,0,0.097103981741248,2 +0,0,56,4.77478502811091,0,0.025009886106496,0 +1,0,56,4.83181116279637,0,0.024887296246409,0 +0,1,42,4.77950341564174,0,0.147342763739497,2 +0,0,49,4.97519712485188,0,0.058811605432008,1 +0,0,45,4.65462496026149,0,0.096978982587569,2 +0,0,51,4.89108172754405,0,0.045438368676703,0 +1,0,72,4.83668072881695,0,0.004758919248576,0 +0,0,50,4.70954960081846,0,0.05134104691959,1 +0,1,58,4.94810442404504,0,0.019528896609102,0 +0,0,50,4.72927980640803,0,0.051521292438857,1 +1,0,52,4.85788263146322,0,0.040108037685629,0 +1,1,55,5.14186912527361,0,0.027328531221027,0 +0,0,56,4.87804608958426,0,0.02479238287739,0 +0,0,62,5.32510899861578,0,0.011543149413966,0 +0,0,45,4.89660453144508,0,0.1000146352811,2 +1,0,55,4.92935750171313,0,0.027750376850827,0 +0,1,56,4.5081530036115,0,0.025604805441654,0 +0,0,52,4.71855354900317,0,0.040164204303545,0 +1,0,51,4.88503934880343,0,0.045429075594095,0 +0,0,58,5.00368458140216,0,0.019340939136811,0 +1,1,52,5.21774445771617,0,0.039800356104541,0 +1,0,48,5.10319281284956,0,0.067918657534687,1 +1,0,55,4.96619498633257,0,0.027659598339365,0 +0,0,40,5.03946023964707,0,0.203466737477587,3 +0,1,45,4.56393986610796,0,0.095958801294355,2 +1,1,60,4.98967456059838,0,0.015388582871642,0 +0,1,66,4.77618775836236,0,0.008583287462748,0 +0,0,68,5.04738529519033,0,0.006536182805971,0 +1,0,60,4.88876093451067,0,0.015767711142563,0 +1,0,49,4.97083373594876,0,0.05884576967821,1 +1,0,56,4.99649387136138,0,0.024470997544598,0 +1,0,61,4.8440379007017,0,0.014178873303077,0 +1,0,58,4.88924043544617,0,0.019684204205982,0 +0,0,56,4.9463662911572,0,0.024591667428409,0 +0,0,47,4.56555001382428,0,0.074755377566967,1 +0,0,58,4.8466618068196,0,0.01967698352652,0 +0,0,50,4.94968936775304,0,0.051790356977721,1 +0,1,74,5.18434231854778,0,0.003467918893961,0 +0,0,45,4.80503318008259,0,0.098738163868494,2 +0,0,47,4.5446457110917,0,0.074303271116483,1 +0,0,50,4.65264294483839,0,0.051278461239838,1 +0,0,58,5.13724707256795,0,0.019060111745668,0 +1,0,59,5.09092240571456,0,0.01702576601212,0 +0,0,54,4.95493840913273,0,0.031238226039386,0 +1,0,55,4.93877227506463,0,0.027770937965791,0 +0,0,61,5.12875837958941,0,0.013446780546902,0 +1,1,61,5.17352253026442,0,0.013336896618889,0 +0,1,44,4.90922530467808,0,0.114563470513631,2 +0,0,40,4.91200521908507,0,0.198353274787362,2 +0,0,48,4.72117605109459,0,0.066220515113038,1 +0,0,42,5.0958388478525,0,0.155753042121075,2 +1,0,50,5.04904293284831,0,0.051736488633346,1 +0,1,47,4.70109266970972,0,0.075189762464121,2 +1,1,63,4.90116071505412,0,0.01135287160367,0 +0,0,50,4.74223177501564,0,0.05138321634204,1 +0,0,56,4.81367153608738,0,0.024869395974328,0 +0,0,45,4.55409830387561,0,0.095869428851322,2 +1,0,64,4.84514775146157,0,0.010318710840323,0 +0,0,49,4.78095341598196,0,0.058387486387965,1 +0,0,40,4.86544375326687,0,0.196454554539217,2 +1,0,56,5.13031608130717,0,0.024112849597994,0 +1,0,50,4.85473336642928,0,0.051547987090976,1 +0,0,58,4.86393901726752,0,0.019671389314002,0 +0,0,60,4.59252656133218,0,0.01650607611132,0 +0,1,46,5.10447169883439,0,0.089051454271614,2 +0,0,47,4.8352769743168,0,0.076083721430799,2 +0,1,55,5.09781105358595,0,0.027424705892052,0 +0,0,49,4.96127647286362,0,0.058888660281517,1 +1,0,49,4.99253206074938,0,0.058961263196421,1 +0,1,64,4.56782804985274,0,0.011035304958648,0 +0,1,41,4.84965710885455,0,0.170983569704952,2 +0,0,57,5.00152845037254,0,0.021686477059629,0 +0,0,46,4.77136730482578,0,0.086230979285294,2 +0,0,44,4.89181974074634,0,0.114422703185768,2 +1,0,62,5.03392138579978,1,0.292097269699809,3 +1,0,50,5.02111245450063,0,0.051839834042542,1 +0,0,43,4.85150683146939,0,0.130223117669947,2 +0,1,41,4.93260222202476,0,0.173564213223253,2 +0,0,62,4.56731395393571,0,0.013507373193356,0 +1,0,57,5.05742144383288,0,0.02161650673348,0 +0,0,44,4.80609170409605,0,0.112927167622047,2 +1,0,46,4.94574547019532,0,0.087609038561363,2 +0,0,50,4.58033840303892,0,0.0513808525782,1 +0,0,40,4.87638420519459,0,0.196953680550606,2 +0,0,52,4.95538026840443,0,0.040092230972691,0 +1,0,42,4.92468791850645,0,0.151044201726696,2 +1,0,52,4.97846627901345,0,0.039977656387033,0 +0,1,62,4.67954498767386,1,0.306215678149284,3 +0,1,47,5.27215975635694,0,0.078578726900038,2 +1,1,61,5.07914682272209,0,0.013606231980849,0 +0,1,61,4.88022019973367,0,0.01412728521981,0 +0,0,58,4.8803511522938,0,0.019679476393437,0 +1,0,65,5.05352697772191,0,0.008887948017254,0 +0,0,53,4.62737390061272,0,0.035894624752588,0 +0,0,56,5.01788208151196,0,0.024398080390651,0 +0,1,41,4.69743783848108,0,0.16626269826041,2 +0,0,44,4.68850210166858,0,0.111086567273803,2 +1,0,58,5.02657848902978,0,0.019288406155318,0 +0,0,61,4.62081726489225,0,0.014778470510604,0 +0,1,72,4.75243904683415,1,0.140470528408364,2 +0,0,40,4.65854671334068,0,0.188264021746685,2 +0,1,48,4.93414522166079,0,0.067111576355329,1 +0,1,61,4.9652733624283,1,0.318476543594236,3 +0,1,58,5.22868162140128,0,0.018788578066166,0 +1,0,47,4.86893840067098,0,0.076325565263564,2 +0,0,60,5.04070926831188,0,0.015369221709394,0 +0,1,69,4.57944036294821,1,0.185957896971778,2 +0,0,60,4.70506098234097,0,0.016198851915401,0 +0,0,60,4.68020374806785,0,0.016300984652064,0 +0,0,40,4.62845892836808,0,0.187274028466373,2 +0,1,66,4.79014467230482,0,0.008573063628446,0 +0,0,51,4.72599309312365,0,0.045459951559461,0 +1,0,64,4.99592496881382,0,0.009999557695271,0 +0,0,56,4.92969386579768,0,0.024603658904906,0 +0,0,46,4.7133764509337,0,0.085626268588882,2 +1,0,52,5.01976593568897,0,0.040026957623633,0 +1,0,66,4.65901699951901,0,0.00887488222923,0 +0,0,42,4.58737993998095,0,0.142690488832953,2 +0,0,42,4.9900483741138,0,0.152765204545513,2 +0,0,50,4.78981078752233,0,0.051442216450824,1 +0,1,66,5.28003021036635,0,0.00754292302843,0 +1,1,58,4.67774846681989,1,0.405916623471941,3 +0,0,43,4.74509149223627,0,0.128003895434095,2 +1,0,63,5.08871434389826,0,0.010867478254749,0 +0,0,56,4.50307929641566,0,0.02559008584458,0 +0,0,54,4.87689293113182,0,0.031529829566625,0 +0,0,43,4.5119286516914,0,0.12346009024959,2 +1,0,59,5.05226948548462,0,0.017089766587678,0 +0,0,58,4.63366536345532,0,0.020304176282818,0 +0,0,54,4.55017184250461,0,0.031970899570092,0 +0,0,65,4.92661756660312,0,0.009156629225863,0 +1,0,50,4.57294735430531,0,0.051211803520225,1 +0,0,67,4.9984362523699,0,0.007300011251855,0 +1,1,55,5.02636354933089,1,0.483684809619632,3 +1,0,71,5.05188172317733,0,0.00485789764765,0 +0,0,63,4.61924801192541,0,0.012034101320927,0 +1,0,40,4.57623326115992,0,0.185240960536853,2 +1,1,67,5.15026605107198,0,0.007019619747492,0 +0,0,52,4.89509142852328,0,0.04013375370698,0 +0,1,57,4.51235574217747,0,0.023013366462406,0 +1,0,53,5.06911072853781,0,0.03514569867001,0 +0,0,42,4.87535822062258,0,0.149768937685052,2 +0,0,54,5.03459271738214,0,0.031079988736712,0 +0,1,64,5.23624432349776,0,0.009443245412521,0 +0,0,44,4.67521229511472,0,0.110890977690728,2 +0,1,66,4.82935263797129,0,0.008472781312372,0 +0,0,51,4.88801632684319,0,0.045394800798757,0 +0,0,66,4.77136228306117,0,0.008594514125621,0 +0,1,53,4.92678050021447,0,0.035456840304982,0 +1,0,62,5.09274237052258,0,0.01213121298853,0 +0,0,52,4.88338972429157,0,0.040066370317797,0 +1,0,72,4.81504773503408,0,0.004803872026835,0 +0,1,68,4.89380812017389,0,0.006838808369584,0 +1,0,63,4.95861349282924,0,0.011186817997861,0 +0,1,64,4.57146427744788,1,0.270175676032792,3 +0,0,63,4.90755243706128,0,0.011345084099499,0 +1,1,75,4.94123325788491,1,0.103849290172465,2 +0,1,55,5.07877179514715,0,0.027371346175493,0 +1,0,56,5.03542350859495,0,0.024458297611064,0 +1,0,54,5.11018006393338,0,0.031047486928712,0 +0,0,44,4.72640603243343,0,0.111970311741742,2 +1,0,62,5.25896430825871,0,0.011756087401817,0 +0,0,43,4.71281889588896,0,0.127519486391567,2 +0,1,66,4.49238104039432,0,0.009276859459449,0 +0,1,42,4.45901355680977,1,0.843175279623785,3 +0,0,61,4.78614356705079,0,0.014299732824248,0 +0,0,40,4.50847985316582,0,0.182576705110746,2 +1,1,65,5.05767561112212,0,0.008872411933476,0 +0,0,52,4.71168789392166,0,0.040353380050648,0 +1,1,59,4.91128188349275,1,0.371111480858165,3 +0,0,51,5.01453213212862,0,0.045516433251662,0 +0,0,47,4.6718297701387,1,0.729883937216718,3 +1,0,54,4.85238487982668,0,0.031443706685552,0 +0,0,60,4.92469718499921,0,0.01553929396651,0 +0,0,63,5.06449507407286,0,0.010933303744078,0 +1,0,56,4.85007273919544,0,0.024881038518936,0 +1,0,58,5.45077621321523,0,0.01826918190521,0 +0,0,51,4.7169711956504,0,0.045405182182846,0 +0,0,44,4.97942419214413,0,0.115710869324954,2 +1,0,70,4.80484318943615,0,0.0058031472721,0 +1,0,64,5.11049031024083,0,0.009728941062836,0 +1,0,61,4.83217016778165,0,0.014222812312547,0 +0,0,54,4.73058448830575,0,0.031690062985608,0 +0,0,54,4.87611429321087,0,0.031524899427641,0 +1,0,58,4.84529990105771,0,0.019779665694915,0 +0,0,41,4.44293553670317,0,0.158205840647745,2 +1,0,58,4.85154255325395,0,0.019783731501812,0 +0,0,44,4.6192159320491,0,0.110067109492829,2 +0,1,70,5.13183827893597,0,0.005208328085235,0 +0,0,52,4.76776084241839,0,0.040263244146382,0 +0,1,62,4.73597661874142,0,0.012979061627304,0 +0,0,46,4.67134314840201,0,0.085276034228232,2 +0,0,57,4.5904613227334,0,0.022765794173126,0 +0,0,40,4.77142312168763,0,0.192655488162626,2 +1,0,62,5.01674977502637,0,0.012338188554265,0 +0,0,41,4.92130185562376,0,0.173274728169156,2 +0,0,60,4.88000017620801,0,0.015749968090518,0 +0,0,63,4.71313261018944,0,0.011836053293076,0 +0,0,64,5.07871926355267,0,0.009775912869389,0 +1,0,48,5.1356015534407,0,0.068136405728918,1 +1,0,57,5.07791723646956,0,0.021526800413185,0 +0,0,46,4.63239218367858,0,0.084949610630217,2 +0,0,52,5.19069795292098,0,0.039794445576241,0 +0,0,52,4.92368092839482,0,0.040005644636485,0 +0,0,50,4.77209078513176,0,0.051455447710206,1 +1,1,71,4.9556658369409,1,0.142992314958708,2 +0,0,47,4.59327736899124,0,0.074562642251996,1 +1,0,51,4.9116081648107,0,0.045373115663865,0 +1,0,57,5.12771902928587,0,0.021439101590814,0 +0,0,62,4.89152674409792,0,0.012677867770706,0 +0,0,41,4.90766594443356,0,0.172892735609789,2 +0,0,52,5.19351434479103,0,0.039789796287714,0 +1,0,51,5.0149506536272,0,0.045417969013769,0 +1,1,71,5.08238770205116,1,0.13799122902669,2 +1,1,66,5.25997159810344,0,0.007545089260439,0 +0,1,56,4.86501237536376,0,0.024755077056697,0 +0,0,47,4.63342875506735,0,0.074775620859383,1 +1,0,64,4.94446684983301,0,0.010120914196104,0 +0,0,43,4.87774084775754,0,0.130683989459164,2 +0,0,51,5.02562301014197,0,0.045379535590205,0 +1,0,51,4.85202489631302,0,0.045636102233957,0 +0,0,46,4.77905449410006,0,0.08621788644473,2 +0,0,61,4.9860134024351,0,0.013865924299815,0 +0,1,44,5.10821095124433,0,0.117700149223715,2 +0,1,68,4.61365746502354,1,0.198227177406347,2 +0,1,58,4.9154612350049,1,0.397733906663517,3 +0,0,42,4.80331269919085,0,0.147938922396019,2 +0,0,42,4.77122948480288,0,0.147488605700315,2 +1,0,54,5.13480241536605,0,0.030930918197552,0 +0,1,58,4.90478177762728,1,0.398195770013887,3 +0,0,66,4.72496852582003,0,0.008708516515796,0 +0,0,46,4.8046119233592,0,0.086446607759096,2 +1,0,62,4.90534159047492,0,0.012605886119566,0 +1,0,60,5.15452177113853,0,0.015035808810116,0 +0,0,44,4.85015742161546,0,0.113583520128181,2 +0,0,43,4.60997902018974,0,0.125445090942417,2 +0,0,41,4.69677160635495,0,0.166343358755328,2 +1,1,41,4.79590923901903,1,0.870991832517058,3 +0,1,64,5.09918565342244,0,0.009745553343333,0 +1,0,44,4.79841145088483,0,0.112832663228096,2 +1,0,48,5.03356191103648,0,0.067545923883973,1 +0,1,40,4.74066625794678,0,0.1914818645737,2 +0,0,70,4.66883395391788,0,0.006062099160722,0 +1,0,63,5.07251987344617,0,0.010941721473359,0 +0,0,46,4.77879855907062,0,0.086295841866907,2 +0,0,53,5.0407829784207,0,0.035233634928402,0 +0,0,52,5.20391339193797,0,0.039837336183691,0 +1,0,56,4.83803417853739,0,0.024830823434648,0 +0,0,66,4.65698844516782,0,0.008822877355531,0 +0,0,52,4.90872405850789,0,0.040070529146225,0 +1,0,64,4.82983004175825,0,0.0103344886435,0 +0,0,66,4.95381071798536,0,0.008202611976265,0 +1,1,67,4.93414502321035,1,0.199284633849263,2 +1,1,62,4.88084506746667,1,0.298678540809981,3 +0,0,62,4.66926786216151,0,0.013239966598133,0 +0,0,53,5.08804738523139,0,0.035209570986792,0 +0,0,47,4.97404737048307,0,0.076933482767469,2 +1,1,67,5.18501112508719,1,0.18772228893033,2 +1,0,48,5.03610991839491,0,0.067375026147916,1 +0,0,41,4.83032119289264,0,0.170388377573886,2 +0,0,49,4.90525694086965,0,0.058687540387975,1 +1,0,55,4.81080383572387,0,0.028062365813329,0 +1,0,49,4.81556785551357,0,0.058505686103319,1 +1,0,61,4.79375683071367,0,0.014317406374791,0 +0,0,59,4.60590141488335,0,0.018327386999595,0 +0,0,59,4.69042906897913,0,0.018092138172118,0 +1,0,68,4.92925128362066,0,0.006751945720721,0 +0,0,45,4.83095422378499,0,0.099036830116216,2 +0,0,57,4.75062583090886,0,0.022336799982342,0 +0,0,49,4.6897212405804,0,0.058377433343777,1 +0,1,65,4.99240795521251,1,0.231158852277686,3 +1,0,66,5.03830339225636,0,0.008009548117965,0 +1,0,53,4.8224916225432,0,0.035705349051955,0 +0,0,53,4.74111486494487,0,0.035551237302848,0 +0,0,51,4.98820181959939,0,0.045392534074926,0 +0,0,41,4.94681450831483,0,0.174062512879638,2 +0,1,49,4.71240306542433,1,0.672064187457744,3 +0,0,49,4.84271074363016,0,0.058668603218399,1 +0,0,60,5.00122513055942,0,0.01544366230424,0 +1,0,50,5.01133240957635,0,0.051642510792701,1 +0,0,60,4.80901381563715,0,0.015912030859681,0 +0,0,53,4.99520635191483,0,0.035297964722965,0 +0,0,54,5.05877176936004,0,0.031019691073864,0 +0,0,53,4.73845465293189,0,0.035657626025074,0 +0,1,65,4.94613537793062,0,0.009108624056281,0 +1,0,72,5.26709152723578,0,0.004087431567981,0 +0,0,41,4.67520243669282,0,0.165335810536883,2 +0,1,63,4.82646842997082,0,0.011551889059822,0 +0,0,55,5.08978166279793,0,0.027411500056498,0 +1,0,64,5.03479103733962,0,0.009905777582139,0 +0,1,56,4.95236838970525,0,0.024503642367965,0 +1,1,57,4.681107454805,1,0.432466224969908,3 +0,1,58,5.10524601370533,0,0.019137232972747,0 +0,0,51,4.85878090408335,0,0.045386248261683,0 +0,0,49,4.87214164571807,0,0.05856609261105,1 +0,0,67,4.74087398769187,0,0.007869652623547,0 +0,0,41,4.82013944089429,0,0.16991098248116,2 +1,0,61,5.18983157652903,0,0.013303843765137,0 +0,0,44,5.33615808090667,0,0.12176710316059,2 +0,0,59,4.57339066388449,0,0.018439106253676,0 +1,0,67,4.94774731455841,0,0.007434360678144,0 +0,1,43,5.09976643984229,0,0.135236647856281,2 +1,0,46,4.89944083911057,0,0.087395683803074,2 +0,1,73,5.18261542313031,0,0.003826177101843,0 +0,0,56,4.75980584195735,0,0.025035402237967,0 +0,0,45,5.06247120789872,0,0.101853079228897,2 +0,0,67,4.74752060055733,0,0.007856242456632,0 +1,1,58,4.92131448366322,1,0.398597441828291,3 +0,0,50,4.88569789457367,0,0.051707454433839,1 +0,0,60,4.58253023949991,0,0.016516602239106,0 +0,0,55,4.82462747661192,0,0.028114273823938,0 +1,0,66,4.82560291869742,0,0.008477155114408,0 +0,0,47,4.73517208978105,0,0.075364871766521,2 +0,0,42,4.70270613064371,0,0.145427255615843,2 +1,0,57,5.02853688916606,0,0.021728724542256,0 +0,0,62,4.69381541676723,0,0.013110296791626,0 +0,1,59,5.22258659968051,0,0.016656991670544,0 +1,1,44,4.80294362554893,1,0.808332857249456,3 +0,0,51,4.78211197312079,0,0.045434077969857,0 +1,1,67,5.14621761803818,1,0.189340067350263,2 +0,0,59,5.16161301975547,0,0.016812015631567,0 +1,0,43,4.9292975788973,0,0.131757265146027,2 +0,0,48,4.67338794261328,0,0.066025885418882,1 +0,0,50,4.77077284154018,0,0.051566996077202,1 +0,0,58,4.96786156353466,0,0.019420710557971,0 +0,1,41,4.52829913668635,1,0.864059348525996,3 +0,0,52,4.87397255094679,0,0.040086433417777,0 +1,0,51,4.97537772350062,0,0.045494942932966,0 +1,1,47,5.0233687821553,0,0.077040929250492,2 +1,1,59,5.16689288525361,0,0.01685662419747,0 +0,1,46,4.88207539520728,0,0.087338943558656,2 +1,0,63,4.92490210063283,0,0.011261088412772,0 +1,0,62,4.82216522251936,0,0.012749390949785,0 +0,0,47,4.75250950831571,0,0.075437036787068,2 +0,0,51,4.93416456752469,0,0.045595766324713,0 +0,0,41,4.81227460971502,0,0.169669748318871,2 +0,0,62,4.9004737478972,0,0.012604158706445,0 +0,0,60,5.17648339751569,0,0.014993639920067,0 +0,0,41,4.91567676788135,0,0.173068578684327,2 +0,0,47,4.95184974378156,0,0.07659640528936,2 +0,0,47,4.5031494255235,0,0.073920126021434,1 +0,0,46,4.7142830151034,0,0.085729280035384,2 +0,1,42,4.80178591154232,0,0.148014845589218,2 +0,1,68,4.87635005707503,0,0.006878098775776,0 +1,1,62,5.00553448023546,1,0.293517051680413,3 +1,0,48,4.92388250593391,0,0.067025895924697,1 +1,0,54,5.0820666906788,0,0.031045488771193,0 +0,0,57,4.96686226881856,0,0.021786810262763,0 +0,0,58,4.83933070168118,0,0.019845618544387,0 +0,1,52,4.83169197289694,0,0.040199794816827,0 +0,0,41,4.35048571360451,0,0.155466866671587,2 +0,1,46,4.7544020720034,0,0.086042998501519,2 +1,0,50,4.84726593660455,0,0.051501036597869,1 +1,1,67,5.20635769158859,0,0.006906648413219,0 +1,0,44,4.875906624466,0,0.114185526617026,2 +0,0,57,5.40054321662731,0,0.020740840351393,0 +0,0,45,4.85812268086046,0,0.099406677380313,2 +0,0,59,4.85043572990924,0,0.017651915056658,0 +1,0,65,4.97668073999734,0,0.00902722374515,0 +1,0,56,4.86707123495339,0,0.024844087758847,0 +0,0,47,4.73328916528271,0,0.075292805061335,2 +1,0,62,5.2083582129558,0,0.011835345591613,0 +0,0,52,4.89624467410626,0,0.040208395762331,0 +1,1,59,5.08634796733908,0,0.01710824854704,0 +0,1,46,4.8159052718048,0,0.086529359282286,2 +1,0,47,5.03766043689104,0,0.07718238565799,2 +0,0,41,4.91378664740065,0,0.172954672705272,2 +0,0,55,4.77467955185373,0,0.028082196611491,0 +0,1,64,5.16218403130513,0,0.009621222677816,0 +1,0,51,5.04507028612427,0,0.04539751142901,0 +0,0,53,4.66590916668941,0,0.035796464676876,0 +1,1,64,4.99259481017676,1,0.250890989791407,3 +0,1,59,5.00705754662141,0,0.017240322822537,0 +0,0,43,4.70908562072404,0,0.127414187031392,2 +0,1,69,4.9586797878346,1,0.168475154886532,2 +0,1,45,5.020195095422,1,0.789390601445287,3 +0,0,40,4.59190225478141,0,0.185756941245675,2 +1,1,63,5.16319076598814,0,0.010718899593616,0 +0,1,66,4.98187373285322,1,0.213794582366319,3 +1,0,65,5.29232951415051,0,0.008364832950721,0 +0,0,64,5.10127801040686,0,0.009751667420516,0 +0,0,49,4.85700141534966,0,0.058554607618209,1 +1,1,52,4.81028534628328,1,0.581141821084536,3 +0,0,51,5.01888688226486,0,0.045540249525863,0 +0,0,59,4.56575315112801,0,0.018421780243452,0 +1,0,63,5.03685942860368,0,0.010989900097199,0 +0,0,44,4.80592260075351,0,0.112912808166657,2 +1,0,48,5.00221780288854,1,0.705194744177699,3 +1,1,50,4.98730076001554,0,0.051673441463685,1 +0,0,64,4.96145626093157,0,0.010095636512864,0 +0,0,45,4.87150753214202,0,0.099656854831733,2 +1,1,58,5.13246810612358,0,0.018989196802711,0 +0,0,52,5.05940950961152,0,0.040188253155965,0 +0,0,60,4.93459369858226,0,0.015617168647064,0 +0,0,45,4.65083193988827,0,0.096943559321947,2 +0,0,68,5.01767817531811,0,0.006587394553624,0 +0,0,58,4.78028616462437,0,0.019951569274027,0 +0,1,69,4.76617439329106,0,0.006432425268528,0 +0,0,64,4.90507099320894,0,0.010217773958662,0 +0,0,43,4.7458951680588,0,0.12803045761905,2 +0,0,53,4.50667058337361,0,0.035994847206869,0 +0,0,58,4.86337366590683,0,0.019687184647716,0 +0,0,43,4.79571687620089,0,0.129065653059175,2 +0,1,47,4.78135570936924,0,0.075746499305821,2 +0,1,72,5.09492162459192,0,0.004347174107901,0 +0,0,68,4.95747708146584,0,0.006715731908634,0 +1,1,68,4.80782149685026,1,0.189400956298487,2 +1,1,54,4.8468601026996,1,0.5177320022925,3 +1,0,60,5.10780737353002,0,0.015199079346388,0 +0,0,52,4.75719123714493,0,0.040215975468619,0 +0,0,55,4.986049525927,0,0.02756051656499,0 +1,1,63,4.80930381315657,0,0.01156901394444,0 +0,1,48,4.90007201167553,0,0.06690285908429,1 +0,0,51,4.70970269726942,0,0.045409753799339,0 +0,0,43,4.89524386106953,0,0.131054286755296,2 +1,0,45,5.11236417142646,0,0.102733417761273,2 +1,0,64,5.00010699633438,0,0.009987508753615,0 +0,1,44,4.67061307433386,0,0.110884218820588,2 +1,1,70,5.06702292622164,1,0.150621562177191,2 +1,1,58,4.8065577119269,1,0.401503106788414,3 +0,0,65,4.72924441578989,0,0.009573495654462,0 +1,1,55,4.88452674988226,1,0.486613443145276,3 +0,1,62,4.90844397664917,0,0.012581833578308,0 +1,1,56,5.21843105933685,0,0.023954451648707,0 +1,0,64,5.05265988984851,0,0.009860232544805,0 +0,0,47,4.67499239400601,0,0.074960648281212,1 +0,0,59,4.58023420615605,0,0.018439217053864,0 +1,0,48,5.19837271934164,0,0.068103438274203,1 +1,0,62,4.51180798841018,0,0.013636188985194,0 +0,0,48,4.70987430502889,0,0.066118457325169,1 +0,1,59,4.9855073029839,0,0.017331059465654,0 +1,1,71,5.01202260557607,0,0.004925087724699,0 +0,0,58,5.15522988129399,0,0.018970673781423,0 +0,0,58,5.15312045127509,0,0.018955151666418,0 +0,1,42,5.03537200159986,0,0.154002307323215,2 +0,0,53,4.81683489596105,0,0.035634351997919,0 +1,1,66,5.18438694698108,0,0.007706010966303,0 +0,0,57,4.67066469787325,0,0.022592460170837,0 +1,1,72,4.97833573304834,0,0.004531132721697,0 +0,0,53,4.70094349153634,0,0.03564567124906,0 +1,0,58,4.97495845709562,0,0.019443713746618,0 +0,0,45,4.76007794607533,0,0.098396169313448,2 +0,1,62,4.37982078537515,0,0.014025987177751,0 +0,0,48,4.82048496297032,0,0.066662787113215,1 +0,0,53,4.81525305309303,0,0.035496443336296,0 +0,0,52,4.77553142514811,0,0.040172339824775,0 +1,0,63,5.18690257399793,0,0.010659078299222,0 +0,0,41,4.72226558046512,0,0.166793295298336,2 +0,1,67,4.791938980935,0,0.007734342296388,0 +0,0,43,4.7393001939672,0,0.128336636715599,2 +1,0,41,4.82135880158624,0,0.170026827602423,2 +0,0,51,4.85126591205075,0,0.045631329529716,0 +0,0,52,4.61874340357112,0,0.040230300745158,0 +1,0,47,5.08744990524341,0,0.07747696256604,2 +1,0,48,4.55438217953455,0,0.065517080682432,1 +1,0,51,5.05797695892155,0,0.045401776678055,0 +0,0,47,4.46081890995162,0,0.073719288925739,1 +1,0,54,5.01204934515927,0,0.031149117190995,0 +0,0,71,4.7379438297728,0,0.005409409411776,0 +0,0,58,4.85936504334854,0,0.019674193062091,0 +1,0,47,5.04979336239363,0,0.077258557344923,2 +1,0,53,4.9345297565795,0,0.035483745396778,0 +0,0,46,4.48518855357432,0,0.083710753578821,2 +0,0,41,4.75011583551343,0,0.167735276411275,2 +1,1,66,5.10160975655558,0,0.007885102052589,0 +0,0,57,4.90567407579207,0,0.021961250871585,0 +0,1,63,4.76440090498333,0,0.011632195034604,0 +1,1,57,5.00082983313402,0,0.021756844975948,0 +0,0,41,4.62661993185316,0,0.163879931018859,2 +0,0,45,4.71053632140368,0,0.097695540033495,2 +1,0,50,4.80993718472047,0,0.051442737397456,1 +0,0,51,4.93423685911925,0,0.045461938129953,0 +0,1,47,4.58219392065555,0,0.07454105599916,1 +0,0,49,5.04676631960231,0,0.059031754458494,1 +0,0,54,4.87389810936025,0,0.031393458836153,0 +1,0,63,4.79227946376182,0,0.011611849728049,0 +0,1,60,5.11875472731418,0,0.015131256399768,0 +0,0,53,5.01437525916942,0,0.035209407301796,0 +0,0,46,5.0288677701172,0,0.088414822051747,2 +0,0,42,4.83083915360727,0,0.148604870142962,2 +0,0,46,4.86910831865115,0,0.086980626344825,2 +1,0,56,5.0524766718092,0,0.024328809115634,0 +0,0,50,4.79775686953954,0,0.051629747925622,1 +1,1,67,5.16836690492707,0,0.006987213181395,0 +0,0,55,4.92253891733565,0,0.027835649213733,0 +0,0,50,4.6476640255683,0,0.051282286234707,1 +1,0,59,4.87967477179016,0,0.017562606708113,0 +0,0,54,4.84949935296013,0,0.031405198231235,0 +0,0,64,5.00954381522708,0,0.009935491802724,0 +0,0,40,4.94082183952967,0,0.199442685275125,2 +1,0,63,4.94675938292296,0,0.011252247145198,0 +0,0,48,4.73984449726847,0,0.066212187709223,1 +0,0,55,4.66900041297651,0,0.028236526612957,0 +1,0,58,4.81054543177381,0,0.019790185129539,0 +1,0,55,4.94415200397144,0,0.027696559482628,0 +1,0,48,5.1081398753437,0,0.067687012380589,1 +1,0,60,5.07345751075142,0,0.015280426884941,0 +1,0,53,4.99939734303999,0,0.035245807818901,0 +0,0,59,4.87304361062178,0,0.017620543437175,0 +1,1,71,5.14639188524296,1,0.135361096895783,2 +0,1,54,4.73790193110742,1,0.520399707115914,3 +0,0,55,4.60681397366278,0,0.028480720579719,0 +0,0,44,4.93282540883481,0,0.115128562099314,2 +1,0,71,4.89641265487149,0,0.005122764677707,0 +0,1,43,4.84200040584509,0,0.130015059644763,2 +1,0,47,4.84148881810813,0,0.075951769067985,2 +0,1,43,4.73799649260091,1,0.829881585110176,3 +0,1,61,4.69164059671371,0,0.014602581414957,0 +1,1,64,4.89978337951508,1,0.255001319429897,3 +1,1,69,5.09586937226732,0,0.005818272936301,0 +0,1,43,4.85058808530215,0,0.130222418226069,2 +0,0,63,4.8132463416815,0,0.011588363537074,0 +0,0,53,4.92770949853482,0,0.035410076531372,0 +1,1,42,4.81466661388189,1,0.852309665360286,3 +1,0,64,5.043446629311,0,0.009834890280761,0 +1,1,68,4.84775471021329,1,0.187628845262245,2 +0,0,43,4.76415093454425,0,0.128433813808172,2 +0,1,66,4.98350617324472,1,0.213779769083866,3 +0,0,52,4.7528814663078,0,0.04017645789978,0 +1,1,67,5.10758611894556,0,0.007112085069138,0 +1,1,61,4.87388349138967,0,0.014124456955018,0 +0,1,42,4.80573892853605,0,0.147982716411447,2 +0,0,69,4.75013902416474,0,0.006469032678939,0 +0,0,47,4.62486302802786,0,0.074718184258107,1 +0,0,60,4.93527064495285,0,0.01559845850669,0 +0,0,53,4.75715347039816,0,0.035553112420783,0 +0,0,54,4.82983013836744,0,0.031480721500229,0 +0,1,42,4.86975905282215,0,0.14960192203194,2 +1,0,59,5.01751474151319,0,0.017197696579368,0 +0,1,45,4.58673867837786,0,0.096653667181417,2 +1,0,41,5.01125228483207,0,0.176173742507875,2 +0,1,51,4.8438820169766,0,0.0454957028318,0 +0,0,53,4.94958014212883,0,0.035276884930191,0 +0,0,52,4.77759100155818,0,0.040117672444257,0 +0,0,57,4.76563518772918,0,0.022369018117014,0 +0,0,42,5.0648246535717,0,0.154755214059612,2 +0,1,63,5.03026417012486,0,0.011046880447498,0 +0,0,42,4.84755397573199,0,0.149597363212909,2 +0,0,53,4.77077781565784,0,0.035652256867954,0 +0,1,45,4.60072314507388,0,0.096376508132398,2 +1,0,69,4.94311292820251,0,0.006108213901137,0 +0,0,44,4.85133455005033,0,0.113661909802858,2 +1,0,54,4.93922005146609,0,0.031284367369248,0 +0,0,52,4.87936164759493,0,0.040063908206908,0 +1,0,54,5.05389443940519,0,0.031087803011331,0 +0,0,43,4.88830025645964,0,0.131020804612488,2 +0,0,58,4.61919030182703,0,0.020373379352671,0 +0,0,54,5.08133440442162,0,0.03108251655335,0 +0,0,58,4.92482654489175,0,0.019552032756165,0 +0,0,57,4.39126077578417,0,0.023207158845009,0 +0,0,49,4.78467114391806,0,0.058367757793917,1 +1,0,61,5.13524889613312,0,0.013444980133044,0 +0,1,45,4.83788243320911,0,0.099260771728626,2 +0,0,42,4.54950609836539,0,0.141572487875865,2 +0,0,47,4.66196189967176,0,0.074897356929375,1 +0,0,47,4.57234470306,0,0.074382462569974,1 +0,0,67,5.00346788966245,0,0.007289187339366,0 +1,0,67,4.91435865629902,0,0.007502279034718,0 +0,1,51,5.2731630299539,0,0.045438707852497,0 +0,0,56,5.08031656203099,0,0.024305449745154,0 +0,0,59,4.77584727416933,0,0.017790411396706,0 +0,0,53,4.75837700971494,0,0.035651676609623,0 +0,0,55,4.90899869353611,0,0.02772775089837,0 +1,1,59,4.92973718301402,1,0.37056966664724,3 +1,0,58,4.81061218350828,0,0.019821915753576,0 +1,0,65,4.9784742239229,0,0.009055507997692,0 +0,0,43,4.9329170388889,0,0.132048403981655,2 +0,0,55,4.94780926515736,0,0.027710020527374,0 +0,1,57,4.94507187556282,1,0.425672717343214,3 +0,0,55,4.74841601893915,0,0.028143508678052,0 +0,0,53,4.7629255858845,0,0.035566533092069,0 +0,0,45,4.73296163981808,0,0.097884459438027,2 +0,0,66,5.00641808210369,0,0.00807628267476,0 +1,0,61,4.9904991914783,0,0.013807249043662,0 +0,0,59,4.69310945875642,0,0.018029717207939,0 +0,0,57,5.29846051585643,0,0.021017294024289,0 +1,1,54,4.95696832413544,1,0.516420909213313,3 +0,0,51,4.62562115623184,0,0.04537564918443,0 +0,0,47,4.73955504525554,0,0.075464409410691,2 +1,0,59,4.97462104574854,0,0.017314476369068,0 +0,0,71,4.82203972658709,0,0.005254780413521,0 +0,0,60,4.99255987036593,0,0.015441409582183,0 +1,0,55,4.94602997738459,0,0.027632308473906,0 +0,0,54,4.89244368695205,0,0.031309041644353,0 +1,0,55,4.96340782761007,0,0.02767775308765,0 +1,0,41,5.02646060392863,0,0.176752923941158,2 +0,1,68,4.90271032358415,0,0.006819438569055,0 +0,1,66,4.6476293752185,0,0.008894200084921,0 +1,0,56,5.028310632516,0,0.024477261132796,0 +1,0,60,5.08651467732374,0,0.01515711793472,0 +0,0,57,4.73639405174442,0,0.022327109381073,0 +0,0,61,5.11437926738544,0,0.013524452552052,0 +0,0,46,4.86835334652538,0,0.087005654074767,2 +0,1,44,5.1468727481648,0,0.11846959830776,2 +0,1,71,4.78791978998378,0,0.005313701300725,0 +0,0,62,5.08139070706692,0,0.012145519949131,0 +0,0,48,4.806400576173,0,0.066641313507481,1 +0,0,42,4.72664565783137,0,0.146128699220843,2 +0,1,46,5.10855760417472,0,0.08919854026301,2 +0,1,51,4.86809662440065,0,0.04538176462201,0 +0,1,58,4.80773352978713,0,0.019839185589561,0 +0,0,55,4.83024737522929,0,0.027902038978563,0 +1,0,55,5.08763235264844,0,0.027519908818771,0 +0,0,51,4.7817166457405,0,0.045499823252214,0 +0,0,60,4.62196865472201,0,0.016418815179909,0 +1,1,45,4.91089721982503,1,0.786640348088136,3 +0,0,44,5.03204246734287,0,0.116465308203581,2 +0,0,57,4.92070540713825,0,0.0220197103727,0 +0,0,64,4.82063300059966,0,0.010405853736972,0 +0,0,50,5.06399125799466,0,0.051715472972205,1 +1,1,52,5.11543260601696,1,0.579130279129866,3 +0,0,53,4.70878824355518,0,0.035643630030047,0 +0,1,50,4.70418893475034,0,0.051358102594401,1 +0,1,42,4.1711948949058,0,0.132480786452074,2 +0,1,49,4.83398992835292,0,0.058538259240107,1 +1,1,70,5.04942793913879,1,0.151427082599402,2 +0,0,42,4.73928538502984,0,0.146320165850769,2 +0,0,55,4.79440283844842,0,0.027956253665745,0 +0,0,57,4.68381955184945,0,0.022584708084864,0 +1,0,57,4.82102560527308,0,0.022140933342057,0 +0,1,42,4.56129725886078,0,0.141816765912956,2 +0,0,61,4.71535464751914,0,0.014518365693695,0 +1,0,51,4.8329782053023,0,0.045361119610209,0 +0,1,44,4.77930582429202,0,0.112479397482995,2 +1,0,53,5.0215579971562,0,0.035207970297466,0 +1,0,67,4.9778060680547,0,0.007376493298114,0 +1,0,58,4.95974237987528,0,0.019447108946543,0 +0,0,50,5.15490838655149,0,0.052006912566424,1 +0,0,50,4.61817874395598,0,0.051265137442287,1 +0,0,47,4.68286460339346,0,0.075200002337487,2 +1,0,50,4.92451244981236,0,0.051559647038427,1 +0,0,46,4.90307316590541,0,0.087308371866332,2 +0,1,47,5.08788726768812,0,0.077472110980122,2 +1,1,47,5.16918878090757,1,0.737285408975293,3 +0,0,47,4.91916424326174,0,0.076464300761487,2 +1,1,67,4.92228377711739,0,0.007495956852885,0 +0,0,53,4.9454424757967,0,0.035418662832103,0 +0,0,56,4.67233773953662,0,0.025184426904714,0 +0,0,58,5.03925313606645,0,0.019243864249176,0 +1,0,62,4.90872652708435,0,0.012592546864838,0 +0,0,42,4.79391700967865,0,0.147734479399283,2 +1,1,53,4.9444787192813,1,0.548674783855639,3 +0,0,52,4.61787328107729,0,0.040270777567666,0 +1,1,73,5.21378321495926,1,0.111539114356152,2 +0,1,64,5.06610123836814,0,0.009809920241401,0 +1,0,68,5.06661136664907,0,0.006503518468568,0 +0,0,51,4.67417834779643,0,0.04549163971236,0 +0,1,60,4.98609026696819,0,0.015465520173612,0 +0,1,69,4.90891422872734,0,0.006169742536515,0 +0,1,54,4.75041785339073,0,0.031565805322379,0 +0,0,53,4.83499802147117,0,0.035545209505205,0 +0,0,60,4.90536469137788,0,0.01564286051374,0 +1,1,55,4.79198565203322,0,0.0280417343982,0 +0,0,55,4.91021654953955,0,0.027884853771384,0 +1,0,48,4.95829540900336,0,0.067141148817254,1 +1,1,65,5.15720797928586,1,0.224499895767867,3 +1,0,61,5.01003880318453,0,0.013763203156674,0 +0,0,56,4.86795586490243,0,0.024864499993951,0 +0,0,62,4.70844875195421,0,0.013109731418046,0 +1,0,68,5.09971595007063,0,0.006416048514925,0 +0,1,43,4.79315077488421,0,0.128994615417782,2 +0,0,59,4.88873991806169,0,0.017565783790829,0 +0,1,66,4.81397117705958,0,0.008486777065751,0 +0,1,56,5.03023398745589,0,0.024415430580008,0 +1,0,64,5.04684938646328,0,0.009875782027415,0 +0,1,51,4.97706308019901,0,0.045636859317176,0 +1,1,70,4.99673856226859,1,0.153618838684082,2 +0,0,43,4.64365514192698,0,0.126187825147994,2 +1,1,53,4.78704841754424,1,0.549786494858857,3 +0,0,52,4.72038923068918,0,0.040172845669988,0 +0,0,52,4.717841471279,0,0.040304406879156,0 +1,1,52,4.77530521649493,1,0.580620569831423,3 +0,0,48,4.97145262076257,0,0.067251357319427,1 +0,1,68,5.01212634924416,1,0.180546240956948,2 +0,0,61,4.76739299261806,0,0.014425286870965,0 +0,0,44,4.79723154410649,0,0.112908519595814,2 +1,0,56,5.0283986787945,0,0.02438802620809,0 +1,0,43,4.79171053657946,0,0.128946359163396,2 +0,1,70,5.12821994462326,1,0.148159950332573,2 +1,0,62,4.94794903001203,0,0.012455170856089,0 +0,0,67,5.08489509113071,0,0.007153799061628,0 +0,0,61,4.77446808168704,0,0.014386226258903,0 +0,0,42,5.23597442888588,0,0.159332843217723,2 +1,1,45,5.05889994799094,0,0.102012661382762,2 +1,0,67,5.04477508555129,0,0.007235594436837,0 +0,0,51,4.63454952742835,0,0.0453845087643,0 +1,0,59,5.00055964035214,0,0.017244177145831,0 +0,0,55,4.5285381536687,0,0.028539354609575,0 +0,0,52,4.86857341335247,0,0.040154116495824,0 +1,0,64,4.80359018873462,0,0.010471113022079,0 +1,0,51,4.81371203335257,0,0.045405153497867,0 +0,0,56,4.84086151694721,0,0.024858412549594,0 +0,0,53,4.94524046978745,0,0.035355126991226,0 +0,0,45,4.97954933426705,0,0.100798607667795,2 +0,0,46,4.72038699801542,0,0.085770168017893,2 +0,0,54,4.87180207189541,0,0.03151458720016,0 +0,0,48,4.48509960748348,0,0.065196793096823,1 +0,0,54,4.87141645841652,0,0.03149289703774,0 +0,0,42,5.0132090392069,0,0.15338090548732,2 +1,1,70,5.07374233236951,0,0.005315517660909,0 +0,0,44,4.67238980912831,0,0.110910674518579,2 +0,1,64,4.48247264803105,0,0.011276820819566,0 +1,1,42,4.81051697033463,0,0.148120863677941,2 +1,1,69,5.35072348841957,0,0.005368553379098,0 +0,0,53,4.98797259706115,0,0.035362642579103,0 +0,0,41,4.6636102094325,0,0.164996748694298,2 +0,0,54,4.91408831226502,0,0.031319492080119,0 +1,1,42,4.90984726201239,0,0.150668561823646,2 +0,0,57,4.57641219411033,0,0.02280450734312,0 +1,1,62,5.2207792432615,1,0.283914585157974,3 +0,0,49,4.83457283732464,0,0.058496716042877,1 +1,0,52,5.12329031570594,0,0.040060227989402,0 +1,0,58,4.88508309406322,0,0.019691120842133,0 +0,0,50,4.73969010154071,0,0.051510848206369,1 +1,0,66,4.96781297598564,0,0.00816538566876,0 +0,0,45,5.06474786887911,0,0.101781882337552,2 +0,1,46,5.03164083551735,0,0.088353668139901,2 +0,0,43,4.46434737477798,0,0.122655267990974,2 +0,0,56,4.85789587431425,0,0.024847993675378,0 +0,0,45,4.6033745034046,0,0.096539106709973,2 +0,0,58,4.90047234363088,0,0.01967036722767,0 +1,0,61,5.02092663441916,0,0.0137395760943,0 +1,1,71,4.81873354664591,1,0.149048305796981,2 +0,0,59,4.75378899814596,0,0.017829839384604,0 +1,0,43,4.93850387585971,0,0.132035525184247,2 +1,0,58,5.00541935585767,0,0.019339176264612,0 +0,1,44,5.04383441843138,0,0.116716897681685,2 +1,1,70,5.41735962295061,1,0.136577358229977,2 +1,0,60,4.86633290744776,0,0.015745118859029,0 +0,0,56,4.6924915685514,0,0.02527791942231,0 +1,0,53,5.10065509453177,0,0.03510219641779,0 +1,0,62,5.0429142333421,0,0.012253088469317,0 +0,0,43,5.05015720254764,0,0.134295007651954,2 +0,1,67,4.90248666656262,0,0.007521426650961,0 +0,0,52,4.69914792694536,0,0.040225743884444,0 +0,0,48,4.8286779640576,0,0.066671837974227,1 +1,1,46,5.03755525906095,0,0.088402786240924,2 +0,0,53,4.90503167795475,0,0.035436524826021,0 +0,1,70,4.94620568669954,0,0.005541375999887,0 +0,0,69,4.97103892284008,0,0.006059490545183,0 +0,0,54,4.65292599282188,0,0.031746961643902,0 +0,0,52,4.7697086348713,0,0.040327011351939,0 +1,0,70,5.14506802704886,0,0.00518287474319,0 +1,1,62,5.27382918827226,0,0.011705963170677,0 +0,0,49,4.98614668340417,0,0.058959390169415,1 +1,0,62,4.99382546193271,0,0.012385737557572,0 +1,1,64,5.30422814548249,0,0.009306286631996,0 +1,1,56,5.16607408113848,1,0.450954959538362,3 +0,0,63,4.73667401336199,0,0.011749198232835,0 +1,0,59,4.85207337901356,0,0.017607416268596,0 +0,0,52,4.51776482753604,0,0.04037682391559,0 +0,0,45,4.92002006701179,0,0.100353281095089,2 +0,0,50,4.5823327545112,0,0.05126538209025,1 +0,0,45,4.49995865963834,0,0.095227884289555,2 +0,0,48,4.80948747788113,0,0.066475931321003,1 +0,0,54,5.00914494199929,0,0.031100597831946,0 +1,1,70,4.87252091025232,0,0.005674756063479,0 +1,0,43,4.90793574768147,0,0.131515830927146,2 +0,0,50,4.89299300871672,0,0.051762439312211,1 +0,0,61,5.24811780860293,0,0.013179215241874,0 +0,0,47,4.76463114529948,0,0.075549169896075,2 +1,1,72,5.21203771055695,0,0.004165235403975,0 +1,0,58,4.81954751897051,0,0.019751782615042,0 +0,0,46,4.91568574212679,0,0.087445830620125,2 +0,0,49,5.02480719860462,0,0.05916783631497,1 +1,1,69,5.18625856482088,0,0.005666829076048,0 +0,1,67,4.76686843695463,1,0.207370105395418,3 +1,0,47,4.81262738184763,0,0.07584754883281,2 +0,1,67,5.04671569111695,1,0.194314841924032,2 +0,0,62,4.7880038024845,0,0.012938715915347,0 +1,1,46,4.84695642283921,0,0.086906620624475,2 +0,0,53,4.47434600108248,0,0.035949827561232,0 +0,1,50,4.83182157189337,1,0.643306193079235,3 +1,0,51,4.96403079909794,0,0.045470621434392,0 +0,0,43,5.02496916987613,0,0.133759970448683,2 +0,0,53,5.11651985564964,0,0.035146298912267,0 +0,0,43,4.72909498402812,0,0.127941576954515,2 +0,0,46,4.67553962140198,0,0.085373894105984,2 +0,0,44,4.95234732724266,0,0.115285514983671,2 +0,0,49,4.6892117111138,0,0.058228802036286,1 +0,0,52,4.62772508102703,0,0.040251664543167,0 +0,1,72,4.94452659420838,0,0.004580778228164,0 +0,0,43,5.02828812884774,0,0.133773211037881,2 +1,1,62,4.87106360327995,0,0.012668325959919,0 +0,0,56,4.65713706257839,0,0.025330867730558,0 +0,1,58,5.09959724303088,0,0.019092956531057,0 +0,0,55,4.82582796477607,0,0.028078041830951,0 +0,1,71,4.72087482518111,0,0.005418292032564,0 +0,0,51,4.96082478471294,0,0.045544139312639,0 +1,0,53,4.82807385989931,0,0.035531109531636,0 +1,0,43,4.95051695462928,0,0.132176276764853,2 +0,0,58,4.96872685832649,0,0.019464771185546,0 +0,0,60,4.6136999538299,0,0.016396394544225,0 +1,0,43,4.82562782506212,0,0.129631378356349,2 +0,0,59,4.68719417470507,0,0.018065246851108,0 +0,1,70,5.12570719487473,0,0.00521855823191,0 +0,0,49,4.74646602994111,0,0.058356545158284,1 +0,0,41,4.62790813286238,0,0.163831873265661,2 +1,1,56,4.78807539549489,0,0.025055908021416,0 +1,0,71,4.97741686302493,0,0.004980506508407,0 +0,0,46,4.76689616412213,0,0.086093463924179,2 +1,0,58,5.2258251630354,0,0.018775993167104,0 +0,1,47,4.6677913569784,0,0.074976156172625,1 +0,0,41,4.76267602037292,0,0.168267474727466,2 +1,0,56,4.95224825525686,0,0.024618208355296,0 +0,1,69,4.91154377730804,0,0.006159898091344,0 +0,0,59,5.01292495691777,0,0.017265309399501,0 +0,0,54,4.83092331894596,0,0.031464955581807,0 +1,1,57,5.24159015129696,1,0.417918932925948,3 +0,1,65,4.55706329042147,0,0.010030731627403,0 +0,0,54,4.76987541790767,0,0.031583229829658,0 +1,1,57,4.75254872166254,1,0.43100683708796,3 +1,1,59,4.9685107520646,1,0.369748013039808,3 +0,0,60,4.75201890900611,0,0.016108363103603,0 +0,0,63,4.63179811351322,0,0.011984233399181,0 +1,0,64,4.95007746245309,0,0.010070548887344,0 +1,1,70,5.27940106620954,1,0.142065297133122,2 +1,0,50,4.90038981301414,0,0.051593284202823,1 +1,0,67,4.98026725670244,0,0.007374712717184,0 +0,0,55,5.16482116944383,0,0.027285838583452,0 +1,1,66,5.14769499623413,1,0.206500433129525,3 +0,0,46,4.58415521734801,0,0.084705575100069,2 +1,0,51,4.88608458083004,0,0.045578626855777,0 +0,0,44,4.84349026103166,0,0.113548022015527,2 +1,0,58,5.03798404777574,0,0.019239093389178,0 +0,0,47,5.08113606119073,0,0.077387776390532,2 +1,1,51,4.97812035175471,0,0.0454065477887,0 +0,0,46,4.64556773423698,0,0.085084757722512,2 +0,0,47,5.11989626110373,0,0.077656791935438,2 +0,0,46,4.59640966650729,0,0.084792161510593,2 +0,0,46,4.82892786704295,0,0.08684363282954,2 +0,0,49,4.74572814700302,0,0.058344167402335,1 +0,0,58,4.78108760257082,0,0.019887876185146,0 +0,0,51,4.81142014079679,0,0.04541505491024,0 +0,0,55,4.71148537124113,0,0.028269117670573,0 +1,0,63,5.2003297116074,0,0.010605580726008,0 +0,0,40,4.65292199861762,0,0.188049453302675,2 +0,1,68,4.74247232463561,0,0.007150775045803,0 +0,0,40,4.65389189871069,0,0.188067817750107,2 +0,0,70,4.85884372838158,0,0.005699623697879,0 +0,0,60,4.80044840655247,0,0.015943431764415,0 +0,0,48,4.59124983531203,0,0.065770551864803,1 +1,0,51,5.01644953352202,0,0.045416296280353,0 +0,0,54,5.18076324884955,0,0.030841429155283,0 +0,1,40,4.91922280164321,0,0.198536620091558,2 +0,0,56,5.01175636099634,0,0.024462281482517,0 +1,1,57,5.05661705200155,1,0.42205233082826,3 +1,0,53,5.24602953524408,0,0.034902588928056,0 +1,0,62,4.85841626789252,0,0.012677531446188,0 +1,0,50,4.80557391464403,0,0.051506737904643,1 +0,1,46,4.7072082380525,0,0.085651110780277,2 +0,1,43,5.38322746598414,0,0.14127398363791,2 +0,0,50,4.88762062421204,0,0.051510581354095,1 +1,1,52,4.83165148347989,1,0.580443171041982,3 +1,1,55,4.8456355922026,1,0.488394214209708,3 +0,0,65,4.83937831540961,0,0.009348183242671,0 +0,0,52,5.17221907164991,0,0.039796964495215,0 +0,0,45,4.42118165487782,0,0.094395666985352,2 +1,0,56,4.88123528674163,0,0.02482126481449,0 +0,0,55,5.02806807656577,0,0.027584877832154,0 +0,0,40,5.05180019613296,0,0.204139599551837,3 +0,1,57,4.89378788419474,0,0.022056228328019,0 +1,0,59,4.9949850547343,0,0.017295141908093,0 +0,0,55,4.70587518891946,0,0.028223335279811,0 +0,1,51,5.13410898886148,0,0.045425299917044,0 +1,0,54,5.02262874612658,0,0.031109641559383,0 +0,1,68,5.20800925812007,0,0.006232579575232,0 +0,1,45,4.97694519689275,0,0.10088414083256,2 +0,0,59,4.68724056653816,0,0.018043767849024,0 +1,0,53,4.80343505888433,0,0.035556443043911,0 +0,0,47,5.01654402533714,0,0.077221462245828,2 +0,1,44,4.49763665423047,0,0.108303920764289,2 +0,0,59,4.61551514783043,0,0.01834647179536,0 +0,0,48,4.58819755742497,0,0.065671510715796,1 +1,0,71,4.84033723735563,0,0.005209226419262,0 +0,0,49,4.61592878294024,0,0.058017348114611,1 +0,0,58,4.77655136243688,0,0.019989756099508,0 +0,0,45,4.8437117990259,0,0.099244816020743,2 +1,0,53,4.79372761710948,0,0.035565533495141,0 +0,1,59,4.74759820966067,0,0.017947470148868,0 +1,1,64,4.87386855127506,0,0.010292191840443,0 +0,1,67,4.63471440727369,0,0.0081153430555,0 +0,0,49,4.73226972616673,0,0.058410363191438,1 +0,1,65,4.69414844702657,0,0.009718139792348,0 +0,0,59,5.20870407733641,0,0.016750749669118,0 +1,0,49,4.9044795092984,0,0.058660344192937,1 +0,0,53,4.75993268118513,0,0.035777092803015,0 +0,0,49,4.87790568247627,0,0.058668746736585,1 +1,0,57,5.02959299110352,0,0.021753515735326,0 +0,0,65,4.50044759938292,0,0.010168760743491,0 +0,0,60,4.74243303891893,0,0.016151786789884,0 +1,0,56,5.00744559797525,0,0.024553305115586,0 +0,0,54,4.63851955782111,0,0.031879972504785,0 +1,0,40,4.88704552306994,0,0.197235176118841,2 +1,1,62,4.97667995610339,0,0.012425893873667,0 +0,1,69,4.77953623361059,0,0.006426641502207,0 +0,0,55,4.97305298216798,0,0.027629322180537,0 +0,1,52,5.15173567550279,0,0.039931046889076,0 +1,0,56,5.09210225293387,0,0.024244349855627,0 +1,0,63,4.86832481723259,0,0.011423778236508,0 +0,0,49,4.70406181153139,0,0.058264630856179,1 +0,1,61,5.07110198985775,0,0.013617880679839,0 +1,0,63,4.9792942180639,0,0.011122322871195,0 +1,0,48,4.80423500737411,0,0.066510588910069,1 +0,0,55,4.99453581504403,0,0.027661706392546,0 +0,0,53,4.42408268367302,0,0.036044015327641,0 +0,0,45,4.72682285584776,0,0.097899100909856,2 +0,1,53,4.76937275944149,0,0.035676668098954,0 +0,1,51,4.65793632965771,1,0.612176056770512,3 +0,0,40,4.92053567914268,0,0.19864765140735,2 +0,1,55,4.99281510404379,0,0.027626365049145,0 +0,0,58,4.85988258771869,0,0.019690334564867,0 +0,0,40,4.55443824656803,0,0.184775751989669,2 +1,0,66,4.9199351400985,0,0.00829157731094,0 +0,0,51,4.69067340872601,0,0.045347452896922,0 +0,0,52,4.61750805675458,0,0.040285973376597,0 +0,0,44,4.7187011073984,0,0.111595053841429,2 +1,0,53,4.88671097469376,0,0.035436337665531,0 +0,0,47,4.68900824559361,0,0.075042037943988,2 +0,0,47,4.70102105614364,0,0.075128516424682,2 +0,1,66,4.92626197470424,0,0.008274844860818,0 +1,0,60,4.96476174156062,0,0.015464657177559,0 +0,0,66,4.97152331294352,0,0.008171409727149,0 +0,1,63,4.86939931155874,0,0.01142286732363,0 +0,0,57,4.69455892183341,0,0.022495485905452,0 +0,0,43,4.53315157225751,0,0.123845855393038,2 +0,0,49,4.7204989267966,0,0.058301809636296,1 +0,1,45,5.08105835151479,0,0.101988753946406,2 +0,0,53,4.67979567882399,0,0.035703920872078,0 +0,0,42,4.86637050613996,0,0.149560553411536,2 +1,0,43,4.98696949610312,0,0.132942732969797,2 +0,0,61,4.84902860179592,0,0.014172719025231,0 +0,1,53,5.26507867417505,0,0.034886757140382,0 +0,1,46,4.91864092472763,0,0.08735692124511,2 +1,1,63,4.91132967761515,1,0.275232556364566,3 +0,1,53,4.9621812772834,0,0.035353132016832,0 +1,1,58,5.07568678545282,1,0.393799092673243,3 +0,0,54,4.75874201179012,0,0.031712152901986,0 +0,1,57,5.22655994133572,0,0.02115808480798,0 +0,1,43,5.07847199876959,0,0.135003410672763,2 +1,0,57,5.04850804723307,0,0.021566273590304,0 +1,0,49,5.00414761329636,0,0.059024843082344,1 +0,0,40,4.66462721582907,0,0.188574933858716,2 +0,0,62,4.89222516615028,0,0.012611883920549,0 +0,1,40,4.98415252811202,0,0.201511234487766,3 +1,1,56,5.0042648464609,1,0.454092873559104,3 +0,0,59,5.09781089095629,0,0.017047079061902,0 +1,0,43,4.86929622177289,0,0.130526676002089,2 +0,1,64,4.73829535529902,0,0.010605002274498,0 +1,0,65,5.03158624800334,0,0.00891584894643,0 +0,0,43,4.94396738956215,0,0.132194981734567,2 +1,1,68,4.93766733200612,1,0.183557775894546,2 +0,1,55,4.69338175067479,1,0.491653430215308,3 +0,0,52,4.83978939812665,0,0.040089134804258,0 +1,0,55,4.96556198757069,0,0.027733792544764,0 +0,0,56,4.90825833161197,0,0.024669392693575,0 +0,0,44,4.70464223695201,1,0.805951101873005,3 +0,0,58,4.88332553016364,0,0.019652963638114,0 +0,0,46,4.96886181792768,0,0.087877207068556,2 +0,0,47,5.15785212585074,0,0.078080941861512,2 +0,1,47,4.66531935131376,1,0.728536822964932,3 +1,0,52,5.10438989145027,0,0.039855018160834,0 +0,0,42,4.94253233429506,0,0.151716000986173,2 +0,0,44,4.96150427604882,0,0.115401712392822,2 +0,0,51,5.02454027043225,0,0.045458773754855,0 +0,1,46,4.85698737850152,0,0.086891225062017,2 +0,1,54,4.503073219342,0,0.032108270646017,0 +1,0,56,4.84953085809725,0,0.024821882537927,0 +0,0,50,4.7988969083981,0,0.051592075263778,1 +1,0,43,4.88732422509241,0,0.130896195831407,2 +0,0,54,4.62068837008496,0,0.031928526408398,0 +1,0,42,4.9762572267279,0,0.152522557432576,2 +0,0,46,4.94848552858108,0,0.087751181474411,2 +0,1,47,4.88691719850976,0,0.076261254276416,2 +0,0,60,4.57080570393406,0,0.016505949445238,0 +0,0,63,4.61931768634285,0,0.012047405098069,0 +1,1,60,4.90191781498733,0,0.015632168513299,0 +0,0,63,4.94626896275882,0,0.011230215707584,0 +0,1,63,4.88204139811143,0,0.011403076399457,0 +0,0,41,4.58097687440676,0,0.162461137165756,2 +1,0,44,4.95015082888654,0,0.115352272130988,2 +0,0,58,4.77693880953424,0,0.019943951401425,0 +0,0,53,5.07823300784851,0,0.035183533472396,0 +0,1,42,4.96918847256951,0,0.152358348404751,2 +0,0,55,4.96038775231628,0,0.027755448831694,0 +0,0,58,4.85663640788934,0,0.019716303951194,0 +0,1,41,4.55760670867344,0,0.161922122394032,2 +0,0,44,4.83593013899685,0,0.113364880323268,2 +0,1,43,4.74763441977375,0,0.12819359204992,2 +0,0,49,4.84844722849782,0,0.05869150037221,1 +0,1,62,4.60066444972643,1,0.310156039161057,3 +0,0,58,4.82536058682766,0,0.019830508077349,0 +0,0,52,4.69685272582739,0,0.040330953392594,0 +0,1,60,4.78027743975536,0,0.015981587541446,0 +0,0,49,4.54873012153318,0,0.057852230679237,1 +0,0,45,4.94473452316309,0,0.100582225284943,2 +0,0,46,4.77179145931209,0,0.086264853034143,2 +1,1,49,5.13586292499941,0,0.059291654185637,1 +0,0,49,4.84425598707076,0,0.058605637970357,1 +0,0,46,4.95815180007653,0,0.087723184882334,2 +0,0,58,4.63822823952233,0,0.02030532065395,0 +0,0,43,5.01709531341674,0,0.133642635614392,2 +0,1,56,4.89407818877398,0,0.024725224941022,0 +1,0,63,4.84656430733813,0,0.011491539956545,0 +0,0,55,5.13002947196186,0,0.027295063753567,0 +0,0,59,4.69206594797729,0,0.01802691767142,0 +1,0,50,4.87445793399072,0,0.051666082008053,1 +1,1,52,4.85580745006958,1,0.580786629235366,3 +0,0,45,4.96322446417462,0,0.10059914277297,2 +0,1,68,5.11371581972669,0,0.006383725712868,0 +1,0,54,4.89379475489033,0,0.031376294835113,0 +0,0,43,4.67079438461524,0,0.126529943685666,2 +0,0,43,5.09836527778646,0,0.135253979102703,2 +0,1,59,4.73903022541364,0,0.017964603259492,0 +0,1,54,4.82683068909044,0,0.031450634825507,0 +0,0,65,4.72062787807665,0,0.009646430455175,0 +1,0,52,4.88599452753101,0,0.040170146127728,0 +0,0,41,4.58068289621119,0,0.162589334106448,2 +0,0,54,4.66342995383757,0,0.031742050790068,0 +0,0,47,4.86624463365188,0,0.076099063645804,2 +0,1,65,5.29267226935523,0,0.008324430600784,0 +1,0,49,4.83613529569333,0,0.058705362759474,1 +0,0,43,4.49829997083038,0,0.123175571630472,2 +0,0,50,4.73615158605477,0,0.051390434705463,1 +0,0,41,4.82776888647698,0,0.170656981750637,2 +0,0,45,4.60096297013038,0,0.096338199397177,2 +0,0,44,4.582806128178,0,0.109618508059552,2 +1,1,64,4.91861783302733,1,0.253499461836397,3 +0,0,41,4.75828242685517,0,0.168001248973655,2 +0,0,46,4.47866151143082,0,0.083750436365012,2 +0,0,67,4.60776432014523,0,0.008146772442903,0 +1,1,63,5.07004934571715,1,0.267706847800973,3 +0,0,49,4.63996284520799,0,0.058243613390714,1 +1,0,60,4.79913635641793,0,0.015958794135594,0 +0,0,66,4.83128573447549,0,0.008473183026067,0 +0,0,59,5.0023570174386,0,0.017233457050473,0 +0,0,47,4.21309832809292,0,0.07225537863175,1 +1,0,56,5.13053534627147,0,0.024131296859358,0 +0,1,46,4.89176396987251,0,0.087197455408202,2 +0,1,66,4.91223364480716,1,0.2173475773754,3 +0,0,41,5.07950632231552,0,0.178437930322091,2 +0,0,58,4.90640078289446,0,0.019589850119582,0 +0,0,54,5.07639229184035,0,0.030983934843004,0 +0,0,61,4.8967599319016,0,0.014094362636224,0 +0,0,46,4.76878737313516,0,0.086362800362431,2 +0,1,40,4.56118480073968,1,0.88239388723142,3 +0,1,61,4.98237876594997,0,0.013847396487849,0 +0,0,43,5.05599837798584,0,0.1344219291898,2 +0,0,52,4.85085699897772,0,0.040143758872076,0 +0,0,55,4.59740828826175,0,0.0283862798761,0 +1,0,47,5.11029256814409,0,0.077571628600033,2 +1,1,59,4.95478265171794,1,0.370251955300239,3 +0,0,57,4.71307160259061,0,0.022538039853993,0 +1,1,43,4.94674848377147,0,0.132072199288366,2 +1,0,65,5.11659571364754,0,0.008725784355415,0 +0,1,68,5.07485025394782,0,0.006467735463862,0 +1,0,57,4.8391959386494,0,0.022142205169171,0 +0,0,52,4.9715600403699,0,0.040050989445561,0 +0,1,67,4.2965259473972,0,0.008870669471384,0 +0,0,41,4.40807757476373,0,0.157103057153636,2 +0,0,48,5.03501599806741,0,0.067448933618405,1 +1,0,58,4.8953395989589,0,0.019653419583197,0 +0,0,48,4.65891802898004,0,0.065887643726523,1 +0,0,59,4.56651296262904,0,0.018387900854555,0 +0,0,44,4.66718824322834,0,0.110787228258036,2 +0,0,51,4.78283714742661,0,0.045355257420246,0 +1,0,70,5.04982922916935,0,0.005343736611923,0 +0,1,47,4.57945089392775,0,0.074634631113058,1 +0,0,43,4.70746189290042,0,0.127286129565583,2 +0,1,67,4.84834629349441,1,0.203250345469599,3 +0,0,43,4.7123832324124,0,0.127388299659175,2 +1,1,61,5.25323497969506,0,0.013177892643268,0 +1,1,50,4.98543811422058,1,0.643463214207774,3 +0,0,43,4.62239398062059,0,0.125590190158917,2 +0,0,58,4.99727919290353,0,0.019350166915203,0 +1,0,62,5.245213607209,0,0.011728941129056,0 +0,0,49,4.59119924367173,0,0.057932376742492,1 +0,0,48,4.85770885279083,0,0.066724326759711,1 +1,0,45,5.08070216724057,0,0.101975391506259,2 +0,0,58,4.56513116319656,0,0.020435133671966,0 +0,0,55,4.69936069294702,0,0.02831056070586,0 +0,0,58,5.16335762742836,0,0.018961754917956,0 +0,0,41,5.0862728310996,0,0.178637792905673,2 +1,1,67,5.21266982985531,0,0.006871370995307,0 +0,1,42,4.95940595187652,1,0.856393274483054,3 +0,0,50,4.89151990504894,0,0.051591989139061,1 +0,0,48,4.55351870903483,0,0.065567885876145,1 +1,0,53,4.86374215235807,0,0.03539515936011,0 +1,1,65,4.83693301491883,1,0.238834062303144,3 +0,1,65,4.52641127657291,0,0.010092120713545,0 +0,0,43,5.16492352210835,0,0.136737608786781,2 +1,0,58,4.80572550889785,0,0.019785830215223,0 +0,0,53,4.68619926199666,0,0.035832654164195,0 +1,1,72,5.01420472794795,1,0.12915378537512,2 +0,1,42,4.77969679143858,0,0.147306428594117,2 +1,0,69,5.26973494566615,0,0.005503383116884,0 +0,0,40,4.73670558468241,0,0.191286240508012,2 +0,0,47,5.03757047157303,0,0.077247974828175,2 +0,1,67,4.84595578146533,0,0.007656762744312,0 +0,0,44,4.57229474428458,0,0.10936394688407,2 +0,1,40,5.11389061780453,0,0.206637111884146,3 +1,0,58,5.06512080466591,0,0.019175680180437,0 +0,0,44,4.88716050030025,0,0.114166833682723,2 +1,1,66,4.8871080182102,0,0.008322940347012,0 +1,1,71,4.97717575105893,1,0.142231405699754,2 +0,0,51,4.73726310801912,0,0.045618766621128,0 +1,1,68,4.89730612299709,0,0.006792927625664,0 +0,0,52,5.04236226241219,0,0.039979023739094,0 +1,0,43,4.87684759424508,0,0.130703817811218,2 +0,1,58,4.7168453710319,1,0.405063547688454,3 +1,0,50,4.89018505868432,0,0.051724499378884,1 +0,0,67,4.67820564667329,0,0.008017681864799,0 +0,0,55,4.60355315865328,0,0.028467925067598,0 +1,1,63,5.20046977501251,0,0.010620415028933,0 +0,0,42,4.80244240615858,0,0.147880719526314,2 +0,0,52,5.20422811551693,0,0.039831955937407,0 +0,0,57,4.54450842905222,0,0.022859341473231,0 +0,0,57,4.95549029615837,0,0.021807677178371,0 +0,0,48,4.86586251941922,0,0.066860981520425,1 +0,0,66,4.85491829946665,0,0.008436990821762,0 +0,0,43,4.78995427832724,0,0.128945801750646,2 +0,1,59,4.80429174193738,0,0.017802652126283,0 +0,0,53,4.61674470295851,0,0.035787212919386,0 +0,0,52,4.92510451826846,0,0.040054272745313,0 +1,0,56,4.97143410831234,0,0.024495754535063,0 +0,0,48,5.16666597389799,0,0.068166706444696,1 +0,1,70,5.14008439952345,0,0.005194827202438,0 +0,1,46,4.82222742375886,1,0.759131001505775,3 +1,0,53,5.15303324884936,0,0.035136175883083,0 +0,0,51,5.04473225195089,0,0.04543243461453,0 +0,0,41,4.706915479508,0,0.166325889405606,2 +0,0,51,4.98723383823711,0,0.045592614721274,0 +1,0,42,4.94700471357301,0,0.15164240074243,2 +0,0,45,4.9969782963132,0,0.101010994734838,2 +0,0,53,4.67926892593837,0,0.035738305328332,0 +0,0,48,4.96501335920597,0,0.067110297838088,1 +1,1,56,4.99383424692918,1,0.454089980837323,3 +0,0,54,4.59640327360058,0,0.03190161322144,0 +0,1,63,4.65921324581693,1,0.286310888335782,3 +1,0,43,4.92342937040831,0,0.131728900831834,2 +1,1,45,4.87593578153912,0,0.099558577031984,2 +0,0,58,4.94734443273154,0,0.019483953416965,0 +1,0,69,5.05294142099923,0,0.005895374958327,0 +1,0,59,4.88550987587138,0,0.017506719406339,0 +1,0,54,5.10745491530525,0,0.030938030780902,0 +0,0,56,5.14976899090559,0,0.024148439492244,0 +0,0,54,4.78085226234855,0,0.031518076782673,0 +0,1,43,5.14658270231514,0,0.13622354872022,2 +0,0,53,5.07517230914708,0,0.035123155598768,0 +0,1,66,5.14197789653161,0,0.007789775652975,0 +1,1,59,5.07246667330223,0,0.01707646148712,0 +1,0,64,4.92260099886246,0,0.010182967793565,0 +0,0,41,5.01613536995161,0,0.176298499484308,2 +1,0,52,4.68902054907365,0,0.040202313701617,0 +0,1,63,4.98118038866474,1,0.27136895383525,3 +1,0,51,4.93080224006427,0,0.045398615906004,0 +0,0,52,4.99598345884386,0,0.039962788211499,0 +0,0,42,4.70469091787635,0,0.14547608015961,2 +1,0,59,4.80617678364436,0,0.017757248247289,0 +0,0,60,4.7776814954089,0,0.015931062070697,0 +0,0,44,4.86462440338955,0,0.113804035038736,2 +1,0,52,4.86494566007513,0,0.040085723033717,0 +1,0,62,4.89220136383257,0,0.01265362899855,0 +0,0,46,4.62779621057508,0,0.084911204205281,2 +0,1,53,4.92070590484632,1,0.549324306005696,3 +0,0,59,4.69099923141007,0,0.018098476670459,0 +1,0,50,5.12182828332113,0,0.051758764723321,1 +0,0,57,4.73357644053058,0,0.022371969083925,0 +1,0,50,4.85683567831538,0,0.051562765831623,1 +0,0,67,5.044825529739,0,0.007225617246542,0 +0,0,42,5.12029269000948,0,0.156226641962919,2 +0,0,57,4.68978708174531,0,0.022524673122971,0 +0,0,47,4.8977669022352,0,0.076364636532903,2 +0,1,67,4.84696262500835,1,0.203637198817878,3 +0,0,48,4.68253374867255,0,0.065981859771046,1 +0,0,56,4.63858674669667,0,0.025364698060353,0 +1,0,61,5.32636272827194,0,0.012975029689609,0 +0,0,47,4.49422278733418,0,0.074068841485259,1 +0,0,43,4.87860724517859,0,0.130746103066722,2 +0,0,41,4.89780048738521,0,0.172458148521071,2 +1,0,52,4.80093219073096,0,0.040081112676847,0 +0,0,53,4.91619788725987,0,0.035337220148982,0 +0,0,44,4.72944671393577,0,0.111857720471257,2 +0,1,41,4.95444075122843,0,0.174284676638722,2 +0,0,60,4.59328585587212,0,0.016475436601627,0 +1,0,46,5.1807978812551,0,0.089930358728621,2 +0,1,57,4.68499786353909,1,0.433415152948912,3 +1,0,61,4.85738869867392,0,0.0141306847719,0 +0,1,63,4.68813579153528,0,0.011815513171738,0 +0,1,54,4.69667134209911,1,0.520322778951968,3 +0,0,57,4.64977220907351,0,0.022679621121106,0 +0,0,50,4.8523551773929,0,0.051550586224984,1 +0,0,50,4.83673975372136,0,0.051792556314328,1 +1,0,49,4.9905062439208,0,0.058846336549006,1 +0,0,68,4.63547686873836,0,0.007368105969318,0 +1,0,67,4.89634556471753,0,0.007548251430624,0 +1,1,67,5.10016853022333,0,0.007123602884531,0 +1,1,71,4.95770763120416,1,0.14305491174026,2 +0,0,49,4.85406248415724,0,0.058664192381243,1 +1,0,40,4.79139505967523,0,0.193767216627179,2 +0,0,59,4.81186913351159,0,0.017792031790943,0 +0,0,47,4.64522555750881,0,0.074774846058834,1 +0,0,42,4.60865283043259,0,0.143244632581254,2 +0,0,70,5.03076206589543,0,0.00538124517004,0 +0,0,57,4.69312242292904,0,0.022481766194165,0 +1,0,63,5.21434110768011,0,0.01058651018014,0 +0,1,47,4.71985592129608,0,0.075206031396253,2 +1,0,59,5.13210852508234,0,0.016965936907388,0 +1,0,61,4.93449516704754,0,0.013973754906037,0 +0,1,71,4.81939367837705,0,0.005256676476141,0 +0,0,44,4.97742327539578,0,0.115568772841308,2 +1,0,55,4.98886133989026,0,0.027616156018254,0 +0,0,52,4.84756249074033,0,0.040069566335649,0 +0,0,44,4.82960365507,0,0.113345485994404,2 +0,1,50,5.31376989993653,0,0.05208547593138,1 +1,0,57,5.13225487036659,0,0.02147939569057,0 +1,1,66,5.02282277052071,0,0.008053030257959,0 +0,0,56,4.70144496409616,0,0.025272742928277,0 +0,0,45,4.92952106803018,0,0.100183139950738,2 +0,0,52,4.90719243346106,0,0.040201350248028,0 +0,0,44,4.69662171712651,0,0.111247861593435,2 +1,0,57,4.9388232220546,1,0.425368264953515,3 +1,0,55,4.87436584412503,0,0.027826461868952,0 +0,1,40,4.70822481331282,0,0.190237973209123,2 +0,0,49,4.71871287699795,0,0.058222954027494,1 +1,1,52,4.84509008095277,1,0.58046468776186,3 +1,0,49,4.92579316372739,0,0.058714171474418,1 +0,0,42,4.85356063196455,0,0.149197884450879,2 +0,0,66,4.81504343893249,0,0.008509508281635,0 +0,1,61,5.18813065554815,0,0.013301631245912,0 +0,0,57,4.78215319430887,0,0.022301392298002,0 +0,0,53,4.8237736775702,0,0.035453299612975,0 +1,1,56,5.21543916620502,0,0.023922719062486,0 +1,0,59,5.26852399863027,0,0.016538092768374,0 +0,0,51,4.37126691633336,0,0.045338609320986,0 +0,0,48,5.01509419919067,0,0.067319511539835,1 +0,0,45,4.91578009932681,0,0.100028194901767,2 +0,0,50,5.15695312738338,0,0.052055229662228,1 +1,0,50,4.88759272379379,0,0.051717755157942,1 +1,0,47,5.10337225079235,0,0.077677660356334,2 +1,0,53,5.07652464392448,0,0.035295086604299,0 +0,0,57,4.89049627948607,0,0.021955496008168,0 +0,1,46,4.91304448778123,0,0.087327247335819,2 +1,0,62,4.86581826548348,1,0.298590772116649,3 +0,0,47,4.54563064566664,0,0.074242967415169,1 +1,0,67,5.24974861561187,0,0.006835357442295,0 +0,0,52,4.56937776542474,0,0.040302746071164,0 +0,0,50,4.67597531917337,0,0.051680312534646,1 +0,1,73,5.14365810530801,0,0.003881692356588,0 +0,0,55,4.74712120187271,0,0.028082514172263,0 +0,0,46,4.71775130691616,0,0.085855500999565,2 +0,1,64,4.97308002884964,0,0.010049001828256,0 +0,0,44,4.76852606230606,0,0.112448460308968,2 +0,0,54,4.87529483785153,0,0.031495927962193,0 +0,0,46,4.87512067795337,0,0.087029167276361,2 +0,0,43,5.16827428972644,0,0.136992451856321,2 +0,0,64,5.01979686340102,0,0.009951727412543,0 +0,0,44,5.11322012212155,0,0.117746897747928,2 +0,0,48,5.04095653155915,0,0.067391340514938,1 +0,0,45,4.86972181708673,0,0.099577252611249,2 +1,0,60,4.97203071470978,0,0.01550432715428,0 +0,0,57,4.90030676341365,0,0.021923324020748,0 +0,0,54,4.74412410278953,0,0.031631923900628,0 +1,0,53,5.26072410811707,0,0.034867202148395,0 +0,0,52,4.85093936576838,0,0.040089838389098,0 +1,1,53,5.18179117887552,0,0.035032729862254,0 +0,0,57,4.81127025806025,0,0.022145555718788,0 +0,1,55,5.17970286362161,0,0.027180319955695,0 +1,0,57,5.18876512242945,0,0.021347924750131,0 +0,0,48,4.81904951420256,0,0.066550312468868,1 +1,0,56,4.9728839115271,0,0.024473412907651,0 +0,0,52,4.86048220266015,0,0.040160790392169,0 +1,1,65,4.93490771548717,0,0.009112219858291,0 +0,0,45,4.67989108249031,0,0.097354811641068,2 +0,0,46,4.5147915493132,0,0.083942114475968,2 +0,1,71,5.12357009985783,0,0.004741011214576,0 +0,1,62,4.73151138616441,1,0.3043657186215,3 +1,0,62,4.89328368316897,0,0.01266912425492,0 +1,0,61,4.94692988633329,0,0.013893306252208,0 +1,0,57,5.05275631627349,0,0.021642628245258,0 +1,0,62,4.80381136823763,0,0.012848371577009,0 +0,0,49,4.61898092564418,0,0.058066067399728,1 +0,0,49,4.33015351933328,1,0.66836799415305,3 +0,1,44,4.99279493382484,0,0.115828050896779,2 +0,0,64,5.05119957596883,0,0.009869687720928,0 +1,0,58,5.07875062685552,0,0.019145511412784,0 +1,0,62,4.99165970519951,0,0.012358739449797,0 +0,0,60,5.05535705017356,0,0.015262739263787,0 +0,1,63,4.95335597532327,0,0.011211365188391,0 +0,1,65,5.12005030250391,0,0.008719381026216,0 +0,1,65,4.87402501182115,0,0.009248054376521,0 +1,1,45,4.8262636507213,0,0.099009236607544,2 +0,0,50,4.72071603129125,0,0.051414698808616,1 +0,0,64,5.02737910261079,0,0.009920807041637,0 +0,1,72,5.09872325713907,0,0.004339665318107,0 +0,1,60,4.56623753669428,1,0.358806628550746,3 +0,0,54,4.6956693893123,0,0.031869916566995,0 +1,0,54,4.79258839662789,0,0.031576620694653,0 +1,0,60,4.98779415559567,0,0.015435819085246,0 +0,0,50,4.87722437390258,0,0.051532345060901,1 +0,0,57,5.02077418529542,0,0.021687525065895,0 +0,1,54,4.87062531388401,0,0.031423154105308,0 +0,0,45,4.96189670114705,0,0.100590583211668,2 +0,0,54,5.08120719146986,0,0.031096403231778,0 +0,0,50,4.89967817593707,0,0.051786131128002,1 +0,0,44,4.91146644160865,0,0.114536271653783,2 +0,0,54,4.76724395370173,0,0.031556637860797,0 +1,0,63,4.81785946154798,0,0.01155906756259,0 +1,0,53,5.00121822740011,0,0.035403756604268,0 +0,1,64,4.62977949100017,1,0.266234421257182,3 +1,1,69,5.0729465515265,1,0.163546974152716,2 +1,1,74,5.56243153855104,0,0.003005096784229,0 +0,0,56,4.99379122405662,0,0.02458480340112,0 +1,1,73,4.96490345418238,1,0.121279973288281,2 +0,0,48,4.37911269521473,0,0.064958394331399,1 +0,0,54,4.78917725403946,0,0.031595238099522,0 +0,1,69,4.65334595971301,1,0.182408336691651,2 +0,1,49,4.91571206421169,0,0.058686512341187,1 +1,0,56,4.95742959870337,0,0.024643872251742,0 +1,0,48,4.65549249164886,0,0.065887226760121,1 +1,0,49,4.88287220140069,0,0.058665891117881,1 +0,1,56,5.07529758083005,0,0.024352568417813,0 +0,1,51,4.934488762195,0,0.045359483554518,0 +0,0,54,4.60568143756625,0,0.031817253114458,0 +1,0,48,5.12784144124248,0,0.067831576001364,1 +0,1,41,4.88430190671659,0,0.172038870359566,2 +0,1,65,5.03392072019901,0,0.008924756815879,0 +0,0,45,4.85807961153156,0,0.099386709900428,2 +0,0,65,4.73328322425018,0,0.009619731301601,0 +1,0,53,4.80255537434036,0,0.035521091807234,0 +0,0,56,5.06188203085432,0,0.024281567831682,0 +1,0,47,4.95302845790747,0,0.076965734723809,2 +0,1,42,4.74457586916654,0,0.146474586122012,2 +0,0,50,4.77992187940849,0,0.051467769644544,1 +0,0,45,4.45493913206119,0,0.094809048421726,2 +0,0,43,4.60359602053125,0,0.125369362410771,2 +0,0,59,4.71034782888216,0,0.01803755573673,0 +0,0,61,4.82139329969528,0,0.014227410119194,0 +0,0,42,4.7513967597646,0,0.146688280746687,2 +1,0,53,4.85820570736451,0,0.035520776485565,0 +0,0,54,4.73958146053753,0,0.031586333337596,0 +0,0,56,4.73316532488674,0,0.025139237067367,0 +0,1,44,4.80228184961952,0,0.112876960056196,2 +0,0,40,4.35336890416093,0,0.176783401111225,2 +0,0,48,4.87790173441752,0,0.067092561165115,1 +1,0,51,4.96979682421091,0,0.045507491236938,0 +1,0,49,4.79709335786631,0,0.058617133996271,1 +1,1,70,5.0235511566878,0,0.005396437578402,0 +0,0,53,4.64638914936523,0,0.035822208021008,0 +0,0,46,4.64876066203715,0,0.085176604458611,2 +1,0,61,4.90384843274019,0,0.014065081069725,0 +0,1,65,4.57726336833541,0,0.009993341430448,0 +0,1,43,4.83925629337087,0,0.129903529303753,2 +0,0,64,4.58253718918771,0,0.010985886437202,0 +0,1,68,4.69253242366999,1,0.195241775655973,2 +0,0,66,4.9964225646223,0,0.008111055484851,0 +1,1,63,4.90670630135937,1,0.274739110075965,3 +0,1,67,4.69322636704692,0,0.007965089232115,0 +0,1,50,4.68342063598293,0,0.051391462390409,1 +0,1,67,5.02970046679032,0,0.007268231146101,0 +0,0,49,4.99786735885481,0,0.058890933350261,1 +1,0,53,4.94272854426274,0,0.035529882515702,0 +0,0,53,4.76122206315339,0,0.035561240842067,0 +0,0,49,4.78080405520646,0,0.058366878855003,1 +0,0,45,4.78508603115062,0,0.09854212516066,2 +0,0,54,4.92781984342951,0,0.031251031897846,0 +0,0,50,4.7571655690276,0,0.051387738341881,1 +0,0,54,4.79141812244016,0,0.031584063468989,0 +0,1,55,4.59198551455158,1,0.493037217255032,3 +0,1,48,4.89723413222829,0,0.066833611807523,1 +1,0,64,4.86922870607281,0,0.010284785591954,0 +0,0,55,4.93286485911215,0,0.027765740942697,0 +0,0,62,4.74948523273507,0,0.012968724389173,0 +1,0,60,4.79774527678707,0,0.015903726573553,0 +0,0,59,4.83408323966808,0,0.017699422343946,0 +0,0,50,5.27743085775672,0,0.051975908728404,1 +0,0,57,4.62119688706305,0,0.02268793243786,0 +0,0,54,4.73762480187869,0,0.031604916752909,0 +0,0,54,4.66469107112085,0,0.031763480163887,0 +0,1,51,4.77839370585235,0,0.045446160899442,0 +0,0,56,4.51411224660777,0,0.025587368529913,0 +1,1,68,5.4060484615425,0,0.005875269358238,0 +0,0,54,4.53519404850892,0,0.031980293468693,0 +0,0,50,4.7821716138396,0,0.051480233473322,1 +0,0,40,4.53956445494142,0,0.183887148440216,2 +0,0,42,4.75703305776872,0,0.146845488628278,2 +0,0,47,4.43650184904213,0,0.073555544465864,1 +0,1,70,4.8918748284177,1,0.158195072754573,2 +1,0,62,5.0349808031494,0,0.012228074026667,0 +1,0,40,4.80124327077055,0,0.193844685010502,2 +0,0,56,4.76547944516227,0,0.025002844569683,0 +1,0,71,5.05726628105495,0,0.004845345483525,0 +0,0,68,5.21560208993605,0,0.006189658339829,0 +0,0,60,4.72960953746777,0,0.016141629871036,0 +1,1,43,5.04362155474341,1,0.837020088763985,3 +0,1,57,4.55159934513627,0,0.022870854029307,0 +0,0,67,4.54368988140561,0,0.008307214927315,0 +1,0,48,4.83194741927656,0,0.066610154573948,1 +0,0,41,4.93015756274715,0,0.173547013444643,2 +0,0,46,4.97108938678042,0,0.087859715371458,2 +1,1,59,5.27904305987384,0,0.016530749340175,0 +0,0,61,4.91425949978169,0,0.014009418097245,0 +0,0,56,4.73575191261129,0,0.025141763698817,0 +0,0,56,4.6465187908975,0,0.025373284300931,0 +0,1,62,4.82397681781611,0,0.012825643652536,0 +0,0,67,4.9574575845116,0,0.007412790467863,0 +0,0,50,4.81507931410859,0,0.05149828600177,1 +1,0,53,5.34149782307604,0,0.034820017169034,0 +1,0,68,5.12984581560602,0,0.006372541946476,0 +0,0,43,4.69646368082775,0,0.127255998209574,2 +0,0,62,4.88151940846621,0,0.0126193022175,0 +1,1,74,5.04694282300371,0,0.003662965920463,0 +0,0,43,5.2867467845239,0,0.139231717788396,2 +0,1,65,5.03857932872434,0,0.008852677090218,0 +0,0,45,5.00050394529566,0,0.101010292434711,2 +0,0,58,4.86983488419953,0,0.019674942921922,0 +1,0,56,4.8529701966373,0,0.024839559600191,0 +0,0,58,4.93261939835612,0,0.019505265996815,0 +0,0,41,4.60473861171962,0,0.163213226273775,2 +0,0,57,4.76010764112159,0,0.022375368820759,0 +0,0,66,5.0442630398555,0,0.008020159677528,0 +0,0,54,4.69298504114306,0,0.031799588327647,0 +0,0,53,4.84889247974572,0,0.035422543634963,0 +0,0,42,4.80998796953125,0,0.148086172833641,2 +0,0,59,4.97897913258308,0,0.017256961157188,0 +0,0,50,4.97314732937956,0,0.051656970447491,1 +0,0,48,4.89860889562753,0,0.067168341765514,1 +0,0,58,4.85722939291899,0,0.019681393224122,0 +0,1,44,5.05263902299533,1,0.814311766865897,3 +0,1,73,5.18099115457718,0,0.003826522824549,0 +1,1,69,4.80677647046674,0,0.006370183696293,0 +0,0,61,4.68737882627706,0,0.014613600961267,0 +0,0,46,4.97071757378861,0,0.087841788612648,2 +0,0,57,4.99021641593419,0,0.021781313390087,0 +1,0,58,5.17093080671348,0,0.018965386391315,0 +0,0,41,4.72030943649305,0,0.166916389180595,2 +1,0,48,4.98868265707395,0,0.06719657545591,1 +0,0,61,4.8722250449594,0,0.014134856198445,0 +1,0,60,4.94412705457213,0,0.015534057973239,0 +0,0,47,5.00727723936666,0,0.076980389070824,2 +0,0,46,4.89859758230933,0,0.08722350648719,2 +0,0,45,4.85249807435929,0,0.099638991372258,2 +0,0,48,4.94957353027906,0,0.067114028388999,1 +0,0,51,4.79996603108068,0,0.045373771029893,0 +0,1,46,5.0686031698206,0,0.088710763683521,2 +0,0,61,4.85434984046556,0,0.014178023378977,0 +0,0,52,5.09033444519417,0,0.039890240812568,0 +0,0,54,4.75569101030635,0,0.031587773329122,0 +0,1,51,4.89415782279347,0,0.045474251954955,0 +0,0,57,4.89126599802175,0,0.022018857825355,0 +0,0,42,4.51872273071767,0,0.140771375101615,2 +0,1,52,4.62840484747039,0,0.040228905941288,0 +0,1,46,4.7205429959397,0,0.085782894577145,2 +1,0,52,4.46397987813056,0,0.040403618569117,0 +0,1,54,4.88567317182932,0,0.031305580810625,0 +0,0,58,4.66467293379674,0,0.020241779134751,0 +1,0,50,5.00705204128125,0,0.051934190668043,1 +1,0,70,5.01510777507235,0,0.005411865520369,0 +0,0,52,5.11007088694307,0,0.039999854128149,0 +1,0,41,5.01972574022955,0,0.17642301589561,2 +1,1,42,5.05300693964455,1,0.858749250603277,3 +0,0,53,4.94495214414145,0,0.035421577200196,0 +0,0,51,4.64730311444896,0,0.045415200798605,0 +1,0,58,5.03029148053057,0,0.019330460083606,0 +1,0,45,4.79361745406105,0,0.098657512038974,2 +0,0,56,4.6525116785682,0,0.025387771749577,0 +0,0,49,4.60753749161554,0,0.058002413997459,1 +0,0,50,4.73174079367141,0,0.051363766859453,1 +0,0,49,4.74730241999637,0,0.058382474824461,1 +0,1,69,4.75240326005017,0,0.006474141943849,0 +0,1,62,5.16019284128465,0,0.011967618643744,0 +0,0,56,4.69599363322996,0,0.025240479536735,0 +0,0,60,4.8474190155174,0,0.015813520616707,0 +0,0,41,5.2102404298134,0,0.182947258528134,2 +1,0,57,5.16418207893101,0,0.021395812250493,0 +1,1,66,4.98010845049665,1,0.214029275538549,3 +0,0,52,4.78413197145717,0,0.040323067535979,0 +0,0,46,4.65234723329058,0,0.085275393502705,2 +0,0,48,4.83217103642861,0,0.066773456694493,1 +0,0,57,4.93923411303933,0,0.021848186095178,0 +0,0,50,4.5509062272365,0,0.051170332533909,1 +0,0,66,4.87028425093525,0,0.008395738292117,0 +0,1,45,4.75357145225612,0,0.098249720752542,2 +0,0,45,4.56098065404551,0,0.096043920439392,2 +1,0,56,4.91694821408632,0,0.024741369851578,0 +0,0,61,4.85346808560797,0,0.014125547186486,0 +0,0,41,4.88264195090631,0,0.172031935526037,2 +0,0,45,4.77060040625351,0,0.09831472634174,2 +0,1,51,4.76846284231686,1,0.611996431013862,3 +1,0,69,4.94260299395041,0,0.006104746735209,0 +0,0,55,5.20941282717685,0,0.027140215748807,0 +1,0,50,4.82879837019877,0,0.051506803807762,1 +1,0,56,4.82397153441851,0,0.0248073257342,0 +0,1,43,4.8595895569684,0,0.130946002477097,2 +1,0,62,4.9723763829802,0,0.012401103905279,0 +0,1,61,5.093268183679,0,0.013557006630923,0 +0,0,50,4.84514199208504,0,0.051498427231044,1 +0,1,54,4.64605606938183,1,0.521625329887826,3 +1,0,66,4.86110672054608,0,0.008424301613423,0 +0,0,62,5.00237825426216,0,0.012391382558915,0 +0,0,56,4.49458839503077,0,0.025740971172263,0 +1,0,47,4.80965653565946,0,0.075827123724848,2 +0,0,57,4.78218878482788,0,0.02226616484407,0 +0,0,44,4.70461801464978,0,0.111492865033308,2 +0,0,59,4.97005037312538,0,0.017272748583954,0 +0,0,61,4.8687605602829,0,0.014104465486286,0 +1,0,53,4.97541686581802,0,0.035270493275995,0 +0,0,59,5.28544468870711,0,0.016545773370153,0 +0,0,49,4.74623938164364,0,0.058341943482357,1 +1,0,48,4.89932787174223,0,0.066971781866944,1 +1,1,69,5.00651998957101,0,0.005987996463543,0 +0,0,60,4.9756322731286,0,0.015435463549395,0 +0,0,50,4.71529122815008,0,0.051381033196619,1 +1,1,64,5.07816159957671,1,0.246921030534938,3 +1,0,45,4.81503759127044,0,0.098886004964878,2 +0,0,57,4.75785845735828,0,0.02236939109391,0 +0,0,63,5.05260697278678,0,0.010987592690561,0 +0,0,40,4.59873854176203,0,0.185957083257323,2 +0,0,57,4.85344605601489,0,0.022048046330331,0 +1,0,41,4.82153996983923,0,0.169991158254768,2 +0,0,41,4.97776967060116,0,0.175061309546526,2 +1,0,63,5.10427907966024,0,0.010820689595392,0 +0,0,42,4.68300476142108,0,0.144996666268013,2 +0,0,48,4.56193966244043,0,0.065543426060877,1 +0,0,51,5.04657612238582,0,0.045655209394878,0 +0,0,45,4.77417022952572,0,0.098434293561313,2 +1,0,45,4.50918526575517,0,0.095306224569775,2 +0,0,62,5.10541700159392,0,0.012103779388113,0 +1,0,47,4.95392689307154,0,0.076634089734238,2 +0,0,62,5.06996729865695,0,0.012212033512506,0 +1,1,72,5.0674251488697,0,0.004390305448716,0 +1,0,51,4.79820995904629,0,0.045421747259015,0 +0,1,65,4.97270987176354,0,0.009065253780759,0 +1,1,64,4.49212113773813,1,0.273060907615431,3 +1,1,59,4.81800864239309,0,0.017697767475364,0 +0,0,44,4.86426472708303,0,0.113899667922881,2 +1,0,52,4.80218621156178,0,0.040214737656622,0 +0,0,48,4.91007392980198,0,0.066978932608583,1 +1,1,57,4.81468297830176,1,0.429054982267707,3 +0,0,50,4.86711139675477,0,0.051520097969468,1 +1,0,49,4.9323423412945,0,0.058953950706822,1 +0,0,51,4.63995121982026,0,0.045394721878134,0 +0,0,45,4.75823033624966,0,0.098345698266383,2 +0,0,53,4.84745831872541,0,0.035506935844217,0 +0,0,48,4.79115359006774,0,0.066437272886235,1 +0,0,46,4.89984885989383,0,0.087335058144884,2 +0,0,41,4.66735538822339,0,0.165064674570485,2 +0,0,51,4.72901973031516,0,0.045431044624992,0 +1,0,50,5.07404401168427,0,0.051713797294897,1 +0,0,42,4.73604970540782,0,0.146302730595492,2 +0,0,48,4.69933846633471,0,0.066047543112,1 +0,0,52,4.81272982998692,0,0.040165838638612,0 +1,0,57,4.97571177647937,0,0.021854401756329,0 +0,0,60,4.31979251522572,0,0.017278527048377,0 +0,1,62,4.73833518804668,0,0.013028131167851,0 +1,0,68,5.04497444728442,0,0.006517997666662,0 +0,0,53,5.03676953509147,0,0.035265130416499,0 +0,0,60,4.51215267690987,0,0.016744749352422,0 +0,0,46,4.80525039955043,0,0.086479591959702,2 +0,0,52,4.50065692262708,0,0.040387323978694,0 +0,0,63,5.00325993853215,0,0.011044046701949,0 +0,1,53,5.25777004219166,0,0.034877921482606,0 +0,0,55,5.33960762824592,0,0.026967614446903,0 +1,0,54,4.8225623685993,0,0.031538629311829,0 +1,1,62,4.97224014545908,1,0.294743698286855,3 +0,0,63,4.92891694181728,0,0.011302135960946,0 +0,0,60,4.92634861360991,0,0.015570422543171,0 +0,0,43,4.8459641486808,0,0.13003529447131,2 +0,0,50,4.89974246669833,0,0.051742705788506,1 +0,1,63,4.93947410083051,1,0.274020215777191,3 +0,0,48,4.97772999244783,0,0.067184010520467,1 +0,0,40,4.68873225796613,0,0.18951434261404,2 +0,0,51,5.02164584815898,0,0.045401940382537,0 +1,1,68,5.07539717724868,0,0.006474252477853,0 +0,0,50,4.75315505666789,0,0.051381982231518,1 +0,0,53,4.72621609670672,0,0.035716581088024,0 +1,1,52,4.84252771208901,1,0.580504720487629,3 +0,0,60,4.77324281603664,0,0.01600107510193,0 +0,0,47,4.92093030927777,0,0.076407362322584,2 +0,0,50,4.86654826960375,0,0.051553450379168,1 +1,0,59,4.87049087690938,0,0.017631148328147,0 +0,0,58,4.57112904018184,0,0.020515124119297,0 +0,0,63,4.83933458006031,0,0.011456451089377,0 +0,0,60,4.63646343243421,0,0.016359575829623,0 +0,0,52,4.5107868324717,0,0.040343477382223,0 +1,0,70,4.99432881685211,0,0.005451479975433,0 +1,0,61,4.80441362838161,0,0.014329671041014,0 +0,0,60,4.56459192923209,0,0.016512980600065,0 +0,0,47,4.67239720452866,0,0.074940227853018,1 +0,0,51,4.88647634557645,0,0.04546848690109,0 +0,1,59,5.12211152377086,0,0.017014363568236,0 +1,1,71,4.787652309155,1,0.15041401452868,2 +1,0,55,4.94624016365161,0,0.027754154454908,0 +0,1,70,5.53589848907678,0,0.004568999214292,0 +0,0,55,4.85149282825712,0,0.027909791942547,0 +0,0,42,4.66031499363225,0,0.144393593523312,2 +0,0,46,4.70134454907295,0,0.08560934054103,2 +0,0,58,5.1243006147191,0,0.019018575382223,0 +0,0,45,5.2805352460287,0,0.104553092322236,2 +1,0,64,4.84909983363003,0,0.010361597881676,0 +1,1,57,4.84677731782361,0,0.022091293442816,0 +0,0,41,5.04854610167295,0,0.177410025563909,2 +0,0,51,5.14944232575035,0,0.045392702090006,0 +0,0,58,5.12478262975553,0,0.018969998146554,0 +0,0,48,4.79973232778089,0,0.066531520319973,1 +1,1,59,5.24307641708729,1,0.359118955083621,3 +0,1,45,4.58507748473879,0,0.096180466993185,2 +0,0,57,4.83879413889982,0,0.022194900475467,0 +0,0,49,4.75734026941993,0,0.058339083777875,1 +0,0,52,4.85369905905772,0,0.040037558384642,0 +0,1,54,5.14635298948338,0,0.031009063027991,0 +0,0,43,4.6586494758949,0,0.126422528658091,2 +1,0,67,5.36795641650565,0,0.00656454331222,0 +1,0,64,4.9317041107083,0,0.010096486645312,0 +0,0,41,4.60592043319416,0,0.163304841173702,2 +1,0,49,4.78785735477367,0,0.058444668618728,1 +0,1,55,4.78038787400933,0,0.028114607533202,0 +0,0,60,4.84406910815418,0,0.015821646280318,0 +0,1,66,4.72792484884357,0,0.008726279041327,0 +0,0,56,4.86709172283563,0,0.024780462103995,0 +0,0,47,4.7846337469031,0,0.075614687274302,2 +0,0,53,4.94695571453233,0,0.035307877301821,0 +0,0,46,4.64717329368536,0,0.085050218912035,2 +0,1,60,5.078616469897,0,0.015263630932196,0 +0,0,47,4.76215056653331,0,0.075469729078003,2 +0,1,71,5.23065958869197,0,0.004567234714466,0 +1,0,51,4.8572140011843,0,0.045493331706139,0 +0,1,49,4.79826317473799,0,0.05847325996301,1 +0,0,45,5.11792225611623,0,0.102445323826886,2 +1,0,66,5.00945180029183,0,0.008093438300506,0 +0,1,59,4.9715370730563,0,0.017343460622332,0 +1,0,55,4.79409519372862,0,0.028027135799078,0 +0,0,62,5.06582782455516,0,0.012227067370799,0 +1,0,44,4.95569270540189,0,0.115311810937146,2 +0,0,58,4.85571892637786,0,0.019642825598837,0 +0,1,44,4.73522529135665,0,0.111943428411567,2 +1,0,55,4.81541329580589,0,0.027972747903994,0 +0,0,54,4.91204291711137,0,0.031395009478212,0 +0,1,74,4.90573892387377,0,0.003864984596243,0 +0,0,56,4.81811085732476,0,0.024867884467013,0 +0,0,57,4.69443289556538,0,0.022487317991503,0 +0,0,42,4.78853915738447,0,0.147858604193278,2 +1,1,64,5.04122172131584,1,0.248825645124668,3 +1,0,59,4.91432835914503,0,0.017463199941841,0 +1,1,56,4.99006149419455,1,0.454718679291344,3 +0,1,56,4.79540520890743,1,0.459890497548097,3 +0,0,46,4.50524773581426,0,0.08399257709778,2 +0,0,61,4.94092155583513,0,0.013967295192981,0 +1,0,60,5.01677409596535,0,0.015373992456383,0 +0,0,63,4.97681398000846,0,0.011170538275034,0 +0,0,51,4.65525513860773,0,0.045347296334524,0 +0,0,51,4.86684043529268,0,0.045421374559036,0 +0,1,55,5.01690688087827,0,0.027579012069358,0 +0,0,59,4.74477759787654,0,0.017969367385449,0 +0,0,41,4.98204623450078,0,0.175239225433702,2 +0,1,67,4.88186401826795,0,0.007572118702391,0 +0,0,51,4.6310428487537,0,0.045479826756746,0 +1,0,49,4.98162252968741,0,0.058901617002364,1 +1,0,60,4.8862253610763,0,0.015663635925653,0 +1,0,50,5.03889404140814,0,0.051783622030796,1 +0,0,43,4.91571303306557,0,0.131532457370381,2 +0,1,43,4.84156802463779,0,0.130013834802331,2 +0,0,45,4.68103946573205,0,0.097311296037325,2 +0,1,68,4.67870923546258,1,0.195730876007375,2 +0,0,44,4.62601741324164,0,0.110185501451458,2 +1,1,61,5.1208177317294,1,0.312506251169415,3 +1,1,44,5.27008905563732,0,0.120306613255172,2 +0,1,69,5.05939292630635,0,0.005875264598761,0 +0,0,64,5.06102016200266,0,0.009858853377843,0 +0,0,44,4.72895783421389,0,0.111726766105621,2 +0,1,44,4.84402703455787,0,0.113523425026553,2 +1,0,64,4.90619401831534,0,0.010218349890113,0 +0,0,49,5.08471932572352,0,0.059075560557485,1 +0,0,48,5.08377252949431,0,0.06771577269317,1 +1,0,62,5.0261408261628,0,0.012292608707234,0 +0,0,57,4.68134300745745,0,0.022604889315411,0 +0,0,47,5.00156597850256,0,0.077196894911615,2 +1,0,61,4.91364625852151,0,0.014066779560206,0 +0,0,53,5.03612604003998,0,0.035307860865905,0 +0,0,46,4.52541114976876,0,0.08413859213813,2 +0,0,40,4.4961873124593,0,0.182065084921739,2 +0,0,53,4.87856986534863,0,0.035522957295304,0 +1,1,71,5.04504149595694,1,0.139429040439199,2 +0,0,61,4.79650988997593,0,0.014307665095763,0 +0,0,59,4.81453034628892,0,0.017703164499367,0 +0,0,49,4.79517463437292,0,0.058438804338492,1 +1,1,66,4.93914918984985,1,0.21564084702961,3 +0,0,60,4.72843812135266,0,0.016109976884524,0 +0,0,58,4.89069273120715,0,0.019634704454553,0 +0,0,55,4.9204199816524,0,0.027716262861116,0 +1,1,53,4.82141384739245,1,0.550717146550525,3 +0,1,50,4.91133033140366,0,0.051611143240807,1 +0,1,49,5.03922432367594,1,0.675660202579738,3 +0,1,71,4.74177032888072,0,0.005396829133624,0 +0,0,67,5.0490622393471,0,0.007220726794615,0 +0,1,40,4.95716840917244,0,0.200216247803038,3 +0,0,71,4.80128132516958,0,0.00529135132289,0 +1,0,62,5.07282042175499,0,0.012181765314245,0 +0,0,46,4.94626866871994,0,0.087698051962094,2 +0,1,71,4.63700939468039,1,0.157188094117098,2 +1,0,59,4.93747441127593,0,0.017395097810111,0 +0,1,42,4.79402945606541,0,0.147888457738256,2 +0,0,45,4.64010967075264,0,0.09687843557533,2 +0,0,59,4.78627654234559,0,0.017816205501168,0 +1,1,68,5.10063585729635,0,0.006430266327367,0 +0,1,53,4.73529816939356,0,0.03564529599296,0 +0,0,51,5.00534279308894,0,0.0455081509086,0 +0,0,53,4.79413869620849,0,0.03554397095793,0 +1,0,54,4.84428754012226,0,0.031480700392303,0 +0,0,53,4.97971704448596,0,0.035274715491584,0 +0,1,46,4.79583329357889,0,0.086450339734504,2 +0,0,53,4.51383678637976,0,0.035910136978358,0 +1,0,56,5.05511122918392,0,0.024369768419969,0 +0,0,42,4.98943184608832,0,0.152754381051382,2 +1,0,55,4.8526232724921,0,0.027939623079997,0 +0,0,62,5.06466597528939,0,0.012209647462478,0 +0,0,50,4.93318416147983,0,0.051635592502702,1 +0,1,47,5.23441451038415,0,0.078341941069159,2 +0,0,60,4.76627087165586,0,0.016071134763278,0 +0,1,40,5.01899021381183,0,0.202601549484388,3 +0,0,53,4.92913691431299,0,0.035349815730983,0 +0,0,59,4.88294896322052,0,0.017556328379641,0 +1,1,69,4.85665759744551,1,0.173070864886797,2 +1,0,61,4.72780174494189,0,0.014514220338091,0 +0,0,59,4.85825293924014,0,0.017609146607016,0 +0,0,46,4.86655389803081,0,0.086967071037491,2 +0,1,69,5.21946956159294,0,0.005609807935242,0 +0,1,49,4.50893493403218,0,0.057757474127316,1 +0,1,43,5.31634559216265,0,0.140339676266809,2 +0,0,49,4.89776516661823,0,0.058895020963273,1 +0,0,59,4.90437912860527,0,0.017509190597767,0 +1,0,49,4.95802154769112,0,0.058807754771319,1 +0,0,52,4.98112254438763,0,0.040094351436242,0 +0,0,41,5.11705980077537,0,0.179731149065862,2 +0,0,46,5.2876172531008,0,0.09077818878671,2 +0,0,60,4.92208700308168,0,0.015670191070548,0 +1,0,61,4.86922586357512,0,0.014124526236501,0 +0,1,66,4.92941978283227,0,0.008251868137803,0 +0,0,44,4.91021271727144,0,0.114500073471835,2 +1,0,58,5.12308105827839,0,0.019048499957979,0 +1,1,61,4.92834685218046,0,0.013993975401202,0 +0,0,56,4.495206356941,0,0.025614968700305,0 +0,1,62,4.83085407565014,0,0.012793723422141,0 +0,1,65,4.69069701848752,0,0.009696038011275,0 +0,0,43,4.64464938762634,0,0.126139476346634,2 +0,0,58,4.53196731132036,0,0.020559896796987,0 +0,0,58,4.8060886642505,0,0.019912883081076,0 +1,0,49,4.81618649161194,0,0.058505406628556,1 +1,0,61,4.95318425978584,0,0.013951456470886,0 +0,0,48,4.81587838202204,0,0.066537225534359,1 +0,0,69,4.69948074846942,0,0.006590592884769,0 +0,1,60,4.75717098392023,0,0.016107549018943,0 +0,1,63,4.98421929091047,0,0.01111999454615,0 +0,0,55,5.06608653120434,0,0.027448487262219,0 +1,0,54,4.99276235804446,0,0.0311654081185,0 +0,0,50,4.958777624771,0,0.051636241975431,1 diff --git a/zepid/graphics/__init__.py b/zepid/graphics/__init__.py index 660b692..85a922a 100644 --- a/zepid/graphics/__init__.py +++ b/zepid/graphics/__init__.py @@ -4,4 +4,5 @@ spaghetti_plot, roc, dynamic_risk_plot, - labbe_plot) + labbe_plot, + zipper_plot) diff --git a/zepid/graphics/graphics.py b/zepid/graphics/graphics.py index c85e4e5..29efddd 100644 --- a/zepid/graphics/graphics.py +++ b/zepid/graphics/graphics.py @@ -834,3 +834,64 @@ def labbe_plot(r1=None, r0=None, scale='both', additive_tuner=12, multiplicative raise ValueError("`scale` must be either 'additive', 'multplicative', or 'both'") return ax + + +def zipper_plot(truth, lcl, ucl, colors=('blue', 'red')): + """Zipper plots are a way to present simulation data, particularly confidence intervals and their width. They are + also useful for showing the confidence interval coverage of the true parameter. + + Parameters + ---------- + truth : float + The true value with which to compare the confidence interval coverage to + lcl : list, array, Series, container + Container of lower confidence limits + ucl : list, array, Series, container + Container of upper confidence limits + colors : set, list, container + List of colors for confidence intervals. The first color is used to designate confidence intervals that cover + the true value, and the second indicates confidence intervals + + Returns + ------- + matplotlib axes + + Examples + -------- + Setting up environment + + >>> import matplotlib.pyplot as plt + >>> from zepid.graphics import zipper_plot + + Adding customized points to the plot + + >>> labbe_plot(r1=[0.3, 0.5], r0=[0.2, 0.7], scale='additive', color='r', marker='D', markersize=10, linestyle='') + >>> plt.show() + + """ + # Formatting the input data + dat = pd.DataFrame() + dat['_lower_'] = lcl + dat['_upper_'] = ucl + if not np.all(dat['_lower_'] < dat['_upper_']): + raise ValueError("It looks like some LCL are bigger than the UCL") + dat['_cover_'] = np.where((dat['_lower_'] < truth) & (truth < dat['_upper_']), colors[0], colors[1]) + coverage = np.where((dat['_lower_'] < truth) & (truth < dat['_upper_']), 1, 0) + dat = dat.sort_values(by=['_upper_', '_lower_']).reset_index(drop=True) + + # Formatting so it looks like a "zipper" + spdat = np.split(dat, [int(dat.shape[0] / 2)], axis=0) + spdat[0] = spdat[0].sort_values(by=['_upper_', '_lower_'], ascending=False) + spdat[0]['_order_'] = list(range(0, dat.shape[0]-1, 2)) + spdat[1]['_order_'] = list(range(1, dat.shape[0]+1, 2)) + dat = pd.concat([spdat[0], spdat[1]]).sort_values(by='_order_') + + # Creating plot + ax = plt.gca() + ax.hlines(dat['_order_'], dat['_lower_'], dat['_upper_'], colors=dat['_cover_']) + ax.vlines(truth, 0, dat.shape[0], colors='k') + ax.set_ylim([0, dat.shape[0]]) + ax.set_yticks([]) + ax.set_xlabel("Confidence Intervals") + ax.set_title("Estimated coverage: "+str(np.round(100*np.mean(coverage), 1))+"%") + return ax diff --git a/zepid/superlearner/__init__.py b/zepid/superlearner/__init__.py new file mode 100644 index 0000000..648493e --- /dev/null +++ b/zepid/superlearner/__init__.py @@ -0,0 +1,2 @@ +from .estimators import EmpiricalMeanSL, GLMSL, StepwiseSL +from .stackers import SuperLearner diff --git a/zepid/superlearner/estimators.py b/zepid/superlearner/estimators.py new file mode 100644 index 0000000..e7abfbd --- /dev/null +++ b/zepid/superlearner/estimators.py @@ -0,0 +1,442 @@ +import warnings +import numpy as np +import pandas as pd +import statsmodels.api as sm +import statsmodels.formula.api as smf + +from itertools import combinations +from sklearn.base import BaseEstimator + + +class EmpiricalMeanSL(BaseEstimator): + """Empirical mean estimator in the format of SciKit learn. This estimator is for use with the SuperLearner + functionality. + + Note + ---- + Generally, I do not recommend its use outside of SuperLearner. Essentially the empirical mean is a + baseline estimator with which to compare other estimators included in SuperLearner. + + Parameters + ---------- + None + + Examples + -------- + Setup the environment and data set + + >>> from zepid import load_sample_data + >>> from zepid.superlearner import EmpiricalMeanSL + >>> df = load_sample_data(False).dropna() + >>> X = np.asarray(df[['art', 'male', 'age0']]) + >>> y = np.asarray(df['dead']) + + EmpiricalMean estimation + + >>> emp_mean = EmpiricalMeanSL() + >>> emp_mean.fit(X=X, y=y) + + EmpiricalMean prediction + + >>> emp_mean.predict(X=X) + """ + + def __init__(self): + self.empirical_mean = np.nan + + def fit(self, X, y): + """Estimate the empirical mean based on X and y. While X in input, it has no effect on the estimated empirical + mean (since the empirical mean is the average for the full y). + + Parameters + ---------- + X : numpy.array + Training data + y : numpy.array + Target values + + Returns + ------- + None + """ + # Error Checking + if X.shape[0] != y.shape[0]: + raise ValueError("X and y must have the same number of observations (rows).") + if np.any(np.isnan(X)) or np.any(np.isnan(y)): + raise ValueError("It looks like there is missing values in X or y. EmpiricalMeanSL does not support " + "missing data.") + + self.empirical_mean = np.mean(y) + return self + + def predict(self, X): + """Predict the value of y given a set of X covariates. Because X has no effect on the empirical mean, the + mean from the data used in the fit() step is returned for all observations. + + Parameters + ---------- + X : numpy.array + NumPy array of covariates + + Returns + ------- + NumPy array of predicted empirical means of the dimension X.shape[0] + """ + return np.array([self.empirical_mean] * X.shape[0]) + + +class GLMSL: + """Generalized Linear Model for use with SuperLearner. This function is a wrapper function for the statsmodels + `GLM` class. This is because the GLM implementation in statsmodels is not natively compatible with the sklearn / + SuperLearner class. Compatible with all options available in the statsmodels families. + + Parameters + ---------- + family: statsmodels.families.family + Family to use for the model. All statsmodels supported families are also supported + verbose : bool, optional + + Examples + -------- + Setup the environment and data set + + >>> import statsmodels.api as sm + >>> from zepid import load_sample_data + >>> from zepid.superlearner import GLMSL + >>> df = load_sample_data(False).dropna() + >>> X = np.asarray(df[['art', 'male', 'age0']]) + >>> y = np.asarray(df['dead']) + + GLMSL example (logit model) + + >>> f = sm.families.family.Binomial() + >>> glm = GLMSL(family=f) + >>> glm.fit(X, y) + """ + def __init__(self, family, verbose=False): + self._family_ = family + self._verbose_ = verbose + + # Storage items + self.model = None + + def fit(self, X, y): + """Estimate the GLM + + Parameters + ---------- + X : numpy.array + Training data + y : numpy.array + Target values + + Returns + ------- + None + """ + # Error Checking + if X.shape[0] != y.shape[0]: + raise ValueError("X and y must have the same number of observations (rows).") + if np.any(np.isnan(X)) or np.any(np.isnan(y)): + raise ValueError("It looks like there is missing values in X or y. GLMSL does not support " + "missing data.") + + # Final results + self.model = sm.GLM(y, np.hstack([np.zeros([X.shape[0], 1]) + 1, X]), # Adds intercept into model + family=self._family_).fit() + if self._verbose_: + print(self.model.summary()) + + return self + + def predict(self, X): + """Predict using the fitted GLM. + + Parameters + ---------- + X : numpy.array + Samples following the same pattern as the X array input into the fit() statement. + + Returns + ------- + Returns predicted values from the GLM + """ + # Adding intercept + Xd = np.hstack([np.zeros([X.shape[0], 1]) + 1, X]) + + # Generating predictions to return + return self.model.predict(Xd) + + def get_params(self, deep=True): + """For sklearn.base.clone() compatibility""" + return {"family": self._family_, + "verbose": self._verbose_} + + def set_params(self, **parameters): + """For sklearn.base.clone() compatibility""" + for parameter, value in parameters.items(): + setattr(self, parameter, value) + return self + + +class StepwiseSL: + """Step-wise model selection for Generalized Linear Model selection for use with SuperLearner. Briefly, each + combination of models is compared by AIC with the best one selected. The model selection procedure continues until + there are no improvements in the model by AIC. The optimal is the best model estimated by the step-wise selection + procedure and the lowest AIC value. + + Parameters + ---------- + family: statsmodels.families.family + Family to use for the model. All statsmodels supported families are also supported + selection : str, optional + Method of step-wise selection to use. Options are `'forward'` and `'backward'`. Default is backward, which + starts from the full model inclusion and removes terms one at a time. + order_interaction : int, optional + Order of interactions to explore. For example, `interaction_order=0` explores only the main effects. + verbose : bool, optional + + Examples + -------- + Setup the environment and data set + + >>> import statsmodels.api as sm + >>> from zepid import load_sample_data + >>> from zepid.superlearner import StepwiseSL + >>> df = load_sample_data(False).dropna() + >>> X = np.asarray(df[['art', 'male', 'age0']]) + >>> y = np.asarray(df['dead']) + + StepwiseSL estimation with no interactions + + >>> f = sm.families.family.Binomial() + >>> step_sl = StepwiseSL(family=f, method="backward", order_interaction=0) + >>> step_sl.fit(X, y) + + StepwiseSL prediction + + >>> step_sl.predict(X=X) + + StepwiseSL with all first-order interactions + + >>> step_sl = StepwiseSL(family=f, method="backward", order_interaction=1) + >>> step_sl.fit(X, y) + + StepwiseSL with forward selection and all second-order interactions + + >>> step_sl = StepwiseSL(family=f, method="forward", order_interaction=2) + >>> step_sl.fit(X, y) + """ + def __init__(self, family, selection="backward", order_interaction=0, verbose=False): + # Error Checking + if selection.lower() not in ["forward", "backward"]: + raise ValueError("`method` must be one of the following: backward, forward") + if order_interaction < 0 or type(order_interaction) is not int: + raise ValueError("interaction_order must be a non-negative integer") + + self._family_ = family + self._verbose_ = verbose + self._selection_ = selection + self._order_ = order_interaction + + # Storage items + self.model_optim = None + self.cols_optim = None + + def fit(self, X, y): + """Estimate the optimal GLM + + Parameters + ---------- + X : numpy.array + Training data + y : numpy.array + Target values + + Returns + ------- + None + """ + # Error Checking + if X.shape[0] != y.shape[0]: + raise ValueError("X and y must have the same number of observations (rows).") + if np.any(np.isnan(X)) or np.any(np.isnan(y)): + raise ValueError("It looks like there is missing values in X or y. StepWiseSL does not support " + "missing data.") + if X.shape[1] < self._order_: # Checking X shape with order_interaction + warnings.warn("order_interaction is greater than the number of columns. This is not possible to assess, " + "so order_interaction="+str(int(X.shape[1]))+" will be assessed instead.", UserWarning) + self._order_ = X.shape[1] + + # Creating all x-order interaction terms for assessment + Xu = self._all_order_interactions_(X, self._order_) + + # Determining method of selection + if self._selection_ == "backward": + # Estimating full model as starting point + full_model = sm.GLM(y, np.hstack([np.zeros([X.shape[0], 1]) + 1, Xu]), # Adds intercept into model + family=self._family_).fit() + if np.isnan(full_model.aic): + raise ValueError("Saturated model is having trouble converging. Reduce the number of covariates, the " + "order_interaction, or use selection=forward instead") + best_aic = full_model.aic + if self._verbose_: + print(full_model.summary()) + print("Full-Model AIC:", best_aic) + + # Determining best AIC via backwards step-wise selection + best_cols = list(range(Xu.shape[1])) + best_alt_aic = best_aic + best_alt_model = full_model + best_alt_cols = best_cols.copy() + + while best_aic >= best_alt_aic and len(best_cols) - 1 >= -1: + best_aic = best_alt_aic + best_model = best_alt_model + best_cols = best_alt_cols + if self._verbose_: + print("\nCurrent Optim:", best_cols) + + if len(best_cols) - 1 == -1: # necessary break for intercept-only to be outpute correctly + break + if self._verbose_: + print("\nValid Combinations...") + alt_models = list(combinations(best_cols, len(best_cols) - 1)) + best_alt_model, best_alt_cols, best_alt_aic = None, None, np.inf + for alt in alt_models: + alt_model = sm.GLM(y, np.hstack([np.zeros([X.shape[0], 1]) + 1, Xu[:, alt]]), # Adds intercept + family=self._family_).fit() + if self._verbose_: + print("Columns:", alt) + print("AIC: ", alt_model.aic) + if alt_model.aic < best_alt_aic: + best_alt_model = alt_model + best_alt_aic = alt_model.aic + best_alt_cols = alt + + # Determining method of selection + if self._selection_ == "forward": + # Estimating null model as starting point + null_model = sm.GLM(y, np.zeros([X.shape[0], 1]) + 1, # intercept-only model + family=self._family_).fit() + best_aic = null_model.aic + if self._verbose_: + print(null_model.summary()) + print("Full-Model AIC:", best_aic) + + # Determining best AIC via forwards step-wise selection + best_cols = () + best_alt_aic = best_aic + best_alt_model = null_model + best_alt_cols = () + vars_to_select = list(range(Xu.shape[1])) + best_alt_var = None + + while best_aic >= best_alt_aic and len(best_cols) <= Xu.shape[1]: + best_aic = best_alt_aic + best_model = best_alt_model + best_cols = best_alt_cols + + # Removing the previous best variable from those to add + vars_to_select = list(vars_to_select) + try: + vars_to_select.remove(best_alt_var) + except ValueError: + pass + vars_to_select = tuple(vars_to_select) + + if self._verbose_: + print("\nCurrent Optim:", best_cols) + + if len(best_cols) == Xu.shape[1]: # necessary break for saturated to be output correctly + break + if self._verbose_: + print("\nValid Combinations...") + best_alt_model, best_alt_cols, best_alt_aic = None, None, np.inf + for var in vars_to_select: + alt = best_cols + (var, ) + alt_model = sm.GLM(y, np.hstack([np.zeros([X.shape[0], 1]) + 1, Xu[:, alt]]), + family=self._family_).fit() + if self._verbose_: + print("Columns:", alt) + print("AIC: ", alt_model.aic) + if alt_model.aic < best_alt_aic and not np.isnan(alt_model.aic): + best_alt_model = alt_model + best_alt_aic = alt_model.aic + best_alt_cols = alt + best_alt_var = var + + # Final results + self.model_optim = best_model + self.cols_optim = best_cols + if self._verbose_: + print(self.model_optim.summary()) + + return self + + def predict(self, X): + """Predict using the optimal GLM, where optimal is defined as the lowest AIC for the step-wise selection + procedure used. + + Parameters + ---------- + X : numpy.array + Samples following the same pattern as the X array input into the fit() statement. All order_interaction + terms are created in this step for the input X (i.e. the user does not need to create any of the x-order + interaction terms) + + Returns + ------- + Returns predicted values from the optimal GLM + """ + # Creating all x-order interaction terms for assessment + Xu = self._all_order_interactions_(X, self._order_) + + # Adding intercept + Xd = np.hstack([np.zeros([X.shape[0], 1]) + 1, Xu[:, self.cols_optim]]) + + # Generating predictions to return + return self.model_optim.predict(Xd) + + @staticmethod + def _all_order_interactions_(X, x_order): + """Background function + + Parameters + ---------- + X : numpy.array + Input data array + x_order : int + Order of interactions to generate. x_order=1 generates all first order interactions + + Returns + ------- + Array containing the original X and all corresponding x-order interactions + """ + Xu = X.copy() + assessed_order = 1 + while x_order >= assessed_order: + assessed_order += 1 + + # Generating unique combinations of columns + combos_order = list(combinations(range(X.shape[1]), assessed_order)) + + # Creating interaction term for all unique combinations + for co in combos_order: + interaction_term = np.prod(X[:, co], axis=1).reshape((X.shape[0], 1)) + Xu = np.hstack([Xu, interaction_term]) + + return Xu + + def get_params(self, deep=True): + """For sklearn.base.clone() compatibility""" + return {"family": self._family_, + "selection": self._selection_, + "order_interaction": self._order_, + "verbose": self._verbose_} + + def set_params(self, **parameters): + """For sklearn.base.clone() compatibility""" + for parameter, value in parameters.items(): + setattr(self, parameter, value) + return self diff --git a/zepid/superlearner/stackers.py b/zepid/superlearner/stackers.py new file mode 100644 index 0000000..ac378bc --- /dev/null +++ b/zepid/superlearner/stackers.py @@ -0,0 +1,362 @@ +import copy +import warnings +import numpy as np +import pandas as pd +import sklearn.model_selection as ms + +from scipy.optimize import nnls +from sklearn import clone + +from zepid.calc import logit, inverse_logit, probability_bounds + + +class SuperLearnerError(Exception): + """Class for errors in the SuperLearner procedures. Nothing special besides directing user to specific issues are + to the SuperLearner side + """ + pass + + +class SuperLearner: + r"""`SuperLearner` is an implementation of the super learner algorithm, which is a generalized stacking algorithm. + Super learner is an approach to combine multiple predictive functions into a singular predictive function that has + performance at least as good as the best candidate estimator included (asymptotically). Additionally, it should be + noted that super learner converges at the rate with which the best candidate estimator converges. + + Briefly, super learner takes an input of candidate estimators for a function. Each of the estimators is run through + a train-test cross-validation algorithm. From the candidate estimators, either the best overall performing + candidate (discrete super learner) or a weighted combination of the algorithms is used as the updated predictive + function. + + Note + ---- + `SuperLearner` does not accept missing data. All missing data decisions have to occur prior to trying to use the + `SuperLearner` procedure. + + + `SuperLearner` accepts estimators that are of the SciKit-Learn format. Specifically, the candidate estimators must + follow the `estimator.fit(X, y)` and `estimator.predict(X)` format. Performance has currently been checked for + `sklearn`, `pygam`, and the estimators included in `zepid.superlearner`. Please consider opening an issue on + GitHub if you find Python libraries that are not supported (but follow the SciKit-Learn style). + + Note + ---- + `SuperLearner(discrete=True)` returns predictions from the candidate estimator with the greatest coefficient. In the + case of a tie, the first candidate estimator with the greatest coefficient is used (as per `numpy.argmax` behavior). + + + To compare performances easily, `SuperLearner` provides both Cross-Validated Error and the Relative Efficiency. The + Cross-Validated Error calculation depends on the chosen loss function. For L2, the loss function is + + .. math:: + + \frac{1}{n} \sum_i (Y_i - \widehat{Y}_i)^2 + + For the negative-log-likelihood loss function, + + .. math:: + + \frac{1}{n} \sum_i Y_i \times \ln(\widehat{Y}_i) + (1-Y_i) \times \ln(1 - \widehat{Y}_i) + + Relative efficiency is the Cross-Validated Error for the candidate estimator divided by the Cross-Validated Error + for the chosen super learner. + + Parameters + ---------- + estimators : list, array + Candidate estimators. Must follow sklearn style and not be fit yet + estimator_labels : list, array + Labels for the candidate estimators being included + folds : int, optional + Number of folds to use during the cross-validation procedure. It is recommended to be between 10-20. The default + value is 10-fold cross-validation. + loss_function : str, optional + Loss function to use. Options include: L2, NLogLik. L2 should be used for continuous outcomes and NLogLik for + binary outcomes + solver : str, optional + Optimization algorithm to use to determine the super learner weights. Currently only Non-Negative Least Squares + is available. + bounds : float, collection, optional + Bounding to use for probability. The bounding prevents values of exactly 0 or 1, which will break the loss + function evaluation. Default is 1e-6. + discrete : bool, optional + Whether to use only the estimator with the greatest weight (discrete super learner). Default is False, which + uses the super learner including all estimators + verbose : bool, optional + Whether to print progress to the console as super learner is being fit. Default is False. + + Examples + -------- + Setup the environment and data set + + >>> import statsmodels.api as sm + >>> from sklearn.linear_model import LinearRegression, LogisticRegression + >>> from zepid import load_sample_data + >>> from zepid.superlearner import EmpiricalMeanSL, StepwiseSL, SuperLearner + + >>> fb = sm.families.family.Gaussian() + >>> fc = sm.families.family.Binomial() + >>> df = load_sample_data(False).dropna() + >>> X = np.asarray(df[['art', 'male', 'age0']]) + >>> y = np.asarray(df['dead']) + + SuperLearner for binary outcomes + + >>> # Setting up estimators + >>> emp = EmpiricalMeanSL() + >>> log = LogisticRegression() + >>> step = StepwiseSL(family=fb, selection="backward", order_interaction=1) + >>> sl = SuperLearner(estimators=[emp, log, step], estimator_labels=["Mean", "Log", "Step"], loss_function='nloglik') + >>> fsl = sl.fit(X, y) + >>> fsl.summary() # Summary of Cross-Validated Errors + >>> fsl.predict(X) # Generating predicted values from super learner + + SuperLearner for continuous outcomes + + >>> emp = EmpiricalMeanSL() + >>> lin = LinearRegression() + >>> step = StepwiseSL(family=fc, selection="backward", order_interaction=1) + >>> sl = SuperLearner(estimators=[emp, log, step], estimator_labels=["Mean", "Lin", "Step"], loss_function='L2') + >>> fsl = sl.fit(X, y) + >>> fsl.summary() # Summary of Cross-Validated Errors + >>> fsl.predict(X) # Generating predicted values from super learner + + Discrete Super Learner + + >>> sl = SuperLearner([emp, log, step], ["Mean", "Lin", "Step"], loss_function='L2', discrete=True) + >>> sl.fit(X, y) + + References + ---------- + Van der Laan MJ, Polley EC, Hubbard AE. (2007). Super learner. Statistical Applications in Genetics and Molecular + Biology, 6(1). + + Rose S. (2013). Mortality risk score prediction in an elderly population using machine learning. American Journal + of Epidemiology, 177(5), 443-452. + """ + def __init__(self, estimators, estimator_labels, folds=10, loss_function="L2", solver="nnls", + bounds=1e-6, discrete=False, verbose=False): + # TODO the R SuperLearner library supports an alt-NNLS, LS, and nnloglik optimization routines. Find in Python + # Checking for errors + if len(estimators) != len(estimator_labels): + raise ValueError("estimators and estimator_labels must be of the same length") + if solver.lower() not in ["nnls"]: + raise ValueError("The solver " + str(solver) + " is not currently available. Please select one of the " + "following: NNLS") + if loss_function.lower() not in ["l2", "nloglik"]: + raise ValueError("The loss function " + str(loss_function) + " is not currently available. Please select " + "one of the following: L2, NLogLik") + + # Parameters + self.estimators = estimators + self.labels = estimator_labels + self.k = folds + self.loss_function = loss_function.lower() + self.discrete = discrete + self.solver = solver.lower() + self._verbose_ = verbose + self._bounds_ = bounds + + # Storage items for results + self.est_performance = pd.DataFrame() + self.est_performance['estimator'] = list(estimator_labels) + self.coefficients = None + self.fit_estimators = [] + self._include_estimator_ = [] + + def fit(self, X, y): + """Fit SuperLearner given the variables `X` to predict `y`. These variables are directly passed to the + candidate estimators. If there is any pre-processing to do outside of the estimator, please do so before passing + to fit. + + Parameters + ---------- + X : numpy.array + Covariates to predict the target values + y : numpy.array + Target values to predict + + Returns + ------- + None + """ + # Some checks + X = np.asarray(X) # this line saves a lot of headaches + y = np.asarray(y) # this line saves a lot of headaches + if X.shape[0] != y.shape[0]: + raise ValueError("X and y must have the same number of observations.") + if np.any(np.isnan(X)) or np.any(np.isnan(y)): + raise ValueError("It looks like there is missing values in X or y. SuperLearner does not support missing " + "data.") + if np.all(np.in1d(y, np.array([0, 1]))) and (self.loss_function == "l2"): + # Allows for the algorithm to proceed (but throws warning to the user about the chosen loss function) + warnings.warn("It looks like your `y` is binary, and the `L2` loss function should be used for " + "continuous outcomes", UserWarning) + + # Step 1) input data and algorithms + n_obs = X.shape[0] # number of observations + n_est = len(self.estimators) # number of candidate estimators + cv_pred = np.full([n_obs, n_est], np.nan) # NaN for blank results + + # Step 2) Cross-Validation Splits + if self._verbose_: + print("Starting cross-validation procedure...") + current_fold = 0 + for train, test in ms.KFold(self.k, shuffle=False).split(range(n_obs)): + current_fold += 1 + + # Step 3) Test-Train for each algorithm for each fold (note that train is only a 1/folds of the hold-out) + X_train, X_test = X[train], X[test] + y_train, y_test = y[train], y[test] + for est_id in range(n_est): + if self._verbose_: + print("...fitting "+str(self.labels[est_id])+" fold-"+str(current_fold)) + cv_est = clone(self.estimators[est_id]) + cv_est.fit(X_train, y_train) + + # Step 4) Filling in cross-validated Y predictions + cv_pred[test, est_id] = self._predict_(estimator=cv_est, X=X_test) + + # Step 5) Calculating Cross-Validated Error for each candidate + self.est_performance['cv_error'] = np.nan + for est_id in range(n_est): + self.est_performance.loc[est_id, "cv_error"] = self._error_term_(y, cv_pred[:, est_id]) + + # Step 6) Determine coefficients + if self.solver == "nnls": + coefs, _ = nnls(cv_pred, y) + else: + raise ValueError(str(self.solver) + " is not currently available") + + coefs = np.array(coefs) + machine_limit = np.finfo(np.double).eps + coefs[coefs < np.sqrt(machine_limit)] = 0 # dropping coefs below certain precision + self.coefficients = coefs / np.sum(coefs) + self.est_performance['coefs'] = self.coefficients + + # Step 7) Fit algorithms to full data + if self._verbose_: + print("Fitting candidate(s) to full data...") + + if self.discrete: # Step 7.a) discrete super learner + discrete_sl_id = np.argmax(self.coefficients) + if self._verbose_: + print("Discrete super learner is: " + str(self.labels[discrete_sl_id])) + for est_id in range(n_est): + est = clone(self.estimators[est_id]) + # Estimating only the Discrete Super Learner to save computation time + if est_id == discrete_sl_id: + if self._verbose_: + print("...fitting " + str(self.labels[est_id])) + est.fit(X, y) + self.fit_estimators.append(est) + self.coefficients[est_id] = 1 + # Skipping over others but retaining the shape of input estimators + else: + self.fit_estimators.append(est) + self.coefficients[est_id] = 0 + + else: # Step 7.b) super learner + # No prior selection (unlike discrete super learner) + for est_id in range(n_est): + est = clone(self.estimators[est_id]) + # Estimating only candidate estimators with coefficient > 0 + if self.coefficients[est_id] > 0: + if self._verbose_: + print("...fitting " + str(self.labels[est_id])) + est.fit(X, y) + self.fit_estimators.append(est) + # Skipping over candidates with coefficient == 0 + else: + if self._verbose_: + print("...skipping " + str(self.labels[est_id])) + self.fit_estimators.append(est) + + return self + + def predict(self, X): + """Generate predictions using the fit SuperLearner. + + Parameters + ---------- + X : numpy.array + Covariates to generate predictions of y. Note that X should be in the same format as the X used during the + fit() function + + Returns + ------- + numpy.array of predicted values using either discrete super learner or super learner + """ + X = np.asarray(X) # this line saves a lot of headaches + if self.coefficients is None: + raise ValueError("fit() must be called before predict()") + if np.any(np.isnan(X)): + raise ValueError("It looks like there is missing values in X. SuperLearner does not support missing data.") + + n_obs = X.shape[0] + n_est = len(self.estimators) + cv_pred = np.full([n_obs, n_est], np.nan) # NaN for blank results + + # Step 8) Predictions from SuperLearner + for est_id in range(n_est): + if self.coefficients[est_id] > 0: + cv_pred[:, est_id] = self._predict_(self.fit_estimators[est_id], X) + else: + cv_pred[:, est_id] = 0 + + # Combining predictions + if self.loss_function == "l2": + y_pred = np.dot(cv_pred, self.coefficients) + if self.loss_function == 'nloglik': + cv_pred_bound = probability_bounds(cv_pred, bounds=self._bounds_) + logodds = logit(cv_pred_bound) + logodds_pred = np.dot(logodds, self.coefficients) + y_pred = inverse_logit(logodds_pred) + + return y_pred + + def summary(self): + """Prints the summary information for the fit SuperLearner to the console. + + Returns + ------- + None + """ + if self.coefficients is None: + raise ValueError("fit() must be called before summary()") + print('======================================================================') + print(" Super Learner Candidate Estimator Performance ") + print('======================================================================') + print(self.est_performance.set_index("estimator")) + print('======================================================================') + + def _predict_(self, estimator, X): + """Background function to generate predictions based on the designated loss-function + """ + if self.loss_function == 'l2': + pred = estimator.predict(X) + + if self.loss_function == 'nloglik': + if hasattr(estimator, "predict_proba"): + try: # Allows for use with PyGAM + pred = estimator.predict_proba(X)[:, 1] + except IndexError: + pred = estimator.predict_proba(X) + if pred.min() < 0 or pred.max() > 1: + raise SuperLearnerError("Probability less than zero or greater than one") + else: + pred = estimator.predict(X) + if pred.min() < 0 or pred.max() > 1: + raise SuperLearnerError("Probability less than zero or greater than one") + + return pred + + def _error_term_(self, y_obs, y_pred): + """Calculates the Error term based on the loss function + """ + if self.loss_function == "l2": + error = np.sum((y_obs - y_pred) ** 2) / y_obs.shape[0] + if self.loss_function == 'nloglik': + y_pred_bound = probability_bounds(v=y_pred, bounds=self._bounds_) + error = - np.sum(y_obs*y_pred_bound + (1-y_obs)*np.log(1-y_pred_bound)) / y_obs.shape[0] + return error diff --git a/zepid/version.py b/zepid/version.py index 4ca39e7..e4e49b3 100644 --- a/zepid/version.py +++ b/zepid/version.py @@ -1 +1 @@ -__version__ = '0.8.2' +__version__ = '0.9.0'