references.bib

@book{allen1997,
  title = {Understanding regression analysis},
  author = {Allen, Michael Patrick},
  date = {1997},
  publisher = {Plenum Press},
  location = {New York},
  isbn = {978-0-306-45648-0},
  pagetotal = {216},
  keywords = {data science,exact sciences,multivariate methods,nested models,probability and statistics,univariate methods},
  file = {G:\Meu Drive\Zotero\files\Allen - 1997 - Understanding regression analysis.pdf}
}

@article{anderson1952,
  title = {Asymptotic theory of certain "goodness of fit" criteria based on stochastic processes},
  author = {Anderson, T. W. and Darling, D. A.},
  date = {1952},
  journaltitle = {The Annals of Mathematical Statistics},
  volume = {23},
  number = {2},
  eprint = {2236446},
  eprinttype = {jstor},
  pages = {193--212},
  publisher = {Institute of Mathematical Statistics},
  issn = {0003-4851},
  url = {https://www.jstor.org/stable/2236446},
  urldate = {2024-09-28},
  abstract = {The statistical problem treated is that of testing the hypothesis that n independent, identically distributed random variables have a specified continuous distribution function F(x). If Fn(x) is the empirical cumulative distribution function and ψ(t) is some nonnegative weight function (0 ≤ t ≤ 1), we consider \$n\textasciicircum\{\textbackslash frac\{1\}\{2\}\} \textbackslash sup\_\{-\textbackslash infty\vphantom\}},
  langid = {english},
  keywords = {anderson-darling test,exact sciences,normality tests,probability and statistics,tests},
  file = {G:\Meu Drive\Zotero\files\Anderson and Darling - 1952 - Asymptotic theory of certain goodness of fit criteria based on stochastic processes.pdf}
}

@article{anderson1954,
  title = {A test of goodness of fit},
  author = {Anderson, T. W. and Darling, D. A.},
  date = {1954-12-01},
  journaltitle = {Journal of the American Statistical Association},
  volume = {49},
  number = {268},
  pages = {765--769},
  publisher = {ASA Website},
  issn = {0162-1459},
  doi = {10.1080/01621459.1954.10501232},
  url = {https://www.tandfonline.com/doi/abs/10.1080/01621459.1954.10501232},
  urldate = {2024-09-28},
  abstract = {Some (large sample) significance points are tabulated for a distribution-free test of goodness of fit which was introduced earlier by the authors. The test, which uses the actual observations without grouping, is sensitive to discrepancies at the tails of the distribution rather than near the median. An illustration is given, using a numerical example used previously by Birnbaum in illustrating the Kolmogorov test.},
  langid = {english},
  keywords = {anderson-darling test,exact sciences,normality tests,probability and statistics,tests},
  file = {G:\Meu Drive\Zotero\files\Anderson and Darling - 1954 - A test of goodness of fit.pdf}
}

@article{anderson1962,
  title = {On the distribution of the two-sample {{Cramer-von Mises}} criterion},
  author = {Anderson, T. W.},
  date = {1962-09},
  journaltitle = {The Annals of Mathematical Statistics},
  volume = {33},
  number = {3},
  pages = {1148--1159},
  publisher = {Institute of Mathematical Statistics},
  issn = {0003-4851, 2168-8990},
  doi = {10.1214/aoms/1177704477},
  url = {https://projecteuclid.org/journals/annals-of-mathematical-statistics/volume-33/issue-3/On-the-Distribution-of-the-Two-Sample-Cramer-von-Mises/10.1214/aoms/1177704477.full},
  urldate = {2024-09-28},
  abstract = {The Cramer-von Mises \$\textbackslash omega\textasciicircum 2\$ criterion for testing that a sample, \$x\_1, \textbackslash cdots, x\_N\$, has been drawn from a specified continuous distribution \$F(x)\$ is \textbackslash begin\{equation*\}\textbackslash tag\{1\}\textbackslash omega\textasciicircum 2 = \textbackslash int\textasciicircum\textbackslash infty\_\{-\textbackslash infty\} \textbackslash lbrack F\_N(x) - F(x)\textbackslash rbrack\textasciicircum 2 dF(x),\textbackslash end\{equation*\} where \$F\_N(x)\$ is the empirical distribution function of the sample; that is, \$F\_N(x) = k/N\$ if exactly \$k\$ observations are less than or equal to \$x(k = 0, 1, \textbackslash cdots, N)\$. If there is a second sample, \$y\_1, \textbackslash cdots, y\_M\$, a test of the hypothesis that the two samples come from the same (unspecified) continuous distribution can be based on the analogue of \$N\textbackslash omega\textasciicircum 2\$, namely \textbackslash begin\{equation*\}\textbackslash tag\{2\} T = \textbackslash lbrack NM/(N + M)\textbackslash rbrack \textbackslash int\textasciicircum\textbackslash infty\_\{-\textbackslash infty\} \textbackslash lbrack F\_N(x) - G\_M(x)\textbackslash rbrack\textasciicircum 2 dH\_\{N+M\}(x),\textbackslash end\{equation*\} where \$G\_M(x)\$ is the empirical distribution function of the second sample and \$H\_\{N+M\}(x)\$ is the empirical distribution function of the two samples together [that is, \$(N + M)H\_\{N+M\}(x) = NF\_N(x) + MG\_M(x)\textbackslash rbrack\$. The limiting distribution of \$N\textbackslash omega\textasciicircum 2\$ as \$N \textbackslash rightarrow \textbackslash infty\$ has been tabulated [2], and it has been shown ([3], [4a], and [7]) that \$T\$ has the same limiting distribution as \$N \textbackslash rightarrow \textbackslash infty, M \textbackslash rightarrow \textbackslash infty\$, and \$N/M \textbackslash rightarrow \textbackslash lambda\$, where \$\textbackslash lambda\$ is any finite positive constant. In this note we consider the distribution of \$T\$ for small values of \$N\$ and \$M\$ and present tables to permit use of the criterion at some conventional significance levels for small values of \$N\$ and \$M\$. The limiting distribution seems a surprisingly good approximation to the exact distribution for moderate sample sizes (corresponding to the same feature for \$N\textbackslash omega\textasciicircum 2\$ [6]). The accuracy of approximation is better than in the case of the two-sample Kolmogorov-Smirnov statistic studied by Hodges [4].},
  langid = {english},
  keywords = {cramer-von mises test,exact sciences,normality tests,probability and statistics,tests},
  file = {G:\Meu Drive\Zotero\files\Anderson - 1962 - On the distribution of the two-sample Cramer-von Mises criterion.pdf}
}

@article{arif2022,
  title = {Predictive models aren't for causal inference},
  author = {Arif, Suchinta and MacNeil, M. Aaron},
  date = {2022-08},
  journaltitle = {Ecology Letters},
  shortjournal = {Ecology Letters},
  volume = {25},
  number = {8},
  pages = {1741--1745},
  issn = {1461-023X, 1461-0248},
  doi = {10.1111/ele.14033},
  url = {https://onlinelibrary.wiley.com/doi/10.1111/ele.14033},
  urldate = {2023-07-27},
  langid = {english},
  keywords = {biological sciences,data science,ecology,exact sciences,modeling,probability and statistics},
  file = {G:\Meu Drive\Zotero\files\Arif - 2022 - Predictive models aren't for causal inference.pdf}
}

@book{belsley2004,
  title = {Regression diagnostics: identifying influential data and sources of collinearity},
  shorttitle = {Regression diagnostics},
  author = {Belsley, David A. and Kuh, Edwin and Welsch, Roy E.},
  date = {2004},
  series = {Wiley {{Series}} in {{Probability}} and {{Statistics}}},
  publisher = {John Wiley \& Sons},
  location = {Hoboken, NJ},
  doi = {10.1002/0471725153},
  abstract = {The Wiley-Interscience Paperback Series consists of selected books that have been made more accessible to consumers in an effort to increase global appeal and general circulation. With these new unabridged softcover volumes, Wiley hopes to extend the lives of these works by making them available to future generations of statisticians, mathematicians, and scientists. "The title of the book more or less sums up the contents. It appears to me to represent a real breakthrough in the art of dealing in ‘unconventional’ data. . . . I found the whole book both readable and enjoyable. It is suitable for data analysts, academic statisticians, and professional software writers." –Journal of the Royal Statistical Society "The book assumes a working knowledge of all of the principal results and techniques used in least squares multiple regression, as expressed in vector and matrix notation. Given this background, the book is clear and easy to use. . . . The techniques are illustrated in great detail with practical data sets from econometrics." –Short Book Reviews, International Statistical Institute Regression Diagnostics: Identifying Influential Data and Sources of Collinearity provides practicing statisticians and econometricians with new tools for assessing quality and reliability of regression estimates. Diagnostic techniques are developed that aid in the systematic location of data points that are unusual or inordinately influential; measure the presence and intensity of collinear relations among the regression data; and help to identify variables involved in each and pinpoint estimated coefficients potentially most adversely affected. The book emphasizes diagnostics and includes suggestions for remedial action.},
  isbn = {0-471-69117-8},
  langid = {english},
  pagetotal = {292},
  keywords = {Regression analysis},
  annotation = {Print ISBN: 9780471058564\\
Online ISBN: 9780471725152},
  file = {G:\Meu Drive\Zotero\files\Belsley et al. - 2004 - Regression diagnostics identifying influential data and sources of collinearity.pdf}
}

@article{bera1981,
  title = {Efficient tests for normality, homoscedasticity and serial independence of regression residuals: {{Monte Carlo Evidence}}},
  shorttitle = {Efficient tests for normality, homoscedasticity and serial independence of regression residuals},
  author = {Bera, Anil K. and Jarque, Carlos M.},
  date = {1981-01-01},
  journaltitle = {Economics Letters},
  shortjournal = {Economics Letters},
  volume = {7},
  number = {4},
  pages = {313--318},
  issn = {0165-1765},
  doi = {10.1016/0165-1765(81)90035-5},
  url = {https://www.sciencedirect.com/science/article/pii/0165176581900355},
  urldate = {2024-09-28},
  abstract = {In this paper we study the performance of various tests for normality (N), homoscedasticity (H) and serial independence (I) of regression residuals (u) under one, two and three directional departures from HO:u∼NHI.},
  langid = {english},
  keywords = {exact sciences,jarque-bera test,normality tests,probability and statistics,tests},
  file = {G:\Meu Drive\Zotero\files\Bera and Jarque - 1981 - Efficient tests for normality, homoscedasticity and serial independence of regression residuals Mon.pdf}
}

@article{bonett2002,
  title = {A test of normality with high uniform power},
  author = {Bonett, Douglas G and Seier, Edith},
  date = {2002-09-28},
  journaltitle = {Computational Statistics \& Data Analysis},
  shortjournal = {Computational Statistics \& Data Analysis},
  volume = {40},
  number = {3},
  pages = {435--445},
  issn = {0167-9473},
  doi = {10.1016/S0167-9473(02)00074-9},
  url = {https://www.sciencedirect.com/science/article/pii/S0167947302000749},
  urldate = {2024-09-28},
  abstract = {Kurtosis can be measured in more than one way. A modification of Geary's measure of kurtosis is shown to be more sensitive to kurtosis in the center of the distribution while Pearson's measure of kurtosis is more sensitive to kurtosis in the tails of the distribution. The modified Geary measure and the Pearson measure are used to define a joint test of kurtosis that has high uniform power across a very wide range of symmetric nonnormal distributions.},
  langid = {english},
  keywords = {bonett-seier test,exact sciences,kurtosis,leptokurtosis,normality tests,probability and statistics,shapiro–wilk test},
  file = {G:\Meu Drive\Zotero\files\Bonett and Seier - 2002 - A test of normality with high uniform power.pdf}
}

@article{box1970,
  title = {Distribution of residual autocorrelations in autoregressive-integrated moving average time series models},
  author = {Box, G. E. P. and Pierce, David A.},
  date = {1970-12-01},
  journaltitle = {Journal of the American Statistical Association},
  volume = {65},
  number = {332},
  pages = {1509--1526},
  publisher = {ASA Website},
  issn = {0162-1459},
  doi = {10.1080/01621459.1970.10481180},
  url = {https://www.tandfonline.com/doi/abs/10.1080/01621459.1970.10481180},
  urldate = {2024-09-28},
  abstract = {Many statistical models, and in particular autoregressive—moving average time series models, can be regarded as means of transforming the data to white noise, that is, to an uncorrected sequence of errors. If the parameters are known exactly, this random sequence can be computed directly from the observations; when this calculation is made with estimates substituted for the true parameter values, the resulting sequence is referred to as the “residuals,” which can be regarded as estimates of the errors. If the appropriate model has been chosen, there will be zero autocorrelation in the errors. In checking adequacy of fit it is therefore logical to study the sample autocorrelation function of the residuals. For large samples the residuals from a correctly fitted model resemble very closely the true errors of the process; however, care is needed in interpreting the serial correlations of the residuals. It is shown here that the residual autocorrelations are to a close approximation representable as a singular linear transformation of the autocorrelations of the errors so that they possess a singular normal distribution. Failing to allow for this results in a tendency to overlook evidence of lack of fit. Tests of fit and diagnostic checks are devised which take these facts into account.},
  langid = {english},
  keywords = {autocorrelation,autocorrelation tests,box-pierce test,exact sciences,ljung-box test,probability and statistics,tests},
  file = {G:\Meu Drive\Zotero\files\Box and Pierce - 1970 - Distribution of residual autocorrelations in autoregressive-integrated moving average time series mo.pdf}
}

@article{breusch1979,
  title = {A simple test for heteroscedasticity and random coefficient variation},
  author = {Breusch, T. S. and Pagan, A. R.},
  date = {1979},
  journaltitle = {Econometrica},
  volume = {47},
  number = {5},
  eprint = {1911963},
  eprinttype = {jstor},
  pages = {1287--1294},
  publisher = {[Wiley, Econometric Society]},
  issn = {0012-9682},
  doi = {10.2307/1911963},
  url = {https://www.jstor.org/stable/1911963},
  urldate = {2024-09-27},
  abstract = {A simple test for heteroscedastic disturbances in a linear regression model is developed using the framework of the Lagrangian multiplier test. For a wide range of heteroscedastic and random coefficient specifications, the criterion is given as a readily computed function of the OLS residuals. Some finite sampleevidence is presented to supplement the general asymptotic properties of Lagrangian multiplier tests.},
  langid = {english},
  keywords = {breusch–pagan test,exact sciences,general linear models,heteroskedasticity,homoskedasticity,probability and statistics,tests},
  file = {G:\Meu Drive\Zotero\files\Breusch and Pagan - 1979 - A simple test for heteroscedasticity and random coefficient variation.pdf}
}

@book{bussab1988,
  title = {Análise de variância e de regressão: uma introdução},
  shorttitle = {Análise de variância e de regressão},
  author = {Bussab, Wilton de Oliveira},
  date = {1988},
  series = {Métodos quantitativos},
  edition = {2},
  publisher = {Atlas},
  location = {São Paulo},
  langid = {brazilian},
  keywords = {data science,exact sciences,multivariate methods,probability and statistics,univariate methods},
  file = {G:\Meu Drive\Zotero\files\Bussab - 1988 - Analise de variancia e de regressao.pdf}
}

@book{casella2002,
  title = {Statistical inference},
  author = {Casella, George and Berger, Roger L.},
  date = {2002},
  series = {Duxbury advanced series},
  edition = {2},
  publisher = {Duxbury},
  location = {Pacific Grove, CA},
  isbn = {0-534-24312-6},
  langid = {english},
  pagetotal = {660},
  keywords = {exact sciences,fundamentals of probability and statistics,probability and statistics,statistical inference,textbooks},
  file = {G:\Meu Drive\Zotero\files\Casella - 2002 - Statistical inference.pdf}
}

@book{chatterjee2012,
  title = {Regression analysis by example},
  author = {Chatterjee, Samprit and Hadi, Ali S.},
  date = {2012},
  series = {Wiley {{Series}} in {{Probability}} and {{Statistics}}},
  edition = {5},
  publisher = {Wiley},
  location = {Hoboken, NJ},
  abstract = {This fifth edition has been expanded and thoroughly updated to reflect recent advances in the field. The emphasis continues to be on exploratory data analysis rather than statistical theory. The coverage offers in-depth treatment of regression diagnostics, transformation, multicollinearity, logistic regression, and robust regression. Methods of regression analysis are clearly demonstrated, and examples containing the types of irregularities commonly encountered in the real world are provided. Each example isolates one or two techniques and features detailed discussions of the techniques themselves, the required assumptions, and the evaluated success of each technique.},
  isbn = {978-0-470-90584-5},
  langid = {english},
  pagetotal = {393},
  keywords = {exact sciences,general linear models,influential observations,modeling,probability and statistics,regression analysis,regression diagnostics},
  file = {G:\Meu Drive\Zotero\files\Chatterjee and Hadi - 2012 - Regression analysis by example.pdf}
}

@book{cohen1988,
  title = {Statistical power analysis for the behavioral sciences},
  author = {Cohen, Jacob},
  date = {1988},
  edition = {2},
  publisher = {Lawrence Erlbaum Associates},
  location = {Hillsdale, N.J},
  isbn = {978-0-8058-0283-2},
  pagetotal = {567},
  keywords = {exact sciences,power analysis,probability and statistics,sampling},
  file = {G:\Meu Drive\Zotero\files\Cohen - 1988 - Statistical power analysis for the behavioral sciences.pdf}
}

@book{cohen2002,
  title = {Applied multiple regression/correlation analysis for the behavioral sciences},
  author = {Cohen, Jacob and Cohen, Patricia and West, Stephen G. and Aiken, Leona S.},
  date = {2002-08},
  edition = {3},
  publisher = {Lawrence Erlbaum Associates},
  location = {Mahwah, NJ},
  abstract = {This classic text on multiple regression is noted for its nonmathematical, applied, and data-analytic approach. Readers profit from its verbal-conceptual exposition and frequent use of examples. The applied emphasis provides clear illustrations of the principles and provides worked examples of the types of applications that are possible. Researchers learn how to specify regression models that directly address their research questions. An overview of the fundamental ideas of multiple regression and a review of bivariate correlation and regression and other elementary statistical concepts provide a strong foundation for understanding the rest of the text. The third edition features an increased emphasis on graphics and the use of confidence intervals and effect size measures, and an accompanying website with data for most of the numerical examples along with the computer code for SPSS, SAS, and SYSTAT, at www.psypress.com/9780805822236 . Applied Multiple Regression serves as both a textbook for graduate students and as a reference tool for researchers in psychology, education, health sciences, communications, business, sociology, political science, anthropology, and economics. An introductory knowledge of statistics is required. Self-standing chapters minimize the need for researchers to refer to previous chapters.},
  isbn = {0-8058-2223-2},
  langid = {english},
  pagetotal = {703},
  keywords = {correlation (statistics),exact sciences,general linear models,linear models,probability and statistics,regression analysis,social sciences,statistical methods,textbooks},
  annotation = {OCLC: ocm49903199},
  file = {G:\Meu Drive\Zotero\files\Cohen et al. - 2003 - Applied multiple regressioncorrelation analysis for the behavioral sciences.pdf}
}

@article{cook1977,
  title = {Detection of influential observation in linear regression},
  author = {Cook, R. Dennis},
  date = {1977-02},
  journaltitle = {Technometrics},
  volume = {19},
  number = {1},
  pages = {15--18},
  publisher = {ASA Website},
  issn = {0040-1706},
  doi = {10.1080/00401706.1977.10489493},
  url = {https://www.tandfonline.com/doi/abs/10.1080/00401706.1977.10489493},
  urldate = {2024-09-29},
  langid = {english},
  keywords = {confidence ellipsoids,cooks d,exact sciences,general linear models,influential observations,outliers,probability and statistics,variances of residuals},
  file = {G:\Meu Drive\Zotero\files\Cook - 1977 - Detection of influential observation in linear regression.pdf}
}

@article{cook1979,
  title = {Influential observations in linear regression},
  author = {Cook, R. Dennis},
  date = {1979-03-01},
  journaltitle = {Journal of the American Statistical Association},
  volume = {74},
  number = {365},
  pages = {169--174},
  publisher = {ASA Website},
  issn = {0162-1459},
  doi = {10.1080/01621459.1979.10481634},
  url = {https://www.tandfonline.com/doi/abs/10.1080/01621459.1979.10481634},
  urldate = {2024-09-29},
  abstract = {Characteristics of observations which cause them to be influential in a least squares analysis are investigated and related to residual variances, residual correlations, and the convex hull of the observed values of the independent variables. It is shown how deleting an observation can substantially alter an analysis by changing the partial F-tests, the studentized residuals, the residual variances, the convex hull of the independent variables, and the estimated parameter vector. Outliers are discussed briefly, and an example is presented.},
  langid = {english},
  keywords = {cooks d,deleting observations,general linear models,outliers,partial f-tests,probability and statistics,residual correlations,studentized residuals},
  file = {G:\Meu Drive\Zotero\files\Cook - 1979 - Influential observations in linear regression 1.pdf}
}

@article{cramer1928,
  title = {On the composition of elementary errors: {{First}} paper: {{Mathematical}} deductions},
  shorttitle = {On the composition of elementary errors},
  author = {Cramér, Harald},
  date = {1928-01-01},
  journaltitle = {Scandinavian Actuarial Journal},
  volume = {1928},
  number = {1},
  pages = {13--74},
  publisher = {Taylor \& Francis},
  issn = {0346-1238},
  doi = {10.1080/03461238.1928.10416862},
  url = {https://doi.org/10.1080/03461238.1928.10416862},
  urldate = {2024-09-28},
  langid = {english},
  keywords = {cramer-von mises test,exact sciences,normality tests,probability and statistics,tests},
  file = {G:\Meu Drive\Zotero\files\Cramér - 1928 - On the composition of elementary errors First paper Mathematical deductions.pdf}
}

@article{dagostino1971,
  title = {An omnibus test of normality for moderate and large size samples},
  author = {D'Agostino, Ralph B.},
  date = {1971-08-01},
  journaltitle = {Biometrika},
  shortjournal = {Biometrika},
  volume = {58},
  number = {2},
  pages = {341--348},
  issn = {0006-3444},
  doi = {10.1093/biomet/58.2.341},
  url = {https://doi.org/10.1093/biomet/58.2.341},
  urldate = {2024-09-28},
  abstract = {We present a test of normality based on a statistic D which is up to a constant the ratio of Downton's linear unbiased estimator of the population standard deviation to the sample standard deviation. For the usual levels of significance Monte Carlo simulations indicate that Cornish-Fisher expansions adequately approximate the null distribution of D if the sample size is 50 or more. The test is an omnibus test, being appropriate to detect deviations from normality due either to skewness or kurtosis. Simulation results of powers for various alternatives when the sample size is 50 indicate that the test compares favourably with the Shapiro-Wilk W test,√1, b2 and the ratio of range to standard deviation.},
  langid = {english},
  keywords = {d'agostino test,exact sciences,normality tests,probability and statistics,tests},
  file = {G:\Meu Drive\Zotero\files\D'AGOSTINO - 1971 - An omnibus test of normality for moderate and large size samples.pdf}
}

@article{dagostino1973,
  title = {Tests for departure from normality. {{Empirical}} results for the distributions of b2 and √b1},
  author = {D'Agostino, Ralph B. and Pearson, E. S.},
  date = {1973},
  journaltitle = {Biometrika},
  shortjournal = {Biometrika},
  volume = {60},
  number = {3},
  pages = {613--622},
  issn = {0006-3444, 1464-3510},
  doi = {10.1093/biomet/60.3.613},
  url = {https://academic.oup.com/biomet/article-lookup/doi/10.1093/biomet/60.3.613},
  urldate = {2023-11-14},
  langid = {english},
  keywords = {d'agostino test,exact sciences,normality tests,probability and statistics},
  file = {G:\Meu Drive\Zotero\files\D'Agostino - 1973 - Tests for departure from normality.pdf}
}

@article{dagostino1990,
  title = {A suggestion for using powerful and informative tests of normality},
  author = {D'Agostino, Ralph B. and Belanger, Albert},
  date = {1990},
  journaltitle = {The American Statistician},
  volume = {44},
  number = {4},
  eprint = {2684359},
  eprinttype = {jstor},
  pages = {316--321},
  publisher = {[American Statistical Association, Taylor \& Francis, Ltd.]},
  issn = {0003-1305},
  doi = {10.2307/2684359},
  url = {https://www.jstor.org/stable/2684359},
  urldate = {2023-11-15},
  abstract = {For testing that an underlying population is normally distributed the skewness and kurtosis statistics, \$\textbackslash sqrt\{b\_1\}\$ and b2, and the D'Agostino-Pearson K2 statistic that combines these two statistics have been shown to be powerful and informative tests. Their use, however, has not been as prevalent as their usefulness. We review these tests and show how readily available and popular statistical software can be used to implement them. Their relationship to deviations from linearity in normal probability plotting is also presented.},
  keywords = {d'agostino test,exact sciences,normality tests,probability and statistics},
  file = {G:\Meu Drive\Zotero\files\D'Agostino - 1990 - A suggestion for using powerful and informative tests of normality.pdf}
}

@article{dallal1986,
  title = {An analytic approximation to the distribution of {{Lilliefors}}'s test statistic for normality},
  author = {Dallal, Gerard E. and Wilkinson, Leland},
  date = {1986-11},
  journaltitle = {The American Statistician},
  shortjournal = {The American Statistician},
  volume = {40},
  number = {4},
  pages = {294--296},
  issn = {0003-1305, 1537-2731},
  doi = {10.1080/00031305.1986.10475419},
  url = {http://www.tandfonline.com/doi/abs/10.1080/00031305.1986.10475419},
  urldate = {2023-11-14},
  langid = {english},
  keywords = {exact sciences,kolmogorov-smirnov test,normality tests,probability and statistics},
  file = {G:\Meu Drive\Zotero\files\Dallal - 1986 - An analytic approximation to the distribution of Lilliefors's test statistic.pdf}
}

@book{dalpiaz,
  title = {Applied statistics with {{R}}},
  author = {Dalpiaz, David},
  url = {https://book.stat420.org/},
  langid = {english},
  keywords = {data science,exact sciences,probability and statistics,programming,r},
  file = {G:\Meu Drive\Zotero\files\Dalpiaz - - Applied statistics with R.pdf}
}

@book{degroot2012,
  title = {Probability and statistics},
  author = {DeGroot, Morris H. and Schervish, Mark J.},
  date = {2012},
  edition = {4},
  publisher = {Addison-Wesley},
  location = {Boston},
  isbn = {978-0-321-50046-5},
  langid = {english},
  pagetotal = {893},
  keywords = {exact sciences,fundamentals of probability and statistics,probability and statistics,textbooks},
  annotation = {OCLC: ocn502674206},
  file = {G\:\\Meu Drive\\Zotero\\files\\DeGroot - 2012 - Probability and statistics.pdf;G\:\\Meu Drive\\Zotero\\files\\DeGroot - 2012 - Probability and statistics.zip}
}

@book{dudek2020,
  title = {Linear models with {{R}}: emphasis on 2-{{IV}} models: basics of multiple regression},
  author = {Dudek, Bruce},
  date = {2020-09-09},
  url = {https://bcdudek.net/regression1/},
  langid = {english},
  keywords = {data science,exact sciences,multivariate methods,probability and statistics,programming,r},
  file = {G:\Meu Drive\Zotero\files\Dudek - 2020 - Linear models with R.pdf}
}

@article{durbin1950,
  title = {Testing for serial correlation in least squares regression. {{I}}},
  author = {Durbin, J. and Watson, G. S.},
  date = {1950-12-01},
  journaltitle = {Biometrika},
  shortjournal = {Biometrika},
  volume = {37},
  number = {3-4},
  pages = {409--428},
  issn = {0006-3444},
  doi = {10.1093/biomet/37.3-4.409},
  url = {https://doi.org/10.1093/biomet/37.3-4.409},
  urldate = {2024-09-27},
  langid = {english},
  keywords = {autocorrelation,durbin-watson test,exact sciences,general linear models,probability and statistics,tests},
  file = {G:\Meu Drive\Zotero\files\Durbin and Watson - 1950 - Testing for serial correlation in least squares regression. I.pdf}
}

@article{durbin1951,
  title = {Testing for serial correlation in least squares regression. {{II}}},
  author = {Durbin, J. and Watson, G. S.},
  date = {1951-06-01},
  journaltitle = {Biometrika},
  shortjournal = {Biometrika},
  volume = {38},
  number = {1-2},
  pages = {159--178},
  issn = {0006-3444},
  doi = {10.1093/biomet/38.1-2.159},
  url = {https://doi.org/10.1093/biomet/38.1-2.159},
  urldate = {2024-09-27},
  langid = {english},
  keywords = {autocorrelation,durbin-watson test,exact sciences,general linear models,probability and statistics,tests},
  file = {G:\Meu Drive\Zotero\files\Durbin and Watson - 1951 - Testing for serial correlation in least squares regression. II.pdf}
}

@article{durbin1971,
  title = {Testing for serial correlation in least squares regression. {{III}}},
  author = {Durbin, J. and Watson, G. S.},
  date = {1971-04-01},
  journaltitle = {Biometrika},
  shortjournal = {Biometrika},
  volume = {58},
  number = {1},
  pages = {1--19},
  issn = {0006-3444},
  doi = {10.1093/biomet/58.1.1},
  url = {https://doi.org/10.1093/biomet/58.1.1},
  urldate = {2024-09-27},
  abstract = {The paper considers a number of problems arising from the test of serial correlation based on the d statistic proposed earlier by the authors (Durbin \&amp; Watson, 1950, 1951). Methods of computing the exact distribution of d are investigated and the exact distribution is compared with six approximations to it for four sets of published data. It is found that approximations suggested by Theil and Nagar and by Hannan are too inaccurate for practical use but that the beta approximation proposed in the 1950 and 1951 papers and a new approximation, called by us the a + bdu approximation and based, like the beta approximation, on the exact first two moments of d, both perform well.The power of the d test is compared with that of certain exact tests proposed by Theil, Durbin, Koerts and Abrahamse from the standpoint of invariance theory. It is shown that the d test is locally most powerful invariant but that the other tests are not.There are three appendices. The first gives an account of the exact distribution of d. The second derives the mean and variance to a second order of approximation of a modified maximum likelihood statistic closely related to d. The third sets out details of the computations required for the a + hdu approximation.},
  langid = {english},
  keywords = {autocorrelation,durbin-watson test,exact sciences,general linear models,probability and statistics,tests},
  file = {G:\Meu Drive\Zotero\files\Durbin and Watson - 1971 - Testing for serial correlation in least squares regression. III.pdf}
}

@book{falk1992,
  title = {A primer for soft modeling},
  author = {Falk, R. Frank and Miller, Nancy B.},
  date = {1992-12-01},
  publisher = {University of Akron Press},
  location = {Akron, Ohio},
  abstract = {A practical guide to "soft modeling" that relies on a computer application strategy, this book is intended for researchers and students interested in a structural equation modeling approach to path analysis that solves many measurement issues encountered in social science research.},
  isbn = {978-0-9622628-4-5},
  langid = {english},
  pagetotal = {103},
  keywords = {exact sciences,general linear models,modeling,probability and statistics},
  file = {G:\Meu Drive\Zotero\files\Falk and Miller - 1992 - A primer for soft modeling.pdf}
}

@book{fox2016,
  title = {Applied regression analysis and generalized linear models},
  author = {Fox, John},
  date = {2016},
  edition = {3},
  publisher = {Sage},
  location = {Thousand Oaks, CA},
  isbn = {978-1-4522-0566-3},
  langid = {english},
  pagetotal = {791},
  keywords = {data science,exact sciences,generalized linear models,multivariate methods,probability and statistics},
  file = {G:\Meu Drive\Zotero\files\Fox - 2016 - Applied regression analysis and generalized linear models.pdf}
}

@article{gorman2014,
  title = {Ecological sexual dimorphism and environmental variability within a community of antarctic penguins (genus pygoscelis)},
  author = {Gorman, Kristen B. and Williams, Tony D. and Fraser, William R.},
  date = {2014-03-05},
  journaltitle = {PLOS ONE},
  shortjournal = {PLOS ONE},
  volume = {9},
  number = {3},
  pages = {e90081},
  publisher = {Public Library of Science},
  issn = {1932-6203},
  doi = {10.1371/journal.pone.0090081},
  url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0090081},
  urldate = {2024-09-26},
  abstract = {Background Sexual segregation in vertebrate foraging niche is often associated with sexual size dimorphism (SSD), i.e., ecological sexual dimorphism. Although foraging behavior of male and female seabirds can vary markedly, differences in isotopic (carbon, δ13C and nitrogen, δ15N) foraging niche are generally more pronounced within sexually dimorphic species and during phases when competition for food is greater. We examined ecological sexual dimorphism among sympatric nesting Pygoscelis penguins asking whether environmental variability is associated with differences in male and female pre-breeding foraging niche. We predicted that all Pygoscelis species would forage sex-specifically, and that higher quality winter habitat, i.e., higher or lower sea ice coverage for a given species, would be associated with a more similar foraging niche among the sexes. Results P2/P8 primers reliably amplified DNA of all species. On average, male Pygoscelis penguins are structurally larger than female conspecifics. However, chinstrap penguins were more sexually dimorphic in culmen and flipper features than Adélie and gentoo penguins. Adélies and gentoos were more sexually dimorphic in body mass than chinstraps. Only male and female chinstraps and gentoos occupied separate δ15N foraging niches. Strong year effects in δ15N signatures were documented for all three species, however, only for Adélies, did yearly variation in δ15N signatures tightly correlate with winter sea ice conditions. There was no evidence that variation in sex-specific foraging niche interacted with yearly winter habitat quality. Conclusion Chinstraps were most sexually size dimorphic followed by gentoos and Adélies. Pre-breeding sex-specific foraging niche was associated with overall SSD indices across species; male chinstrap and gentoo penguins were enriched in δ15N relative to females. Our results highlight previously unknown trophic pathways that link Pygoscelis penguins with variation in Southern Ocean sea ice suggesting that each sex within a species should respond similarly in pre-breeding trophic foraging to changes in future winter habitat.},
  langid = {english},
  keywords = {animal sexual behavior,antarctica,biological sciences,biology,ecological niches,foraging,islands,isotopes,open data,open science,penguins,r,sea ice},
  file = {G:\Meu Drive\Zotero\files\Gorman et al. - 2014 - Ecological sexual dimorphism and environmental variability within a community of antarctic penguins -genus pygoscelis-.pdf}
}

@online{greener2020,
  title = {Stop testing for normality},
  author = {Greener, Robert},
  date = {2020-08-04T12:53:26},
  url = {https://towardsdatascience.com/stop-testing-for-normality-dba96bb73f90},
  urldate = {2024-09-29},
  abstract = {Normality tests are misleading and a waste of your time!},
  langid = {english},
  organization = {Medium},
  keywords = {exact sciences,general linear models,normality tests,tests},
  file = {G:\Meu Drive\Zotero\files\Greener - 2020 - Stop testing for normality.pdf}
}

@book{hair2019,
  title = {Multivariate data analysis},
  author = {Hair, Joseph F.},
  date = {2019},
  edition = {8},
  publisher = {Cengage},
  location = {Andover, Hampshire},
  isbn = {978-1-4737-5654-0},
  langid = {english},
  pagetotal = {813},
  keywords = {data science,exact sciences,multivariate methods,probability and statistics},
  file = {G:\Meu Drive\Zotero\files\Hair - 2019 - Multivariate data analysis.pdf}
}

@article{jarque1980,
  title = {Efficient tests for normality, homoscedasticity and serial independence of regression residuals},
  author = {Jarque, Carlos M. and Bera, Anil K.},
  date = {1980-01-01},
  journaltitle = {Economics Letters},
  shortjournal = {Economics Letters},
  volume = {6},
  number = {3},
  pages = {255--259},
  issn = {0165-1765},
  doi = {10.1016/0165-1765(80)90024-5},
  url = {https://www.sciencedirect.com/science/article/pii/0165176580900245},
  urldate = {2024-09-28},
  abstract = {We use the Lagrange multiplier procedure to derive efficient joint tests for residual normality, homoscedasticity and serial independence. The tests are simple to compute and asymptotically distributed as χ2.},
  langid = {english},
  keywords = {jarque-bera test,normality tests,probability and statistics,tests},
  file = {G:\Meu Drive\Zotero\files\Jarque and Bera - 1980 - Efficient tests for normality, homoscedasticity and serial independence of regression residuals.pdf}
}

@article{jarque1987,
  title = {A test for normality of observations and regression residuals},
  author = {Jarque, Carlos M. and Bera, Anil K.},
  date = {1987},
  journaltitle = {International Statistical Review},
  volume = {55},
  number = {2},
  eprint = {1403192},
  eprinttype = {jstor},
  pages = {163--172},
  publisher = {[Wiley, International Statistical Institute (ISI)]},
  issn = {0306-7734},
  doi = {10.2307/1403192},
  url = {https://www.jstor.org/stable/1403192},
  urldate = {2024-09-28},
  abstract = {Using the Lagrange multiplier procedure or score test on the Pearson family of distributions we obtain tests for normality of observations and regression disturbances. The tests suggested have optimum asymptotic power properties and good finite sample performance. Due to their simplicity they should prove to be useful tools in statistical analysis.},
  langid = {english},
  keywords = {exact sciences,jarque-bera test,normality tests,probability and statistics,tests},
  file = {G:\Meu Drive\Zotero\files\Jarque and Bera - 1987 - A test for normality of observations and regression residuals.pdf}
}

@book{johnson2013,
  title = {Applied multivariate statistical analysis: {{Pearson}} new international edition},
  shorttitle = {Applied multivariate statistical analysis},
  author = {Johnson, Richard and Wichern, Dean},
  date = {2013},
  edition = {6},
  publisher = {Pearson},
  location = {Harlow, UK},
  abstract = {For courses in Multivariate Statistics, Marketing Research, Intermediate Business Statistics, Statistics in Education, and graduate-level courses in Experimental Design and Statistics. Appropriate for experimental scientists in a variety of disciplines, this market-leading text offers a readable introduction to the statistical analysis of multivariate observations. Its primary goal is to impart the knowledge necessary to make proper interpretations and select appropriate techniques for analysing multivariate data. Ideal for a junior/senior or graduate level course that explores the statistical methods for describing and analysing multivariate data, the text assumes two or more statistics courses as a prerequisite. The full text downloaded to your computer With eBooks you can: search for key concepts, words and phrases make highlights and notes as you study share your notes with friends eBooks are downloaded to your computer and accessible either offline through the Bookshelf (available as a free download), available online and also via the iPad and Android apps. Upon purchase, you will receive via email the code and instructions on how to access this product. Time limit The eBooks products do not have an expiry date. You will continue to access your digital ebook products whilst you have your Bookshelf installed.},
  isbn = {978-1-292-03757-8},
  langid = {english},
  keywords = {data science,exact sciences,multivariate methods,probability and statistics},
  annotation = {OCLC: 1277290670\\
\\
Pearson New International Edition.},
  file = {G:\Meu Drive\Zotero\files\Johnson - 2013 - Applied multivariate statistical analysis.pdf}
}

@article{koenker1981,
  title = {A note on studentizing a test for heteroscedasticity},
  author = {Koenker, Roger},
  date = {1981-09-01},
  journaltitle = {Journal of Econometrics},
  shortjournal = {Journal of Econometrics},
  volume = {17},
  number = {1},
  pages = {107--112},
  issn = {0304-4076},
  doi = {10.1016/0304-4076(81)90062-2},
  url = {https://www.sciencedirect.com/science/article/pii/0304407681900622},
  urldate = {2024-09-28},
  abstract = {Breusch and Pagan (1979) have recently proposed a convenient test for heteroscedasticity in general linear models. This note derives the asymptotic distribution of their test under sequences of contiguous alternatives to the null hypothesis of homoscedasticity. The test is shown to possess asymptotically incorrect size (nominal significance level) except in the case of strictly Gaussian disturbances. A slight modification of the test is proposed which corrects this defect.},
  langid = {english},
  keywords = {breusch–pagan test,exact sciences,heteroskedasticity,probability and statistics,tests},
  file = {G:\Meu Drive\Zotero\files\Koenker - 1981 - A note on studentizing a test for heteroscedasticity.pdf}
}

@article{kolmogorov1933,
  title = {Sulla determinazione empirica di una legge di distribuzione},
  author = {Kolmogorov, A.},
  date = {1933},
  journaltitle = {Giornale dell'Istituto Italiano degli Attuari},
  volume = {4},
  langid = {italian},
  keywords = {exact sciences,kolmogorov-smirnov test,normality tests,probability and statistics},
  file = {G:\Meu Drive\Zotero\files\Kolmogorov - 1933 - Sulla determinazione empirica di una legge di distribuzione.pdf}
}

@article{kozak2018,
  title = {What's normal anyway? {{Residual}} plots are more telling than significance tests when checking {{ANOVA}} assumptions},
  shorttitle = {What's normal anyway?},
  author = {Kozak, M. and Piepho, H.-P.},
  date = {2018},
  journaltitle = {Journal of Agronomy and Crop Science},
  volume = {204},
  number = {1},
  pages = {86--98},
  issn = {1439-037X},
  doi = {10.1111/jac.12220},
  url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/jac.12220},
  urldate = {2024-09-29},
  abstract = {We consider two questions important for applying analysis of variance (ANOVA): Should normality be checked on the raw data or on the residuals (or is it immaterial which of the two approaches we take)? Should normality and homogeneity of variance be checked using significance tests or diagnostic plots (or both)? Based on two examples, we show that residuals should be used for model checking and that residual plots are better for checking ANOVA assumptions than statistical tests. We also discuss why one should be very cautious when using statistical tests to check the assumptions.},
  langid = {english},
  keywords = {anova,assumption checks,diagnostic plots,exact sciences,general linear models,linear models,probability and statistics,statistical assumptions},
  file = {G:\Meu Drive\Zotero\files\Kozak and Piepho - 2018 - What's normal anyway Residual plots are more telling than significance tests when checking ANOVA as.pdf}
}

@book{kuhn2022,
  title = {Tidy modeling with {{R}}: a framework for modeling in the tidyverse},
  shorttitle = {Tidy modeling with {{R}}},
  author = {Kuhn, Max and Silge, Julia},
  date = {2022},
  publisher = {O'Reilly Media},
  location = {Sebastopol, CA},
  url = {https://www.tmwr.org/},
  abstract = {Get going with tidymodels, a collection of R packges for modeling and machine learning. Whether you're just starting out or have years of experience with modeling, this practical introduction shows data analysts, business analysts, and data scientists how the tidymodels framework offers a consistent, flexible approach for your work. RStudio engineers Max Kuhn and Julia Silge demonstrate ways to create models by focusing on an R dialect called the tidyverse. Software that adops tidyverse principles shares both a high-level design philosophy and low-level grammar and data structures, so learning one piece of the ecosystem makes it easier to learn the next. You'll understand why the tidymodels framework has been built to be used by a broad range of people.},
  isbn = {978-1-4920-9648-1},
  langid = {english},
  pagetotal = {363},
  keywords = {artificial intelligence,exact sciences,machine learning,modeling,probability and statistics,programming,r,tidyverse},
  annotation = {OCLC: on1338675673},
  file = {G\:\\Meu Drive\\Zotero\\files\\Kuhn - 2022 - Tidy modeling with R.epub;G\:\\Meu Drive\\Zotero\\files\\Kuhn - 2022 - Tidy modeling with R.pdf}
}

@article{lilliefors1967,
  title = {On the {{Kolmogorov-Smirnov}} test for normality with mean and variance unknown},
  author = {Lilliefors, Hubert W.},
  date = {1967-06},
  journaltitle = {Journal of the American Statistical Association},
  shortjournal = {Journal of the American Statistical Association},
  volume = {62},
  number = {318},
  pages = {399--402},
  issn = {0162-1459, 1537-274X},
  doi = {10.1080/01621459.1967.10482916},
  url = {http://www.tandfonline.com/doi/abs/10.1080/01621459.1967.10482916},
  urldate = {2023-11-14},
  langid = {english},
  keywords = {exact sciences,kolmogorov-smirnov test,normality tests,probability and statistics},
  file = {G:\Meu Drive\Zotero\files\Lilliefors - 1967 - On the Kolmogorov-Smirnov test for normality with mean and variance unknown.pdf}
}

@article{ljung1978,
  title = {On a measure of lack of fit in time series models},
  author = {Ljung, G. M. and Box, G. E. P.},
  date = {1978-08-01},
  journaltitle = {Biometrika},
  shortjournal = {Biometrika},
  volume = {65},
  number = {2},
  pages = {297--303},
  issn = {0006-3444},
  doi = {10.1093/biomet/65.2.297},
  url = {https://doi.org/10.1093/biomet/65.2.297},
  urldate = {2024-09-28},
  abstract = {The overall test for lack of fit in autoregressive-moving average models proposed by Box \& Pierce (1970) is considered. It is shown that a substantially improved approximation results from a simple modification of this test. Some consideration is given to the power of such tests and their robustness when the innovations are nonnormal. Similar modifications in the overall tests used for transfer function-noise models are proposed},
  langid = {english},
  keywords = {autocorrelation,autocorrelation tests,exact sciences,ljung-box test,probability and statistics,tests},
  file = {G:\Meu Drive\Zotero\files\Ljung and Box - 1978 - On a measure of lack of fit in time series models.pdf}
}

@article{massey1951,
  title = {The {{Kolmogorov-Smirnov}} test for goodness of fit},
  author = {Massey, Frank J.},
  date = {1951-03},
  journaltitle = {Journal of the American Statistical Association},
  shortjournal = {Journal of the American Statistical Association},
  volume = {46},
  number = {253},
  pages = {68--78},
  issn = {0162-1459, 1537-274X},
  doi = {10.1080/01621459.1951.10500769},
  url = {http://www.tandfonline.com/doi/abs/10.1080/01621459.1951.10500769},
  urldate = {2023-11-14},
  langid = {english},
  keywords = {exact sciences,kolmogorov-smirnov test,normality tests,probability and statistics},
  file = {G:\Meu Drive\Zotero\files\Massey - 1951 - The Kolmogorov-Smirnov test for goodness of fit.pdf}
}

@book{nahhas2024,
  title = {Introduction to regression methods for public health using {{R}}},
  author = {Nahhas, Ramzi W.},
  date = {2024-10-13},
  url = {https://www.bookdown.org/rwnahhas/RMPH/},
  urldate = {2024-10-14},
  abstract = {This text was written to be used in a second biostatistics course for Master of Public Health students; however, students in any field will find it useful. Students in many disciplines take an introductory statistics course, providing foundational competencies but perhaps not enough to use more advanced methods without additional training. There are a plethora of textbooks covering topics such as linear regression, logistic regression, and survival analysis aimed at those with a background in mathematical statistics and/or without a focus specifically on public health and/or without a focus on using R statistical software. The goal of this text is to provide a gentle introduction to regression methods, using R, that covers all the basics and a bit more, with examples drawn from public health data.},
  langid = {english},
  keywords = {exact sciences,general linear models,probability and statistics,regression analysis,regression diagnostics}
}

@article{newey1987,
  title = {A simple, positive semi-definite, heteroskedasticity and autocorrelation consistent covariance matrix},
  author = {Newey, Whitney K. and West, Kenneth D.},
  date = {1987},
  journaltitle = {Econometrica},
  volume = {55},
  number = {3},
  eprint = {1913610},
  eprinttype = {jstor},
  pages = {703--708},
  publisher = {[Wiley, Econometric Society]},
  issn = {0012-9682},
  doi = {10.2307/1913610},
  url = {https://www.jstor.org/stable/1913610},
  urldate = {2024-09-28},
  langid = {english},
  keywords = {autocorrelation,autocorrelation tests,exact sciences,heteroskedasticity,newey-west estimator,probability and statistics,tests},
  file = {G:\Meu Drive\Zotero\files\Newey and West - 1987 - A simple, positive semi-definite, heteroskedasticity and autocorrelation consistent covariance matrix.pdf}
}

@article{newey1994,
  title = {Automatic lag selection in covariance matrix estimation},
  author = {Newey, Whitney K. and West, Kenneth D.},
  date = {1994-10-01},
  journaltitle = {The Review of Economic Studies},
  shortjournal = {The Review of Economic Studies},
  volume = {61},
  number = {4},
  pages = {631--653},
  issn = {0034-6527},
  doi = {10.2307/2297912},
  url = {https://doi.org/10.2307/2297912},
  urldate = {2024-09-28},
  abstract = {We propose a nonparametric method for automatically selecting the number of autocovariances to use in computing a heteroskedasticity and autocorrelation consistent covariance matrix. For a given kernel for weighting the autocovariances, we prove that our procedure is asymptotically equivalent to one that is optimal under a mean-squared error loss function. Monte Carlo simulations suggest that our procedure performs tolerably well, although it does result in size distortions.},
  keywords = {autocorrelation,exact sciences,heteroskedasticity,newey-west estimator,probability and statistics,tests},
  file = {G:\Meu Drive\Zotero\files\Newey and West - 1994 - Automatic lag selection in covariance matrix estimation.pdf}
}

@article{neyman1928,
  title = {On the use and interpretation of certain test criteria for purposes of statistical inference: part {{I}}},
  shorttitle = {On the use and interpretation of certain test criteria for purposes of statistical inference},
  author = {Neyman, J. and Pearson, E. S.},
  date = {1928},
  journaltitle = {Biometrika},
  volume = {20A},
  number = {1/2},
  eprint = {2331945},
  eprinttype = {jstor},
  pages = {175--240},
  publisher = {[Oxford University Press, Biometrika Trust]},
  issn = {0006-3444},
  doi = {10.2307/2331945},
  url = {https://www.jstor.org/stable/2331945},
  urldate = {2024-09-30},
  langid = {english},
  keywords = {exact sciences,extraordinary publications,hypothesis tests,hypothetico–deductive method,probability and statistics,statistical inference},
  file = {G:\Meu Drive\Zotero\files\Neyman and Pearson - 1928 - On the use and interpretation of certain test criteria for purposes of statistical inference part I.pdf}
}

@article{neyman1928a,
  title = {On the use and interpretation of certain test criteria for purposes of statistical inference: part {{II}}},
  shorttitle = {On the use and interpretation of certain test criteria for purposes of statistical inference},
  author = {Neyman, J. and Pearson, E. S.},
  date = {1928},
  journaltitle = {Biometrika},
  volume = {20A},
  number = {3/4},
  eprint = {2332112},
  eprinttype = {jstor},
  pages = {263--294},
  publisher = {[Oxford University Press, Biometrika Trust]},
  issn = {0006-3444},
  doi = {10.2307/2332112},
  url = {https://www.jstor.org/stable/2332112},
  urldate = {2024-09-30},
  langid = {english},
  keywords = {exact sciences,extraordinary publications,hypothesis tests,hypothetico–deductive method,probability and statistics,statistical inference},
  file = {G:\Meu Drive\Zotero\files\Neyman and Pearson - 1928 - On the use and interpretation of certain test criteria for purposes of statistical inference part I 1.pdf}
}

@article{pearson1900,
  title = {X. {{On}} the criterion that a given system of deviations from the probable in the case of a correlated system of variables is such that it can be reasonably supposed to have arisen from random sampling},
  author = {Pearson, Karl},
  date = {1900-07},
  journaltitle = {The London, Edinburgh, and Dublin Philosophical Magazine and Journal of Science},
  volume = {50},
  number = {302},
  pages = {157--175},
  publisher = {Taylor \& Francis},
  issn = {1941-5982},
  doi = {10.1080/14786440009463897},
  url = {https://www.tandfonline.com/doi/abs/10.1080/14786440009463897},
  urldate = {2024-09-28},
  langid = {english},
  keywords = {exact sciences,normality tests,pearson chi-squared test,probability and statistics,tests},
  file = {G:\Meu Drive\Zotero\files\Pearson - 1900 - X. On the criterion that a given system of deviations from the probable in the case of a correlated.pdf}
}

@article{peek2003,
  title = {How much variance is explained by ecologists? {{Additional}} perspectives},
  shorttitle = {How much variance is explained by ecologists?},
  author = {Peek, Michael S. and Leffler, A. Joshua and Flint, Stephan D. and Ryel, Ronald J.},
  date = {2003},
  journaltitle = {Oecologia},
  volume = {137},
  number = {2},
  eprint = {4223745},
  eprinttype = {jstor},
  pages = {161--170},
  publisher = {Springer},
  issn = {0029-8549},
  url = {https://www.jstor.org/stable/4223745},
  urldate = {2024-09-29},
  abstract = {A recent meta-analysis of meta-analyses by Møller and Jennions (2002, Oecologia 132: 492-500) suggested that ecologists using statistical models are explaining between 2.5\% and 5.42\% of the variability in ecological studies. Although we agree that there is considerable variability in ecological systems that is not explained, we disagree with the approach and general conclusions of Møller and Jennions. As an alternate perspective, we explored the question: "How much ecological variation in relationships is not explained?" We did this by examining published studies in five different journals representative of the numerous sub-disciplines of ecology. We quantified the proportion of variance not explained in statistical models as the residual or random error compared to the total variation in the data set. Our results indicate that statistical models explain roughly half of the variation in variables of interest, vastly different from the 2.5\%-5.42\% reported by Møller and Jennions. This difference resulted largely from a different level of analysis: we considered the original study to be the appropriate level for quantifying variability while Møller and Jennions combined studies at different temporal and spatial scales and attempted to find universal single-factor relationships between ecological variables across study organisms or locations. Therefore, we believe that Møller and Jennions actually measured the universality of single factor effects across multiple ecological systems, not the amount of variability in ecological studies explained by ecologists. This study, combined with Møller and Jennions', illustrates importance of applying statistical models appropriately to assess ecological relationships.},
  langid = {english},
  keywords = {biological sciences,ecology,effect size,r squared},
  file = {G:\Meu Drive\Zotero\files\Peek et al. - 2003 -How much variance is explained by ecologists Additional perspectives.pdf}
}

@article{perezgonzalez2015,
  title = {Fisher, {{Neyman-Pearson}} or {{NHST}}? {{A}} tutorial for teaching data testing},
  shorttitle = {Fisher, {{Neyman-Pearson}} or {{NHST}}?},
  author = {Perezgonzalez, Jose D.},
  date = {2015-03-02},
  journaltitle = {Frontiers in Psychology},
  shortjournal = {Front. Psychol.},
  volume = {6},
  publisher = {Frontiers},
  issn = {1664-1078},
  doi = {10.3389/fpsyg.2015.00223},
  url = {https://www.frontiersin.org/journals/psychology/articles/10.3389/fpsyg.2015.00223/full},
  urldate = {2024-09-30},
  abstract = {Despite frequent calls for the overhaul of null hypothesis significance testing (NHST), this controversial procedure remains ubiquitous in behavioral, social and biomedical teaching and research. Little change seems possible once the procedure becomes well ingrained in the minds and current practice of researchers; thus, the optimal opportunity for such change is at the time the procedure is taught, be this at undergraduate or at postgraduate levels. This paper presents a tutorial for the teaching of data testing procedures, often referred to as hypothesis testing theories. The first procedure introduced is Fisher's approach to data testing—tests of significance; the second is Neyman-Pearson's approach—tests of acceptance; the final procedure is the incongruent combination of the previous two theories into the current approach—NSHT. For those researchers sticking with the latter, two compromise solutions on how to improve NHST conclude the tutorial.},
  langid = {english},
  keywords = {exact sciences,fisher,history,hypothesis tests,hypothetico–deductive method,neyman-pearson,null hypothesis significance testing,science,statistical education,teaching statistics,test of significance,test of statistical hypothesis},
  file = {G:\Meu Drive\Zotero\files\Perezgonzalez - 2015 - Fisher, Neyman-Pearson or NHST A tutorial for teaching data testing.pdf}
}

@book{popper1979,
  title = {Objective knowledge: an evolutionary approach},
  shorttitle = {Objective knowledge},
  author = {Popper, Karl R.},
  date = {1979},
  publisher = {Oxford University Press},
  location = {Oxford, UK},
  isbn = {978-0-19-824370-0},
  langid = {english},
  pagetotal = {395},
  keywords = {epistemology,ontology,philosophy,science,scientific methodology},
  annotation = {Publicado originalmente em 1972.},
  file = {G:\Meu Drive\Zotero\files\Popper - 1979 - Objective knowledge.pdf}
}

@article{ramsey1969,
  title = {Tests for specification errors in classical linear least-squares regression analysis},
  author = {Ramsey, J. B.},
  date = {1969},
  journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
  volume = {31},
  number = {2},
  pages = {350--371},
  publisher = {[Royal Statistical Society, Oxford University Press]},
  issn = {0035-9246},
  doi = {10.1111/j.2517-6161.1969.tb00796.x},
  url = {https://academic.oup.com/jrsssb/article-abstract/31/2/350/7027014},
  urldate = {2024-09-29},
  abstract = {The effects on the distribution of least-squares residuals of a series of model mis-specifications are considered. It is shown that for a variety of specification errors the distributions of the least-squares residuals are normal, but with non-zero means. An alternative predictor of the disturbance vector is used in developing four procedures for testing for the presence of specification error. The specification errors considered are omitted variables, incorrect functional form, simultaneous equation problems and heteroskedasticity.},
  langid = {english},
  keywords = {exact sciences,general linear models,linearity,probability and statistics,ramsey reset test,tests},
  file = {G:\Meu Drive\Zotero\files\Ramsey - 1969 - Tests for specification errors in classical linear least-squares regression analysis.pdf}
}

@article{schucany2006,
  title = {Preliminary goodness-of-fit tests for normality do not validate the one-sample {{Student}} t},
  author = {Schucany, William R. and Ng, H. K. Tony},
  date = {2006-12-01},
  journaltitle = {Communications in Statistics - Theory and Methods},
  volume = {35},
  number = {12},
  pages = {2275--2286},
  publisher = {Taylor \& Francis Group},
  doi = {10.1080/03610920600853308},
  url = {https://www.tandfonline.com/doi/abs/10.1080/03610920600853308},
  urldate = {2024-09-29},
  abstract = {One of the most basic topics in many introductory statistical methods texts is inference for a population mean, μ. The primary tool for confidence intervals and tests is the Student t sampling dist...},
  langid = {english},
  keywords = {assumption checks,exact sciences,general linear models,normality tests,probability and statistics,statistical assumptions,tests}
}

@article{shapiro1965,
  title = {An analysis of variance test for normality (complete samples)†},
  author = {Shapiro, S. S. and Wilk, M. B.},
  date = {1965-12-01},
  journaltitle = {Biometrika},
  shortjournal = {Biometrika},
  volume = {52},
  number = {3-4},
  pages = {591--611},
  issn = {0006-3444},
  doi = {10.1093/biomet/52.3-4.591},
  url = {https://doi.org/10.1093/biomet/52.3-4.591},
  urldate = {2024-09-28},
  langid = {english},
  keywords = {exact sciences,normality tests,probability and statistics,shapiro–wilk test,tests}
}

@article{shapiro1972,
  title = {An approximate analysis of variance test for normality},
  author = {Shapiro, S. S. and Francia, R. S.},
  date = {1972-03-01},
  journaltitle = {Journal of the American Statistical Association},
  volume = {67},
  number = {337},
  pages = {215--216},
  publisher = {ASA Website},
  issn = {0162-1459},
  doi = {10.1080/01621459.1972.10481232},
  url = {https://www.tandfonline.com/doi/abs/10.1080/01621459.1972.10481232},
  urldate = {2024-09-28},
  abstract = {This article presents a modification of the Shapiro-Wilk W statistic for testing normality which can be used with large samples. Shapiro and Wilk gave coefficients and percentage points for sample sizes up to 50. These coefficients required obtaining an approximation to the covariance matrix of the normal order statistics. The proposed test uses coefficients which depend only on the expected values of the normal order statistics which are generally available. Results of an empirical sampling study to compare the sensitivity of the test statistic to the W test statistic are briefly discussed.},
  langid = {english},
  keywords = {exact sciences,normality tests,probability and statistics,shapiro-francia test,tests},
  file = {G:\Meu Drive\Zotero\files\Shapiro and Francia - 1972 - An approximate analysis of variance test for normality.pdf}
}

@article{shatz2024,
  title = {Assumption-checking rather than (just) testing: the importance of visualization and effect size in statistical diagnostics},
  shorttitle = {Assumption-checking rather than (just) testing},
  author = {Shatz, Itamar},
  date = {2024-02-01},
  journaltitle = {Behavior Research Methods},
  shortjournal = {Behav Res},
  volume = {56},
  number = {2},
  pages = {826--845},
  issn = {1554-3528},
  doi = {10.3758/s13428-023-02072-x},
  url = {https://doi.org/10.3758/s13428-023-02072-x},
  urldate = {2024-09-29},
  abstract = {Statistical methods generally have assumptions (e.g., normality in linear regression models). Violations of these assumptions can cause various issues, like statistical errors and biased estimates, whose impact can range from inconsequential to critical. Accordingly, it is important to check these assumptions, but this is often done in a flawed way. Here, I first present a prevalent but problematic approach to diagnostics—testing assumptions using null hypothesis significance tests (e.g., the Shapiro–Wilk test of normality). Then, I consolidate and illustrate the issues with this approach, primarily using simulations. These issues include statistical errors (i.e., false positives, especially with large samples, and false negatives, especially with small samples), false binarity, limited descriptiveness, misinterpretation (e.g., of p-value as an~effect size), and potential testing failure due to unmet test assumptions. Finally, I synthesize the implications of these issues for statistical diagnostics, and provide practical recommendations for improving such diagnostics. Key recommendations include maintaining awareness of the issues with assumption tests (while recognizing they can be useful), using appropriate combinations of diagnostic methods (including visualization and effect sizes) while recognizing their limitations, and distinguishing between testing and checking assumptions. Additional recommendations include judging assumption violations as a complex spectrum (rather than a simplistic binary), using programmatic tools that increase replicability and decrease researcher degrees of freedom, and sharing the material and rationale involved in the diagnostics.},
  langid = {english},
  keywords = {assumption checks,exact sciences,general linear models,graphical methods,null hypothesis significance testing,probability and statistics,statistical assumptions,statistical diagnostics,visualization},
  file = {G:\Meu Drive\Zotero\files\Shatz - 2024 - Assumption-checking rather than (just) testing- the importance of visualization and effect size in s.pdf}
}

@article{smirnov1948,
  title = {Table for estimating the goodness of fit of empirical distributions},
  author = {Smirnov, N.},
  date = {1948},
  journaltitle = {Annals of Mathematical Statistics},
  volume = {19},
  pages = {279--281},
  keywords = {exact sciences,kolmogorov-smirnov test,normality tests,probability and statistics},
  file = {G:\Meu Drive\Zotero\files\Smirnov - 1948 - Table for estimating the goodness of fit of empirical distributions.pdf}
}

@book{struck2024,
  title = {Regression {{Diagnostics}} with {{R}}},
  author = {Struck, Jason},
  date = {2024-06},
  publisher = {University of Wisconsin-Madison},
  location = {Madison, WI},
  url = {https://sscc.wisc.edu/sscc/pubs/RegDiag-R/},
  urldate = {2024-09-29},
  abstract = {This book uses R. A Stata version of this book is available at Regression Diagnostics with Stata. Regression diagnostics are a critical step in the modeling process. Diagnostics for regression models are tools that assess a model’s compliance to its assumptions and investigate if there is a single observation or group of observations that are not well represented by the model. These tools allow researchers to evaluate if a model appropriately represents the data of their study. In this book we separate diagnostics from the other parts of model selection to provide a focus on this important topic. This separation is not meant to imply that these tools are used separately from other regression modeling tools.},
  langid = {english},
  keywords = {exact sciences,general linear models,probability and statistics,regression diagnostics}
}

@book{thode2002,
  title = {Testing for normality},
  author = {Thode, Henry C.},
  date = {2002},
  series = {Statistics, textbooks and monographs},
  number = {164},
  publisher = {Marcel Dekker},
  location = {New York},
  isbn = {978-0-8247-9613-6},
  langid = {english},
  pagetotal = {479},
  keywords = {exact sciences,normality tests,probability and statistics},
  file = {G:\Meu Drive\Zotero\files\Thode - 2002 - Testing for normality.pdf}
}

@report{welsch1977,
  type = {Working Paper},
  title = {Linear regression diagnostics},
  author = {Welsch, Roy and Kuh, Edwin},
  date = {1977-03},
  number = {0173},
  pages = {44},
  institution = {National Bureau of Economic Research},
  location = {Cambridge, MA},
  doi = {10.3386/w0173},
  url = {http://www.nber.org/papers/w0173.pdf},
  urldate = {2024-09-30},
  langid = {english},
  keywords = {dfbetas,dffits,diagnostic plots,difference in betas,difference in fits,exact sciences,general linear models,probability and statistics,regression diagnostics,residuals},
  file = {G:\Meu Drive\Zotero\files\Welsch and Kuh - 1977 - Linear regression diagnostics.pdf}
}

@article{white1980,
  title = {A heteroskedasticity-consistent covariance matrix estimator and a direct test for heteroskedasticity},
  author = {White, Halbert},
  date = {1980},
  journaltitle = {Econometrica},
  volume = {48},
  number = {4},
  eprint = {1912934},
  eprinttype = {jstor},
  pages = {817--838},
  publisher = {[Wiley, Econometric Society]},
  issn = {0012-9682},
  doi = {10.2307/1912934},
  url = {https://www.jstor.org/stable/1912934},
  urldate = {2024-09-28},
  abstract = {This paper presents a parameter covariance matrix estimator which is consistent even when the disturbances of a linear regression model are heteroskedastic. This estimator does not depend on a formal model of the structure of the heteroskedasticity. By comparing the elements of the new estimator to those of the usual covariance estimator, one obtains a direct test for heteroskedasticity, since in the absence of heteroskedasticity, the two estimators will be approximately equal, but will generally diverge otherwise. The test has an appealing least squares interpretation.},
  langid = {english},
  keywords = {exact sciences,heteroskedasticity,probability and statistics,tests,white test},
  file = {G:\Meu Drive\Zotero\files\White - 1980 - A heteroskedasticity-consistent covariance matrix estimator and a direct test for heteroskedasticity.pdf}
}

@article{zeileis2004,
  title = {Econometric computing with {{HC}} and {{HAC}} covariance matrix estimators},
  author = {Zeileis, Achim},
  date = {2004-11-29},
  journaltitle = {Journal of Statistical Software},
  volume = {11},
  number = {10},
  pages = {1--17},
  issn = {1548-7660},
  doi = {10.18637/jss.v011.i10},
  url = {https://doi.org/10.18637/jss.v011.i10},
  urldate = {2024-09-29},
  abstract = {Data described by econometric models typically contains autocorrelation and/or heteroskedasticity of unknown form and for inference in such models it is essential to use covariance matrix estimators that can consistently estimate the covariance of the model parameters. Hence, suitable heteroskedasticity consistent (HC) and heteroskedasticity and autocorrelation consistent (HAC) estimators have been receiving attention in the econometric literature over the last 20 years. To apply these estimators in practice, an implementation is needed that preferably translates the conceptual properties of the underlying theoretical frameworks into computational tools. In this paper, such an implementation in the package sandwich in the R system for statistical computing is described and it is shown how the suggested functions provide reusable components that build on readily existing functionality and how they can be integrated easily into new inferential procedures or applications. The toolbox contained in sandwich is extremely flexible and comprehensive, including specific functions for the most important HC and HAC estimators from the econometric literature. Several real-world data sets are used to illustrate how the functionality can be integrated into applications.},
  langid = {english},
  keywords = {autocorrelation,covariance matrix,exact sciences,hac,hc,heteroscedasticity and autocorrelation consistent,probability and statistics},
  file = {G:\Meu Drive\Zotero\files\Zeileis - 2004 - Econometric computing with HC and HAC covariance matrix estimators.pdf}
}