-
Notifications
You must be signed in to change notification settings - Fork 0
/
references.bib
1084 lines (1022 loc) · 70.7 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@book{allen1997,
title = {Understanding regression analysis},
author = {Allen, Michael Patrick},
date = {1997},
publisher = {Plenum Press},
location = {New York},
isbn = {978-0-306-45648-0},
pagetotal = {216},
keywords = {data science,exact sciences,multivariate methods,nested models,probability and statistics,univariate methods},
file = {G:\Meu Drive\Zotero\files\Allen - 1997 - Understanding regression analysis.pdf}
}
@article{anderson1952,
title = {Asymptotic theory of certain "goodness of fit" criteria based on stochastic processes},
author = {Anderson, T. W. and Darling, D. A.},
date = {1952},
journaltitle = {The Annals of Mathematical Statistics},
volume = {23},
number = {2},
eprint = {2236446},
eprinttype = {jstor},
pages = {193--212},
publisher = {Institute of Mathematical Statistics},
issn = {0003-4851},
url = {https://www.jstor.org/stable/2236446},
urldate = {2024-09-28},
abstract = {The statistical problem treated is that of testing the hypothesis that n independent, identically distributed random variables have a specified continuous distribution function F(x). If Fn(x) is the empirical cumulative distribution function and ψ(t) is some nonnegative weight function (0 ≤ t ≤ 1), we consider \$n\textasciicircum\{\textbackslash frac\{1\}\{2\}\} \textbackslash sup\_\{-\textbackslash infty\vphantom\}},
langid = {english},
keywords = {anderson-darling test,exact sciences,normality tests,probability and statistics,tests},
file = {G:\Meu Drive\Zotero\files\Anderson and Darling - 1952 - Asymptotic theory of certain goodness of fit criteria based on stochastic processes.pdf}
}
@article{anderson1954,
title = {A test of goodness of fit},
author = {Anderson, T. W. and Darling, D. A.},
date = {1954-12-01},
journaltitle = {Journal of the American Statistical Association},
volume = {49},
number = {268},
pages = {765--769},
publisher = {ASA Website},
issn = {0162-1459},
doi = {10.1080/01621459.1954.10501232},
url = {https://www.tandfonline.com/doi/abs/10.1080/01621459.1954.10501232},
urldate = {2024-09-28},
abstract = {Some (large sample) significance points are tabulated for a distribution-free test of goodness of fit which was introduced earlier by the authors. The test, which uses the actual observations without grouping, is sensitive to discrepancies at the tails of the distribution rather than near the median. An illustration is given, using a numerical example used previously by Birnbaum in illustrating the Kolmogorov test.},
langid = {english},
keywords = {anderson-darling test,exact sciences,normality tests,probability and statistics,tests},
file = {G:\Meu Drive\Zotero\files\Anderson and Darling - 1954 - A test of goodness of fit.pdf}
}
@article{anderson1962,
title = {On the distribution of the two-sample {{Cramer-von Mises}} criterion},
author = {Anderson, T. W.},
date = {1962-09},
journaltitle = {The Annals of Mathematical Statistics},
volume = {33},
number = {3},
pages = {1148--1159},
publisher = {Institute of Mathematical Statistics},
issn = {0003-4851, 2168-8990},
doi = {10.1214/aoms/1177704477},
url = {https://projecteuclid.org/journals/annals-of-mathematical-statistics/volume-33/issue-3/On-the-Distribution-of-the-Two-Sample-Cramer-von-Mises/10.1214/aoms/1177704477.full},
urldate = {2024-09-28},
abstract = {The Cramer-von Mises \$\textbackslash omega\textasciicircum 2\$ criterion for testing that a sample, \$x\_1, \textbackslash cdots, x\_N\$, has been drawn from a specified continuous distribution \$F(x)\$ is \textbackslash begin\{equation*\}\textbackslash tag\{1\}\textbackslash omega\textasciicircum 2 = \textbackslash int\textasciicircum\textbackslash infty\_\{-\textbackslash infty\} \textbackslash lbrack F\_N(x) - F(x)\textbackslash rbrack\textasciicircum 2 dF(x),\textbackslash end\{equation*\} where \$F\_N(x)\$ is the empirical distribution function of the sample; that is, \$F\_N(x) = k/N\$ if exactly \$k\$ observations are less than or equal to \$x(k = 0, 1, \textbackslash cdots, N)\$. If there is a second sample, \$y\_1, \textbackslash cdots, y\_M\$, a test of the hypothesis that the two samples come from the same (unspecified) continuous distribution can be based on the analogue of \$N\textbackslash omega\textasciicircum 2\$, namely \textbackslash begin\{equation*\}\textbackslash tag\{2\} T = \textbackslash lbrack NM/(N + M)\textbackslash rbrack \textbackslash int\textasciicircum\textbackslash infty\_\{-\textbackslash infty\} \textbackslash lbrack F\_N(x) - G\_M(x)\textbackslash rbrack\textasciicircum 2 dH\_\{N+M\}(x),\textbackslash end\{equation*\} where \$G\_M(x)\$ is the empirical distribution function of the second sample and \$H\_\{N+M\}(x)\$ is the empirical distribution function of the two samples together [that is, \$(N + M)H\_\{N+M\}(x) = NF\_N(x) + MG\_M(x)\textbackslash rbrack\$. The limiting distribution of \$N\textbackslash omega\textasciicircum 2\$ as \$N \textbackslash rightarrow \textbackslash infty\$ has been tabulated [2], and it has been shown ([3], [4a], and [7]) that \$T\$ has the same limiting distribution as \$N \textbackslash rightarrow \textbackslash infty, M \textbackslash rightarrow \textbackslash infty\$, and \$N/M \textbackslash rightarrow \textbackslash lambda\$, where \$\textbackslash lambda\$ is any finite positive constant. In this note we consider the distribution of \$T\$ for small values of \$N\$ and \$M\$ and present tables to permit use of the criterion at some conventional significance levels for small values of \$N\$ and \$M\$. The limiting distribution seems a surprisingly good approximation to the exact distribution for moderate sample sizes (corresponding to the same feature for \$N\textbackslash omega\textasciicircum 2\$ [6]). The accuracy of approximation is better than in the case of the two-sample Kolmogorov-Smirnov statistic studied by Hodges [4].},
langid = {english},
keywords = {cramer-von mises test,exact sciences,normality tests,probability and statistics,tests},
file = {G:\Meu Drive\Zotero\files\Anderson - 1962 - On the distribution of the two-sample Cramer-von Mises criterion.pdf}
}
@article{arif2022,
title = {Predictive models aren't for causal inference},
author = {Arif, Suchinta and MacNeil, M. Aaron},
date = {2022-08},
journaltitle = {Ecology Letters},
shortjournal = {Ecology Letters},
volume = {25},
number = {8},
pages = {1741--1745},
issn = {1461-023X, 1461-0248},
doi = {10.1111/ele.14033},
url = {https://onlinelibrary.wiley.com/doi/10.1111/ele.14033},
urldate = {2023-07-27},
langid = {english},
keywords = {biological sciences,data science,ecology,exact sciences,modeling,probability and statistics},
file = {G:\Meu Drive\Zotero\files\Arif - 2022 - Predictive models aren't for causal inference.pdf}
}
@book{belsley2004,
title = {Regression diagnostics: identifying influential data and sources of collinearity},
shorttitle = {Regression diagnostics},
author = {Belsley, David A. and Kuh, Edwin and Welsch, Roy E.},
date = {2004},
series = {Wiley {{Series}} in {{Probability}} and {{Statistics}}},
publisher = {John Wiley \& Sons},
location = {Hoboken, NJ},
doi = {10.1002/0471725153},
abstract = {The Wiley-Interscience Paperback Series consists of selected books that have been made more accessible to consumers in an effort to increase global appeal and general circulation. With these new unabridged softcover volumes, Wiley hopes to extend the lives of these works by making them available to future generations of statisticians, mathematicians, and scientists. "The title of the book more or less sums up the contents. It appears to me to represent a real breakthrough in the art of dealing in ‘unconventional’ data. . . . I found the whole book both readable and enjoyable. It is suitable for data analysts, academic statisticians, and professional software writers." –Journal of the Royal Statistical Society "The book assumes a working knowledge of all of the principal results and techniques used in least squares multiple regression, as expressed in vector and matrix notation. Given this background, the book is clear and easy to use. . . . The techniques are illustrated in great detail with practical data sets from econometrics." –Short Book Reviews, International Statistical Institute Regression Diagnostics: Identifying Influential Data and Sources of Collinearity provides practicing statisticians and econometricians with new tools for assessing quality and reliability of regression estimates. Diagnostic techniques are developed that aid in the systematic location of data points that are unusual or inordinately influential; measure the presence and intensity of collinear relations among the regression data; and help to identify variables involved in each and pinpoint estimated coefficients potentially most adversely affected. The book emphasizes diagnostics and includes suggestions for remedial action.},
isbn = {0-471-69117-8},
langid = {english},
pagetotal = {292},
keywords = {Regression analysis},
annotation = {Print ISBN: 9780471058564\\
Online ISBN: 9780471725152},
file = {G:\Meu Drive\Zotero\files\Belsley et al. - 2004 - Regression diagnostics identifying influential data and sources of collinearity.pdf}
}
@article{bera1981,
title = {Efficient tests for normality, homoscedasticity and serial independence of regression residuals: {{Monte Carlo Evidence}}},
shorttitle = {Efficient tests for normality, homoscedasticity and serial independence of regression residuals},
author = {Bera, Anil K. and Jarque, Carlos M.},
date = {1981-01-01},
journaltitle = {Economics Letters},
shortjournal = {Economics Letters},
volume = {7},
number = {4},
pages = {313--318},
issn = {0165-1765},
doi = {10.1016/0165-1765(81)90035-5},
url = {https://www.sciencedirect.com/science/article/pii/0165176581900355},
urldate = {2024-09-28},
abstract = {In this paper we study the performance of various tests for normality (N), homoscedasticity (H) and serial independence (I) of regression residuals (u) under one, two and three directional departures from HO:u∼NHI.},
langid = {english},
keywords = {exact sciences,jarque-bera test,normality tests,probability and statistics,tests},
file = {G:\Meu Drive\Zotero\files\Bera and Jarque - 1981 - Efficient tests for normality, homoscedasticity and serial independence of regression residuals Mon.pdf}
}
@article{bonett2002,
title = {A test of normality with high uniform power},
author = {Bonett, Douglas G and Seier, Edith},
date = {2002-09-28},
journaltitle = {Computational Statistics \& Data Analysis},
shortjournal = {Computational Statistics \& Data Analysis},
volume = {40},
number = {3},
pages = {435--445},
issn = {0167-9473},
doi = {10.1016/S0167-9473(02)00074-9},
url = {https://www.sciencedirect.com/science/article/pii/S0167947302000749},
urldate = {2024-09-28},
abstract = {Kurtosis can be measured in more than one way. A modification of Geary's measure of kurtosis is shown to be more sensitive to kurtosis in the center of the distribution while Pearson's measure of kurtosis is more sensitive to kurtosis in the tails of the distribution. The modified Geary measure and the Pearson measure are used to define a joint test of kurtosis that has high uniform power across a very wide range of symmetric nonnormal distributions.},
langid = {english},
keywords = {bonett-seier test,exact sciences,kurtosis,leptokurtosis,normality tests,probability and statistics,shapiro–wilk test},
file = {G:\Meu Drive\Zotero\files\Bonett and Seier - 2002 - A test of normality with high uniform power.pdf}
}
@article{box1970,
title = {Distribution of residual autocorrelations in autoregressive-integrated moving average time series models},
author = {Box, G. E. P. and Pierce, David A.},
date = {1970-12-01},
journaltitle = {Journal of the American Statistical Association},
volume = {65},
number = {332},
pages = {1509--1526},
publisher = {ASA Website},
issn = {0162-1459},
doi = {10.1080/01621459.1970.10481180},
url = {https://www.tandfonline.com/doi/abs/10.1080/01621459.1970.10481180},
urldate = {2024-09-28},
abstract = {Many statistical models, and in particular autoregressive—moving average time series models, can be regarded as means of transforming the data to white noise, that is, to an uncorrected sequence of errors. If the parameters are known exactly, this random sequence can be computed directly from the observations; when this calculation is made with estimates substituted for the true parameter values, the resulting sequence is referred to as the “residuals,” which can be regarded as estimates of the errors. If the appropriate model has been chosen, there will be zero autocorrelation in the errors. In checking adequacy of fit it is therefore logical to study the sample autocorrelation function of the residuals. For large samples the residuals from a correctly fitted model resemble very closely the true errors of the process; however, care is needed in interpreting the serial correlations of the residuals. It is shown here that the residual autocorrelations are to a close approximation representable as a singular linear transformation of the autocorrelations of the errors so that they possess a singular normal distribution. Failing to allow for this results in a tendency to overlook evidence of lack of fit. Tests of fit and diagnostic checks are devised which take these facts into account.},
langid = {english},
keywords = {autocorrelation,autocorrelation tests,box-pierce test,exact sciences,ljung-box test,probability and statistics,tests},
file = {G:\Meu Drive\Zotero\files\Box and Pierce - 1970 - Distribution of residual autocorrelations in autoregressive-integrated moving average time series mo.pdf}
}
@article{breusch1979,
title = {A simple test for heteroscedasticity and random coefficient variation},
author = {Breusch, T. S. and Pagan, A. R.},
date = {1979},
journaltitle = {Econometrica},
volume = {47},
number = {5},
eprint = {1911963},
eprinttype = {jstor},
pages = {1287--1294},
publisher = {[Wiley, Econometric Society]},
issn = {0012-9682},
doi = {10.2307/1911963},
url = {https://www.jstor.org/stable/1911963},
urldate = {2024-09-27},
abstract = {A simple test for heteroscedastic disturbances in a linear regression model is developed using the framework of the Lagrangian multiplier test. For a wide range of heteroscedastic and random coefficient specifications, the criterion is given as a readily computed function of the OLS residuals. Some finite sampleevidence is presented to supplement the general asymptotic properties of Lagrangian multiplier tests.},
langid = {english},
keywords = {breusch–pagan test,exact sciences,general linear models,heteroskedasticity,homoskedasticity,probability and statistics,tests},
file = {G:\Meu Drive\Zotero\files\Breusch and Pagan - 1979 - A simple test for heteroscedasticity and random coefficient variation.pdf}
}
@book{bussab1988,
title = {Análise de variância e de regressão: uma introdução},
shorttitle = {Análise de variância e de regressão},
author = {Bussab, Wilton de Oliveira},
date = {1988},
series = {Métodos quantitativos},
edition = {2},
publisher = {Atlas},
location = {São Paulo},
langid = {brazilian},
keywords = {data science,exact sciences,multivariate methods,probability and statistics,univariate methods},
file = {G:\Meu Drive\Zotero\files\Bussab - 1988 - Analise de variancia e de regressao.pdf}
}
@book{casella2002,
title = {Statistical inference},
author = {Casella, George and Berger, Roger L.},
date = {2002},
series = {Duxbury advanced series},
edition = {2},
publisher = {Duxbury},
location = {Pacific Grove, CA},
isbn = {0-534-24312-6},
langid = {english},
pagetotal = {660},
keywords = {exact sciences,fundamentals of probability and statistics,probability and statistics,statistical inference,textbooks},
file = {G:\Meu Drive\Zotero\files\Casella - 2002 - Statistical inference.pdf}
}
@book{chatterjee2012,
title = {Regression analysis by example},
author = {Chatterjee, Samprit and Hadi, Ali S.},
date = {2012},
series = {Wiley {{Series}} in {{Probability}} and {{Statistics}}},
edition = {5},
publisher = {Wiley},
location = {Hoboken, NJ},
abstract = {This fifth edition has been expanded and thoroughly updated to reflect recent advances in the field. The emphasis continues to be on exploratory data analysis rather than statistical theory. The coverage offers in-depth treatment of regression diagnostics, transformation, multicollinearity, logistic regression, and robust regression. Methods of regression analysis are clearly demonstrated, and examples containing the types of irregularities commonly encountered in the real world are provided. Each example isolates one or two techniques and features detailed discussions of the techniques themselves, the required assumptions, and the evaluated success of each technique.},
isbn = {978-0-470-90584-5},
langid = {english},
pagetotal = {393},
keywords = {exact sciences,general linear models,influential observations,modeling,probability and statistics,regression analysis,regression diagnostics},
file = {G:\Meu Drive\Zotero\files\Chatterjee and Hadi - 2012 - Regression analysis by example.pdf}
}
@book{cohen1988,
title = {Statistical power analysis for the behavioral sciences},
author = {Cohen, Jacob},
date = {1988},
edition = {2},
publisher = {Lawrence Erlbaum Associates},
location = {Hillsdale, N.J},
isbn = {978-0-8058-0283-2},
pagetotal = {567},
keywords = {exact sciences,power analysis,probability and statistics,sampling},
file = {G:\Meu Drive\Zotero\files\Cohen - 1988 - Statistical power analysis for the behavioral sciences.pdf}
}
@book{cohen2002,
title = {Applied multiple regression/correlation analysis for the behavioral sciences},
author = {Cohen, Jacob and Cohen, Patricia and West, Stephen G. and Aiken, Leona S.},
date = {2002-08},
edition = {3},
publisher = {Lawrence Erlbaum Associates},
location = {Mahwah, NJ},
abstract = {This classic text on multiple regression is noted for its nonmathematical, applied, and data-analytic approach. Readers profit from its verbal-conceptual exposition and frequent use of examples. The applied emphasis provides clear illustrations of the principles and provides worked examples of the types of applications that are possible. Researchers learn how to specify regression models that directly address their research questions. An overview of the fundamental ideas of multiple regression and a review of bivariate correlation and regression and other elementary statistical concepts provide a strong foundation for understanding the rest of the text. The third edition features an increased emphasis on graphics and the use of confidence intervals and effect size measures, and an accompanying website with data for most of the numerical examples along with the computer code for SPSS, SAS, and SYSTAT, at www.psypress.com/9780805822236 . Applied Multiple Regression serves as both a textbook for graduate students and as a reference tool for researchers in psychology, education, health sciences, communications, business, sociology, political science, anthropology, and economics. An introductory knowledge of statistics is required. Self-standing chapters minimize the need for researchers to refer to previous chapters.},
isbn = {0-8058-2223-2},
langid = {english},
pagetotal = {703},
keywords = {correlation (statistics),exact sciences,general linear models,linear models,probability and statistics,regression analysis,social sciences,statistical methods,textbooks},
annotation = {OCLC: ocm49903199},
file = {G:\Meu Drive\Zotero\files\Cohen et al. - 2003 - Applied multiple regressioncorrelation analysis for the behavioral sciences.pdf}
}
@article{cook1977,
title = {Detection of influential observation in linear regression},
author = {Cook, R. Dennis},
date = {1977-02},
journaltitle = {Technometrics},
volume = {19},
number = {1},
pages = {15--18},
publisher = {ASA Website},
issn = {0040-1706},
doi = {10.1080/00401706.1977.10489493},
url = {https://www.tandfonline.com/doi/abs/10.1080/00401706.1977.10489493},
urldate = {2024-09-29},
langid = {english},
keywords = {confidence ellipsoids,cooks d,exact sciences,general linear models,influential observations,outliers,probability and statistics,variances of residuals},
file = {G:\Meu Drive\Zotero\files\Cook - 1977 - Detection of influential observation in linear regression.pdf}
}
@article{cook1979,
title = {Influential observations in linear regression},
author = {Cook, R. Dennis},
date = {1979-03-01},
journaltitle = {Journal of the American Statistical Association},
volume = {74},
number = {365},
pages = {169--174},
publisher = {ASA Website},
issn = {0162-1459},
doi = {10.1080/01621459.1979.10481634},
url = {https://www.tandfonline.com/doi/abs/10.1080/01621459.1979.10481634},
urldate = {2024-09-29},
abstract = {Characteristics of observations which cause them to be influential in a least squares analysis are investigated and related to residual variances, residual correlations, and the convex hull of the observed values of the independent variables. It is shown how deleting an observation can substantially alter an analysis by changing the partial F-tests, the studentized residuals, the residual variances, the convex hull of the independent variables, and the estimated parameter vector. Outliers are discussed briefly, and an example is presented.},
langid = {english},
keywords = {cooks d,deleting observations,general linear models,outliers,partial f-tests,probability and statistics,residual correlations,studentized residuals},
file = {G:\Meu Drive\Zotero\files\Cook - 1979 - Influential observations in linear regression 1.pdf}
}
@article{cramer1928,
title = {On the composition of elementary errors: {{First}} paper: {{Mathematical}} deductions},
shorttitle = {On the composition of elementary errors},
author = {Cramér, Harald},
date = {1928-01-01},
journaltitle = {Scandinavian Actuarial Journal},
volume = {1928},
number = {1},
pages = {13--74},
publisher = {Taylor \& Francis},
issn = {0346-1238},
doi = {10.1080/03461238.1928.10416862},
url = {https://doi.org/10.1080/03461238.1928.10416862},
urldate = {2024-09-28},
langid = {english},
keywords = {cramer-von mises test,exact sciences,normality tests,probability and statistics,tests},
file = {G:\Meu Drive\Zotero\files\Cramér - 1928 - On the composition of elementary errors First paper Mathematical deductions.pdf}
}
@article{dagostino1971,
title = {An omnibus test of normality for moderate and large size samples},
author = {D'Agostino, Ralph B.},
date = {1971-08-01},
journaltitle = {Biometrika},
shortjournal = {Biometrika},
volume = {58},
number = {2},
pages = {341--348},
issn = {0006-3444},
doi = {10.1093/biomet/58.2.341},
url = {https://doi.org/10.1093/biomet/58.2.341},
urldate = {2024-09-28},
abstract = {We present a test of normality based on a statistic D which is up to a constant the ratio of Downton's linear unbiased estimator of the population standard deviation to the sample standard deviation. For the usual levels of significance Monte Carlo simulations indicate that Cornish-Fisher expansions adequately approximate the null distribution of D if the sample size is 50 or more. The test is an omnibus test, being appropriate to detect deviations from normality due either to skewness or kurtosis. Simulation results of powers for various alternatives when the sample size is 50 indicate that the test compares favourably with the Shapiro-Wilk W test,√1, b2 and the ratio of range to standard deviation.},
langid = {english},
keywords = {d'agostino test,exact sciences,normality tests,probability and statistics,tests},
file = {G:\Meu Drive\Zotero\files\D'AGOSTINO - 1971 - An omnibus test of normality for moderate and large size samples.pdf}
}
@article{dagostino1973,
title = {Tests for departure from normality. {{Empirical}} results for the distributions of b2 and √b1},
author = {D'Agostino, Ralph B. and Pearson, E. S.},
date = {1973},
journaltitle = {Biometrika},
shortjournal = {Biometrika},
volume = {60},
number = {3},
pages = {613--622},
issn = {0006-3444, 1464-3510},
doi = {10.1093/biomet/60.3.613},
url = {https://academic.oup.com/biomet/article-lookup/doi/10.1093/biomet/60.3.613},
urldate = {2023-11-14},
langid = {english},
keywords = {d'agostino test,exact sciences,normality tests,probability and statistics},
file = {G:\Meu Drive\Zotero\files\D'Agostino - 1973 - Tests for departure from normality.pdf}
}
@article{dagostino1990,
title = {A suggestion for using powerful and informative tests of normality},
author = {D'Agostino, Ralph B. and Belanger, Albert},
date = {1990},
journaltitle = {The American Statistician},
volume = {44},
number = {4},
eprint = {2684359},
eprinttype = {jstor},
pages = {316--321},
publisher = {[American Statistical Association, Taylor \& Francis, Ltd.]},
issn = {0003-1305},
doi = {10.2307/2684359},
url = {https://www.jstor.org/stable/2684359},
urldate = {2023-11-15},
abstract = {For testing that an underlying population is normally distributed the skewness and kurtosis statistics, \$\textbackslash sqrt\{b\_1\}\$ and b2, and the D'Agostino-Pearson K2 statistic that combines these two statistics have been shown to be powerful and informative tests. Their use, however, has not been as prevalent as their usefulness. We review these tests and show how readily available and popular statistical software can be used to implement them. Their relationship to deviations from linearity in normal probability plotting is also presented.},
keywords = {d'agostino test,exact sciences,normality tests,probability and statistics},
file = {G:\Meu Drive\Zotero\files\D'Agostino - 1990 - A suggestion for using powerful and informative tests of normality.pdf}
}
@article{dallal1986,
title = {An analytic approximation to the distribution of {{Lilliefors}}'s test statistic for normality},
author = {Dallal, Gerard E. and Wilkinson, Leland},
date = {1986-11},
journaltitle = {The American Statistician},
shortjournal = {The American Statistician},
volume = {40},
number = {4},
pages = {294--296},
issn = {0003-1305, 1537-2731},
doi = {10.1080/00031305.1986.10475419},
url = {http://www.tandfonline.com/doi/abs/10.1080/00031305.1986.10475419},
urldate = {2023-11-14},
langid = {english},
keywords = {exact sciences,kolmogorov-smirnov test,normality tests,probability and statistics},
file = {G:\Meu Drive\Zotero\files\Dallal - 1986 - An analytic approximation to the distribution of Lilliefors's test statistic.pdf}
}
@book{dalpiaz,
title = {Applied statistics with {{R}}},
author = {Dalpiaz, David},
url = {https://book.stat420.org/},
langid = {english},
keywords = {data science,exact sciences,probability and statistics,programming,r},
file = {G:\Meu Drive\Zotero\files\Dalpiaz - - Applied statistics with R.pdf}
}
@book{degroot2012,
title = {Probability and statistics},
author = {DeGroot, Morris H. and Schervish, Mark J.},
date = {2012},
edition = {4},
publisher = {Addison-Wesley},
location = {Boston},
isbn = {978-0-321-50046-5},
langid = {english},
pagetotal = {893},
keywords = {exact sciences,fundamentals of probability and statistics,probability and statistics,textbooks},
annotation = {OCLC: ocn502674206},
file = {G\:\\Meu Drive\\Zotero\\files\\DeGroot - 2012 - Probability and statistics.pdf;G\:\\Meu Drive\\Zotero\\files\\DeGroot - 2012 - Probability and statistics.zip}
}
@book{dudek2020,
title = {Linear models with {{R}}: emphasis on 2-{{IV}} models: basics of multiple regression},
author = {Dudek, Bruce},
date = {2020-09-09},
url = {https://bcdudek.net/regression1/},
langid = {english},
keywords = {data science,exact sciences,multivariate methods,probability and statistics,programming,r},
file = {G:\Meu Drive\Zotero\files\Dudek - 2020 - Linear models with R.pdf}
}
@article{durbin1950,
title = {Testing for serial correlation in least squares regression. {{I}}},
author = {Durbin, J. and Watson, G. S.},
date = {1950-12-01},
journaltitle = {Biometrika},
shortjournal = {Biometrika},
volume = {37},
number = {3-4},
pages = {409--428},
issn = {0006-3444},
doi = {10.1093/biomet/37.3-4.409},
url = {https://doi.org/10.1093/biomet/37.3-4.409},
urldate = {2024-09-27},
langid = {english},
keywords = {autocorrelation,durbin-watson test,exact sciences,general linear models,probability and statistics,tests},
file = {G:\Meu Drive\Zotero\files\Durbin and Watson - 1950 - Testing for serial correlation in least squares regression. I.pdf}
}
@article{durbin1951,
title = {Testing for serial correlation in least squares regression. {{II}}},
author = {Durbin, J. and Watson, G. S.},
date = {1951-06-01},
journaltitle = {Biometrika},
shortjournal = {Biometrika},
volume = {38},
number = {1-2},
pages = {159--178},
issn = {0006-3444},
doi = {10.1093/biomet/38.1-2.159},
url = {https://doi.org/10.1093/biomet/38.1-2.159},
urldate = {2024-09-27},
langid = {english},
keywords = {autocorrelation,durbin-watson test,exact sciences,general linear models,probability and statistics,tests},
file = {G:\Meu Drive\Zotero\files\Durbin and Watson - 1951 - Testing for serial correlation in least squares regression. II.pdf}
}
@article{durbin1971,
title = {Testing for serial correlation in least squares regression. {{III}}},
author = {Durbin, J. and Watson, G. S.},
date = {1971-04-01},
journaltitle = {Biometrika},
shortjournal = {Biometrika},
volume = {58},
number = {1},
pages = {1--19},
issn = {0006-3444},
doi = {10.1093/biomet/58.1.1},
url = {https://doi.org/10.1093/biomet/58.1.1},
urldate = {2024-09-27},
abstract = {The paper considers a number of problems arising from the test of serial correlation based on the d statistic proposed earlier by the authors (Durbin \& Watson, 1950, 1951). Methods of computing the exact distribution of d are investigated and the exact distribution is compared with six approximations to it for four sets of published data. It is found that approximations suggested by Theil and Nagar and by Hannan are too inaccurate for practical use but that the beta approximation proposed in the 1950 and 1951 papers and a new approximation, called by us the a + bdu approximation and based, like the beta approximation, on the exact first two moments of d, both perform well.The power of the d test is compared with that of certain exact tests proposed by Theil, Durbin, Koerts and Abrahamse from the standpoint of invariance theory. It is shown that the d test is locally most powerful invariant but that the other tests are not.There are three appendices. The first gives an account of the exact distribution of d. The second derives the mean and variance to a second order of approximation of a modified maximum likelihood statistic closely related to d. The third sets out details of the computations required for the a + hdu approximation.},
langid = {english},
keywords = {autocorrelation,durbin-watson test,exact sciences,general linear models,probability and statistics,tests},
file = {G:\Meu Drive\Zotero\files\Durbin and Watson - 1971 - Testing for serial correlation in least squares regression. III.pdf}
}
@book{falk1992,
title = {A primer for soft modeling},
author = {Falk, R. Frank and Miller, Nancy B.},
date = {1992-12-01},
publisher = {University of Akron Press},
location = {Akron, Ohio},
abstract = {A practical guide to "soft modeling" that relies on a computer application strategy, this book is intended for researchers and students interested in a structural equation modeling approach to path analysis that solves many measurement issues encountered in social science research.},
isbn = {978-0-9622628-4-5},
langid = {english},
pagetotal = {103},
keywords = {exact sciences,general linear models,modeling,probability and statistics},
file = {G:\Meu Drive\Zotero\files\Falk and Miller - 1992 - A primer for soft modeling.pdf}
}
@book{fox2016,
title = {Applied regression analysis and generalized linear models},
author = {Fox, John},
date = {2016},
edition = {3},
publisher = {Sage},
location = {Thousand Oaks, CA},
isbn = {978-1-4522-0566-3},
langid = {english},
pagetotal = {791},
keywords = {data science,exact sciences,generalized linear models,multivariate methods,probability and statistics},
file = {G:\Meu Drive\Zotero\files\Fox - 2016 - Applied regression analysis and generalized linear models.pdf}
}
@article{gorman2014,
title = {Ecological sexual dimorphism and environmental variability within a community of antarctic penguins (genus pygoscelis)},
author = {Gorman, Kristen B. and Williams, Tony D. and Fraser, William R.},
date = {2014-03-05},
journaltitle = {PLOS ONE},
shortjournal = {PLOS ONE},
volume = {9},
number = {3},
pages = {e90081},
publisher = {Public Library of Science},
issn = {1932-6203},
doi = {10.1371/journal.pone.0090081},
url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0090081},
urldate = {2024-09-26},
abstract = {Background Sexual segregation in vertebrate foraging niche is often associated with sexual size dimorphism (SSD), i.e., ecological sexual dimorphism. Although foraging behavior of male and female seabirds can vary markedly, differences in isotopic (carbon, δ13C and nitrogen, δ15N) foraging niche are generally more pronounced within sexually dimorphic species and during phases when competition for food is greater. We examined ecological sexual dimorphism among sympatric nesting Pygoscelis penguins asking whether environmental variability is associated with differences in male and female pre-breeding foraging niche. We predicted that all Pygoscelis species would forage sex-specifically, and that higher quality winter habitat, i.e., higher or lower sea ice coverage for a given species, would be associated with a more similar foraging niche among the sexes. Results P2/P8 primers reliably amplified DNA of all species. On average, male Pygoscelis penguins are structurally larger than female conspecifics. However, chinstrap penguins were more sexually dimorphic in culmen and flipper features than Adélie and gentoo penguins. Adélies and gentoos were more sexually dimorphic in body mass than chinstraps. Only male and female chinstraps and gentoos occupied separate δ15N foraging niches. Strong year effects in δ15N signatures were documented for all three species, however, only for Adélies, did yearly variation in δ15N signatures tightly correlate with winter sea ice conditions. There was no evidence that variation in sex-specific foraging niche interacted with yearly winter habitat quality. Conclusion Chinstraps were most sexually size dimorphic followed by gentoos and Adélies. Pre-breeding sex-specific foraging niche was associated with overall SSD indices across species; male chinstrap and gentoo penguins were enriched in δ15N relative to females. Our results highlight previously unknown trophic pathways that link Pygoscelis penguins with variation in Southern Ocean sea ice suggesting that each sex within a species should respond similarly in pre-breeding trophic foraging to changes in future winter habitat.},
langid = {english},
keywords = {animal sexual behavior,antarctica,biological sciences,biology,ecological niches,foraging,islands,isotopes,open data,open science,penguins,r,sea ice},
file = {G:\Meu Drive\Zotero\files\Gorman et al. - 2014 - Ecological sexual dimorphism and environmental variability within a community of antarctic penguins -genus pygoscelis-.pdf}
}
@online{greener2020,
title = {Stop testing for normality},
author = {Greener, Robert},
date = {2020-08-04T12:53:26},
url = {https://towardsdatascience.com/stop-testing-for-normality-dba96bb73f90},
urldate = {2024-09-29},
abstract = {Normality tests are misleading and a waste of your time!},
langid = {english},
organization = {Medium},
keywords = {exact sciences,general linear models,normality tests,tests},
file = {G:\Meu Drive\Zotero\files\Greener - 2020 - Stop testing for normality.pdf}
}
@book{hair2019,
title = {Multivariate data analysis},
author = {Hair, Joseph F.},
date = {2019},
edition = {8},
publisher = {Cengage},
location = {Andover, Hampshire},
isbn = {978-1-4737-5654-0},
langid = {english},
pagetotal = {813},
keywords = {data science,exact sciences,multivariate methods,probability and statistics},
file = {G:\Meu Drive\Zotero\files\Hair - 2019 - Multivariate data analysis.pdf}
}
@article{jarque1980,
title = {Efficient tests for normality, homoscedasticity and serial independence of regression residuals},
author = {Jarque, Carlos M. and Bera, Anil K.},
date = {1980-01-01},
journaltitle = {Economics Letters},
shortjournal = {Economics Letters},
volume = {6},
number = {3},
pages = {255--259},
issn = {0165-1765},
doi = {10.1016/0165-1765(80)90024-5},
url = {https://www.sciencedirect.com/science/article/pii/0165176580900245},
urldate = {2024-09-28},
abstract = {We use the Lagrange multiplier procedure to derive efficient joint tests for residual normality, homoscedasticity and serial independence. The tests are simple to compute and asymptotically distributed as χ2.},
langid = {english},
keywords = {jarque-bera test,normality tests,probability and statistics,tests},
file = {G:\Meu Drive\Zotero\files\Jarque and Bera - 1980 - Efficient tests for normality, homoscedasticity and serial independence of regression residuals.pdf}
}
@article{jarque1987,
title = {A test for normality of observations and regression residuals},
author = {Jarque, Carlos M. and Bera, Anil K.},
date = {1987},
journaltitle = {International Statistical Review},
volume = {55},
number = {2},
eprint = {1403192},
eprinttype = {jstor},
pages = {163--172},
publisher = {[Wiley, International Statistical Institute (ISI)]},
issn = {0306-7734},
doi = {10.2307/1403192},
url = {https://www.jstor.org/stable/1403192},
urldate = {2024-09-28},
abstract = {Using the Lagrange multiplier procedure or score test on the Pearson family of distributions we obtain tests for normality of observations and regression disturbances. The tests suggested have optimum asymptotic power properties and good finite sample performance. Due to their simplicity they should prove to be useful tools in statistical analysis.},
langid = {english},
keywords = {exact sciences,jarque-bera test,normality tests,probability and statistics,tests},
file = {G:\Meu Drive\Zotero\files\Jarque and Bera - 1987 - A test for normality of observations and regression residuals.pdf}
}
@book{johnson2013,
title = {Applied multivariate statistical analysis: {{Pearson}} new international edition},
shorttitle = {Applied multivariate statistical analysis},
author = {Johnson, Richard and Wichern, Dean},
date = {2013},
edition = {6},
publisher = {Pearson},
location = {Harlow, UK},
abstract = {For courses in Multivariate Statistics, Marketing Research, Intermediate Business Statistics, Statistics in Education, and graduate-level courses in Experimental Design and Statistics. Appropriate for experimental scientists in a variety of disciplines, this market-leading text offers a readable introduction to the statistical analysis of multivariate observations. Its primary goal is to impart the knowledge necessary to make proper interpretations and select appropriate techniques for analysing multivariate data. Ideal for a junior/senior or graduate level course that explores the statistical methods for describing and analysing multivariate data, the text assumes two or more statistics courses as a prerequisite. The full text downloaded to your computer With eBooks you can: search for key concepts, words and phrases make highlights and notes as you study share your notes with friends eBooks are downloaded to your computer and accessible either offline through the Bookshelf (available as a free download), available online and also via the iPad and Android apps. Upon purchase, you will receive via email the code and instructions on how to access this product. Time limit The eBooks products do not have an expiry date. You will continue to access your digital ebook products whilst you have your Bookshelf installed.},
isbn = {978-1-292-03757-8},
langid = {english},
keywords = {data science,exact sciences,multivariate methods,probability and statistics},
annotation = {OCLC: 1277290670\\
\\
Pearson New International Edition.},
file = {G:\Meu Drive\Zotero\files\Johnson - 2013 - Applied multivariate statistical analysis.pdf}
}
@article{koenker1981,
title = {A note on studentizing a test for heteroscedasticity},
author = {Koenker, Roger},
date = {1981-09-01},
journaltitle = {Journal of Econometrics},
shortjournal = {Journal of Econometrics},
volume = {17},
number = {1},
pages = {107--112},
issn = {0304-4076},
doi = {10.1016/0304-4076(81)90062-2},
url = {https://www.sciencedirect.com/science/article/pii/0304407681900622},
urldate = {2024-09-28},
abstract = {Breusch and Pagan (1979) have recently proposed a convenient test for heteroscedasticity in general linear models. This note derives the asymptotic distribution of their test under sequences of contiguous alternatives to the null hypothesis of homoscedasticity. The test is shown to possess asymptotically incorrect size (nominal significance level) except in the case of strictly Gaussian disturbances. A slight modification of the test is proposed which corrects this defect.},
langid = {english},
keywords = {breusch–pagan test,exact sciences,heteroskedasticity,probability and statistics,tests},
file = {G:\Meu Drive\Zotero\files\Koenker - 1981 - A note on studentizing a test for heteroscedasticity.pdf}
}
@article{kolmogorov1933,
title = {Sulla determinazione empirica di una legge di distribuzione},
author = {Kolmogorov, A.},
date = {1933},
journaltitle = {Giornale dell'Istituto Italiano degli Attuari},
volume = {4},
langid = {italian},
keywords = {exact sciences,kolmogorov-smirnov test,normality tests,probability and statistics},
file = {G:\Meu Drive\Zotero\files\Kolmogorov - 1933 - Sulla determinazione empirica di una legge di distribuzione.pdf}
}
@article{kozak2018,
title = {What's normal anyway? {{Residual}} plots are more telling than significance tests when checking {{ANOVA}} assumptions},
shorttitle = {What's normal anyway?},
author = {Kozak, M. and Piepho, H.-P.},
date = {2018},
journaltitle = {Journal of Agronomy and Crop Science},
volume = {204},
number = {1},
pages = {86--98},
issn = {1439-037X},
doi = {10.1111/jac.12220},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/jac.12220},
urldate = {2024-09-29},
abstract = {We consider two questions important for applying analysis of variance (ANOVA): Should normality be checked on the raw data or on the residuals (or is it immaterial which of the two approaches we take)? Should normality and homogeneity of variance be checked using significance tests or diagnostic plots (or both)? Based on two examples, we show that residuals should be used for model checking and that residual plots are better for checking ANOVA assumptions than statistical tests. We also discuss why one should be very cautious when using statistical tests to check the assumptions.},
langid = {english},
keywords = {anova,assumption checks,diagnostic plots,exact sciences,general linear models,linear models,probability and statistics,statistical assumptions},
file = {G:\Meu Drive\Zotero\files\Kozak and Piepho - 2018 - What's normal anyway Residual plots are more telling than significance tests when checking ANOVA as.pdf}
}
@book{kuhn2022,
title = {Tidy modeling with {{R}}: a framework for modeling in the tidyverse},
shorttitle = {Tidy modeling with {{R}}},
author = {Kuhn, Max and Silge, Julia},
date = {2022},
publisher = {O'Reilly Media},
location = {Sebastopol, CA},
url = {https://www.tmwr.org/},
abstract = {Get going with tidymodels, a collection of R packges for modeling and machine learning. Whether you're just starting out or have years of experience with modeling, this practical introduction shows data analysts, business analysts, and data scientists how the tidymodels framework offers a consistent, flexible approach for your work. RStudio engineers Max Kuhn and Julia Silge demonstrate ways to create models by focusing on an R dialect called the tidyverse. Software that adops tidyverse principles shares both a high-level design philosophy and low-level grammar and data structures, so learning one piece of the ecosystem makes it easier to learn the next. You'll understand why the tidymodels framework has been built to be used by a broad range of people.},
isbn = {978-1-4920-9648-1},
langid = {english},
pagetotal = {363},
keywords = {artificial intelligence,exact sciences,machine learning,modeling,probability and statistics,programming,r,tidyverse},
annotation = {OCLC: on1338675673},
file = {G\:\\Meu Drive\\Zotero\\files\\Kuhn - 2022 - Tidy modeling with R.epub;G\:\\Meu Drive\\Zotero\\files\\Kuhn - 2022 - Tidy modeling with R.pdf}
}
@article{lilliefors1967,
title = {On the {{Kolmogorov-Smirnov}} test for normality with mean and variance unknown},
author = {Lilliefors, Hubert W.},
date = {1967-06},
journaltitle = {Journal of the American Statistical Association},
shortjournal = {Journal of the American Statistical Association},
volume = {62},
number = {318},
pages = {399--402},
issn = {0162-1459, 1537-274X},
doi = {10.1080/01621459.1967.10482916},
url = {http://www.tandfonline.com/doi/abs/10.1080/01621459.1967.10482916},
urldate = {2023-11-14},
langid = {english},
keywords = {exact sciences,kolmogorov-smirnov test,normality tests,probability and statistics},
file = {G:\Meu Drive\Zotero\files\Lilliefors - 1967 - On the Kolmogorov-Smirnov test for normality with mean and variance unknown.pdf}
}
@article{ljung1978,
title = {On a measure of lack of fit in time series models},
author = {Ljung, G. M. and Box, G. E. P.},
date = {1978-08-01},
journaltitle = {Biometrika},
shortjournal = {Biometrika},
volume = {65},
number = {2},
pages = {297--303},
issn = {0006-3444},
doi = {10.1093/biomet/65.2.297},
url = {https://doi.org/10.1093/biomet/65.2.297},
urldate = {2024-09-28},
abstract = {The overall test for lack of fit in autoregressive-moving average models proposed by Box \& Pierce (1970) is considered. It is shown that a substantially improved approximation results from a simple modification of this test. Some consideration is given to the power of such tests and their robustness when the innovations are nonnormal. Similar modifications in the overall tests used for transfer function-noise models are proposed},
langid = {english},
keywords = {autocorrelation,autocorrelation tests,exact sciences,ljung-box test,probability and statistics,tests},
file = {G:\Meu Drive\Zotero\files\Ljung and Box - 1978 - On a measure of lack of fit in time series models.pdf}
}
@article{massey1951,
title = {The {{Kolmogorov-Smirnov}} test for goodness of fit},
author = {Massey, Frank J.},
date = {1951-03},
journaltitle = {Journal of the American Statistical Association},
shortjournal = {Journal of the American Statistical Association},
volume = {46},
number = {253},
pages = {68--78},
issn = {0162-1459, 1537-274X},
doi = {10.1080/01621459.1951.10500769},
url = {http://www.tandfonline.com/doi/abs/10.1080/01621459.1951.10500769},
urldate = {2023-11-14},
langid = {english},
keywords = {exact sciences,kolmogorov-smirnov test,normality tests,probability and statistics},
file = {G:\Meu Drive\Zotero\files\Massey - 1951 - The Kolmogorov-Smirnov test for goodness of fit.pdf}
}
@book{nahhas2024,
title = {Introduction to regression methods for public health using {{R}}},
author = {Nahhas, Ramzi W.},
date = {2024-10-13},
url = {https://www.bookdown.org/rwnahhas/RMPH/},
urldate = {2024-10-14},
abstract = {This text was written to be used in a second biostatistics course for Master of Public Health students; however, students in any field will find it useful. Students in many disciplines take an introductory statistics course, providing foundational competencies but perhaps not enough to use more advanced methods without additional training. There are a plethora of textbooks covering topics such as linear regression, logistic regression, and survival analysis aimed at those with a background in mathematical statistics and/or without a focus specifically on public health and/or without a focus on using R statistical software. The goal of this text is to provide a gentle introduction to regression methods, using R, that covers all the basics and a bit more, with examples drawn from public health data.},
langid = {english},
keywords = {exact sciences,general linear models,probability and statistics,regression analysis,regression diagnostics}
}
@article{newey1987,
title = {A simple, positive semi-definite, heteroskedasticity and autocorrelation consistent covariance matrix},
author = {Newey, Whitney K. and West, Kenneth D.},
date = {1987},
journaltitle = {Econometrica},
volume = {55},
number = {3},
eprint = {1913610},
eprinttype = {jstor},
pages = {703--708},
publisher = {[Wiley, Econometric Society]},
issn = {0012-9682},
doi = {10.2307/1913610},
url = {https://www.jstor.org/stable/1913610},
urldate = {2024-09-28},
langid = {english},
keywords = {autocorrelation,autocorrelation tests,exact sciences,heteroskedasticity,newey-west estimator,probability and statistics,tests},
file = {G:\Meu Drive\Zotero\files\Newey and West - 1987 - A simple, positive semi-definite, heteroskedasticity and autocorrelation consistent covariance matrix.pdf}
}
@article{newey1994,
title = {Automatic lag selection in covariance matrix estimation},
author = {Newey, Whitney K. and West, Kenneth D.},
date = {1994-10-01},
journaltitle = {The Review of Economic Studies},
shortjournal = {The Review of Economic Studies},
volume = {61},
number = {4},
pages = {631--653},
issn = {0034-6527},
doi = {10.2307/2297912},
url = {https://doi.org/10.2307/2297912},
urldate = {2024-09-28},
abstract = {We propose a nonparametric method for automatically selecting the number of autocovariances to use in computing a heteroskedasticity and autocorrelation consistent covariance matrix. For a given kernel for weighting the autocovariances, we prove that our procedure is asymptotically equivalent to one that is optimal under a mean-squared error loss function. Monte Carlo simulations suggest that our procedure performs tolerably well, although it does result in size distortions.},
keywords = {autocorrelation,exact sciences,heteroskedasticity,newey-west estimator,probability and statistics,tests},
file = {G:\Meu Drive\Zotero\files\Newey and West - 1994 - Automatic lag selection in covariance matrix estimation.pdf}
}
@article{neyman1928,
title = {On the use and interpretation of certain test criteria for purposes of statistical inference: part {{I}}},
shorttitle = {On the use and interpretation of certain test criteria for purposes of statistical inference},
author = {Neyman, J. and Pearson, E. S.},
date = {1928},
journaltitle = {Biometrika},
volume = {20A},
number = {1/2},
eprint = {2331945},
eprinttype = {jstor},
pages = {175--240},
publisher = {[Oxford University Press, Biometrika Trust]},
issn = {0006-3444},
doi = {10.2307/2331945},
url = {https://www.jstor.org/stable/2331945},
urldate = {2024-09-30},
langid = {english},
keywords = {exact sciences,extraordinary publications,hypothesis tests,hypothetico–deductive method,probability and statistics,statistical inference},
file = {G:\Meu Drive\Zotero\files\Neyman and Pearson - 1928 - On the use and interpretation of certain test criteria for purposes of statistical inference part I.pdf}
}
@article{neyman1928a,
title = {On the use and interpretation of certain test criteria for purposes of statistical inference: part {{II}}},
shorttitle = {On the use and interpretation of certain test criteria for purposes of statistical inference},
author = {Neyman, J. and Pearson, E. S.},
date = {1928},
journaltitle = {Biometrika},
volume = {20A},
number = {3/4},
eprint = {2332112},
eprinttype = {jstor},
pages = {263--294},
publisher = {[Oxford University Press, Biometrika Trust]},
issn = {0006-3444},
doi = {10.2307/2332112},
url = {https://www.jstor.org/stable/2332112},
urldate = {2024-09-30},
langid = {english},
keywords = {exact sciences,extraordinary publications,hypothesis tests,hypothetico–deductive method,probability and statistics,statistical inference},
file = {G:\Meu Drive\Zotero\files\Neyman and Pearson - 1928 - On the use and interpretation of certain test criteria for purposes of statistical inference part I 1.pdf}
}
@article{pearson1900,
title = {X. {{On}} the criterion that a given system of deviations from the probable in the case of a correlated system of variables is such that it can be reasonably supposed to have arisen from random sampling},
author = {Pearson, Karl},
date = {1900-07},
journaltitle = {The London, Edinburgh, and Dublin Philosophical Magazine and Journal of Science},
volume = {50},
number = {302},
pages = {157--175},
publisher = {Taylor \& Francis},
issn = {1941-5982},
doi = {10.1080/14786440009463897},
url = {https://www.tandfonline.com/doi/abs/10.1080/14786440009463897},
urldate = {2024-09-28},
langid = {english},
keywords = {exact sciences,normality tests,pearson chi-squared test,probability and statistics,tests},
file = {G:\Meu Drive\Zotero\files\Pearson - 1900 - X. On the criterion that a given system of deviations from the probable in the case of a correlated.pdf}
}
@article{peek2003,
title = {How much variance is explained by ecologists? {{Additional}} perspectives},
shorttitle = {How much variance is explained by ecologists?},
author = {Peek, Michael S. and Leffler, A. Joshua and Flint, Stephan D. and Ryel, Ronald J.},
date = {2003},
journaltitle = {Oecologia},
volume = {137},
number = {2},
eprint = {4223745},
eprinttype = {jstor},
pages = {161--170},
publisher = {Springer},
issn = {0029-8549},
url = {https://www.jstor.org/stable/4223745},
urldate = {2024-09-29},
abstract = {A recent meta-analysis of meta-analyses by Møller and Jennions (2002, Oecologia 132: 492-500) suggested that ecologists using statistical models are explaining between 2.5\% and 5.42\% of the variability in ecological studies. Although we agree that there is considerable variability in ecological systems that is not explained, we disagree with the approach and general conclusions of Møller and Jennions. As an alternate perspective, we explored the question: "How much ecological variation in relationships is not explained?" We did this by examining published studies in five different journals representative of the numerous sub-disciplines of ecology. We quantified the proportion of variance not explained in statistical models as the residual or random error compared to the total variation in the data set. Our results indicate that statistical models explain roughly half of the variation in variables of interest, vastly different from the 2.5\%-5.42\% reported by Møller and Jennions. This difference resulted largely from a different level of analysis: we considered the original study to be the appropriate level for quantifying variability while Møller and Jennions combined studies at different temporal and spatial scales and attempted to find universal single-factor relationships between ecological variables across study organisms or locations. Therefore, we believe that Møller and Jennions actually measured the universality of single factor effects across multiple ecological systems, not the amount of variability in ecological studies explained by ecologists. This study, combined with Møller and Jennions', illustrates importance of applying statistical models appropriately to assess ecological relationships.},
langid = {english},
keywords = {biological sciences,ecology,effect size,r squared},
file = {G:\Meu Drive\Zotero\files\Peek et al. - 2003 -How much variance is explained by ecologists Additional perspectives.pdf}
}
@article{perezgonzalez2015,
title = {Fisher, {{Neyman-Pearson}} or {{NHST}}? {{A}} tutorial for teaching data testing},
shorttitle = {Fisher, {{Neyman-Pearson}} or {{NHST}}?},
author = {Perezgonzalez, Jose D.},
date = {2015-03-02},
journaltitle = {Frontiers in Psychology},
shortjournal = {Front. Psychol.},
volume = {6},
publisher = {Frontiers},
issn = {1664-1078},
doi = {10.3389/fpsyg.2015.00223},
url = {https://www.frontiersin.org/journals/psychology/articles/10.3389/fpsyg.2015.00223/full},
urldate = {2024-09-30},
abstract = {Despite frequent calls for the overhaul of null hypothesis significance testing (NHST), this controversial procedure remains ubiquitous in behavioral, social and biomedical teaching and research. Little change seems possible once the procedure becomes well ingrained in the minds and current practice of researchers; thus, the optimal opportunity for such change is at the time the procedure is taught, be this at undergraduate or at postgraduate levels. This paper presents a tutorial for the teaching of data testing procedures, often referred to as hypothesis testing theories. The first procedure introduced is Fisher's approach to data testing—tests of significance; the second is Neyman-Pearson's approach—tests of acceptance; the final procedure is the incongruent combination of the previous two theories into the current approach—NSHT. For those researchers sticking with the latter, two compromise solutions on how to improve NHST conclude the tutorial.},
langid = {english},
keywords = {exact sciences,fisher,history,hypothesis tests,hypothetico–deductive method,neyman-pearson,null hypothesis significance testing,science,statistical education,teaching statistics,test of significance,test of statistical hypothesis},
file = {G:\Meu Drive\Zotero\files\Perezgonzalez - 2015 - Fisher, Neyman-Pearson or NHST A tutorial for teaching data testing.pdf}
}
@book{popper1979,
title = {Objective knowledge: an evolutionary approach},
shorttitle = {Objective knowledge},
author = {Popper, Karl R.},
date = {1979},
publisher = {Oxford University Press},
location = {Oxford, UK},
isbn = {978-0-19-824370-0},
langid = {english},
pagetotal = {395},
keywords = {epistemology,ontology,philosophy,science,scientific methodology},
annotation = {Publicado originalmente em 1972.},
file = {G:\Meu Drive\Zotero\files\Popper - 1979 - Objective knowledge.pdf}
}
@article{ramsey1969,
title = {Tests for specification errors in classical linear least-squares regression analysis},
author = {Ramsey, J. B.},
date = {1969},
journaltitle = {Journal of the Royal Statistical Society. Series B (Methodological)},
volume = {31},
number = {2},
pages = {350--371},
publisher = {[Royal Statistical Society, Oxford University Press]},
issn = {0035-9246},
doi = {10.1111/j.2517-6161.1969.tb00796.x},
url = {https://academic.oup.com/jrsssb/article-abstract/31/2/350/7027014},
urldate = {2024-09-29},
abstract = {The effects on the distribution of least-squares residuals of a series of model mis-specifications are considered. It is shown that for a variety of specification errors the distributions of the least-squares residuals are normal, but with non-zero means. An alternative predictor of the disturbance vector is used in developing four procedures for testing for the presence of specification error. The specification errors considered are omitted variables, incorrect functional form, simultaneous equation problems and heteroskedasticity.},
langid = {english},
keywords = {exact sciences,general linear models,linearity,probability and statistics,ramsey reset test,tests},
file = {G:\Meu Drive\Zotero\files\Ramsey - 1969 - Tests for specification errors in classical linear least-squares regression analysis.pdf}
}
@article{schucany2006,
title = {Preliminary goodness-of-fit tests for normality do not validate the one-sample {{Student}} t},
author = {Schucany, William R. and Ng, H. K. Tony},
date = {2006-12-01},
journaltitle = {Communications in Statistics - Theory and Methods},
volume = {35},
number = {12},
pages = {2275--2286},
publisher = {Taylor \& Francis Group},
doi = {10.1080/03610920600853308},
url = {https://www.tandfonline.com/doi/abs/10.1080/03610920600853308},
urldate = {2024-09-29},
abstract = {One of the most basic topics in many introductory statistical methods texts is inference for a population mean, μ. The primary tool for confidence intervals and tests is the Student t sampling dist...},
langid = {english},
keywords = {assumption checks,exact sciences,general linear models,normality tests,probability and statistics,statistical assumptions,tests}
}
@article{shapiro1965,
title = {An analysis of variance test for normality (complete samples)†},
author = {Shapiro, S. S. and Wilk, M. B.},
date = {1965-12-01},
journaltitle = {Biometrika},
shortjournal = {Biometrika},
volume = {52},
number = {3-4},
pages = {591--611},
issn = {0006-3444},
doi = {10.1093/biomet/52.3-4.591},
url = {https://doi.org/10.1093/biomet/52.3-4.591},
urldate = {2024-09-28},
langid = {english},
keywords = {exact sciences,normality tests,probability and statistics,shapiro–wilk test,tests}
}
@article{shapiro1972,
title = {An approximate analysis of variance test for normality},
author = {Shapiro, S. S. and Francia, R. S.},
date = {1972-03-01},
journaltitle = {Journal of the American Statistical Association},
volume = {67},
number = {337},
pages = {215--216},
publisher = {ASA Website},
issn = {0162-1459},
doi = {10.1080/01621459.1972.10481232},
url = {https://www.tandfonline.com/doi/abs/10.1080/01621459.1972.10481232},
urldate = {2024-09-28},
abstract = {This article presents a modification of the Shapiro-Wilk W statistic for testing normality which can be used with large samples. Shapiro and Wilk gave coefficients and percentage points for sample sizes up to 50. These coefficients required obtaining an approximation to the covariance matrix of the normal order statistics. The proposed test uses coefficients which depend only on the expected values of the normal order statistics which are generally available. Results of an empirical sampling study to compare the sensitivity of the test statistic to the W test statistic are briefly discussed.},
langid = {english},
keywords = {exact sciences,normality tests,probability and statistics,shapiro-francia test,tests},
file = {G:\Meu Drive\Zotero\files\Shapiro and Francia - 1972 - An approximate analysis of variance test for normality.pdf}
}
@article{shatz2024,
title = {Assumption-checking rather than (just) testing: the importance of visualization and effect size in statistical diagnostics},
shorttitle = {Assumption-checking rather than (just) testing},
author = {Shatz, Itamar},
date = {2024-02-01},
journaltitle = {Behavior Research Methods},
shortjournal = {Behav Res},
volume = {56},
number = {2},
pages = {826--845},
issn = {1554-3528},
doi = {10.3758/s13428-023-02072-x},
url = {https://doi.org/10.3758/s13428-023-02072-x},
urldate = {2024-09-29},
abstract = {Statistical methods generally have assumptions (e.g., normality in linear regression models). Violations of these assumptions can cause various issues, like statistical errors and biased estimates, whose impact can range from inconsequential to critical. Accordingly, it is important to check these assumptions, but this is often done in a flawed way. Here, I first present a prevalent but problematic approach to diagnostics—testing assumptions using null hypothesis significance tests (e.g., the Shapiro–Wilk test of normality). Then, I consolidate and illustrate the issues with this approach, primarily using simulations. These issues include statistical errors (i.e., false positives, especially with large samples, and false negatives, especially with small samples), false binarity, limited descriptiveness, misinterpretation (e.g., of p-value as an~effect size), and potential testing failure due to unmet test assumptions. Finally, I synthesize the implications of these issues for statistical diagnostics, and provide practical recommendations for improving such diagnostics. Key recommendations include maintaining awareness of the issues with assumption tests (while recognizing they can be useful), using appropriate combinations of diagnostic methods (including visualization and effect sizes) while recognizing their limitations, and distinguishing between testing and checking assumptions. Additional recommendations include judging assumption violations as a complex spectrum (rather than a simplistic binary), using programmatic tools that increase replicability and decrease researcher degrees of freedom, and sharing the material and rationale involved in the diagnostics.},
langid = {english},
keywords = {assumption checks,exact sciences,general linear models,graphical methods,null hypothesis significance testing,probability and statistics,statistical assumptions,statistical diagnostics,visualization},
file = {G:\Meu Drive\Zotero\files\Shatz - 2024 - Assumption-checking rather than (just) testing- the importance of visualization and effect size in s.pdf}
}
@article{smirnov1948,
title = {Table for estimating the goodness of fit of empirical distributions},
author = {Smirnov, N.},
date = {1948},
journaltitle = {Annals of Mathematical Statistics},
volume = {19},
pages = {279--281},
keywords = {exact sciences,kolmogorov-smirnov test,normality tests,probability and statistics},
file = {G:\Meu Drive\Zotero\files\Smirnov - 1948 - Table for estimating the goodness of fit of empirical distributions.pdf}
}