-
Notifications
You must be signed in to change notification settings - Fork 1
/
datagressor-draft.do
143 lines (90 loc) · 4.37 KB
/
datagressor-draft.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import delimited "\\wsl.localhost\bastianin\home\tosho\bastianin-2022\datasets-clean\xxx-2012-insample-final-dataset.csv"
gen satias = (2*z_4 + house_midterm -1)*incumbent/2
*gen satias = (2*z_mt_2 + house_midterm -1)*incumbent/2
* fair
replace fair_p_1 = fair_p_1*incumbent
replace fair_g_1 = fair_g_1*incumbent
* z and inc same
* yeet2year
forvalues j = 1/4 {
*replace def_`j'= def_`j'*incumbent
*replace gdp_`j' = gdp_`j'*incumbent
replace z_`j'= z_`j'*incumbent
*replace avg_inc_`j'= avg_inc_`j'*incumbent
*replace unemp_`j' = unemp_`j'*incumbent
}
* meed2mid
forvalues j = 1/2 {
replace def_mt_`j'= def_mt_`j'*incumbent
replace gdp_mt_`j' = gdp_mt_`j'*incumbent
replace z_mt_`j'= z_mt_`j'*incumbent
replace avg_inc_mt_`j'= avg_inc_mt_`j'*incumbent
*replace unemp_mt_`j' = unemp_mt_`j'*incumbent
gen gdp_mt_`j'_pw2 = gdp_mt_`j'^2
gen avg_inc_mt_`j'_pw2 = avg_inc_mt_`j'^2
gen def_mt_`j'_pw2 = def_mt_`j'^2
}
replace unemp_rn = unemp_rn*incumbent
gen fair_z_1 = z_1 + z_2 + z_3 + z_4
gen was_a_vice = sudden_vice + lag_vice
tab state, gen(S)
tab year, gen(Y)
encode state, gen(tmp)
drop state
rename tmp state
xtset state year, delta(4)
***** Start from the estimation of a fixed effects model that contains the same variables used by Fair.
* xtreg y_votes_percent fair_g_1 fair_p_1 fair_z_1 incumbent-former_party_morethan_2, fe vce(cluster state)
reg y_votes_percent fair_g_1 fair_p_1 fair_z_1 incumbent-former_party_morethan_2 i.state, vce(robust)
*<<<< Discuss coefficient estimates and inferences.
**** Perform a test for the joint significance of economic variables in the model.
testparm fair_*
**** Check if the panel dataset can be pooled by testing the joint significance of state-level fixed effects.
testparm i.state
* Now compute fitted values and assign the State to the candidate with the majority of votes.
predict y_hat_fr
*<<<<<<<< Translate popular vote into elector vote using the information here and compare your forecasts with the results for 2012 and 2020. (https://www.archives.gov/electoral-college/allocation).
* Next, you need to apply the LASSO to the same regression, but add additional terms that you think can help forecasting presidential elections. Always keep real per capita GDP in the regression.
global x_all fair_p_1-def_mt_1 gdp_mt_1 z_mt_2-former_party_morethan_2 house_midterm-was_a_vice
global x_fe S1-S27 S29-S50 Y1-Y9
xtreg y_votes_percent fair_g_1 gdp_mt_2 $x_all, fe vce(cluster state)
************************************************************************
* Y-LASSO
lasso linear y_votes_percent ($x_fe) $x_all gdp_mt_2, selection(plugin, heteroskedastic) nolog
lassocoef
/*
house_midterm | x
def_mt_2_pw2 | x
*/
global yyy house_midterm def_mt_2_pw2
* X-LASSO
lasso linear fair_g_1 ($x_fe) $x_all gdp_mt_2, selection(plugin, heteroskedastic) nolog
lassocoef
/*
**************************
satias | x
gdp_mt_2_pw2 | x
gdp_mt_2 | x
**************************
z_mt_2 | x
avg_inc_mt_2 | x
fair_g_1 | x
**************************
*/
global xxx satias gdp_mt_2 gdp_mt_2_pw2
* YX REG
reg y_votes_percent fair_g_1 $yyy $xxx $x_fe, vce(robust)
xtreg y_votes_percent fair_g_1 $yyy $xxx, fe vce(cluster state)
predict y_hat_ds
* <<<<<<<<< Compare the performance of LASSO with the performance of the previous model.
export delimited datasets-clean\xxx-fitted-values.csv
* <<<<<<<<< Provide valid inferences for the variables selected by the LASSO and comment in relation to the OLS estimates.
* <<<<<< Explain why a double-post-model selection approach is needed and a naïve post-model selection approach that simply excludes GDP from the LASSO penalty is not appropriate.
********************************END******************************
****************************USELESS DSREG***********************
dsregress y_votes_percent gdp_mt_2, controls(($x_fe) $x_all fair_g_1) lasso(*, selection(cv, alllambdas))
lassocoef (.,for(y_votes_percent)) (.,for(gdp_mt_2))
***************************** ******************
dsregress y_votes_percent fair_g_1, controls(($x_fe) $x_all gdp_mt_2) lasso(*, selection(cv, alllambdas))
lassocoef (.,for(y_votes_percent)) (.,for(fair_g_1))
****************************************************************