From e56725f1d2e036b0f65bef16c0ee72a06c534a20 Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Sun, 22 Dec 2024 13:06:38 +0100 Subject: [PATCH] method for step halfing --- pyfixest/estimation/feglm.py | 346 +++++++++++++++++------------------ 1 file changed, 172 insertions(+), 174 deletions(-) diff --git a/pyfixest/estimation/feglm.py b/pyfixest/estimation/feglm.py index 6627a934..d4cf887e 100644 --- a/pyfixest/estimation/feglm.py +++ b/pyfixest/estimation/feglm.py @@ -1,15 +1,14 @@ -from abc import ABC, abstractmethod -from typing import Optional, Union - import numpy as np import pandas as pd - +from typing import Optional, Union +from pyfixest.estimation.fepois_ import Fepois, _fepois_input_checks from pyfixest.estimation.demean_ import demean -from pyfixest.estimation.fepois_ import Fepois +from abc import ABC, abstractmethod from pyfixest.estimation.FormulaParser import FixestFormula - +from pyfixest.utils.dev_utils import DataFrameType, _to_integer class Feglm(Fepois, ABC): + def __init__( self, FixestFormula: FixestFormula, @@ -33,30 +32,32 @@ def __init__( separation_check: Optional[list[str]] = None, ): super().__init__( - FixestFormula=FixestFormula, - data=data, - ssc_dict=ssc_dict, - drop_singletons=drop_singletons, - drop_intercept=drop_intercept, - weights=weights, - weights_type=weights_type, - tol=tol, - maxiter=maxiter, - collin_tol=collin_tol, - fixef_tol=fixef_tol, - lookup_demeaned_data=lookup_demeaned_data, - solver=solver, - store_data=store_data, - copy_data=copy_data, - lean=lean, - sample_split_var=sample_split_var, - sample_split_value=sample_split_value, + FixestFormula = FixestFormula, + data = data, + ssc_dict = ssc_dict, + drop_singletons = drop_singletons, + drop_intercept = drop_intercept, + weights = weights, + weights_type = weights_type, + tol = tol, + maxiter = maxiter, + collin_tol = collin_tol, + fixef_tol = fixef_tol, + lookup_demeaned_data = lookup_demeaned_data, + solver = solver, + store_data = store_data, + copy_data = copy_data, + lean = lean, + sample_split_var = sample_split_var, + sample_split_value = sample_split_value, ) self._method = "feglm" def get_fit(self): + "Fit the GLM model via iterated weighted least squares." + _Y = self._Y _X = self._X _fe = self._fe @@ -69,199 +70,164 @@ def get_fit(self): # initialize - # import pdb; pdb.set_trace() - # beta = np.linalg.lstsq(_X, _Y, rcond=None)[0].flatten() # Solve OLS - # eta = (_X @ beta).flatten() # Initialize eta using OLS - # mu = self.get_mu(theta=eta) # Compute mu using the link function - beta = np.zeros(_X.shape[1]) eta = np.zeros(_N) - mu = self.get_mu(theta=eta) + mu = self.get_mu(theta = eta) deviance_old = self.compute_deviance(_Y.flatten(), mu) for r in range(_maxiter): + # Step 1: compute weights w_tilde(r-1) and v(r-1) (eq. 2.5) - detadmu = self.get_detadmu(mu=mu) - v = self.get_v(y=_Y.flatten(), mu=mu, detadmu=detadmu) - W = self.get_W(mu=mu) + detadmu = self.get_detadmu(mu = mu) + v = self.get_v(y = _Y.flatten(), mu = mu, detadmu = detadmu) + W = self.get_W(mu = mu) # Step 2: compute v_tilde(r-1) and X_tilde(r-1) (eq. 3.2) - W_tilde = self.get_W_tilde(W=W) - X_tilde = self.get_X_tilde(W_tilde=W_tilde, X=_X) - v_tilde = self.get_v_tilde( - y=_Y.flatten(), mu=mu, W_tilde=W_tilde, detadmu=detadmu - ) + W_tilde = self.get_W_tilde(W = W) + X_tilde = self.get_X_tilde(W_tilde = W_tilde, X = _X) + v_tilde = self.get_v_tilde(y = _Y.flatten(), mu = mu, W_tilde = W_tilde.flatten(), detadmu = detadmu) # Step 3 compute v_dotdot(r-1) and X_dotdot(r-1) - demeaning - v_dotdot, X_dotdot = self.residualize( - v=v_tilde, - X=X_tilde, - flist=_fe, - weights=np.diag(W_tilde), - tol=_fixef_tol, - ) + v_dotdot, X_dotdot = self.residualize(v = v_tilde, X = X_tilde, flist = _fe, weights = W_tilde.flatten(), tol = _fixef_tol) # Step 4: compute (beta(r) - beta(r-1)) and check for convergence, update beta(r-1) (eq. 3.5) - beta_update_diff = self.get_beta_diff(X_dotdot=X_dotdot, v_dotdot=v_dotdot) - beta += beta_update_diff - - # Step 5: update eta(r) (eq. 4.5)t - eta = self.get_eta_update( - W_tilde=W_tilde, - v_tilde=v_tilde, - v_dotdot=v_dotdot, - X_dotdot=X_dotdot, - beta_diff=beta_update_diff, - eta=eta, + beta_update_diff = self.get_beta_diff(X_dotdot = X_dotdot, v_dotdot = v_dotdot) + + # Step 5: update using step halfing (if required) + beta, eta, mu, deviance, crit, step_accepted = self.update_eta_step_halfing( + Y = _Y, + beta = beta, + eta = eta, + mu = mu, + deviance = deviance_old, + beta_update_diff = beta_update_diff, + W_tilde = W_tilde.flatten(), + v_tilde = v_tilde, + v_dotdot = v_dotdot, + X_dotdot = X_dotdot, + deviance_old = deviance_old, + step_halfing_tolerance = 1e-12, ) - mu = self.get_mu(theta=eta) - - deviance = self.compute_deviance(_Y.flatten(), mu) - crit = self.compute_diff(deviance, deviance_old) - print(crit) deviance_old = deviance.copy() - converged = self.stop_iterating(crit=crit, tol=_tol, r=r, maxiter=_maxiter) + converged = self.stop_iterating(crit = crit, tol = _tol, r = r, maxiter = _maxiter) if converged: break - import pdb - - pdb.set_trace() - - if False: - # TBD: update all of the below, which is just directly copied from the Fepois class - self._beta_hat = delta_new.flatten() - self._Y_hat_response = mu.flatten() - self._Y_hat_link = eta.flatten() - # (Y - self._Y_hat) - # needed for the calculation of the vcov - - # update for inference - self._weights = mu_old - self._irls_weights = mu - # if only one dim - if self._weights.ndim == 1: - self._weights = self._weights.reshape((self._N, 1)) - - self._u_hat = (WZ - WX @ delta_new).flatten() - self._u_hat_working = resid - self._u_hat_response = self._Y - np.exp(eta) - - self._Y = WZ - self._X = WX - self._Z = self._X - self.deviance = deviance - - self._tZX = np.transpose(self._Z) @ self._X - self._tZXinv = np.linalg.inv(self._tZX) - self._Xbeta = eta - - self._scores = self._u_hat[:, None] * self._X - self._hessian = XWX - - if _convergence: - self._convergence = True - @abstractmethod def compute_deviance(self, y, mu): + "Compute the deviance for the GLM family." pass @abstractmethod def get_dispersion_phi(self, theta): + "Get the dispersion parameter phi for the GLM family." pass @abstractmethod def get_b(self, theta): + "Get the cumulant function b(theta) for the GLM family." pass - # @abstractmethod - # def get_c(self, y, phi): + #@abstractmethod + #def get_c(self, y, phi): # # "Get the function c(y, phi) for the GLM family." # pass @abstractmethod def get_mu(self, theta): + "Get the mean mu(theta) for the GLM family." pass @abstractmethod def get_link(self, mu): + "Get the link function theta(mu) for the GLM family." pass @abstractmethod def get_detadmu(self, mu): + "Get the derivative of mu(theta) with respect to theta for the GLM family." pass @abstractmethod def get_theta(self, mu): + "Get the mechanical link theta(mu) for the GLM family." pass @abstractmethod def get_V(self, mu): + "Get the variance function V(mu) for the GLM family." pass def get_v(self, y, mu, detadmu): + "Get vr for the GLM family." return (y - mu) * detadmu def get_W(self, mu): + "Get Wr for the GLM family." - return np.diag(1 / (self.get_detadmu(mu=mu) ** 2 * self.get_V(mu=mu))) + #return np.diag(1 / (self.get_detadmu(mu = mu) ** 2 * self.get_V(mu = mu)) ) + return (mu * (1 - mu)).reshape(-1, 1) def get_W_tilde(self, W): + "Get W_tilde (formula 3.2)." return np.sqrt(W) def get_v_tilde(self, y, mu, W_tilde, detadmu): + "Get v_tilde (formula 3.2)." - return W_tilde @ ((y - mu) * detadmu) + return W_tilde * ((y - mu) * detadmu) def get_X_tilde(self, W_tilde, X): + "Get X_tilde (formula 3.2)." - return W_tilde @ X + #import pdb; pdb.set_trace() + return W_tilde * X def get_beta_diff(self, X_dotdot, v_dotdot): + "Get the beta update difference (formula 3.5)." - beta_diff = np.linalg.lstsq(X_dotdot, v_dotdot.reshape(-1, 1), rcond=None)[ - 0 - ].flatten() + #beta_diff = np.linalg.lstsq(X_dotdot, v_dotdot.reshape(-1, 1), rcond = None)[0].flatten() + beta_diff = np.linalg.solve(X_dotdot.T @ X_dotdot, X_dotdot.T @ v_dotdot) return beta_diff def get_eta_update(self, W_tilde, v_tilde, v_dotdot, X_dotdot, beta_diff, eta): "Get the eta update (formula 4.5)." - return ( - np.linalg.inv(W_tilde) @ (v_tilde - v_dotdot - X_dotdot @ beta_diff) + eta - ) + #import pdb; pdb.set_trace() + return (v_tilde - X_dotdot @ beta_diff) / W_tilde + eta + #return (v_tilde - v_dotdot - X_dotdot @ beta_diff) / W_tilde + eta - def get_gradient(self, Z, W, v): + def get_gradient(self,Z, W, v): return Z.T @ W @ v def compute_diff(self, deviance, last): return np.abs(deviance - last) / (0.1 + np.abs(last)) def residualize(self, v, X, flist, weights, tol): + "Residualize v and X by flist using weights." if flist is None: return v, X else: - vX_resid, success = demean( - x=np.c_[v, X], flist=flist, weights=weights, tol=tol - ) + vX_resid, success = demean(x = np.c_[v, X], flist = flist, weights = weights, tol = tol) if success is False: raise ValueError("Demeaning failed after 100_000 iterations.") else: return vX_resid[:, 0], vX_resid[:, 1:] def stop_iterating(self, crit, tol, r, maxiter): + stop_iterating = crit < tol converged = stop_iterating if r == maxiter: @@ -274,8 +240,47 @@ def stop_iterating(self, crit, tol, r, maxiter): return converged + def update_eta_step_halfing(self, Y, beta, eta, mu, deviance, beta_update_diff, W_tilde, v_tilde, v_dotdot, X_dotdot, deviance_old, step_halfing_tolerance): + + "Update parameters, potentially using step halfing." + + + alpha = 1.0 + step_accepted = False + + while alpha > step_halfing_tolerance: + beta_try = beta + alpha * beta_update_diff + eta_try = self.get_eta_update( + W_tilde=W_tilde.flatten(), + v_tilde=v_tilde, + v_dotdot=v_dotdot, + X_dotdot=X_dotdot, + beta_diff=alpha * beta_update_diff, + eta=eta + ) + mu_try = self.get_mu(theta=eta_try) + deviance_try = self.compute_deviance(Y.flatten(), mu_try) + if deviance_try < deviance_old: + beta = beta_try + eta = eta_try + mu = mu_try + deviance = deviance_try + step_accepted = True + crit = self.compute_diff(deviance, deviance_old) + break + else: + alpha /= 2.0 + + if not step_accepted: + raise RuntimeError("Step-halving failed to find improvement.") + + return beta, eta, mu, deviance, crit, step_accepted + + + class Felogit(Feglm): + def __init__( self, FixestFormula: FixestFormula, @@ -298,58 +303,57 @@ def __init__( sample_split_value: Optional[Union[str, int]] = None, separation_check: Optional[list[str]] = None, ): + super().__init__( - FixestFormula=FixestFormula, - data=data, - ssc_dict=ssc_dict, - drop_singletons=drop_singletons, - drop_intercept=drop_intercept, - weights=weights, - weights_type=weights_type, - collin_tol=collin_tol, - fixef_tol=fixef_tol, - lookup_demeaned_data=lookup_demeaned_data, - tol=tol, - maxiter=maxiter, - solver=solver, - store_data=store_data, - copy_data=copy_data, - lean=lean, - sample_split_var=sample_split_var, - sample_split_value=sample_split_value, - separation_check=separation_check, + FixestFormula = FixestFormula, + data = data, + ssc_dict = ssc_dict, + drop_singletons = drop_singletons, + drop_intercept = drop_intercept, + weights = weights, + weights_type = weights_type, + collin_tol = collin_tol, + fixef_tol = fixef_tol, + lookup_demeaned_data = lookup_demeaned_data, + tol = tol, + maxiter = maxiter, + solver = solver, + store_data = store_data, + copy_data = copy_data, + lean = lean, + sample_split_var = sample_split_var, + sample_split_value = sample_split_value, + separation_check = separation_check, ) self._method = "feglm-logit" def compute_deviance(self, y, mu): - return -2 * np.sum(y * np.log(mu) + (1 - y) * np.log(1 - mu)) + eps = 1e-15 + mu_clipped = np.clip(mu, eps, 1 - eps) + return -2 * np.sum(y * np.log(mu_clipped) + (1 - y) * np.log(1 - mu_clipped)) + def get_dispersion_phi(self, theta): return 1 - def get_b(self, theta): return np.log(1 + np.exp(theta)) - - # def get_c(self, y, phi): + #def get_c(self, y, phi): # return 0 def get_mu(self, theta): - return np.exp(theta) / (1 + np.exp(theta)) - + return np.exp(theta)/(1 + np.exp(theta)) def get_link(self, mu): - return np.log(mu / (1 - mu)) - + return np.log(mu/(1 - mu)) def get_detadmu(self, mu): return 1 / (mu * (1 - mu)) - def get_theta(self, mu): - return np.log(mu / (1 - mu)) - + return np.log(mu/(1 - mu)) def get_V(self, mu): - return mu * (1 - mu) + return mu*(1 - mu) class Fepois2(Feglm): + def __init__( self, FixestFormula: FixestFormula, @@ -372,50 +376,44 @@ def __init__( sample_split_value: Optional[Union[str, int]] = None, separation_check: Optional[list[str]] = None, ): + super().__init__( - FixestFormula=FixestFormula, - data=data, - ssc_dict=ssc_dict, - drop_singletons=drop_singletons, - drop_intercept=drop_intercept, - weights=weights, - weights_type=weights_type, - collin_tol=collin_tol, - fixef_tol=fixef_tol, - lookup_demeaned_data=lookup_demeaned_data, - tol=tol, - maxiter=maxiter, - solver=solver, - store_data=store_data, - copy_data=copy_data, - lean=lean, - sample_split_var=sample_split_var, - sample_split_value=sample_split_value, - separation_check=separation_check, + FixestFormula = FixestFormula, + data = data, + ssc_dict = ssc_dict, + drop_singletons = drop_singletons, + drop_intercept = drop_intercept, + weights = weights, + weights_type = weights_type, + collin_tol = collin_tol, + fixef_tol = fixef_tol, + lookup_demeaned_data = lookup_demeaned_data, + tol = tol, + maxiter = maxiter, + solver = solver, + store_data = store_data, + copy_data = copy_data, + lean = lean, + sample_split_var = sample_split_var, + sample_split_value = sample_split_value, + separation_check = separation_check, ) def compute_deviance(self, y, mu): - return -2 * np.sum(y * np.log(mu) - mu) - + return -2 * np.sum(y * np.log(mu) - mu) def get_dispersion_phi(self, theta): return 1 - def get_b(self, theta): return np.exp(theta) - - # def get_c(self, y, phi): + #def get_c(self, y, phi): # return np.log(y) # TODO: error! def get_mu(self, theta): return np.exp(theta) - def get_link(self, mu): return np.log(mu) - def get_detadmu(self, mu): return 1 / mu - def get_theta(self, mu): return np.log(mu) - def get_V(self, mu): return mu