Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ameliorations variables erfs fpr #225

Merged
merged 13 commits into from
Jun 27, 2023
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Changelog

# 1.1.0 [#225](https://github.com/openfisca/openfisca-france-data/pull/225)


* Technical changes
- Ajoute des variables concernant le calcul des aides au logement, des non salariés et du handicap dans le builder de openfisca-france-data



# 1.0.0 [#224](https://github.com/openfisca/openfisca-france-data/pull/224)

* Breaking changes
Expand Down
11 changes: 0 additions & 11 deletions openfisca_france_data/erfs_fpr/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,6 @@ class ErfsFprtoInputComparator(AbstractComparator):
"statut_occupation_logement",
]

from openfisca_france_data.erfs_fpr.get_survey_scenario import menage_projected_variables

target_menage_projected_variables = [
f"{menage_projected_variable}_menage"
for menage_projected_variable
in menage_projected_variables
]

default_target_variables += target_menage_projected_variables


def compute_test_dataframes(self):
erfs_fpr_survey_collection = SurveyCollection.load(collection = "erfs_fpr")
# infer names of the survey and data tables
Expand Down
7 changes: 2 additions & 5 deletions openfisca_france_data/erfs_fpr/get_survey_scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,8 @@


menage_projected_variables = [
# "rev_financier_prelev_lib_imputes",
"revenu_categoriel_foncier",
"revenus_capitaux_prelevement_forfaitaire_unique_ir",
]

]


class erfs_fpr_plugin(Reform):
Expand Down Expand Up @@ -144,7 +142,6 @@ def get_survey_scenario(
# S'il n'y a pas de données, on sait où les trouver.
if data is None:
input_data_table_by_entity = dict(
foyer_fiscal = f"foyer_fiscal_{year}",
individu = f"individu_{year}",
menage = f"menage_{year}",
)
Expand Down
14 changes: 5 additions & 9 deletions openfisca_france_data/erfs_fpr/input_data_builder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
step_02_menage as menage,
step_03_variables_individuelles as variables_individuelles,
step_04_famille as famille,
step_05_foyer as foyer,
step_06_final as final,
)

Expand Down Expand Up @@ -50,7 +49,7 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None:
# - On merge les tables individus / menages
#
# Note : c'est ici où on objectivise les hypothèses, step 1
log.info('\n [[[ Year {} - Step 1 / 6 ]]] \n'.format(year))
log.info('\n [[[ Year {} - Step 1 / 5 ]]] \n'.format(year))
preprocessing.build_merged_dataframes(year = year)

# Step 02 : Si on veut calculer les allocations logement, il faut faire le matching avec une autre enquête (ENL)
Expand All @@ -59,11 +58,11 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None:
# stata_directory = openfisca_survey_collection.config.get('data', 'stata_directory')
# stata_file = os.path.join(stata_directory, 'log_men_ERFS.dta')
# imputation_loyer.merge_imputation_loyer(stata_file = stata_file, year = year)
log.info('\n [[[ Year {} - Step 2 / 6 SKIPPED ]]] \n'.format(year))
log.info('\n [[[ Year {} - Step 2 / 5 SKIPPED ]]] \n'.format(year))
menage.build_variables_menage(year = year)

# Step 03 : on commence par les variables indivuelles
log.info('\n [[[ Year {} - Step 3 / 6 ]]] \n'.format(year))
log.info('\n [[[ Year {} - Step 3 / 5 ]]] \n'.format(year))
variables_individuelles.build_variables_individuelles(year = year)

# Step 04 : ici on va constituer foyer et famille à partir d'invididu et ménage
Expand All @@ -72,18 +71,15 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None:
# - On va faire des suppositions pour faire les familles
# - On va faire les foyers fiscaux à partir des familles
# - On va faire de suppositions pour faire les foyers fiscaux
log.info('\n [[[ Year {} - Step 4 / 6 ]]] \n'.format(year))
log.info('\n [[[ Year {} - Step 4 / 5 ]]] \n'.format(year))
famille.build_famille(year = year)

log.info('\n [[[ Year {} - Step 5 / 6 ]]] \n'.format(year))
foyer.build_variables_foyers_fiscal(year = year)

# Affreux ! On injectait tout dans un même DataFrame !!!
# C'est très moche !
#
# On crée une df par entité par période.
# Elles sont stockées dans un fichier h5
log.info('\n [[[ Year {} - Step 6 / 6 ]]] \n'.format(year))
log.info('\n [[[ Year {} - Step 5 / 5 ]]] \n'.format(year))
final.create_input_data_frame(year = year, export_flattened_df_filepath = export_flattened_df_filepath)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,25 @@

@temporary_store_decorator(file_name = 'erfs_fpr')
def build_variables_menage(temporary_store = None, year = None):
menages = temporary_store[f'menages_{year}']

if "loyer" in menages.columns:
menages['loyer'] = menages['loyer'] * 12

if year == 2018:
menages = temporary_store[f'menages_{year}']
menages['statut_occupation_logement'] = menages['so'].copy()

if year == 2019: # SO ne fonctionne pas en 2019 bcp de 0 = non-renseigné
menages = temporary_store[f'menages_{year}']
menages['statut_occupation_logement'] = menages['logt'].copy()

if year >= 2018:
menages['zone_apl'] = 2
menages['zone_apl'] = 3
menages.loc[menages['tau2010'] == 10,'zone_apl'] = 1
menages.loc[menages['tau2010'] == 9,'zone_apl'] = 2
#menages.loc[menages['tau2010'] == 8,'zone_apl'] = 2
# pour l'instant on met tout le monde à 2 mais à améliorer, peut être en fonction de la taille de l'aire urbaine ?
menages.loc[(menages.statut_occupation_logement == 7), 'statut_occupation_logement'] = 2
menages['logement_conventionne'] = False
menages.loc[menages['statut_occupation_logement'] == 3 ,'logement_conventionne'] = True
temporary_store['menages_{}'.format(year)] = menages

Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def create_variables_individuelles(individus, year, survey_year = None, revenu_t
create_activite(individus)
create_contrat_de_travail(individus, period = period, salaire_type = revenu_type)
create_categorie_salarie(individus, period = period, survey_year = survey_year)

create_categorie_non_salarie(individus)
# inversion des revenus pour retrouver le brut
# pour les revenus de remplacement on a la csg et la crds dans l'erfs-fpr donc on peut avoir le brut directement
create_revenus_remplacement_bruts(individus)
Expand Down Expand Up @@ -489,6 +489,7 @@ def create_categorie_non_salarie(individus):
commercant = individus.cstot.isin([22])
chef_entreprise = individus.cstot.isin([23])
profession_liberale = individus.cstot.isin([31])
individus['categorie_non_salarie'] = 0
individus.loc[
agriculteur | artisan,
'categorie_non_salarie'
Expand All @@ -501,6 +502,12 @@ def create_categorie_non_salarie(individus):
profession_liberale,
'categorie_non_salarie'
] = 3
#fix un peu crade : cstot ne semble pas recouvrir tout le champ des personnes qui ont du rpns
# on met par défaut ces gens là en chef d'entreprise
individus.loc[
((individus['rpns_imposables'] != 0) & (individus['categorie_non_salarie'] == 0)),
'categorie_non_salarie'
] = 2


def create_contrat_de_travail(individus, period, salaire_type = 'imposable'):
Expand Down
57 changes: 0 additions & 57 deletions openfisca_france_data/erfs_fpr/input_data_builder/step_05_foyer.py

This file was deleted.

42 changes: 5 additions & 37 deletions openfisca_france_data/erfs_fpr/input_data_builder/step_06_final.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene

individus = temporary_store['individus_{}'.format(year)]
menages = temporary_store['menages_{}'.format(year)]
foyers_fiscaux = temporary_store['foyers_fiscaux_{}'.format(year)]

# ici : variables à garder
var_individus = [
'activite',
'age',
'categorie_salarie',
'categorie_non_salarie',
'chomage_brut',
'contrat_de_travail',
'date_naissance',
Expand All @@ -47,12 +47,6 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene
"primes_fonction_publique",
"traitement_indiciaire_brut",
]
var_foyers_fiscaux = [
'idfoy',
'rev_financier_prelev_lib_imputes',
'revenu_categoriel_foncier',
'revenus_capitaux_prelevement_forfaitaire_unique_ir',
]

var_menages = [
'idmen',
Expand All @@ -61,6 +55,8 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene
'taxe_habitation',
'wprm',
'zone_apl',
'logement_conventionne',
'prest_precarite_hand' # on récupère la variable de montant de aah / caah pour pouvoir faire une imputation du handicap
]

individus = create_ids_and_roles(individus)
Expand All @@ -80,14 +76,7 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene
idmens = individus.idmen.unique()
menages = menages.loc[
menages.idmen.isin(idmens),
[
'idmen',
'loyer',
'statut_occupation_logement',
'taxe_habitation',
'wprm',
'zone_apl',
]
var_menages
].copy()
survey_name = 'openfisca_erfs_fpr_' + str(year)

Expand All @@ -101,16 +90,6 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene
how = 'inner',
on = 'idmen_original')

foyers_fiscaux = foyers_fiscaux.rename(columns = {'idfoy':'idfoy_original'})
unique_idfoy = individus[['idfoy','idfoy_original']].drop_duplicates()
assert len(unique_idmen) == len(menages), "Number of idfoy should be the same individus and foyers tables."

foyers_fiscaux = foyers_fiscaux.merge(unique_idfoy,
how = 'inner',
on = 'idfoy_original')

foyers_fiscaux = foyers_fiscaux[var_foyers_fiscaux]

if export_flattened_df_filepath:
supermerge = individus.merge(
menages,
Expand All @@ -131,17 +110,6 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene
survey_name = survey_name,
)

foyers_fiscaux = foyers_fiscaux.sort_values(by = ['idfoy'])
log.debug(f"Saving entity 'foyers fiscaux' in collection 'openfisca_erfs_fpr' and survey name '{survey_name}' with set_table_in_survey")
set_table_in_survey(
foyers_fiscaux,
entity = "foyer_fiscal",
period = year,
collection = "openfisca_erfs_fpr",
survey_name = survey_name,
)
log.debug("End of create_input_data_frame")

menages = menages.sort_values(by = ['idmen'])
log.debug(f"Saving entity 'menage' in collection 'openfisca_erfs_fpr' and survey name '{survey_name}' with set_table_in_survey")
set_table_in_survey(
Expand Down Expand Up @@ -255,7 +223,7 @@ def extract_menages_variables_from_store(temporary_store = None, year = None):

def extract_menages_variables(menages):
variables = ['ident', 'wprm', 'taxe_habitation']
external_variables = ['loyer', 'zone_apl', 'statut_occupation_logement']
external_variables = ['loyer', 'zone_apl', 'statut_occupation_logement','logement_conventionne', 'prest_precarite_hand']
for external_variable in external_variables:
if external_variable in menages.columns:
log.debug("Found {} in menages table: we keep it".format(external_variable))
Expand Down
5 changes: 2 additions & 3 deletions openfisca_france_data/erfs_fpr/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class ErfsFprSurveyScenario(AbstractErfsSurveyScenario):
"activite",
"autonomie_financiere",
"categorie_salarie",
"categorie_non_salarie",
"chomage_brut",
"chomage_imposable",
"contrat_de_travail",
Expand All @@ -19,16 +20,14 @@ class ErfsFprSurveyScenario(AbstractErfsSurveyScenario):
"effectif_entreprise",
"f4ba",
"heures_remunerees_volume",
"logement_conventionne",
"loyer",
"pensions_alimentaires_percues",
"pensions_invalidite",
"primes_fonction_publique",
"rag",
"retraite_brute",
"retraite_imposable",
# "rev_financier_prelev_lib_imputes",
"revenu_categoriel_foncier",
"revenus_capitaux_prelevement_forfaitaire_unique_ir",
"ric",
"rnc",
"rpns_imposables",
Expand Down
3 changes: 0 additions & 3 deletions openfisca_france_data/surveys.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,6 @@ def custom_initialize(self, simulation):


def custom_input_data_frame(self, input_data_frame, **kwargs):
if "loyer" in input_data_frame:
input_data_frame["loyer"] = 12 * input_data_frame.loyer

for variable in ["quifam", "quifoy", "quimen"]:
if variable in input_data_frame:
log.debug(input_data_frame[variable].value_counts(dropna = False))
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name = "OpenFisca-France-Data",
version = "1.0",
version = "1.1.0",
description = "OpenFisca-France-Data module to work with French survey data",
long_description = long_description,
long_description_content_type="text/markdown",
Expand Down