diff --git a/CHANGELOG.md b/CHANGELOG.md index a5652549..7d9e1f29 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +# 1.1.0 [#225](https://github.com/openfisca/openfisca-france-data/pull/225) + + +* Technical changes +- Ajoute des variables concernant le calcul des aides au logement, des non salariés et du handicap dans le builder de openfisca-france-data + + + # 1.0.0 [#224](https://github.com/openfisca/openfisca-france-data/pull/224) * Breaking changes diff --git a/openfisca_france_data/erfs_fpr/comparison.py b/openfisca_france_data/erfs_fpr/comparison.py index d9630d20..b542e47c 100644 --- a/openfisca_france_data/erfs_fpr/comparison.py +++ b/openfisca_france_data/erfs_fpr/comparison.py @@ -45,17 +45,6 @@ class ErfsFprtoInputComparator(AbstractComparator): "statut_occupation_logement", ] - from openfisca_france_data.erfs_fpr.get_survey_scenario import menage_projected_variables - - target_menage_projected_variables = [ - f"{menage_projected_variable}_menage" - for menage_projected_variable - in menage_projected_variables - ] - - default_target_variables += target_menage_projected_variables - - def compute_test_dataframes(self): erfs_fpr_survey_collection = SurveyCollection.load(collection = "erfs_fpr") # infer names of the survey and data tables diff --git a/openfisca_france_data/erfs_fpr/get_survey_scenario.py b/openfisca_france_data/erfs_fpr/get_survey_scenario.py index 4f8e065c..448467fc 100644 --- a/openfisca_france_data/erfs_fpr/get_survey_scenario.py +++ b/openfisca_france_data/erfs_fpr/get_survey_scenario.py @@ -28,10 +28,8 @@ menage_projected_variables = [ - # "rev_financier_prelev_lib_imputes", - "revenu_categoriel_foncier", - "revenus_capitaux_prelevement_forfaitaire_unique_ir", - ] + +] class erfs_fpr_plugin(Reform): @@ -144,7 +142,6 @@ def get_survey_scenario( # S'il n'y a pas de données, on sait où les trouver. if data is None: input_data_table_by_entity = dict( - foyer_fiscal = f"foyer_fiscal_{year}", individu = f"individu_{year}", menage = f"menage_{year}", ) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py index 5dc8df4b..6fa75695 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py @@ -19,7 +19,6 @@ step_02_menage as menage, step_03_variables_individuelles as variables_individuelles, step_04_famille as famille, - step_05_foyer as foyer, step_06_final as final, ) @@ -50,7 +49,7 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: # - On merge les tables individus / menages # # Note : c'est ici où on objectivise les hypothèses, step 1 - log.info('\n [[[ Year {} - Step 1 / 6 ]]] \n'.format(year)) + log.info('\n [[[ Year {} - Step 1 / 5 ]]] \n'.format(year)) preprocessing.build_merged_dataframes(year = year) # Step 02 : Si on veut calculer les allocations logement, il faut faire le matching avec une autre enquête (ENL) @@ -59,11 +58,11 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: # stata_directory = openfisca_survey_collection.config.get('data', 'stata_directory') # stata_file = os.path.join(stata_directory, 'log_men_ERFS.dta') # imputation_loyer.merge_imputation_loyer(stata_file = stata_file, year = year) - log.info('\n [[[ Year {} - Step 2 / 6 SKIPPED ]]] \n'.format(year)) + log.info('\n [[[ Year {} - Step 2 / 5 SKIPPED ]]] \n'.format(year)) menage.build_variables_menage(year = year) # Step 03 : on commence par les variables indivuelles - log.info('\n [[[ Year {} - Step 3 / 6 ]]] \n'.format(year)) + log.info('\n [[[ Year {} - Step 3 / 5 ]]] \n'.format(year)) variables_individuelles.build_variables_individuelles(year = year) # Step 04 : ici on va constituer foyer et famille à partir d'invididu et ménage @@ -72,18 +71,15 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: # - On va faire des suppositions pour faire les familles # - On va faire les foyers fiscaux à partir des familles # - On va faire de suppositions pour faire les foyers fiscaux - log.info('\n [[[ Year {} - Step 4 / 6 ]]] \n'.format(year)) + log.info('\n [[[ Year {} - Step 4 / 5 ]]] \n'.format(year)) famille.build_famille(year = year) - log.info('\n [[[ Year {} - Step 5 / 6 ]]] \n'.format(year)) - foyer.build_variables_foyers_fiscal(year = year) - # Affreux ! On injectait tout dans un même DataFrame !!! # C'est très moche ! # # On crée une df par entité par période. # Elles sont stockées dans un fichier h5 - log.info('\n [[[ Year {} - Step 6 / 6 ]]] \n'.format(year)) + log.info('\n [[[ Year {} - Step 5 / 5 ]]] \n'.format(year)) final.create_input_data_frame(year = year, export_flattened_df_filepath = export_flattened_df_filepath) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_02_menage.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_02_menage.py index 9bb038a6..9b7a2f0e 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_02_menage.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_02_menage.py @@ -12,16 +12,25 @@ @temporary_store_decorator(file_name = 'erfs_fpr') def build_variables_menage(temporary_store = None, year = None): + menages = temporary_store[f'menages_{year}'] + + if "loyer" in menages.columns: + menages['loyer'] = menages['loyer'] * 12 + if year == 2018: - menages = temporary_store[f'menages_{year}'] menages['statut_occupation_logement'] = menages['so'].copy() if year == 2019: # SO ne fonctionne pas en 2019 bcp de 0 = non-renseigné - menages = temporary_store[f'menages_{year}'] menages['statut_occupation_logement'] = menages['logt'].copy() if year >= 2018: - menages['zone_apl'] = 2 + menages['zone_apl'] = 3 + menages.loc[menages['tau2010'] == 10,'zone_apl'] = 1 + menages.loc[menages['tau2010'] == 9,'zone_apl'] = 2 + #menages.loc[menages['tau2010'] == 8,'zone_apl'] = 2 # pour l'instant on met tout le monde à 2 mais à améliorer, peut être en fonction de la taille de l'aire urbaine ? menages.loc[(menages.statut_occupation_logement == 7), 'statut_occupation_logement'] = 2 + menages['logement_conventionne'] = False + menages.loc[menages['statut_occupation_logement'] == 3 ,'logement_conventionne'] = True temporary_store['menages_{}'.format(year)] = menages + diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py index 659997c8..ff20b639 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py @@ -76,7 +76,7 @@ def create_variables_individuelles(individus, year, survey_year = None, revenu_t create_activite(individus) create_contrat_de_travail(individus, period = period, salaire_type = revenu_type) create_categorie_salarie(individus, period = period, survey_year = survey_year) - + create_categorie_non_salarie(individus) # inversion des revenus pour retrouver le brut # pour les revenus de remplacement on a la csg et la crds dans l'erfs-fpr donc on peut avoir le brut directement create_revenus_remplacement_bruts(individus) @@ -489,6 +489,7 @@ def create_categorie_non_salarie(individus): commercant = individus.cstot.isin([22]) chef_entreprise = individus.cstot.isin([23]) profession_liberale = individus.cstot.isin([31]) + individus['categorie_non_salarie'] = 0 individus.loc[ agriculteur | artisan, 'categorie_non_salarie' @@ -501,6 +502,12 @@ def create_categorie_non_salarie(individus): profession_liberale, 'categorie_non_salarie' ] = 3 + #fix un peu crade : cstot ne semble pas recouvrir tout le champ des personnes qui ont du rpns + # on met par défaut ces gens là en chef d'entreprise + individus.loc[ + ((individus['rpns_imposables'] != 0) & (individus['categorie_non_salarie'] == 0)), + 'categorie_non_salarie' + ] = 2 def create_contrat_de_travail(individus, period, salaire_type = 'imposable'): diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_foyer.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_foyer.py deleted file mode 100644 index 21683d06..00000000 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_foyer.py +++ /dev/null @@ -1,57 +0,0 @@ -import logging -import pandas as pd - - -from openfisca_survey_manager.temporary import temporary_store_decorator # type: ignore - - -log = logging.getLogger(__name__) - - -@temporary_store_decorator(file_name = 'erfs_fpr') -def build_variables_foyers_fiscal(temporary_store = None, year = None): - - assert temporary_store is not None - assert year is not None - - individus = temporary_store['individus_{}'.format(year)] - menages = temporary_store['menages_{}'.format(year)] - - individus['idfoy'] = individus['idfam'].copy() - individus['quifoy'] = individus['quifam'].copy() - - foyers_fiscaux = individus[['idfoy','ident',]].drop_duplicates() - foyers_fiscaux = pd.merge( - menages[[ - 'ident', - 'rev_financier_prelev_lib_imputes', - 'rev_fonciers_bruts', - 'rev_valeurs_mobilieres_bruts', - 'wprm', - ]], - foyers_fiscaux, - how = 'inner', - on = 'ident' - ) - # première version pour splitter les revenus du capital du ménage dans les foyers fiscaux - # on attribue l'ensemble des revenus du capital du ménage au foyer avec la personne ayant les plus hauts revenus - # procédure à améliorer - idfoy = (individus - .sort_values( - [ - 'ident', - 'salaire_de_base', - 'traitement_indiciaire_brut', - 'retraite_brute' - ], - ascending = False - ) - .groupby('ident') - .first() - .idfoy - ) - foyers_fiscaux['revenu_categoriel_foncier'] = foyers_fiscaux['rev_fonciers_bruts'] * foyers_fiscaux.idfoy.isin(idfoy) - foyers_fiscaux['revenus_capitaux_prelevement_forfaitaire_unique_ir'] = foyers_fiscaux['rev_valeurs_mobilieres_bruts'] * foyers_fiscaux.idfoy.isin(idfoy) - foyers_fiscaux['rev_financier_prelev_lib_imputes'] = foyers_fiscaux['rev_financier_prelev_lib_imputes'] * foyers_fiscaux.idfoy.isin(idfoy) - - temporary_store[f"foyers_fiscaux_{year}"] = foyers_fiscaux diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_06_final.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_06_final.py index db72b2a0..eaa28055 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_06_final.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_06_final.py @@ -16,13 +16,13 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene individus = temporary_store['individus_{}'.format(year)] menages = temporary_store['menages_{}'.format(year)] - foyers_fiscaux = temporary_store['foyers_fiscaux_{}'.format(year)] # ici : variables à garder var_individus = [ 'activite', 'age', 'categorie_salarie', + 'categorie_non_salarie', 'chomage_brut', 'contrat_de_travail', 'date_naissance', @@ -47,12 +47,6 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene "primes_fonction_publique", "traitement_indiciaire_brut", ] - var_foyers_fiscaux = [ - 'idfoy', - 'rev_financier_prelev_lib_imputes', - 'revenu_categoriel_foncier', - 'revenus_capitaux_prelevement_forfaitaire_unique_ir', - ] var_menages = [ 'idmen', @@ -61,6 +55,8 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene 'taxe_habitation', 'wprm', 'zone_apl', + 'logement_conventionne', + 'prest_precarite_hand' # on récupère la variable de montant de aah / caah pour pouvoir faire une imputation du handicap ] individus = create_ids_and_roles(individus) @@ -80,14 +76,7 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene idmens = individus.idmen.unique() menages = menages.loc[ menages.idmen.isin(idmens), - [ - 'idmen', - 'loyer', - 'statut_occupation_logement', - 'taxe_habitation', - 'wprm', - 'zone_apl', - ] + var_menages ].copy() survey_name = 'openfisca_erfs_fpr_' + str(year) @@ -101,16 +90,6 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene how = 'inner', on = 'idmen_original') - foyers_fiscaux = foyers_fiscaux.rename(columns = {'idfoy':'idfoy_original'}) - unique_idfoy = individus[['idfoy','idfoy_original']].drop_duplicates() - assert len(unique_idmen) == len(menages), "Number of idfoy should be the same individus and foyers tables." - - foyers_fiscaux = foyers_fiscaux.merge(unique_idfoy, - how = 'inner', - on = 'idfoy_original') - - foyers_fiscaux = foyers_fiscaux[var_foyers_fiscaux] - if export_flattened_df_filepath: supermerge = individus.merge( menages, @@ -131,17 +110,6 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene survey_name = survey_name, ) - foyers_fiscaux = foyers_fiscaux.sort_values(by = ['idfoy']) - log.debug(f"Saving entity 'foyers fiscaux' in collection 'openfisca_erfs_fpr' and survey name '{survey_name}' with set_table_in_survey") - set_table_in_survey( - foyers_fiscaux, - entity = "foyer_fiscal", - period = year, - collection = "openfisca_erfs_fpr", - survey_name = survey_name, - ) - log.debug("End of create_input_data_frame") - menages = menages.sort_values(by = ['idmen']) log.debug(f"Saving entity 'menage' in collection 'openfisca_erfs_fpr' and survey name '{survey_name}' with set_table_in_survey") set_table_in_survey( @@ -255,7 +223,7 @@ def extract_menages_variables_from_store(temporary_store = None, year = None): def extract_menages_variables(menages): variables = ['ident', 'wprm', 'taxe_habitation'] - external_variables = ['loyer', 'zone_apl', 'statut_occupation_logement'] + external_variables = ['loyer', 'zone_apl', 'statut_occupation_logement','logement_conventionne', 'prest_precarite_hand'] for external_variable in external_variables: if external_variable in menages.columns: log.debug("Found {} in menages table: we keep it".format(external_variable)) diff --git a/openfisca_france_data/erfs_fpr/scenario.py b/openfisca_france_data/erfs_fpr/scenario.py index 80c8d852..7e9711d3 100644 --- a/openfisca_france_data/erfs_fpr/scenario.py +++ b/openfisca_france_data/erfs_fpr/scenario.py @@ -11,6 +11,7 @@ class ErfsFprSurveyScenario(AbstractErfsSurveyScenario): "activite", "autonomie_financiere", "categorie_salarie", + "categorie_non_salarie", "chomage_brut", "chomage_imposable", "contrat_de_travail", @@ -19,6 +20,7 @@ class ErfsFprSurveyScenario(AbstractErfsSurveyScenario): "effectif_entreprise", "f4ba", "heures_remunerees_volume", + "logement_conventionne", "loyer", "pensions_alimentaires_percues", "pensions_invalidite", @@ -26,9 +28,6 @@ class ErfsFprSurveyScenario(AbstractErfsSurveyScenario): "rag", "retraite_brute", "retraite_imposable", - # "rev_financier_prelev_lib_imputes", - "revenu_categoriel_foncier", - "revenus_capitaux_prelevement_forfaitaire_unique_ir", "ric", "rnc", "rpns_imposables", diff --git a/openfisca_france_data/surveys.py b/openfisca_france_data/surveys.py index 0ccc5c58..a595570d 100644 --- a/openfisca_france_data/surveys.py +++ b/openfisca_france_data/surveys.py @@ -193,9 +193,6 @@ def custom_initialize(self, simulation): def custom_input_data_frame(self, input_data_frame, **kwargs): - if "loyer" in input_data_frame: - input_data_frame["loyer"] = 12 * input_data_frame.loyer - for variable in ["quifam", "quifoy", "quimen"]: if variable in input_data_frame: log.debug(input_data_frame[variable].value_counts(dropna = False)) diff --git a/setup.py b/setup.py index 84ee5e0a..02e93474 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name = "OpenFisca-France-Data", - version = "1.0", + version = "1.1.0", description = "OpenFisca-France-Data module to work with French survey data", long_description = long_description, long_description_content_type="text/markdown",