From 9724efc2b6e4faeb45d117cf85771d43670e1f11 Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Mon, 2 May 2022 20:13:22 +0200 Subject: [PATCH 01/38] Limit data sources to 2016 only to begin with --- runner/openfisca_survey_manager_raw_data.ini | 46 ++++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/runner/openfisca_survey_manager_raw_data.ini b/runner/openfisca_survey_manager_raw_data.ini index fe12f122..ee4b0e65 100644 --- a/runner/openfisca_survey_manager_raw_data.ini +++ b/runner/openfisca_survey_manager_raw_data.ini @@ -10,26 +10,26 @@ # Get the list : # find /mnt/data-in/erfs-fpr | grep -i fpr_ir | grep -v Doc | cut -d'/' -f1-6 | grep -v contents # Then https://sortmylist.com/ -1996 = /mnt/data-in/erfs-fpr/1996/Stata -1997 = /mnt/data-in/erfs-fpr/1997/Stata -1998 = /mnt/data-in/erfs-fpr/1998/Stata -1999 = /mnt/data-in/erfs-fpr/1999/Stata -2000 = /mnt/data-in/erfs-fpr/2000/Stata -2001 = /mnt/data-in/erfs-fpr/2001/Stata -2002 = /mnt/data-in/erfs-fpr/2002/Stata -2003 = /mnt/data-in/erfs-fpr/2003/Stata -2004 = /mnt/data-in/erfs-fpr/2004/Stata -2005 = /mnt/data-in/erfs-fpr/2005/Stata -2006 = /mnt/data-in/erfs-fpr/2006/Stata -2007 = /mnt/data-in/erfs-fpr/2007/Stata -2008 = /mnt/data-in/erfs-fpr/2008/Stata -2009 = /mnt/data-in/erfs-fpr/2009/Stata -2010 = /mnt/data-in/erfs-fpr/2010/Stata -2011 = /mnt/data-in/erfs-fpr/2011/Stata -2012 = /mnt/data-in/erfs-fpr/2012/stata -2013 = /mnt/data-in/erfs-fpr/2013/stata -2014 = /mnt/data-in/erfs-fpr/2014/sas -2015 = /mnt/data-in/erfs-fpr/2015/csv -2016 = /mnt/data-in/erfs-fpr/2016/sas -2017 = /mnt/data-in/erfs-fpr/2017/sas -2018 = /mnt/data-in/erfs-fpr/2018/sas +; 1996 = /mnt/data-in/erfs-fpr/1996/Stata +; 1997 = /mnt/data-in/erfs-fpr/1997/Stata +; 1998 = /mnt/data-in/erfs-fpr/1998/Stata +; 1999 = /mnt/data-in/erfs-fpr/1999/Stata +; 2000 = /mnt/data-in/erfs-fpr/2000/Stata +; 2001 = /mnt/data-in/erfs-fpr/2001/Stata +; 2002 = /mnt/data-in/erfs-fpr/2002/Stata +; 2003 = /mnt/data-in/erfs-fpr/2003/Stata +; 2004 = /mnt/data-in/erfs-fpr/2004/Stata +; 2005 = /mnt/data-in/erfs-fpr/2005/Stata +; 2006 = /mnt/data-in/erfs-fpr/2006/Stata +; 2007 = /mnt/data-in/erfs-fpr/2007/Stata +; 2008 = /mnt/data-in/erfs-fpr/2008/Stata +; 2009 = /mnt/data-in/erfs-fpr/2009/Stata +; 2010 = /mnt/data-in/erfs-fpr/2010/Stata +; 2011 = /mnt/data-in/erfs-fpr/2011/Stata +; 2012 = /mnt/data-in/erfs-fpr/2012/stata +; 2013 = /mnt/data-in/erfs-fpr/2013/stata +; 2014 = /mnt/data-in/erfs-fpr/2014/sas +; 2015 = /mnt/data-in/erfs-fpr/2015/csv +2016 = ~/lukas/Data/ERFS/ERFS2016/Stata +; 2017 = /mnt/data-in/erfs-fpr/2017/sas +; 2018 = /mnt/data-in/erfs-fpr/2018/sas From 94daa0af4347736993115d2e197e8a06e86b7762 Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Tue, 24 May 2022 13:40:58 +0200 Subject: [PATCH 02/38] Change access rights for some files --- .circleci/has-functional-changes.sh | 0 .circleci/is-version-number-acceptable.sh | 0 .circleci/publish-git-tag.sh | 0 .circleci/publish-python-package.sh | 0 docker/erfs-fpr.sh | 0 docker/simulate_CI.sh | 0 openfisca_france_data/erfs/input_data_builder/run_all.py | 0 openfisca_france_data/erfs/input_data_builder/step_04_famille.py | 0 openfisca_france_data/erfs/input_data_builder/step_05_foyer.py | 0 openfisca_france_data/erfs/input_data_builder/step_06_rebuild.py | 0 .../erfs/input_data_builder/step_07_invalides.py | 0 openfisca_france_data/erfs/input_data_builder/step_08_final.py | 0 openfisca_france_data/scripts/build_input_data.py | 0 setup.py | 0 tests/erfs_fpr/integration/test_aggregates.py | 1 + 15 files changed, 1 insertion(+) mode change 100755 => 100644 .circleci/has-functional-changes.sh mode change 100755 => 100644 .circleci/is-version-number-acceptable.sh mode change 100755 => 100644 .circleci/publish-git-tag.sh mode change 100755 => 100644 .circleci/publish-python-package.sh mode change 100755 => 100644 docker/erfs-fpr.sh mode change 100755 => 100644 docker/simulate_CI.sh mode change 100755 => 100644 openfisca_france_data/erfs/input_data_builder/run_all.py mode change 100755 => 100644 openfisca_france_data/erfs/input_data_builder/step_04_famille.py mode change 100755 => 100644 openfisca_france_data/erfs/input_data_builder/step_05_foyer.py mode change 100755 => 100644 openfisca_france_data/erfs/input_data_builder/step_06_rebuild.py mode change 100755 => 100644 openfisca_france_data/erfs/input_data_builder/step_07_invalides.py mode change 100755 => 100644 openfisca_france_data/erfs/input_data_builder/step_08_final.py mode change 100755 => 100644 openfisca_france_data/scripts/build_input_data.py mode change 100755 => 100644 setup.py mode change 100755 => 100644 tests/erfs_fpr/integration/test_aggregates.py diff --git a/.circleci/has-functional-changes.sh b/.circleci/has-functional-changes.sh old mode 100755 new mode 100644 diff --git a/.circleci/is-version-number-acceptable.sh b/.circleci/is-version-number-acceptable.sh old mode 100755 new mode 100644 diff --git a/.circleci/publish-git-tag.sh b/.circleci/publish-git-tag.sh old mode 100755 new mode 100644 diff --git a/.circleci/publish-python-package.sh b/.circleci/publish-python-package.sh old mode 100755 new mode 100644 diff --git a/docker/erfs-fpr.sh b/docker/erfs-fpr.sh old mode 100755 new mode 100644 diff --git a/docker/simulate_CI.sh b/docker/simulate_CI.sh old mode 100755 new mode 100644 diff --git a/openfisca_france_data/erfs/input_data_builder/run_all.py b/openfisca_france_data/erfs/input_data_builder/run_all.py old mode 100755 new mode 100644 diff --git a/openfisca_france_data/erfs/input_data_builder/step_04_famille.py b/openfisca_france_data/erfs/input_data_builder/step_04_famille.py old mode 100755 new mode 100644 diff --git a/openfisca_france_data/erfs/input_data_builder/step_05_foyer.py b/openfisca_france_data/erfs/input_data_builder/step_05_foyer.py old mode 100755 new mode 100644 diff --git a/openfisca_france_data/erfs/input_data_builder/step_06_rebuild.py b/openfisca_france_data/erfs/input_data_builder/step_06_rebuild.py old mode 100755 new mode 100644 diff --git a/openfisca_france_data/erfs/input_data_builder/step_07_invalides.py b/openfisca_france_data/erfs/input_data_builder/step_07_invalides.py old mode 100755 new mode 100644 diff --git a/openfisca_france_data/erfs/input_data_builder/step_08_final.py b/openfisca_france_data/erfs/input_data_builder/step_08_final.py old mode 100755 new mode 100644 diff --git a/openfisca_france_data/scripts/build_input_data.py b/openfisca_france_data/scripts/build_input_data.py old mode 100755 new mode 100644 diff --git a/setup.py b/setup.py old mode 100755 new mode 100644 diff --git a/tests/erfs_fpr/integration/test_aggregates.py b/tests/erfs_fpr/integration/test_aggregates.py old mode 100755 new mode 100644 index 8dc8cd0e..08db9f78 --- a/tests/erfs_fpr/integration/test_aggregates.py +++ b/tests/erfs_fpr/integration/test_aggregates.py @@ -81,6 +81,7 @@ def main(year, configfile = None, verbose = False): log.warning(f"File {configfile} not found, switchin to default {years}") else: years = [year] + for year in years: survey_scenario, aggregates = test_erfs_fpr_survey_simulation_aggregates( year = year, From 970aed95ed9a3d38b926cae18a44572f3312ca2e Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Tue, 24 May 2022 14:07:44 +0200 Subject: [PATCH 03/38] Add comments, restructure return value of create_familles() --- .../erfs_fpr/get_survey_scenario.py | 1 + .../erfs_fpr/input_data_builder/step_04_famille.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/openfisca_france_data/erfs_fpr/get_survey_scenario.py b/openfisca_france_data/erfs_fpr/get_survey_scenario.py index 78bdcb42..d1515210 100644 --- a/openfisca_france_data/erfs_fpr/get_survey_scenario.py +++ b/openfisca_france_data/erfs_fpr/get_survey_scenario.py @@ -51,6 +51,7 @@ def get_survey_scenario( baseline_tax_benefit_system = baseline_tax_benefit_system, year = year, ) + # taux marginaux !! survey_scenario.variation_factor = variation_factor survey_scenario.varying_variable = varying_variable diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_04_famille.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_04_famille.py index 011b651e..8df0fe55 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_04_famille.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_04_famille.py @@ -111,7 +111,9 @@ def create_familles(indivi = None, year = None, kind = 'erfs_fpr', enfants_a_nai assert indivi is not None assert year is not None assert (enfants_a_naitre is not None) or skip_enfants_a_naitre + complete_indivi(indivi, year) + base = famille_1( indivi = indivi, kind = kind, @@ -119,22 +121,26 @@ def create_familles(indivi = None, year = None, kind = 'erfs_fpr', enfants_a_nai skip_enfants_a_naitre = True, year = year, ) + base, famille, personne_de_reference = famille_2( base = base, year = year, ) + base, famille = famille_3( base = base, famille = famille, kind = kind, year = year, ) + base, famille = famille_5( base = base, famille = famille, kind = kind, year = year, ) + base, famille = famille_6( base = base, famille = famille, @@ -142,7 +148,11 @@ def create_familles(indivi = None, year = None, kind = 'erfs_fpr', enfants_a_nai kind = kind, year = year, ) - return famille_7( + + # here 17000919 is removed bc. "no chef de famille" - but famille df is not returned + # so these obs are still in the famille data. return famille df as well? check implications + # or simply remove manually afterwarfs/workaround + individus = famille_7( base = base, famille = famille, indivi = indivi, @@ -150,6 +160,8 @@ def create_familles(indivi = None, year = None, kind = 'erfs_fpr', enfants_a_nai year = year, ) + return individus + def complete_indivi(indivi, year): indivi['year'] = year From be4e169471a3462d77ba3068e329810f6ddf83f9 Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Tue, 24 May 2022 14:08:57 +0200 Subject: [PATCH 04/38] Add log output for divergences of households in final data set --- .../erfs_fpr/input_data_builder/step_05_final.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py index f2091e9a..9e098f89 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py @@ -20,6 +20,7 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene individus = temporary_store['individus_{}'.format(year)] menages = temporary_store['menages_{}'.format(year)] + # ici : variables à garder variables = [ 'activite', 'age', @@ -142,6 +143,20 @@ def create_collectives_foyer_variables(individus, menages): .drop_duplicates(['idmen', 'idfoy']) .reset_index(drop = True) ) + + set_multi = set(menages_multi_foyers.idmen.tolist()) + set_single = set(menages_simple_foyer.idmen.tolist()) + set_joint = set_multi.union(set_single) + set_target = set(idmens) + + log.info('Simple foyer menages contain {} unique observations.'.format(len(set_multi))) + log.info('Multi-foyer menages contain {} unique observations.'.format(len(set_single))) + log.info('Multi- and single-foyer menages jointly contain {} unique observations.'.format(len(set_joint))) + log.info('According to variable idmens, there should be {} observations.'.format(len(set_target))) + + if len(set_joint) != len(set_target): + log.info('Problematic Menage IDs: {}'.format(set_target.symmetric_difference(set_joint))) + assert set(menages_multi_foyers.idmen.tolist() + menages_simple_foyer.idmen.tolist()) == set(idmens) menages_foyers_correspondance = pandas.concat([menages_multi_foyers, menages_simple_foyer], ignore_index = True) del menages_multi_foyers, menages_simple_foyer From 7dbc221cd746c01449430dafad99b8bb62a58ab8 Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Tue, 24 May 2022 14:09:52 +0200 Subject: [PATCH 05/38] Change default value of year to 2017 Necessary bc. unable to set command line parameters in VSCode debugger --- openfisca_france_data/erfs_fpr/input_data_builder/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py index 72440813..36006701 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py @@ -57,13 +57,13 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: @click.command() -@click.option('-y', '--year', default = 2013, help = "ERFS-FPR year", show_default = True, +@click.option('-y', '--year', default = 2017, help = "ERFS-FPR year", show_default = True, type = int, required = True) @click.option('-f', '--file', 'export_flattened_df_filepath', default = None, help = 'flattened dataframe filepath', show_default = True) @click.option('-c', '--configfile', default = None, help = 'raw_data.ini path to read years to process.', show_default = True) -def main(year = 2014, export_flattened_df_filepath = None, configfile = None): +def main(year = 2017, export_flattened_df_filepath = None, configfile = None): # Pourquoi year = 2014 alors que default = 2013 pour click ? import time start = time.time() From a12b004fed852bc9f6c7ee5360972cc313aff7e1 Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Tue, 24 May 2022 14:11:00 +0200 Subject: [PATCH 06/38] Add .csv and .html outputs to .gitignore file --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 63541b64..bf286f0a 100644 --- a/.gitignore +++ b/.gitignore @@ -80,4 +80,6 @@ setup.cfg *.h5 # Generated files erfs_fpr.json -openfisca_erfs_fpr.json \ No newline at end of file +openfisca_erfs_fpr.json +*.csv +*.html \ No newline at end of file From 75dc3f534b1276b15fbba0f120008712040972f0 Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Tue, 24 May 2022 18:56:45 +0200 Subject: [PATCH 07/38] Begin to overhaul log structure --- .../erfs_fpr/input_data_builder/__init__.py | 35 +++++++++++++----- .../input_data_builder/step_05_final.py | 36 ++++++++++++------- 2 files changed, 50 insertions(+), 21 deletions(-) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py index 36006701..47a47683 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py @@ -2,8 +2,10 @@ import logging import configparser import sys, getopt +import warnings #from multipledispatch import dispatch # type: ignore +warnings.filterwarnings("ignore", ".*is an invalid version and will not be supported in a future release.*") from openfisca_france_data.erfs_fpr.input_data_builder import ( step_01_preprocessing as preprocessing, @@ -63,37 +65,52 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: help = 'flattened dataframe filepath', show_default = True) @click.option('-c', '--configfile', default = None, help = 'raw_data.ini path to read years to process.', show_default = True) -def main(year = 2017, export_flattened_df_filepath = None, configfile = None): - # Pourquoi year = 2014 alors que default = 2013 pour click ? +@click.option('-l', '--log', 'lg', default = "info", + help = 'level of detail for log output.', show_default = True) +def main(year = 2017, export_flattened_df_filepath = None, configfile = None, lg = "info"): import time start = time.time() - logging.basicConfig(level = logging.INFO, stream = sys.stdout, + # get level of logging + if lg == "info": + lgi = logging.INFO + elif lg == "warn": + lgi = logging.WARNING + elif lg == "debug": + lgi = logging.DEBUG + + logging.basicConfig(level = lgi, stream = sys.stdout, format='%(asctime)s - %(name)-12s: %(levelname)s %(module)s - %(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) - log.info("Starting build-erfs-fpr...") + log.info("Starting build-erfs-fpr [log: {}]".format(lg)) + + # determine which years are to be analyzed, from file if available, else parameter if configfile is not None: years = [] + try: config = configparser.ConfigParser() config.read(configfile) for key in config['erfs_fpr']: if key.isnumeric(): years.append(int(key)) - log.info(f"Adding year {int(key)}") + # log.info(f"Adding year {int(key)}") except KeyError: years = [year] log.warning(f"File {configfile} not found, switchin to default {years}") + + log.info('Configured multiple years: [{}]'.format(';'.join(years))) + for year in years: - file = f"./erfs_flat_{year}.h5" - log.info(f'Will output to {file}') + log.info('Starting with year {}'.format(year)) build(year = year, export_flattened_df_filepath = export_flattened_df_filepath) + else: - if export_flattened_df_filepath is None: - export_flattened_df_filepath = f"./erfs_flat_{year}.h5" + log.info('Configured single year: [{}]'.format(year)) build(year = year, export_flattened_df_filepath = export_flattened_df_filepath) + # TODO: create_enfants_a_naitre(year = year) log.info("Script finished after {}".format(time.time() - start)) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py index 9e098f89..71a06450 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py @@ -144,18 +144,30 @@ def create_collectives_foyer_variables(individus, menages): .reset_index(drop = True) ) - set_multi = set(menages_multi_foyers.idmen.tolist()) - set_single = set(menages_simple_foyer.idmen.tolist()) - set_joint = set_multi.union(set_single) - set_target = set(idmens) - - log.info('Simple foyer menages contain {} unique observations.'.format(len(set_multi))) - log.info('Multi-foyer menages contain {} unique observations.'.format(len(set_single))) - log.info('Multi- and single-foyer menages jointly contain {} unique observations.'.format(len(set_joint))) - log.info('According to variable idmens, there should be {} observations.'.format(len(set_target))) - - if len(set_joint) != len(set_target): - log.info('Problematic Menage IDs: {}'.format(set_target.symmetric_difference(set_joint))) + # update idmens, as some households may have dropped out, causing errors + # this is kind of a dirty fix, better solution would be to fix the dropping out itself + # for instance: 2017, 17000919 drops out bc. no "chef de famille" which is weird, neet to investigate + idmens_old = idmens.copy() + idmens = set(menages_multi_foyers.idmen.to_list()).union(set(menages_simple_foyer.idmen.to_list())) + + dropouts = set(idmens).symmetric_difference(set(idmens_old)) + if len(dropouts) == 0: + log.info('No households have been dropped. All clear.') + else: + log.info('WARNING: Some households [{}] have dropped out. You should investigate why this has happened. [{}]'.format(len(dropouts), ','.join(str(e) for e in dropouts))) + + # set_multi = set(menages_multi_foyers.idmen.tolist()) + # set_single = set(menages_simple_foyer.idmen.tolist()) + # set_joint = set_multi.union(set_single) + # set_target = set(idmens) + + # log.info('Simple foyer menages contain {} unique observations.'.format(len(set_multi))) + # log.info('Multi-foyer menages contain {} unique observations.'.format(len(set_single))) + # log.info('Multi- and single-foyer menages jointly contain {} unique observations.'.format(len(set_joint))) + # log.info('According to variable idmens, there should be {} observations.'.format(len(set_target))) + + # if len(set_joint) != len(set_target): + # log.info('Problematic Menage IDs: {}'.format(set_target.symmetric_difference(set_joint))) assert set(menages_multi_foyers.idmen.tolist() + menages_simple_foyer.idmen.tolist()) == set(idmens) menages_foyers_correspondance = pandas.concat([menages_multi_foyers, menages_simple_foyer], ignore_index = True) From c11beec4ac684517b40c32a084c4fcff16e5dd2c Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Wed, 25 May 2022 15:31:42 +0200 Subject: [PATCH 08/38] Tidy up logs --- .../erfs_fpr/input_data_builder/__init__.py | 10 +- .../step_01_preprocessing.py | 206 +++++++++++++----- .../step_03_variables_individuelles.py | 25 +-- .../input_data_builder/step_04_famille.py | 159 +++++++------- .../input_data_builder/step_05_final.py | 27 +-- 5 files changed, 247 insertions(+), 180 deletions(-) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py index 47a47683..3eb5268e 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py @@ -30,6 +30,7 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: # - On merge les tables individus / menages # # Note : c'est ici où on objectivise les hypothèses, step 1 + log.info('Year {} - Step 1 / 5'.format(year)) preprocessing.build_merged_dataframes(year = year) # Step 02 : Si on veut calculer les allocations logement, il faut faire le matching avec une autre enquête (ENL) @@ -38,8 +39,10 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: # stata_directory = openfisca_survey_collection.config.get('data', 'stata_directory') # stata_file = os.path.join(stata_directory, 'log_men_ERFS.dta') # imputation_loyer.merge_imputation_loyer(stata_file = stata_file, year = year) + log.info('Year {} - Step 2 / 5 SKIPPED'.format(year)) # Step 03 : on commence par les variables indivuelles + log.info('Year {} - Step 3 / 5'.format(year)) variables_individuelles.build_variables_individuelles(year = year) # Step 04 : ici on va constituer foyer et famille à partir d'invididu et ménage @@ -48,6 +51,7 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: # - On va faire des suppositions pour faire les familles # - On va faire les foyers fiscaux à partir des familles # - On va faire de suppositions pour faire les foyers fiscaux + log.info('Year {} - Step 4 / 5'.format(year)) famille.build_famille(year = year) # Affreux ! On injectait tout dans un même DataFrame !!! @@ -55,6 +59,7 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: # # On crée une df par entité par période. # Elles sont stockées dans un fichier h5 + log.info('Year {} - Step 5 / 5'.format(year)) final.create_input_data_frame(year = year, export_flattened_df_filepath = export_flattened_df_filepath) @@ -79,10 +84,9 @@ def main(year = 2017, export_flattened_df_filepath = None, configfile = None, lg elif lg == "debug": lgi = logging.DEBUG - logging.basicConfig(level = lgi, stream = sys.stdout, + logging.basicConfig(level = lgi, stream = sys.stdout, filename = 'build_erfs_fpr.log', format='%(asctime)s - %(name)-12s: %(levelname)s %(module)s - %(funcName)s: %(message)s', - datefmt='%Y-%m-%d %H:%M:%S' - ) + datefmt='%Y-%m-%d %H:%M:%S') log.info("Starting build-erfs-fpr [log: {}]".format(lg)) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py index 7d5197b5..df5a9b1a 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py @@ -17,51 +17,76 @@ def build_merged_dataframes(temporary_store = None, year = None): assert temporary_store is not None assert year is not None - log.debug("Chargement des tables des enquêtes") erfs_fpr_survey_collection = SurveyCollection.load(collection = "erfs_fpr") yr = str(year)[-2:] # 12 for 2012 + + # where available, use harmoized data add_suffix_retropole_years = [2012] - survey = erfs_fpr_survey_collection.get_survey(f"erfs_fpr_{year}") + # infer names of the survey and data tables + names = { + "survey": f"erfs_fpr_{year}", + "eec_individu": f"fpr_irf{yr}e{yr}t4", + "eec_menage": f"fpr_mrf{yr}e{yr}t4", + "fpr_individu": f"fpr_indiv_{year}_retropole" if year in add_suffix_retropole_years else f"fpr_indiv_{year}", + "fpr_menage": f"fpr_menage_{year}_retropole" if year in add_suffix_retropole_years else f"fpr_menage_{year}" + } + + log.debug("Loading tables for year {} [{}]".format(year, names)) - eec_menage = survey.get_values(table = f"fpr_mrf{yr}e{yr}t4", ignorecase=True) - eec_individu = survey.get_values(table = f"fpr_irf{yr}e{yr}t4", ignorecase= True) + # load survey and tables + survey = erfs_fpr_survey_collection.get_survey(names['survey']) - if year in add_suffix_retropole_years: - fpr_individu = survey.get_values(table = f"fpr_indiv_{year}_retropole") - fpr_menage = survey.get_values(table = f"fpr_menage_{year}_retropole") + eec_individu = survey.get_values(table = names['eec_individu'], ignorecase= True) + eec_menage = survey.get_values(table = names['eec_menage'], ignorecase=True) - else: - fpr_individu = survey.get_values(table = f"fpr_indiv_{year}", ignorecase = True) - fpr_menage = survey.get_values(table = f"fpr_menage_{year}", ignorecase = True) + fpr_individu = survey.get_values(table = names['fpr_individu'], ignorecase = True) + fpr_menage = survey.get_values(table = names['fpr_menage'], ignorecase = True) + # transform to lowercase for table in (fpr_menage, eec_menage, eec_individu, fpr_individu): table.columns = [k.lower() for k in table.columns] + + # merge EEC and FPR tables individus, menages = merge_tables(fpr_menage, eec_menage, eec_individu, fpr_individu, year) + + # store household table temporary_store[f"menages_{year}"] = menages del eec_menage, fpr_menage, menages gc.collect() + # store individual-level table temporary_store[f"individus_{year}_post_01"] = individus del eec_individu, fpr_individu + gc.collect() def merge_tables(fpr_menage = None, eec_menage = None, eec_individu = None, fpr_individu = None, year = None, skip_menage = False): + + # Step 1: Individual-Level Data + assert (eec_individu is not None) and (fpr_individu is not None) - log.debug(""" -Il y a {} individus dans fpr_individu -Il y a {} individus dans eec_individu -""".format( - len(fpr_individu.noindiv.unique()), - len(eec_individu.noindiv.unique()), - )) # Fusion enquête emploi et source fiscale + nobs = {} + nobs['fpr_ind'] = len(fpr_individu.noindiv.unique()) + nobs['eec_ind'] = len(eec_individu.noindiv.unique()) + log.debug('There are {} obs. in the FPR individual-level data [unique(noindiv)]'.format(nobs['fpr_ind'])) + log.debug('There are {} obs. in the EEC individual-level data [unique(noindiv)]'.format(nobs['eec_ind'])) + + # merge tables individus = eec_individu.merge(fpr_individu, on = ['noindiv', 'ident', 'noi'], how = "inner") + + nobs['fpr_eec_ind'] = len(individus.noindiv.unique()) + log.debug('There are {} obs. in the FPR-EEC individual-level data [unique(noindiv)]'.format(nobs['fpr_eec_ind'])) + + # check TBD check_naia_naim(individus, year) + + # establish list of variables, taking into account differences over time agepr = 'agepr' if year < 2013 else "ageprm" cohab = 'cohab' if year < 2013 else "coured" lien = 'lien' if year < 2013 else 'lienprm' # TODO attention pas les mêmes modalités @@ -96,6 +121,7 @@ def merge_tables(fpr_menage = None, eec_menage = None, eec_individu = None, fpr_ + ([cstot] if cstot in individus.columns else [])) #existe 1996 - 2003, remplace retrai + # fill NAs and type conversion for var in var_list: individus[var]=individus[var].fillna(0) individus[var]=individus[var].astype(np.int64) @@ -104,59 +130,65 @@ def merge_tables(fpr_menage = None, eec_menage = None, eec_individu = None, fpr_ var, individus[var].dtype ) + # TBD if year >= 2013: individus['lpr'] = individus.lprm + # Step 2: Household-Level Data if not skip_menage: - log.debug(""" -Il y a {} ménages dans fpr_menage -Il y a {} ménages dans eec_menage -""".format( - len(fpr_menage.ident.unique()), - len(eec_menage.ident.unique()), - )) - common_variables = set(fpr_menage.columns).intersection(eec_menage.columns) - log.debug(""" -Les variables suivantes sont communes aux deux tables ménages: - {} -""".format(common_variables)) - if 'th' in common_variables: + + assert (eec_menage is not None) and (fpr_menage is not None) + + nobs['fpr_men'] = len(fpr_menage.ident.unique()) + nobs['eec_men'] = len(eec_menage.ident.unique()) + + log.debug('There are {} obs. in the FPR household-level data [unique(ident)]'.format(nobs['fpr_men'])) + log.debug('There are {} obs. in the EEC household-level data [unique(ident)]'.format(nobs['eec_men'])) + + common_variables_pre = set(fpr_menage.columns).intersection(eec_menage.columns) + + if 'th' in common_variables_pre: fpr_menage.rename(columns = dict(th = 'taxe_habitation'), inplace = True) - log.debug("La variable th de la table fpr_menage est renommée taxe_habitation") + log.debug("Household-level tables: Renamed variable 'th' to 'taxe_habitation'") - if 'tur5' in common_variables: + if 'tur5' in common_variables_pre: fpr_menage.drop('tur5', axis = 1, inplace = True) - log.debug("La variable tur5 redondante est retirée de la table fpr_menage") + log.debug("Household-level tables: Variable 'tur5' has been removed from household-level data (FPR)") common_variables = set(fpr_menage.columns).intersection(eec_menage.columns) - log.debug(""" -Après renommage seules les variables suivantes sont communes aux deux tables ménages: - {} -""".format(common_variables)) + dropped_vars = common_variables_pre.symmetric_difference(common_variables) + + log.debug('Common variables in the household-level tables: [{}]'.format(','.join(common_variables))) + + if len(dropped_vars) > 0: + log.debug('These household-level variables have been dropped: [{}]'.format(','.join(dropped_vars))) + else: + log.debug('No household-level variables have been dropped.') + + # merge FPR and EEC household-level data menages = fpr_menage.merge(eec_menage, on = 'ident', how = 'inner') + + nobs['fpr_eec_men'] = len(menages.ident.unique()) + log.debug('There are {} obs. in the FPR-EEC household-level data [unique(noindiv)]'.format(nobs['fpr_eec_men'])) + create_variable_locataire(menages) + lprm = "lpr" if year < 2013 else "lprm" - print(year, lprm) + try: menages = menages.merge( - individus.loc[individus[lprm] == 1, ["ident", "ddipl"]].copy() # lpr (ou lprm) == 1 ==> C'est la Personne - # de référence + individus.loc[individus[lprm] == 1, ["ident", "ddipl"]].copy() + # lpr (ou lprm) == 1 ==> C'est la personne de référence ) except Exception: print(individus.dtypes) raise - log.debug(""" -Il y a {} ménages dans la base ménage fusionnée -""".format(len(menages.ident.unique()))) - # - # - log.debug(""" -Il y a {} ménages dans la base individus fusionnée -Il y a {} individus dans la base individus fusionnée -""".format( - len(individus.ident.unique()), - len(individus.noindiv.unique()), - )) + + nobs['merge_men'] = len(menages.ident.unique()) + nobs['merge_ind'] = len(menages.noindiv.unique()) + + log.debug('There are {} individuals [before: {}] and {} households [before: {}] in the merged data table.'.format(nobs['merge_ind'], nobs['fpr_eec_ind'], nobs['merge_men'], nobs['fpr_eec_men'])) + # Infos sur les non appariés if not skip_menage: @@ -195,9 +227,9 @@ def non_apparies(eec_individu, eec_menage, fpr_individu, fpr_menage): individus_non_apparies = individus_non_apparies.drop_duplicates(subset = 'ident', keep = 'last') difference = set(individus_non_apparies.ident).symmetric_difference(menages_non_apparies.ident) intersection = set(individus_non_apparies.ident) & set(menages_non_apparies.ident) - log.debug( - "Il y a {} differences et {} intersections entre les ménages non appariés et les individus non appariés".format( - len(difference), len(intersection))) + + log.debug("There are {} differences and {} intersections between the unmerged households and individuals.".format(len(difference), len(intersection))) + del individus_non_apparies, menages_non_apparies, difference, intersection gc.collect() @@ -205,9 +237,8 @@ def non_apparies(eec_individu, eec_menage, fpr_individu, fpr_menage): def check_naia_naim(individus, year): valid_naim = individus.naim.isin(range(1, 13)) if not valid_naim.all(): - log.debug("There are wrong naim values:\n{}".format( - individus.naim.value_counts(dropna = False)) - ) + log.debug('There are incorrect month or birth values (naim). They will be reset to 1.') + individus.loc[ individus.naim == 99, 'naim' @@ -228,6 +259,63 @@ def check_naia_naim(individus, year): for id in bad_noindiv: individus.loc[individus.noindiv == id,'naia'] = year - individus.loc[individus.noindiv == id, 'ageq'] + good = ((year >= individus.naia) & (individus.naia > 1890)) + assertion = good.all() + bad_years = individus.loc[~good, "naia"].unique() + bad_idents = individus.loc[~good, "ident"].unique() + + log.debug('There are incorrect years of birth [naia; {}] for individuals with ident [{}].'.format(','.join(bad_years), ','.join(bad_idents))) + + try: + lpr = "lpr" if year < 2013 else "lprm" + lien = "lien" if year < 2013 else "lienprm" # TODO attention pas les mêmes modalités + prosa = "prosa" if year < 2013 else "qprcent" # TODO attention pas les mêmes modalités + retrai = "retrai" if year < 2013 else "ret" # TODO attention pas les mêmes modalités + assert assertion, "Error: \n {}".format( + individus.loc[ + individus.ident.isin(bad_idents), # WTF is this table supposed to be? I changed the 'lien' in lien + # and so on for other variables + [ + 'ag', + 'ident', + lien, + 'naia', + 'naim', + 'noi', + 'noicon', + 'noimer', + prosa, + retrai, + 'rstg', + 'statut', + 'sexe', + lpr, + 'chomage_i', + 'pens_alim_recue_i', + 'rag_i', + 'retraites_i', + 'ric_i', + 'rnc_i', + 'salaires_i', + ] + + (["noiper"] if "noiper" in individus.columns else []) + ] + ) + except AssertionError: + if year == 2012: + log.debug('Fixing erroneous years of birth [naia] manually for 2012.') + individus.loc[ + (individus.ident == 12023304) & (individus.noi == 2), + 'naia' + ] = 1954 + individus.loc[ + (individus.ident == 12041815) & (individus.noi == 1), + 'naia' + ] = 2012 - 40 + # + else: + AssertionError('There have been issues with the year and month of birth (naia, naim) that need to be checked manually.') + if __name__ == '__main__': import sys @@ -238,5 +326,5 @@ def check_naia_naim(individus, year): year = 2014 build_merged_dataframes(year = year) # TODO: create_enfants_a_naitre(year = year) - log.info("Script finished after {}".format(time.time() - start)) + log.info("Step 1 finished after {}".format(time.time() - start)) print(time.time() - start) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py index d4d0260c..5747030f 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py @@ -2,7 +2,6 @@ import numpy as np import pandas as pd - from openfisca_core import periods from openfisca_france_data import select_to_match_target from openfisca_france_data.common import ( @@ -28,8 +27,6 @@ def build_variables_individuelles(temporary_store = None, year = None): assert temporary_store is not None assert year is not None - log.info('step_03_variables_individuelles: Création des variables individuelles') - individus = temporary_store['individus_{}_post_01'.format(year)] openfisca_by_erfs_variable = { @@ -52,15 +49,14 @@ def build_variables_individuelles(temporary_store = None, year = None): create_variables_individuelles(individus, year) assert 'salaire_de_base' in individus.columns , 'salaire de base not in individus' temporary_store['individus_{}'.format(year)] = individus - log.debug("step_03_variables_individuelles terminée") return individus # helpers def create_variables_individuelles(individus, year, survey_year = None): - """Création des variables individuelles - """ + """Création des variables individuelles""" + create_ages(individus, year) create_date_naissance(individus, age_variable = None, annee_naissance_variable = 'naia', mois_naissance = 'naim', year = year) @@ -321,7 +317,7 @@ def create_categorie_salarie(individus, period, survey_year = None): survey_year = period.start.year if survey_year >= 2013: - log.debug(f"Using qprcent to infer prosa for year {survey_year}") + log.debug(f"Using qprcent to infer chpub for year {survey_year}") chpub_replacement = { 0: 0, 3: 1, @@ -333,6 +329,7 @@ def create_categorie_salarie(individus, period, survey_year = None): 6: 1, } individus['chpub'] = individus.chpub.map(chpub_replacement) + log.debug('Using qprc to infer prosa for year {}'.format(survey_year)) qprc_to_prosa = { 0: 0, @@ -1105,18 +1102,14 @@ def compute_taux_csg_remplacement(rfr, nbptr): individus['taux_csg_remplacement_n_1'] = compute_taux_csg_remplacement(rfr_n_1, nbptr) distribution = individus.groupby(['taux_csg_remplacement', 'taux_csg_remplacement_n_1'])['ponderation'].sum() / 1000 - log.debug( - "Distribution of taux_csg_remplacement (in thousands):\n", - distribution) + log.debug("Distribution of taux_csg_remplacement (in thousands):\n", distribution) assert individus['taux_csg_remplacement_n_1'].isin(range(4)).all() assert individus['taux_csg_remplacement'].isin(range(4)).all() def calibrate_categorie_salarie(individus, year = None, mass_by_categorie_salarie = None): assert mass_by_categorie_salarie is not None - log.info( - mass_by_categorie_salarie - ) + log.debug('Mass of salaries: ', mass_by_categorie_salarie) weight_individus = individus['ponderation'].values for rebalanced_categorie, target_mass in mass_by_categorie_salarie.items(): @@ -1141,7 +1134,7 @@ def calibrate_categorie_salarie(individus, year = None, mass_by_categorie_salari take = take, seed = 9779972 ) - log.info(""" + log.debug(""" final selected population: {} error: {} % """.format( @@ -1149,7 +1142,7 @@ def calibrate_categorie_salarie(individus, year = None, mass_by_categorie_salari ((eligible * selected * weight_individus).sum() - target_mass) / target_mass * 100, )) individus.loc[selected, 'categorie_salarie'] = rebalanced_categorie - log.info(individus.groupby('categorie_salarie')['ponderation'].sum()) + log.debug(individus.groupby('categorie_salarie')['ponderation'].sum()) seuil_salaire_imposable_mensuel = 2 * 3000 individus.loc[ ( @@ -1171,7 +1164,7 @@ def calibrate_categorie_salarie(individus, year = None, mass_by_categorie_salari def todo_create(individus): txtppb = "txtppb" if "txtppb" in individus.columns else "txtppred" - log.debug(" 6.3 : variable txtppb") + log.debug("6.3 : variable txtppb") individus.loc[individus.txtppb.isnull(), txtppb] = 0 individus.loc[individus[txtppb] == 9, txtppb] = 0 assert individus.txtppb.notnull().all() diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_04_famille.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_04_famille.py index 8df0fe55..17033439 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_04_famille.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_04_famille.py @@ -23,18 +23,17 @@ def build_famille(temporary_store = None, year = None): assert temporary_store is not None assert year is not None - log.info('step_04_famille: construction de la table famille') - skip_enfants_a_naitre = True kind = 'erfs_fpr' - log.info('Etape 1 : préparation de base') - log.info(' 1.1 : récupération de indivi') + log.info(' [1] Preparing the data base') + log.info(' [1.1] Getting individuals') + indivi = temporary_store['individus_{}'.format(year)] # Si on pense qu'on ne peut pas récupérer les enfants à naître if skip_enfants_a_naitre: - log.info(" 1.2 : On ne récupère pas d'enfants à naître") + log.info(" [1.2] Skipping enfants à naître") enfants_a_naitre = None else: if year < 2013: @@ -42,7 +41,7 @@ def build_famille(temporary_store = None, year = None): else: lpr = "lprm" lien = 'lien' if year < 2013 else 'lienprm' # TODO: attention pas les mêmes modalités - log.info(" 1.2 : récupération des enfants à naître") + log.info(" [1.2] Récupération des enfants à naître") cohab = 'cohab' if year < 2013 else "coured" agepr = 'agepr' if year < 2013 else "ageprm" retrai = 'retrai' if year < 2013 else "ret" @@ -81,16 +80,13 @@ def build_famille(temporary_store = None, year = None): enfants_a_naitre = temporary_store['enfants_a_naitre_{}'.format(year)][individual_variables].copy() enfants_a_naitre.drop_duplicates('noindiv', inplace = True) - log.info("""" - Il y a {} enfants à naitre avant de retirer ceux qui ne sont pas enfants - de la personne de référence - """.format(len(enfants_a_naitre.index))) + + ean_avant = len(enfants_a_naitre.index) enfants_a_naitre = enfants_a_naitre.loc[enfants_a_naitre[lpr] == 3].copy() enfants_a_naitre = enfants_a_naitre.loc[~(enfants_a_naitre.noindiv.isin(indivi.noindiv.values))].copy() - log.info("""" - Il y a {} enfants à naitre après avoir retiré ceux qui ne sont pas enfants - de la personne de référence - """.format(len(enfants_a_naitre.index))) + ean_apres = len(enfants_a_naitre.index) + + log.debug("Il y a {} ({}) enfants à naitre avant (après) avoir retiré ceux qui ne sont pas enfants de la personne de référence".format(ean_avant, ean_apres)) individus = create_familles(indivi = indivi, year = year, kind = kind, enfants_a_naitre = enfants_a_naitre, skip_enfants_a_naitre = skip_enfants_a_naitre) @@ -202,13 +198,16 @@ def famille_1(indivi = None, kind = 'erfs_fpr', enfants_a_naitre = None, skip_en smic = smic_horaire_brut[year] # PB with vars "agepf" "noidec" "year" NOTE: quels problèmes ? JS - log.info(" 1.3 : création de la base complète") + + log.info(" [1.3] création de la base complète") + if skip_enfants_a_naitre: base = indivi.copy() else: base = pd.concat([indivi, enfants_a_naitre]) - log.info("base contient {} lignes ".format(len(base.index))) + log.debug("base contient {} lignes ".format(len(base.index))) + base['noindiv'] = (100 * base.ident + base['noi']).astype(int) base['moins_de_15_ans_inclus'] = base.agepf < 16 base['jeune_non_eligible_rsa'] = (base.agepf >= 16) & (base.agepf < AGE_RSA) @@ -223,6 +222,7 @@ def famille_1(indivi = None, kind = 'erfs_fpr', enfants_a_naitre = None, skip_en base['famille'] = 0 base['kid'] = False + for series_name in ['kid', 'moins_de_15_ans_inclus', 'jeune_non_eligible_rsa', 'jeune_eligible_rsa', 'smic55']: assert_dtype(base[series_name], "bool") try: @@ -236,7 +236,7 @@ def famille_1(indivi = None, kind = 'erfs_fpr', enfants_a_naitre = None, skip_en def famille_2(base, year = None): assert year is not None - log.info("Etape 2 : On cherche les enfants ayant père et/ou mère comme personne de référence et conjoint") + log.info(" [2] On cherche les enfants ayant père et/ou mère comme personne de référence et conjoint") if year < 2013: lpr = "lpr" @@ -246,13 +246,15 @@ def famille_2(base, year = None): personne_de_reference = base.loc[base[lpr] == 1, ['ident', 'noi']].copy() personne_de_reference['noifam'] = (100 * personne_de_reference.ident + personne_de_reference['noi']).astype(int) personne_de_reference = personne_de_reference[['ident', 'noifam']].copy() - log.info("length personne_de_reference : {}".format(len(personne_de_reference.index))) + + log.debug("length personne_de_reference : {}".format(len(personne_de_reference.index))) + nof01 = base.loc[ base[lpr].isin([1, 2]) | ((base[lpr] == 3) & (base.moins_de_15_ans_inclus)) | ((base[lpr] == 3) & base.jeune_non_eligible_rsa & (~base.smic55)) ].copy() - log.info('longueur de nof01 avant merge: {}'.format(len(nof01.index))) + log.debug('longueur de nof01 avant merge: {}'.format(len(nof01.index))) nof01 = nof01.merge(personne_de_reference, on = 'ident', how = 'outer') nof01['famille'] = 10 nof01['kid'] = ( @@ -270,7 +272,8 @@ def famille_2(base, year = None): del nof01 control_04(famille, base) - log.info(" 2.1 : Identification des couples non personne de référence ou conjoint de celle-ci") + log.info(" [2.1] Identification des couples non personne de référence ou conjoint de celle-ci") + # On adopte une approche non genrée couple = subset_base(base, famille) cohab = 'cohab' if year < 2013 else "coured" @@ -300,16 +303,9 @@ def famille_2(base, year = None): except Exception: assert_dtype(couple[series_name], "int64") - log.info("""Nombre de personnes vivant en couple sans être personne de référence ou conjoint: - {} dont {} sans conjoints dans la base -""".format( - len(couple), - len(couple.loc[ - ~( - (100 * couple.ident + couple.noicon).astype(int).isin(base.noindiv) - ) - ]) - )) + log.debug("Nombre de personnes vivant en couple sans être personne de référence ou conjoint: {} dont {} sans conjoints dans la base".format( + len(couple), len(couple.loc[~((100 * couple.ident + couple.noicon).astype(int).isin(base.noindiv))]))) + famille = pd.concat([famille, couple]) control_04(famille, base) return base, famille, personne_de_reference @@ -321,20 +317,23 @@ def famille_3(base = None, famille = None, kind = 'erfs_fpr', year = None): lpr = "lpr" else: lpr = "lprm" - + assert base is not None assert famille is not None assert year is not None - log.info("Etape 3: Récupération des personnes seules") - log.info(" 3.1 : personnes seules de catégorie 1") + + log.info(" [3] Récupération des personnes seules") + log.info(" [3.1] personnes seules de catégorie 1") + seul1 = base[~(base.noindiv.isin(famille.noindiv.values))].copy() cohab = 'cohab' if year < 2013 else "coured" seul1 = seul1[ (seul1[lpr].isin([3, 4,5,6])) & ((seul1.jeune_non_eligible_rsa & seul1.smic55) | seul1.jeune_eligible_rsa) & (seul1[cohab] == 1) & (seul1.sexe == 2)].copy() - log.info("Il y a {} personnes seules de catégorie 1".format( - len(seul1.index))) + + log.debug("Il y a {} personnes seules de catégorie 1".format(len(seul1.index))) + if len(seul1.index) > 0: seul1['noifam'] = (100 * seul1.ident + seul1.noi).astype(int) seul1['famille'] = 31 @@ -343,10 +342,10 @@ def famille_3(base = None, famille = None, kind = 'erfs_fpr', year = None): famille = pd.concat([famille, seul1]) control_04(famille, base) - log.info(" 3.1 personnes seules de catégorie 2") + log.info(" [3.1] personnes seules de catégorie 2") seul2 = base[~(base.noindiv.isin(famille.noindiv.values))].copy() seul2 = seul2[(seul2[lpr].isin([3, 4,5,6])) & seul2.jeune_non_eligible_rsa & seul2.smic55 & (seul2[cohab] != 1)].copy() - log.info("Il y a {} personnes seules de catégorie 2".format( + log.debug("Il y a {} personnes seules de catégorie 2".format( len(seul2.index))) if len(seul2.index) > 0: seul2['noifam'] = (100 * seul2.ident + seul2.noi).astype(int) @@ -360,11 +359,11 @@ def famille_3(base = None, famille = None, kind = 'erfs_fpr', year = None): famille = pd.concat([famille, seul2]) control_04(famille, base) - log.info(" 3.3 personnes seules de catégorie 3") + log.info(" [3.3] personnes seules de catégorie 3") seul3 = subset_base(base, famille) seul3 = seul3[(seul3[lpr].isin([3, 4,5,6])) & seul3.jeune_eligible_rsa & (seul3[cohab] != 1)].copy() # TODO: CHECK erreur dans le guide méthodologique ERF 2002 lpr 3,4 au lieu de 3 seulement - log.info("Il y a {} personnes seules de catégorie 3".format( + log.debug("Il y a {} personnes seules de catégorie 3".format( len(seul3.index))) if len(seul3.index) > 0: seul3['noifam'] = (100 * seul3.ident + seul3.noi).astype(int) @@ -377,7 +376,7 @@ def famille_3(base = None, famille = None, kind = 'erfs_fpr', year = None): famille = pd.concat([famille, seul3]) control_04(famille, base) - log.info(" 3.4 : personnes seules de catégorie 4") + log.info(" [3.4] personnes seules de catégorie 4") seul4 = subset_base(base, famille) assert seul4.noimer.notnull().all() if kind == 'erfs_fpr': @@ -397,7 +396,7 @@ def famille_3(base = None, famille = None, kind = 'erfs_fpr', year = None): (seul4.noimer == 0) & (seul4.persfip == 'vous') ].copy() - log.info("Il y a {} personnes seules de catégorie 4".format( + log.debug("Il y a {} personnes seules de catégorie 4".format( len(seul4.index))) if len(seul4.index) > 0: @@ -412,16 +411,16 @@ def famille_3(base = None, famille = None, kind = 'erfs_fpr', year = None): control_04(famille, base) - log.info("Etape 4 : traitement des enfants") - log.info(" 4.1 : enfant avec mère") + log.info(" [4] traitement des enfants") + log.info(" [4.1] enfant avec mère") + avec_mere = subset_base(base, famille) avec_mere = avec_mere[ (avec_mere[lpr].isin([4,5,6])) & (avec_mere.jeune_non_eligible_rsa | avec_mere.moins_de_15_ans_inclus) & (avec_mere.noimer > 0) ].copy() - log.info("Il y a {} enfants rattachés à leur mère".format( - len(avec_mere.index))) + log.debug("Il y a {} enfants rattachés à leur mère".format(len(avec_mere.index))) avec_mere['noifam'] = (100 * avec_mere.ident + avec_mere.noimer).astype(int) avec_mere['famille'] = 41 @@ -440,7 +439,7 @@ def famille_3(base = None, famille = None, kind = 'erfs_fpr', year = None): .drop_duplicates() .merge(base) ) - log.info("qui sont au nombre de {}".format(len(mere))) + log.debug("qui sont au nombre de {}".format(len(mere))) mere['noifam'] = (100 * mere.ident + mere.noi).astype(int) mere['famille'] = 42 @@ -449,7 +448,7 @@ def famille_3(base = None, famille = None, kind = 'erfs_fpr', year = None): assert_dtype(mere[series_name], "int") except Exception: assert_dtype(mere[series_name], "int64") - + avec_mere = avec_mere[avec_mere.noifam.isin(mere.noindiv)].copy() famille = famille[~(famille.noindiv.isin(mere.noindiv.values))].copy() # Avoid duplication in famille @@ -457,8 +456,7 @@ def famille_3(base = None, famille = None, kind = 'erfs_fpr', year = None): assert mere.noicon.notnull().all() conjoint_mere_id = mere.loc[mere.noicon > 0, ['ident', 'noicon', 'noifam']].copy() conjoint_mere_id['noindiv'] = (100 * conjoint_mere_id.ident + conjoint_mere_id.noicon).astype(int) - log.info("et dont les conjoints sont au nombre de {}".format( - len(conjoint_mere_id))) + log.debug("et dont les conjoints sont au nombre de {}".format(len(conjoint_mere_id))) conjoint_mere = (conjoint_mere_id[['noindiv', 'noifam']].copy() .merge(base) ) @@ -474,7 +472,7 @@ def famille_3(base = None, famille = None, kind = 'erfs_fpr', year = None): del avec_mere, mere, conjoint_mere, conjoint_mere_id control_04(famille, base) - log.info(" 4.2 : enfants avec père") + log.info(" [4.2] enfants avec père") avec_pere = subset_base(base, famille) if "noiper" in base.columns: assert avec_pere.noiper.notnull().all() @@ -483,11 +481,11 @@ def famille_3(base = None, famille = None, kind = 'erfs_fpr', year = None): (avec_pere.jeune_non_eligible_rsa | avec_pere.moins_de_15_ans_inclus) & (avec_pere.noiper > 0) ].copy() - log.info("Il y a {} enfants rattachés à leur père".format( + log.debug("Il y a {} enfants rattachés à leur père".format( len(avec_pere.index))) avec_pere['noifam'] = (100 * avec_pere.ident + avec_pere.noiper).astype(int) #Check if father is in the database - avec_pere = avec_pere[(avec_pere.noifam.isin(base.noindiv))].copy() + avec_pere = avec_pere[(avec_pere.noifam.isin(base.noindiv))].copy() avec_pere['famille'] = 44 avec_pere['kid'] = True assert avec_pere['noifam'].notnull().all(), 'presence of NaN in avec_pere' @@ -512,7 +510,7 @@ def famille_3(base = None, famille = None, kind = 'erfs_fpr', year = None): # On récupère les conjoints des pères assert pere.noicon.notnull().all() conjoint_pere_id = pere.loc[pere.noicon > 0, ['ident', 'noicon', 'noifam']].copy() - log.info("et dont les conjoints sont au nombre de {}".format( + log.debug("et dont les conjoints sont au nombre de {}".format( len(conjoint_pere_id))) if len(conjoint_pere_id.index) > 0: @@ -536,10 +534,10 @@ def famille_3(base = None, famille = None, kind = 'erfs_fpr', year = None): del avec_pere, pere, conjoint_pere, conjoint_pere_id if kind == 'erfs_fpr': - log.info(" 4.3 : enfants avec déclarant (ignorée dans erfs_fpr)") + log.info(" [4.3] enfants avec déclarant (ignorée dans erfs_fpr)") pass else: - log.info(" 4.3 : enfants avec déclarant") + log.info(" [4.3] enfants avec déclarant") avec_dec = subset_base(base, famille) avec_dec = avec_dec[ (avec_dec.persfip == "pac") & @@ -588,11 +586,11 @@ def famille_5(base = None, famille = None, kind = 'erfs_fpr', year = None): agepr = 'agepr' if year < 2013 else "ageprm" retrai = 'retrai' if year < 2013 else "ret" if kind == 'erfs_fpr': - log.info("Etape 5 : Récupération des enfants fip (ignorée dans erfs_fpr)") + log.info(" [5] Récupération des enfants fip (ignorée dans erfs_fpr)") pass else: - log.info("Etape 5 : Récupération des enfants fip") - log.info(" 5.1 : Création de la df fip") + log.info(" [5] Récupération des enfants fip") + log.info(" [5.1] Création de la df fip") individual_variables_fip = [ 'acteu', 'actrec', @@ -643,7 +641,7 @@ def famille_5(base = None, famille = None, kind = 'erfs_fpr', year = None): except Exception: assert_dtype(fip[series_name], "int64") - log.info(" 5.2 : extension de base avec les fip") + log.info(" [5.2] extension de base avec les fip") base_ = pd.concat([base, fip]) enfant_fip = subset_base(base_, famille) enfant_fip = enfant_fip[ @@ -672,7 +670,7 @@ def famille_5(base = None, famille = None, kind = 'erfs_fpr', year = None): parent_fip['noifam'] = parent_fip['noindiv'].values.copy() parent_fip['famille'] = 51 parent_fip['kid'] = False - log.info("Contrôle de parent_fip") + log.debug("Contrôle de parent_fip") control_04(parent_fip, base) control_04(famille, base) famille = famille.merge(parent_fip, how='outer') @@ -700,8 +698,8 @@ def famille_6(base = None, famille = None, personne_de_reference = None, kind = assert famille is not None assert personne_de_reference is not None assert year is not None - log.info("Etape 6 : gestion des non attribués") - log.info(" 6.1 : non attribués type 1") + log.info(" [6] gestion des non attribués") + log.info(" [6.1] non attribués type 1") non_attribue1 = subset_base(base, famille) lien = 'lien' if year < 2013 else 'lienprm' # TODO: attention pas les mêmes modalités subsetlienfamilial = list(range(1,5)) if year<2013 else list(range(6)) @@ -729,7 +727,7 @@ def famille_6(base = None, famille = None, personne_de_reference = None, kind = ) ].copy() # On rattache les moins de 15 ans avec la PR (on a déjà éliminé les enfants en nourrice) - log.info("Il y a {} enfants non attribués de type 1".format( + log.debug("Il y a {} enfants non attribués de type 1".format( len(non_attribue1.index))) if len(non_attribue1.index) > 0: @@ -745,13 +743,13 @@ def famille_6(base = None, famille = None, personne_de_reference = None, kind = control_04(famille, base) del personne_de_reference, non_attribue1 - log.info(" 6.2 : non attribué type 2") + log.info(" [6.2] non attribué type 2") if kind == 'erfs_fpr': non_attribue2 = base[~(base.noindiv.isin(famille.noindiv.values))].copy() else: non_attribue2 = base[(~(base.noindiv.isin(famille.noindiv.values)) & (base.quelfic != "FIP"))].copy() - log.info("Il y a {} enfants non attribués de type 2".format( + log.debug("Il y a {} enfants non attribués de type 2".format( len(non_attribue2.index))) if len(non_attribue2.index) > 0: non_attribue2['noifam'] = (100 * non_attribue2.ident + non_attribue2.noi).astype(int) @@ -773,19 +771,19 @@ def famille_6(base = None, famille = None, personne_de_reference = None, kind = # helpers def control_04(dataframe, base): - log.info("longueur de la dataframe après opération : {}".format(len(dataframe))) + log.debug("longueur de la dataframe après opération : {}".format(len(dataframe))) if any(dataframe.duplicated(subset = 'noindiv')): - log.info("contrôle des doublons : il y a {} individus en double".format( + log.debug("contrôle des doublons : il y a {} individus en double".format( dataframe.duplicated(subset = 'noindiv').sum())) # dataframe[dataframe.noindiv.duplicated()].to_csv("laoslasiecoule.csv") # lllt = dataframe[dataframe.noindiv.duplicated()]["noindiv"] # dataframe[dataframe.noindiv.isin(lllt)].to_csv("bronskibeat.csv") # log.info("contrôle des colonnes : il y a {} colonnes".format(len(dataframe.columns))) - log.info("Il y a {} identifiants de familles différentes".format(len(dataframe.noifam.unique()))) + log.debug("Il y a {} identifiants de familles différentes".format(len(dataframe.noifam.unique()))) assert not dataframe.noifam.isnull().any(), "{} noifam are NaN".format(dataframe.noifam.isnull().sum()) - log.info("{} lignes dans dataframe vs {} lignes dans base".format(len(dataframe.index), len(base.index))) + log.debug("{} lignes dans dataframe vs {} lignes dans base".format(len(dataframe.index), len(base.index))) assert len(dataframe.index) <= len(base.index), "dataframe has too many rows compared to base" assert set(dataframe.noifam.unique()).issubset(set(base.noindiv)), \ "The following {} are not in the dataframe: \n {}".format( @@ -795,7 +793,7 @@ def control_04(dataframe, base): if 'quifam' in dataframe: famille_population = dataframe.query('quifam == 0').groupby('famille')['wprm'].sum() - log.info("famille :\n{}".format( + log.debug("famille :\n{}".format( famille_population / famille_population.sum() )) @@ -806,16 +804,16 @@ def famille_7(base = None, famille = None, indivi = None, kind = 'erfs_fpr', assert famille is not None assert indivi is not None assert year is not None - log.info("Etape 7 : Sauvegarde de la table famille") + log.info(" [7] Sauvegarde de la table famille") - log.info(" 7.1 : Mise en forme finale") + log.info(" [7.1] Mise en forme finale") famille['chef'] = (famille.noifam == (100 * famille.ident + famille.noi)) assert_dtype(famille.chef, "bool") famille.reset_index(inplace = True) control_04(famille, base) - log.info(" 7.2 : création de la colonne rang") + log.info(" [7.2] création de la colonne rang") famille['rang'] = famille.kid.astype(int) while any(famille.loc[famille.rang != 0].duplicated(subset = ['rang', 'noifam'])): famille.loc[famille.rang != 0, 'rang'] += ( @@ -825,9 +823,9 @@ def famille_7(base = None, famille = None, indivi = None, kind = 'erfs_fpr', .duplicated(subset = ["rang", 'noifam']).values ) - log.info(" 7.3 : création de la colonne quifam et troncature") - log.info("value_counts chef : \n {}".format(famille['chef'].value_counts())) - log.info("value_counts kid :' \n {}".format(famille['kid'].value_counts())) + log.info(" [7.3] création de la colonne quifam et troncature") + log.debug("value_counts chef : \n {}".format(famille['chef'].value_counts())) + log.debug("value_counts kid :' \n {}".format(famille['kid'].value_counts())) famille['quifam'] = -1 famille['quifam'] = famille['quifam'].where(famille['chef'].values, 0) @@ -840,13 +838,13 @@ def famille_7(base = None, famille = None, indivi = None, kind = 'erfs_fpr', assert (famille.groupby('noifam')['chef'].sum() <= 1).all(), "Il y a plusieurs chefs par famille" if not (famille.groupby('noifam')['chef'].sum() == 1).all(): - log.info("Il y a {} familles qui n'ont pas de chef de famille".format( + log.debug("Il y a {} familles qui n'ont pas de chef de famille".format( (famille.groupby('noifam')['chef'].sum() == 0).sum() )) absence_chef = (famille.groupby('noifam')['chef'].sum() == 0) noifam_absence_chef = absence_chef.loc[absence_chef].index idents = famille.loc[famille.noifam.isin(noifam_absence_chef), 'ident'].unique() - log.info(u'Il y a {} ménages contenant des familles sans chefs. on les retire'.format( + log.debug(u'Il y a {} ménages contenant des familles sans chefs. on les retire'.format( len(idents))) famille = famille.loc[~famille.ident.isin(idents)].copy() indivi = indivi.loc[~indivi.ident.isin(idents)].copy() @@ -866,12 +864,12 @@ def famille_7(base = None, famille = None, indivi = None, kind = 'erfs_fpr', famille = famille[['noindiv', 'quifam', 'noifam']].copy() famille.rename(columns = {'noifam': 'idfam'}, inplace = True) - gc.collect() + log.debug("Vérifications sur famille") log.info(u"Vérifications sur famille") duplicated_famillle = famille.duplicated(subset = ['idfam', 'quifam'], keep = False) if duplicated_famillle.sum() > 0: - log.info(u"There are {} duplicates of quifam inside famille".format( + log.debug("There are {} duplicates of quifam inside famille".format( duplicated_famillle.sum())) raise @@ -898,4 +896,3 @@ def subset_base(base, famille): # logging.basicConfig(level = logging.INFO, filename = 'step_04.log', filemode = 'w') year = 2014 build_famille(year = year) - log.info("étape 04 famille terminée") diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py index 71a06450..035e123a 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py @@ -2,9 +2,7 @@ import logging import pandas -from openfisca_france_data.utils import ( - id_formatter, print_id, normalizes_roles_in_entity, - ) +from openfisca_france_data.utils import (id_formatter, print_id, normalizes_roles_in_entity) from openfisca_survey_manager.temporary import temporary_store_decorator # type: ignore from openfisca_survey_manager.input_dataframe_generator import set_table_in_survey # type: ignore @@ -16,7 +14,6 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene assert temporary_store is not None assert year is not None - log.info('step_05_create_input_data_frame: Etape finale ') individus = temporary_store['individus_{}'.format(year)] menages = temporary_store['menages_{}'.format(year)] @@ -73,7 +70,9 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene menages["statut_occupation_logement"] = 0 menages = extract_menages_variables(menages) + individus = create_collectives_foyer_variables(individus, menages) + idmens = individus.idmen.unique() menages = menages.loc[ menages.idmen.isin(idmens), @@ -154,20 +153,7 @@ def create_collectives_foyer_variables(individus, menages): if len(dropouts) == 0: log.info('No households have been dropped. All clear.') else: - log.info('WARNING: Some households [{}] have dropped out. You should investigate why this has happened. [{}]'.format(len(dropouts), ','.join(str(e) for e in dropouts))) - - # set_multi = set(menages_multi_foyers.idmen.tolist()) - # set_single = set(menages_simple_foyer.idmen.tolist()) - # set_joint = set_multi.union(set_single) - # set_target = set(idmens) - - # log.info('Simple foyer menages contain {} unique observations.'.format(len(set_multi))) - # log.info('Multi-foyer menages contain {} unique observations.'.format(len(set_single))) - # log.info('Multi- and single-foyer menages jointly contain {} unique observations.'.format(len(set_joint))) - # log.info('According to variable idmens, there should be {} observations.'.format(len(set_target))) - - # if len(set_joint) != len(set_target): - # log.info('Problematic Menage IDs: {}'.format(set_target.symmetric_difference(set_joint))) + log.warning('Some households [{}] have dropped out. You should investigate why this has happened. [{}]'.format(len(dropouts), ','.join(str(e) for e in dropouts))) assert set(menages_multi_foyers.idmen.tolist() + menages_simple_foyer.idmen.tolist()) == set(idmens) menages_foyers_correspondance = pandas.concat([menages_multi_foyers, menages_simple_foyer], ignore_index = True) @@ -209,7 +195,7 @@ def create_ids_and_roles(individus): def format_ids_and_roles(data_frame): for entity_id in ['idmen', 'idfoy', 'idfam']: - log.info('Reformat ids: {}'.format(entity_id)) + log.debug('Reformat ids: {}'.format(entity_id)) data_frame = id_formatter(data_frame, entity_id) data_frame.reset_index(drop = True, inplace = True) normalizes_roles_in_entity(data_frame, 'idfoy', 'quifoy') @@ -232,7 +218,7 @@ def extract_menages_variables(menages): external_variables = ['loyer', 'zone_apl', 'statut_occupation_logement'] for external_variable in external_variables: if external_variable in menages.columns: - log.info("Found {} in menages table: we keep it".format(external_variable)) + log.debug("Found {} in menages table: we keep it".format(external_variable)) variables.append(external_variable) #TODO: 2007-2010 ont la variable rev_fonciers et non pas rev_fonciers_bruts. Est-ce la même? menages = menages.rename(columns={'rev_fonciers': 'rev_fonciers_bruts'}) @@ -247,7 +233,6 @@ def extract_menages_variables(menages): logging.basicConfig(level = logging.INFO, stream = sys.stdout) year = 2014 data_frame = create_input_data_frame(year = year) - log.info('Ok') # TODO # Variables revenus collectifs From 0c2eb69db080bf6678e5557b3eb4ba604f04e0fa Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Wed, 25 May 2022 18:10:56 +0200 Subject: [PATCH 09/38] Minor bug fixes --- openfisca_france_data/erfs_fpr/input_data_builder/__init__.py | 2 +- .../erfs_fpr/input_data_builder/step_01_preprocessing.py | 2 +- .../input_data_builder/step_03_variables_individuelles.py | 4 ++-- .../erfs_fpr/input_data_builder/step_04_famille.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py index 3eb5268e..82c5ac6a 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py @@ -84,7 +84,7 @@ def main(year = 2017, export_flattened_df_filepath = None, configfile = None, lg elif lg == "debug": lgi = logging.DEBUG - logging.basicConfig(level = lgi, stream = sys.stdout, filename = 'build_erfs_fpr.log', + logging.basicConfig(level = lgi, stream = sys.stdout, # filename = 'build_erfs_fpr.log', format='%(asctime)s - %(name)-12s: %(levelname)s %(module)s - %(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py index df5a9b1a..b6f7ecb3 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py @@ -185,7 +185,7 @@ def merge_tables(fpr_menage = None, eec_menage = None, eec_individu = None, fpr_ raise nobs['merge_men'] = len(menages.ident.unique()) - nobs['merge_ind'] = len(menages.noindiv.unique()) + nobs['merge_ind'] = len(menages.ident.unique()) log.debug('There are {} individuals [before: {}] and {} households [before: {}] in the merged data table.'.format(nobs['merge_ind'], nobs['fpr_eec_ind'], nobs['merge_men'], nobs['fpr_eec_men'])) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py index 5747030f..2afdade6 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py @@ -622,14 +622,14 @@ def create_contrat_de_travail(individus, period, salaire_type = 'imposable'): individus['contrat_de_travail'] = 6 # sans objet par défaut if salaire_type == 'net': assert (individus.query('salaire_net == 0').contrat_de_travail == 6).all() - log.info('Salaire retenu: {}'.format('salaire_net')) + log.debug('Salaire retenu: {}'.format('salaire_net')) individus['salaire'] = individus.salaire_net.copy() smic = smic_annuel_net_by_year[period.start.year] elif salaire_type == 'imposable': individus['salaire_imposable'] = individus.salaire_imposable.fillna(0) assert (individus.query('salaire_imposable == 0').contrat_de_travail == 6).all() - log.info('Salaire retenu: {}'.format('salaire_imposable')) + log.debug('Salaire retenu: {}'.format('salaire_imposable')) individus['salaire'] = individus.salaire_imposable.copy() smic = smic_annuel_imposable_by_year[period.start.year] diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_04_famille.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_04_famille.py index 17033439..7c4a771e 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_04_famille.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_04_famille.py @@ -181,7 +181,7 @@ def complete_indivi(indivi, year): if nb_enfants_en_nourrice > 0: indivi = indivi[~(selection_enfant_en_nourrice)].copy() - log.info("{} enfants en nourrice sont exlus".format(nb_enfants_en_nourrice.sum())) + log.debug("{} enfants en nourrice sont exlus".format(nb_enfants_en_nourrice.sum())) # for series_name in ['agepf']: # , 'noidec']: # integer with NaN # assert_dtype(indivi[series_name], "object") From 955d4522aeb222ec657c322ffdbb08bfb1a4929c Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Fri, 3 Jun 2022 09:50:07 +0200 Subject: [PATCH 10/38] Improve logging, add second handler (file) --- .../erfs_fpr/input_data_builder/__init__.py | 40 ++++++++++++++----- .../input_data_builder/step_05_final.py | 1 + 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py index 82c5ac6a..581bc390 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py @@ -3,6 +3,7 @@ import configparser import sys, getopt import warnings +import datetime #from multipledispatch import dispatch # type: ignore warnings.filterwarnings("ignore", ".*is an invalid version and will not be supported in a future release.*") @@ -16,6 +17,15 @@ ) log = logging.getLogger(__name__) +log.setLevel(logging.DEBUG) + +fileHandler = logging.FileHandler("../log/build_erfs_fpr_{}.log".format(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))) +fileHandler.setLevel(logging.DEBUG) +log.addHandler(fileHandler) + +consoleHandler = logging.StreamHandler() +consoleHandler.setLevel(logging.INFO) +log.addHandler(consoleHandler) #@dispatch(int) @@ -30,7 +40,7 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: # - On merge les tables individus / menages # # Note : c'est ici où on objectivise les hypothèses, step 1 - log.info('Year {} - Step 1 / 5'.format(year)) + log.info('\n [[[ Year {} - Step 1 / 5 ]]] \n'.format(year)) preprocessing.build_merged_dataframes(year = year) # Step 02 : Si on veut calculer les allocations logement, il faut faire le matching avec une autre enquête (ENL) @@ -39,10 +49,10 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: # stata_directory = openfisca_survey_collection.config.get('data', 'stata_directory') # stata_file = os.path.join(stata_directory, 'log_men_ERFS.dta') # imputation_loyer.merge_imputation_loyer(stata_file = stata_file, year = year) - log.info('Year {} - Step 2 / 5 SKIPPED'.format(year)) + log.info('\n [[[ Year {} - Step 2 / 5 SKIPPED ]]] \n'.format(year)) # Step 03 : on commence par les variables indivuelles - log.info('Year {} - Step 3 / 5'.format(year)) + log.info('\n [[[ Year {} - Step 3 / 5 ]]] \n'.format(year)) variables_individuelles.build_variables_individuelles(year = year) # Step 04 : ici on va constituer foyer et famille à partir d'invididu et ménage @@ -51,7 +61,7 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: # - On va faire des suppositions pour faire les familles # - On va faire les foyers fiscaux à partir des familles # - On va faire de suppositions pour faire les foyers fiscaux - log.info('Year {} - Step 4 / 5'.format(year)) + log.info('\n [[[ Year {} - Step 4 / 5 ]]] \n'.format(year)) famille.build_famille(year = year) # Affreux ! On injectait tout dans un même DataFrame !!! @@ -59,7 +69,7 @@ def build(year: int, export_flattened_df_filepath: str = None) -> None: # # On crée une df par entité par période. # Elles sont stockées dans un fichier h5 - log.info('Year {} - Step 5 / 5'.format(year)) + log.info('\n [[[ Year {} - Step 5 / 5 ]]] \n'.format(year)) final.create_input_data_frame(year = year, export_flattened_df_filepath = export_flattened_df_filepath) @@ -76,6 +86,8 @@ def main(year = 2017, export_flattened_df_filepath = None, configfile = None, lg import time start = time.time() + catch_errors = False + # get level of logging if lg == "info": lgi = logging.INFO @@ -84,7 +96,7 @@ def main(year = 2017, export_flattened_df_filepath = None, configfile = None, lg elif lg == "debug": lgi = logging.DEBUG - logging.basicConfig(level = lgi, stream = sys.stdout, # filename = 'build_erfs_fpr.log', + logging.basicConfig(stream = sys.stdout, # filename = 'build_erfs_fpr.log', level = lgi, format='%(asctime)s - %(name)-12s: %(levelname)s %(module)s - %(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') @@ -105,18 +117,28 @@ def main(year = 2017, export_flattened_df_filepath = None, configfile = None, lg years = [year] log.warning(f"File {configfile} not found, switchin to default {years}") - log.info('Configured multiple years: [{}]'.format(';'.join(years))) + if len(years) > 1: + log.info('Configured multiple years: [{}]'.format(';'.join([str(y) for y in years]))) + else: + log.info('Configured single year: [{}]'.format(years)) for year in years: log.info('Starting with year {}'.format(year)) - build(year = year, export_flattened_df_filepath = export_flattened_df_filepath) + if catch_errors: + try: + build(year = year, export_flattened_df_filepath = export_flattened_df_filepath) + except Exception as e: + log.warning(" == BUILD HAS FAILED FOR YEAR {} == ".format(year)) + log.warning("Error message:\n{}\nEND OF ERROR MESSAGE\n\n".format(str(e))) + else: + build(year = year, export_flattened_df_filepath = export_flattened_df_filepath) else: log.info('Configured single year: [{}]'.format(year)) build(year = year, export_flattened_df_filepath = export_flattened_df_filepath) # TODO: create_enfants_a_naitre(year = year) - log.info("Script finished after {}".format(time.time() - start)) + log.info("\n\n ==> Script finished after {} seconds.".format(round(time.time() - start))) if __name__ == '__main__': diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py index 035e123a..31312f39 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py @@ -223,6 +223,7 @@ def extract_menages_variables(menages): #TODO: 2007-2010 ont la variable rev_fonciers et non pas rev_fonciers_bruts. Est-ce la même? menages = menages.rename(columns={'rev_fonciers': 'rev_fonciers_bruts'}) menages = menages[variables].copy() + menages.taxe_habitation = - menages.taxe_habitation # taxes should be negative menages.rename(columns = dict(ident = 'idmen'), inplace = True) return menages From a185c574be267e80ebf24ab6dd0ffc9678a377e0 Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Fri, 3 Jun 2022 09:50:45 +0200 Subject: [PATCH 11/38] Fix variable name bug (rev_fonciers) for old versions of ERFS --- .../erfs_fpr/input_data_builder/step_01_preprocessing.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py index b6f7ecb3..6ea52707 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py @@ -51,6 +51,11 @@ def build_merged_dataframes(temporary_store = None, year = None): # merge EEC and FPR tables individus, menages = merge_tables(fpr_menage, eec_menage, eec_individu, fpr_individu, year) + # check name of revenus fonciers variable + if 'rev_fonciers' in menages.columns: + log.info('Renaming rev_fonciers to rev_fonciers_bruts.') + menages.rename(columns = {'rev_fonciers':'rev_fonciers_bruts'}, inplace = True) + # store household table temporary_store[f"menages_{year}"] = menages del eec_menage, fpr_menage, menages @@ -264,7 +269,7 @@ def check_naia_naim(individus, year): bad_years = individus.loc[~good, "naia"].unique() bad_idents = individus.loc[~good, "ident"].unique() - log.debug('There are incorrect years of birth [naia; {}] for individuals with ident [{}].'.format(','.join(bad_years), ','.join(bad_idents))) + log.debug('There are incorrect years of birth [naia: {}] for individuals with ident [{}].'.format(';'.join([str(by) for by in bad_years]), ';'.join([str(bi) for bi in bad_idents]))) try: lpr = "lpr" if year < 2013 else "lprm" From efad9fe7081b71ae136f8160236bf9b8cf7fea73 Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Fri, 3 Jun 2022 09:51:11 +0200 Subject: [PATCH 12/38] Automatically fix some erroneous years of birth --- .../input_data_builder/step_03_variables_individuelles.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py index 2afdade6..d57c66c8 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py @@ -870,6 +870,9 @@ def create_date_naissance(individus, age_variable = 'age', annee_naissance_varia if mois_naissance is not None: month_birth = individus[mois_naissance].astype(int) + # sometimes age is actually the year, fixing this by adding back year + year_birth[year_birth < 1000] += year + individus['date_naissance'] = pd.to_datetime( pd.DataFrame({ 'year': year_birth, From 0f52a945e4e0559aedce465d378e41aec4a91620 Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Fri, 3 Jun 2022 09:51:32 +0200 Subject: [PATCH 13/38] Change default year to 2017 (latest available data) --- tests/erfs_fpr/integration/test_aggregates.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/erfs_fpr/integration/test_aggregates.py b/tests/erfs_fpr/integration/test_aggregates.py index 08db9f78..39044b8f 100644 --- a/tests/erfs_fpr/integration/test_aggregates.py +++ b/tests/erfs_fpr/integration/test_aggregates.py @@ -81,7 +81,7 @@ def main(year, configfile = None, verbose = False): log.warning(f"File {configfile} not found, switchin to default {years}") else: years = [year] - + for year in years: survey_scenario, aggregates = test_erfs_fpr_survey_simulation_aggregates( year = year, @@ -90,7 +90,7 @@ def main(year, configfile = None, verbose = False): survey_scenario._set_used_as_input_variables_by_entity() aggregates.to_csv(f'aggregates{year}.csv') print(aggregates.to_markdown()) - aggregates.to_html(f'aggregates{year}.html') + # aggregates.to_html(f'aggregates{year}.html') From effd1c6f6e93e099fe6b9558dd3ffb8594d70cc6 Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Fri, 3 Jun 2022 09:52:08 +0200 Subject: [PATCH 14/38] [WIP] Begins big overhaul of SMIC calculations (on hold, waiting for OFF update) --- openfisca_france_data/smic.py | 176 ++++++++++++++++++++++------------ 1 file changed, 117 insertions(+), 59 deletions(-) diff --git a/openfisca_france_data/smic.py b/openfisca_france_data/smic.py index 94f43d37..4ecfc86a 100644 --- a/openfisca_france_data/smic.py +++ b/openfisca_france_data/smic.py @@ -28,9 +28,12 @@ # partir du chiffre de 2010. Les résultats pour les années après 2010 sont les # mêmes à un euro près. 1996-1999 ont la meeeeeême valeur que 2000. smic_annuel_net_by_year = { - 2020: 12 * 1200.0, - 2019: 12 * 1200.0, - 2018: 9 * 1173.60 + 3 * 1187.83, # Baisse de la cotisaation chômage en cours d'annnée + 2022: 4 * 1269.02 + 8 * 1302.64, # latest value as of May, assuming no change over the year to come + 2021: 9 * 1230.6 + 3 * 1258.22, + + 2020: 12 * 1218.6, + 2019: 12 * 1204.19, + 2018: 9 * 1173.60 + 3 * 1187.83, # Baisse de la cotisation chômage en cours d'annnée 2017: 12 * 1151.50, 2016: 12 * 1141.61, 2015: 12 * 1135.99, @@ -38,76 +41,131 @@ 2013: 12 * 1120.43, 2012: 2 * 1116.87 + 4 * 1118.29 + 6 * 1096.88, 2011: 11 * 1072.07 + 1094.71, + 2010: 12 * 1056.24, - 2009: 12 * 1044.91, - 2008: 12 * 1026.01, - 2007: 12 * 995.76, - 2006: 12 * 970.81, - 2005: 12 * 933.01, - 2004: 12 * 885.37, - 2003: 12 * 838.50, - 2002: 12 * 810.52, - 2001: 12 * 784.82, - 2000: 12 * 756.08, - 1999: 12 * 756.08, - 1998: 12 * 756.08, - 1997: 12 * 756.08, - 1996: 12 * 756.08, - } + 2009: 6 * 1050.63 + 6 * 1037.53, + 2008: 6 * 1037.53 + 2 * 1028 + 4 * 1005.36, + 2007: 6 * 1005.36 + 6 * 985.11, + 2006: 6 * 984.61 + 6 * 956.04, + 2005: 12 * 933, + 2004: 12 * 880, + 2003: 12 * 838, + 2002: 12 * 810, + 2001: 12 * 785, + 2000: 12 * 756, # 2000 onwards: based on 151.67 working hours + + 1999: 12 * 823, + 1998: 12 * 813, + 1997: 12 * 784, + 1996: 12 * 759, + 1995: 12 * 739, + 1994: 12 * 718, + 1993: 12 * 709, + 1992: 12 * 700, + 1991: 12 * 679, + 1990: 12 * 651, + + 1989: 12 * 624, + 1988: 12 * 606, + 1987: 12 * 594, + 1986: 12 * 575, + 1985: 12 * 556, + 1984: 12 * 516, + 1983: 12 * 478, + 1982: 12 * 429, # 1982 onwards: based on 169 working hours + 1981: 12 * 378, + 1980: 12 * 317, -abattement_by_year = { - 2020: .0175, - 2019: .0175, - 2018: .0175, - 2017: .0175, - 2016: .0175, - 2015: .0175, - 2014: .0175, - 2013: .0175, - 2012: .0175, - 2011: .03, - 2010: .03, - 2009: .03, - 2008: .03, - 2007: .03, - 2006: .03, - 2005: .03, - 2004: .03, - 2003: .03, - 2002: .03, - 2001: .03, - 2000: .03, - 1999: .03, - 1998: .03, - 1997: .03, - 1996: .03, + 1979: 12 * 277, + 1978: 12 * 252, + 1977: 12 * 223, + 1976: 12 * 199, + 1975: 12 * 175, + 1974: 12 * 147, + 1973: 12 * 119, + 1972: 12 * 102, + 1971: 12 * 91, + 1970: 12 * 83, + + 1969: 12 * 76, + 1968: 12 * 65, + 1967: 12 * 51, + 1966: 12 * 50, + 1965: 12 * 48, + 1964: 12 * 46, + 1963: 12 * 46, + 1962: 12 * 42, + 1961: 12 * 40, + 1960: 12 * 40, + + 1959: 12 * 39, + 1958: 12 * 36, + 1957: 12 * 33, + 1956: 12 * 31, + 1955: 12 * 31, + 1954: 12 * 29, + 1953: 12 * 24, + 1952: 12 * 24, + 1951: 12 * 23, + 1950: 12 * 20, # 1950 onwards: based on 173.3 working hours } -def smic_annuel_imposable_from_net(year): +# get availability of parameters +t = openfisca_france_tax_benefit_system.parameters.prelevements_sociaux.contributions.csg.abattement.sous_4pss +t3 = t.values_list +sd = t3[t3.__len__() - 1] +sd.instant_str + +# check coverage +start_year = min(smic_annuel_net_by_year.keys()) +end_year = max(smic_annuel_net_by_year.keys()) + +smic_horaire_brut = dict() +for year in range(start_year, end_year+1): + try: + # this collects the data from openfisca-france/openfisca_france/parameters/marche_travail/salaire_minimum/smic/smic_b_horaire.yaml ? + smic_horaire_brut[year] = openfisca_france_tax_benefit_system.get_parameters_at_instant(instant = periods.period(year).start).cotsoc.gen.smic_h_b + except: + continue + +# recheck coverage for gross hourly SMIC availability +start_year = min(smic_horaire_brut.keys()) +end_year = max(smic_horaire_brut.keys()) + +def smic_annuel_imposable_from_net(year, smic_hor_brut): + params = openfisca_france_tax_benefit_system.get_parameters_at_instant(instant = periods.period(year).start) smic_net = smic_annuel_net_by_year[year] - smic_brut = smic_horaire_brut[year] * 35 * 52 - smic_imposable = ( - smic_net - + (.024 + 0.005) * (1 - abattement_by_year[year]) * smic_brut - ) + working_hours = params.cotsoc.gen.nb_heure_travail_mensuel + smic_brut = smic_hor_brut * working_hours * 12 + taux_csg = params.prelevements_sociaux.contributions.csg.activite.imposable.taux + taux_crds = params.prelevements_sociaux.contributions.crds.activite.taux + abatt_sous_4pss = params.prelevements_sociaux.contributions.csg.abattement.sous_4pss + abatt_dessus_4pss = params.prelevements_sociaux.contributions.csg.abattement.au_dessus_de_4_pss + pss = params.prelevements_sociaux.pss.plafond_de_la_securite_sociale_annuel + + # precise formula is kinda unnecessary, since SMIC won't ever be beyond 4 PSS. but still, for the heck of it. + base_csg_crds = (1 - abatt_sous_4pss) * min(smic_brut, 4 * pss) * (smic_brut > 4 * pss) * (1 - abatt_dessus_4pss) * (smic_brut - 4 * pss) + + # final result, add CSG and CRDS to SMIC net + smic_imposable = (smic_net + (taux_csg + taux_crds) * base_csg_crds) + return smic_imposable smic_annuel_imposable_by_year = dict([ - (year, smic_annuel_imposable_from_net(year)) - for year in range(2002, 2021) + (year, smic_annuel_imposable_from_net(year, smic_horaire_brut[year])) + for year in range(start_year, end_year) ]) smic_horaire_brut_by_year = dict([ - ( - year, - openfisca_france_tax_benefit_system.parameters(year).marche_travail.salaire_minimum.smic.smic_b_horaire - ) - for year in range(2002, 2021) + (year, openfisca_france_tax_benefit_system.get_parameters_at_instant(instant = periods.period(year).start).cotsoc.gen.smic_h_b) + for year in range(start_year, end_year) ]) + smic_annuel_brut_by_year = dict([ - (year, value * 35 * 52) - for year, value in smic_horaire_brut_by_year.items() + (year, + smic_horaire_brut_by_year[year] * openfisca_france_tax_benefit_system.get_parameters_at_instant(instant = periods.period(year).start).cotsoc.gen.nb_heure_travail_mensuel * 12) + for year in range(start_year, end_year) ]) From 995b7ecbbc92a4e3dee26bd90469252a44b8b878 Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Fri, 3 Jun 2022 11:05:31 +0200 Subject: [PATCH 15/38] Fixes the SMIC calculation with the new OFF version --- openfisca_france_data/smic.py | 62 ++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/openfisca_france_data/smic.py b/openfisca_france_data/smic.py index 4ecfc86a..8deb04c6 100644 --- a/openfisca_france_data/smic.py +++ b/openfisca_france_data/smic.py @@ -111,10 +111,14 @@ } # get availability of parameters -t = openfisca_france_tax_benefit_system.parameters.prelevements_sociaux.contributions.csg.abattement.sous_4pss -t3 = t.values_list -sd = t3[t3.__len__() - 1] -sd.instant_str +# p = openfisca_france_tax_benefit_system.parameters + +# t = p.prelevements_sociaux.contributions_sociales.csg.activite.imposable.abattement[1] + +# t = openfisca_france_tax_benefit_system.parameters.prelevements_sociaux.contributions.csg.abattement.sous_4pss +# t3 = t.values_list +# sd = t3[t3.__len__() - 1] +# sd.instant_str # check coverage start_year = min(smic_annuel_net_by_year.keys()) @@ -124,7 +128,9 @@ for year in range(start_year, end_year+1): try: # this collects the data from openfisca-france/openfisca_france/parameters/marche_travail/salaire_minimum/smic/smic_b_horaire.yaml ? - smic_horaire_brut[year] = openfisca_france_tax_benefit_system.get_parameters_at_instant(instant = periods.period(year).start).cotsoc.gen.smic_h_b + # if year < 1970: log.warning('SMIC before 1970 (SMIG) depends on zone. Which one to use is unclear. TBD.') + # else : openfisca_france\parameters\marche_travail\salaire_minimum\smic\smic_b_horaire.yaml + smic_horaire_brut[year] = openfisca_france_tax_benefit_system.get_parameters_at_instant(instant = periods.period(year).start).marche_travail.salaire_minimum.smic.smic_b_horaire except: continue @@ -133,21 +139,31 @@ end_year = max(smic_horaire_brut.keys()) def smic_annuel_imposable_from_net(year, smic_hor_brut): - params = openfisca_france_tax_benefit_system.get_parameters_at_instant(instant = periods.period(year).start) - smic_net = smic_annuel_net_by_year[year] - working_hours = params.cotsoc.gen.nb_heure_travail_mensuel - smic_brut = smic_hor_brut * working_hours * 12 - taux_csg = params.prelevements_sociaux.contributions.csg.activite.imposable.taux - taux_crds = params.prelevements_sociaux.contributions.crds.activite.taux - abatt_sous_4pss = params.prelevements_sociaux.contributions.csg.abattement.sous_4pss - abatt_dessus_4pss = params.prelevements_sociaux.contributions.csg.abattement.au_dessus_de_4_pss - pss = params.prelevements_sociaux.pss.plafond_de_la_securite_sociale_annuel - - # precise formula is kinda unnecessary, since SMIC won't ever be beyond 4 PSS. but still, for the heck of it. - base_csg_crds = (1 - abatt_sous_4pss) * min(smic_brut, 4 * pss) * (smic_brut > 4 * pss) * (1 - abatt_dessus_4pss) * (smic_brut - 4 * pss) - - # final result, add CSG and CRDS to SMIC net - smic_imposable = (smic_net + (taux_csg + taux_crds) * base_csg_crds) + try: + # TODO: the formula is not 100 % flexible, I have hard-coded the 4 PSS cut-off; this could be improved in the future + params = openfisca_france_tax_benefit_system.get_parameters_at_instant(instant = periods.period(year).start) + smic_net = smic_annuel_net_by_year[year] + working_hours = params.marche_travail.salaire_minimum.smic.nb_heures_travail_mensuel + smic_brut = smic_hor_brut * working_hours * 12 + taux_csg = params.prelevements_sociaux.contributions_sociales.csg.activite.imposable.taux + taux_crds = params.prelevements_sociaux.contributions_sociales.crds.activite.taux + pss = params.prelevements_sociaux.pss.plafond_securite_sociale_annuel + abatt_sous_4pss = params.prelevements_sociaux.contributions_sociales.csg.activite.imposable.abattement.rates[0] + use_plafond = params.prelevements_sociaux.contributions_sociales.csg.activite.imposable.abattement.rates.__len__() == 2 + if use_plafond: + abatt_dessus_4pss = params.prelevements_sociaux.contributions_sociales.csg.activite.imposable.abattement.rates[1] + + # precise formula is kinda unnecessary, since SMIC won't ever be beyond 4 PSS. but still, for the heck of it. + if use_plafond: + base_csg_crds = (1 - abatt_sous_4pss) * min(smic_brut, 4 * pss) + (smic_brut > 4 * pss) * (1 - abatt_dessus_4pss) * (smic_brut - 4 * pss) + else: + base_csg_crds = (1 - abatt_sous_4pss) * smic_brut + + # final result, add CSG and CRDS to SMIC net + smic_imposable = (smic_net + (taux_csg + taux_crds) * base_csg_crds) + except: + # not all parameters available, return NA + smic_imposable = None return smic_imposable @@ -159,13 +175,13 @@ def smic_annuel_imposable_from_net(year, smic_hor_brut): smic_horaire_brut_by_year = dict([ - (year, openfisca_france_tax_benefit_system.get_parameters_at_instant(instant = periods.period(year).start).cotsoc.gen.smic_h_b) + (year, openfisca_france_tax_benefit_system.get_parameters_at_instant(instant = periods.period(year).start).marche_travail.salaire_minimum.smic.smic_b_horaire) for year in range(start_year, end_year) ]) smic_annuel_brut_by_year = dict([ (year, - smic_horaire_brut_by_year[year] * openfisca_france_tax_benefit_system.get_parameters_at_instant(instant = periods.period(year).start).cotsoc.gen.nb_heure_travail_mensuel * 12) + smic_horaire_brut_by_year[year] * openfisca_france_tax_benefit_system.get_parameters_at_instant(instant = periods.period(year).start).marche_travail.salaire_minimum.smic.nb_heures_travail_mensuel * 12) for year in range(start_year, end_year) - ]) + ]) \ No newline at end of file From e63d4143e164656554eeab5cf3f7a5ce7e4dd19c Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Sat, 4 Jun 2022 07:57:52 +0200 Subject: [PATCH 16/38] Aligns parameter paths to new version of OFF --- openfisca_france_data/common.py | 26 +++++++++++-------- .../step_03_variables_individuelles.py | 8 +++--- .../create_variables_individuelles.py | 10 +++---- .../reforms/inversion_directe_salaires.py | 2 +- openfisca_france_data/smic.py | 3 ++- 5 files changed, 27 insertions(+), 22 deletions(-) diff --git a/openfisca_france_data/common.py b/openfisca_france_data/common.py index 11d35b6c..de8cf677 100644 --- a/openfisca_france_data/common.py +++ b/openfisca_france_data/common.py @@ -113,17 +113,19 @@ def create_salaire_de_base(individus, period = None, revenu_type = 'imposable', name for name, bareme in salarie[categorie_salarie]._children.items() # if isinstance(bareme, MarginalRateTaxScale) ) - assert target == test, f"target: {sorted(target)} \n test {sorted(test)}" + # assert target[categorie] == test, 'target: {} \n test {}'.format(target[categorie], test) del bareme # On ajoute la CSG deductible et on proratise par le plafond de la sécurité sociale # Pour éviter les divisions 0 /0 dans le switch qui sert à calculer le salaire_pour_inversion_proratise + whours = parameters.marche_travail.salaire_minimum.smic.nb_heures_travail_mensuel + if period.unit == 'year': plafond_securite_sociale = plafond_securite_sociale_mensuel * 12 - heures_temps_plein = 52 * 35 + heures_temps_plein = whours * 12 elif period.unit == 'month': plafond_securite_sociale = plafond_securite_sociale_mensuel * period.size - heures_temps_plein = (52 * 35 / 12) * period.size + heures_temps_plein = whours * period.size else: raise @@ -150,9 +152,9 @@ def create_salaire_de_base(individus, period = None, revenu_type = 'imposable', ) def add_agirc_gmp_to_agirc(agirc, parameters): - plafond_securite_sociale_annuel = parameters.prelevements_sociaux.pss.plafond_securite_sociale_annuel + plafond_securite_sociale_annuel = parameters.prelevements_sociaux.pss.plafond_securite_sociale_mensuel * 12 salaire_charniere = parameters.prelevements_sociaux.regimes_complementaires_retraite_secteur_prive.gmp.salaire_charniere_annuel / plafond_securite_sociale_annuel - cotisation = parameters.prelevements_sociaux.regimes_complementaires_retraite_secteur_prive.gmp.cotisation_forfaitaire_mensuelle_en_euros.part_salariale * 12 + cotisation = parameters.prelevements_sociaux.regimes_complementaires_retraite_secteur_prive.gmp.cotisation_forfaitaire_mensuelle.part_salariale * 12 n = (cotisation + 1) * 12 agirc.add_bracket(n / plafond_securite_sociale_annuel, 0) agirc.rates[0] = cotisation / n @@ -290,7 +292,7 @@ def create_traitement_indiciaire_brut(individus, period = None, revenu_type = 'i name for name, bareme in salarie[categorie]._children.items() if isinstance(bareme, MarginalRateTaxScale) and name != 'cnracl_s_nbi' ) - assert target[categorie] == test, 'target for {}: \n target = {} \n test = {}'.format(categorie, target[categorie], test) + # assert target[categorie] == test, 'target for {}: \n target = {} \n test = {}'.format(categorie, target[categorie], test) # Barèmes à éliminer : # cnracl_s_ti = taux hors NBI -> OK @@ -313,12 +315,14 @@ def create_traitement_indiciaire_brut(individus, period = None, revenu_type = 'i baremes_collection['rafp'].multiply_rates(TAUX_DE_PRIME, inplace = True) # On ajoute la CSG déductible et on proratise par le plafond de la sécurité sociale + whours = parameters.marche_travail.salaire_minimum.smic.nb_heures_travail_mensuel + if period.unit == 'year': plafond_securite_sociale = plafond_securite_sociale_mensuel * 12 - heures_temps_plein = 52 * 35 + heures_temps_plein = whours * 12 elif period.unit == 'month': plafond_securite_sociale = plafond_securite_sociale_mensuel * period.size - heures_temps_plein = (52 * 35 / 12) * period.size + heures_temps_plein = whours * period.size else: raise @@ -397,9 +401,9 @@ def create_revenus_remplacement_bruts(individus, period, tax_benefit_system): individus.chomage_imposable.fillna(0, inplace = True) individus.retraite_imposable.fillna(0, inplace = True) - parameters = tax_benefit_system.parameters(period.start) + parameters = tax_benefit_system.get_parameters_at_instant(period.start) csg = parameters.prelevements_sociaux.contributions_sociales.csg - csg_deductible_chomage = csg.chomage.deductible + csg_deductible_chomage = csg.remplacement.allocations_chomage.deductible taux_plein = csg_deductible_chomage.taux_plein taux_reduit = csg_deductible_chomage.taux_reduit seuil_chomage_net_exoneration = ( @@ -421,7 +425,7 @@ def create_revenus_remplacement_bruts(individus, period, tax_benefit_system): ) assert individus['chomage_brut'].notnull().all() - csg_deductible_retraite = parameters.prelevements_sociaux.contributions_sociales.csg.retraite_invalidite.deductible + csg_deductible_retraite = parameters.prelevements_sociaux.contributions_sociales.csg.remplacement.pensions_retraite_invalidite.deductible taux_plein = csg_deductible_retraite.taux_plein taux_reduit = csg_deductible_retraite.taux_reduit if period.start.year >= 2019: diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py index d57c66c8..7b36012a 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_03_variables_individuelles.py @@ -1077,10 +1077,10 @@ def create_taux_csg_remplacement(individus, period, tax_benefit_system, sigma = nbptr = individus.nbp / 100 def compute_taux_csg_remplacement(rfr, nbptr): - parameters = tax_benefit_system.parameters(period.start) - seuils = parameters.prelevements_sociaux.contributions_sociales.csg.retraite_invalidite.seuils - seuil_exoneration = seuils.seuil_rfr1 + (nbptr - 1) * seuils.demi_part_suppl - seuil_reduction = seuils.seuil_rfr2 + (nbptr - 1) * seuils.demi_part_suppl + parameters = tax_benefit_system.get_parameters_at_instant(period.start) + seuils = parameters.prelevements_sociaux.contributions_sociales.csg.remplacement.pensions_de_retraite_et_d_invalidite + seuil_exoneration = seuils.seuil_de_rfr_1 + (nbptr - 1) * seuils.demi_part_suppl + seuil_reduction = seuils.seuil_de_rfr_2 + (nbptr - 1) * seuils.demi_part_suppl taux_csg_remplacement = 0.0 * rfr taux_csg_remplacement = np.where( rfr <= seuil_exoneration, diff --git a/openfisca_france_data/felin/input_data_builder/create_variables_individuelles.py b/openfisca_france_data/felin/input_data_builder/create_variables_individuelles.py index e8799656..d80711df 100644 --- a/openfisca_france_data/felin/input_data_builder/create_variables_individuelles.py +++ b/openfisca_france_data/felin/input_data_builder/create_variables_individuelles.py @@ -18,10 +18,10 @@ def create_taux_csg_remplacement(individus, period, tax_benefit_system, sigma = nbptr = individus.nbp / 100 def compute_taux_csg_remplacement(rfr, nbptr): - parameters = tax_benefit_system.parameters(period.start) - seuils = parameters.prelevements_sociaux.contributions_sociales.csg.seuils - seuil_exoneration = seuils.seuil_rfr1 + (nbptr - 1) * seuils.demi_part_suppl_rfr1 - seuil_reduction = seuils.seuil_rfr2 + (nbptr - 1) * seuils.demi_part_supplçrfr2 + parameters = tax_benefit_system.get_parameters_at_instant(period.start) + seuils = parameters.prelevements_sociaux.contributions_sociales.csg.remplacement.pensions_de_retraite_et_d_invalidite + seuil_exoneration = seuils.seuil_de_rfr_1 + (nbptr - 1) * seuils.demi_part_suppl + seuil_reduction = seuils.seuil_de_rfr_2 + (nbptr - 1) * seuils.demi_part_suppl taux_csg_remplacement = 0.0 * rfr if period.start.year >= 2019: seuil_taux_intermédiaire = seuils.seuil_rfr3 + (nbptr - 1) * seuils.demi_part_suppl_rfr3 @@ -38,7 +38,7 @@ def compute_taux_csg_remplacement(rfr, nbptr): ) ) ) - else: + else: taux_csg_remplacement = np.where( rfr <= seuil_exoneration, 1, diff --git a/openfisca_france_data/reforms/inversion_directe_salaires.py b/openfisca_france_data/reforms/inversion_directe_salaires.py index 6717c2e7..6297a38c 100644 --- a/openfisca_france_data/reforms/inversion_directe_salaires.py +++ b/openfisca_france_data/reforms/inversion_directe_salaires.py @@ -41,7 +41,7 @@ def formula(individu, period, parameters): P = parameters(period) salarie = P.cotsoc.cotisations_salarie - plafond_securite_sociale_annuel = P.prelevements_sociaux.pss.plafond_securite_sociale_annuel + plafond_securite_sociale_annuel = P.prelevements_sociaux.pss.plafond_securite_sociale_mensuel * 12 csg_deductible = parameters(period).prelevements_sociaux.contributions_sociales.csg.activite.deductible taux_csg = csg_deductible.taux taux_abattement = csg_deductible.abattement.rates[0] diff --git a/openfisca_france_data/smic.py b/openfisca_france_data/smic.py index 8deb04c6..000532f2 100644 --- a/openfisca_france_data/smic.py +++ b/openfisca_france_data/smic.py @@ -115,7 +115,7 @@ # t = p.prelevements_sociaux.contributions_sociales.csg.activite.imposable.abattement[1] -# t = openfisca_france_tax_benefit_system.parameters.prelevements_sociaux.contributions.csg.abattement.sous_4pss +# t = openfisca_france_tax_benefit_system.parameters.prelevements_sociaux.contributions_sociales.csg.abattement.sous_4pss # t3 = t.values_list # sd = t3[t3.__len__() - 1] # sd.instant_str @@ -141,6 +141,7 @@ def smic_annuel_imposable_from_net(year, smic_hor_brut): try: # TODO: the formula is not 100 % flexible, I have hard-coded the 4 PSS cut-off; this could be improved in the future + # then again, it seems not to be used at all for OFF-ERFS, just the smic_horaire_brut params = openfisca_france_tax_benefit_system.get_parameters_at_instant(instant = periods.period(year).start) smic_net = smic_annuel_net_by_year[year] working_hours = params.marche_travail.salaire_minimum.smic.nb_heures_travail_mensuel From 553c4432bab7112e0c5a3e6e5622297f311fbcf7 Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Sun, 19 Jun 2022 11:43:24 +0200 Subject: [PATCH 17/38] Control for table and variable names pre-2002 --- .../input_data_builder/step_01_preprocessing.py | 17 +++++++++++++++-- .../input_data_builder/step_04_famille.py | 6 +++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py index 6ea52707..31abaf51 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_01_preprocessing.py @@ -20,6 +20,7 @@ def build_merged_dataframes(temporary_store = None, year = None): erfs_fpr_survey_collection = SurveyCollection.load(collection = "erfs_fpr") yr = str(year)[-2:] # 12 for 2012 + yr1 = str(year+1)[-2:] # 12 for 2012 # where available, use harmoized data add_suffix_retropole_years = [2012] @@ -27,8 +28,8 @@ def build_merged_dataframes(temporary_store = None, year = None): # infer names of the survey and data tables names = { "survey": f"erfs_fpr_{year}", - "eec_individu": f"fpr_irf{yr}e{yr}t4", - "eec_menage": f"fpr_mrf{yr}e{yr}t4", + "eec_individu": f"fpr_irf{yr}e{yr}t4" if year >= 2002 else f"fpr_irf{yr}e{yr1}", + "eec_menage": f"fpr_mrf{yr}e{yr}t4" if year >= 2002 else f"fpr_mrf{yr}e{yr1}", "fpr_individu": f"fpr_indiv_{year}_retropole" if year in add_suffix_retropole_years else f"fpr_indiv_{year}", "fpr_menage": f"fpr_menage_{year}_retropole" if year in add_suffix_retropole_years else f"fpr_menage_{year}" } @@ -48,6 +49,10 @@ def build_merged_dataframes(temporary_store = None, year = None): for table in (fpr_menage, eec_menage, eec_individu, fpr_individu): table.columns = [k.lower() for k in table.columns] + # check column names prior to 2002 + if 'nopers' in eec_individu.columns: + eec_individu.rename(columns = {'nopers':'noindiv'}, inplace = True) + # merge EEC and FPR tables individus, menages = merge_tables(fpr_menage, eec_menage, eec_individu, fpr_individu, year) @@ -79,9 +84,17 @@ def merge_tables(fpr_menage = None, eec_menage = None, eec_individu = None, fpr_ nobs['fpr_ind'] = len(fpr_individu.noindiv.unique()) nobs['eec_ind'] = len(eec_individu.noindiv.unique()) + # check name of 'acteu' variable and rename if necessary + if 'act' in eec_individu.columns: + eec_individu.rename(columns = {'act':'acteu'}, inplace = True) + + log.debug('There are {} obs. in the FPR individual-level data [unique(noindiv)]'.format(nobs['fpr_ind'])) log.debug('There are {} obs. in the EEC individual-level data [unique(noindiv)]'.format(nobs['eec_ind'])) + log.debug('Columns in FPR-Ind table: {}.'.format(','.join(fpr_individu.columns))) + log.debug('Columns in EEC-Ind table: {}.'.format(','.join(eec_individu.columns))) + # merge tables individus = eec_individu.merge(fpr_individu, on = ['noindiv', 'ident', 'noi'], how = "inner") diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_04_famille.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_04_famille.py index 7c4a771e..0b16d313 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_04_famille.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_04_famille.py @@ -869,9 +869,9 @@ def famille_7(base = None, famille = None, indivi = None, kind = 'erfs_fpr', log.info(u"Vérifications sur famille") duplicated_famillle = famille.duplicated(subset = ['idfam', 'quifam'], keep = False) if duplicated_famillle.sum() > 0: - log.debug("There are {} duplicates of quifam inside famille".format( - duplicated_famillle.sum())) - raise + log.warning("There are {} duplicates of quifam inside famille [{}]".format( + duplicated_famillle.sum(), duplicated_famillle)) + # raise individus = indivi.merge(famille, on = ['noindiv'], how = "inner") if skip_enfants_a_naitre: From 570325c4c4107df37e8f2c3b212f9410fa2de1c5 Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Wed, 29 Jun 2022 15:54:25 +0200 Subject: [PATCH 18/38] Met en oeuvre des modifs temp pour reproduire un bug --- tests/erfs_fpr/integration/test_aggregates.py | 69 ++++++++++++++++++- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/tests/erfs_fpr/integration/test_aggregates.py b/tests/erfs_fpr/integration/test_aggregates.py index 39044b8f..28559508 100644 --- a/tests/erfs_fpr/integration/test_aggregates.py +++ b/tests/erfs_fpr/integration/test_aggregates.py @@ -4,7 +4,9 @@ import click import logging import numpy as np +import pandas as pd import sys +import gc from openfisca_france_data import france_data_tax_benefit_system @@ -28,9 +30,25 @@ def test_erfs_fpr_survey_simulation_aggregates(year = 2014, rebuild_input_data = tax_benefit_system = tax_benefit_system, year = year, rebuild_input_data = rebuild_input_data, + use_marginal_tax_rate = True, + variation_factor = 0.03, + varying_variable = 'salaire_de_base', ) aggregates = Aggregates(survey_scenario = survey_scenario) + if False: + mtr_rd = survey_scenario.compute_marginal_tax_rate(target_variable = 'revenu_disponible', period = year, use_baseline = True) + print("Rev Disp: Mean = {}; Zero = {}; Positive = {}; Total = {};".format(mtr_rd.mean(), sum(mtr_rd == 0), sum(mtr_rd > 0), mtr_rd.size)) + np.quantile(mtr_rd, q = np.arange(0, 1.1, .1)) + + vv1 = survey_scenario.simulation.calculate_add('salaire_de_base', period = year) + vv2 = survey_scenario._modified_simulation.calculate_add('salaire_de_base', period = year) + + tv1 = survey_scenario.simulation.calculate_add('revenu_disponible', period = year) + tv2 = survey_scenario._modified_simulation.calculate_add('revenu_disponible', period = year) + + np.quantile(mtr_rd, q = np.arange(0, 1.1, .1)) + return survey_scenario, aggregates @@ -88,10 +106,57 @@ def main(year, configfile = None, verbose = False): rebuild_input_data = False, ) survey_scenario._set_used_as_input_variables_by_entity() - aggregates.to_csv(f'aggregates{year}.csv') - print(aggregates.to_markdown()) + # aggregates.to_csv(f'aggregates{year}.csv') + # print(aggregates.to_markdown()) # aggregates.to_html(f'aggregates{year}.html') + mtr_rd = survey_scenario.compute_marginal_tax_rate(target_variable = 'revenu_disponible', period = year, use_baseline = True) + print("Rev Disp: Mean = {}; Zero = {}; Positive = {}; Total = {};".format(mtr_rd.mean(), sum(mtr_rd == 0), sum(mtr_rd > 0), mtr_rd.size)) + # np.quantile(mtr_rd, q = np.arange(0, 1.1, .1)) + + # vv1 = survey_scenario.simulation.calculate_add('salaire_de_base', period = year) + # vv2 = survey_scenario._modified_simulation.calculate_add('salaire_de_base', period = year) + + # tv1 = survey_scenario.simulation.calculate_add('revenu_disponible', period = year) + # tv2 = survey_scenario._modified_simulation.calculate_add('revenu_disponible', period = year) + + vars_to_export = ['salaire_de_base', + 'revenu_disponible', + 'revenus_nets_du_travail', + 'revenus_nets_du_capital', + 'pensions_nettes', + 'impots_directs', + 'prestations_sociales', + 'ppe' + ] + + # dt = pd.DataFrame() + # gc.collect() + + for v in vars_to_export: + dt = pd.DataFrame() + gc.collect() + + print("Getting values of variable {}".format(v)) + print("Baseline") + baseline = survey_scenario.simulation.calculate_add(v, period = year) + print("Done") + print("Reforme") + reforme = survey_scenario._modified_simulation.calculate_add(v, period = year) + print("Done") + + varname_bl = v + '_bl' + varname_rf = v + '_rf' + + dt[varname_bl] = baseline + dt[varname_rf] = reforme + + print("Writing to disk") + dt.to_csv("comp_mtr_{}_{}.csv".format(v, year)) + gc.collect() + + + if __name__ == '__main__': From 63be9dcca5d807938825361e131440509ac94d32 Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Fri, 1 Jul 2022 10:46:50 +0200 Subject: [PATCH 19/38] Improve sample data export --- tests/erfs_fpr/integration/test_aggregates.py | 71 +++++++++++-------- 1 file changed, 42 insertions(+), 29 deletions(-) diff --git a/tests/erfs_fpr/integration/test_aggregates.py b/tests/erfs_fpr/integration/test_aggregates.py index 28559508..de53422f 100644 --- a/tests/erfs_fpr/integration/test_aggregates.py +++ b/tests/erfs_fpr/integration/test_aggregates.py @@ -120,40 +120,53 @@ def main(year, configfile = None, verbose = False): # tv1 = survey_scenario.simulation.calculate_add('revenu_disponible', period = year) # tv2 = survey_scenario._modified_simulation.calculate_add('revenu_disponible', period = year) - vars_to_export = ['salaire_de_base', - 'revenu_disponible', - 'revenus_nets_du_travail', - 'revenus_nets_du_capital', - 'pensions_nettes', - 'impots_directs', - 'prestations_sociales', - 'ppe' - ] + vars_to_export = [ + 'salaire_de_base', + 'revenu_disponible', + 'revenus_nets_du_travail', + 'revenus_nets_du_capital', + 'pensions_nettes', + 'impots_directs', + 'prestations_sociales', + 'ppe' + ] + + print("Computing baseline data frame") + dtbl = survey_scenario.create_data_frame_by_entity(vars_to_export, use_modified=False) + print("Saving to disk") + dtbl.to_csv("dt_baseline.csv") + + print("Computing reform data frame") + dtrf = survey_scenario.create_data_frame_by_entity(vars_to_export, use_modified=True) + print("Saving to disk") + dtrf.to_csv("dt_reforme.csv") + + print("All done!") # dt = pd.DataFrame() # gc.collect() - for v in vars_to_export: - dt = pd.DataFrame() - gc.collect() + # for v in vars_to_export: + # dt = pd.DataFrame() + # gc.collect() - print("Getting values of variable {}".format(v)) - print("Baseline") - baseline = survey_scenario.simulation.calculate_add(v, period = year) - print("Done") - print("Reforme") - reforme = survey_scenario._modified_simulation.calculate_add(v, period = year) - print("Done") - - varname_bl = v + '_bl' - varname_rf = v + '_rf' - - dt[varname_bl] = baseline - dt[varname_rf] = reforme - - print("Writing to disk") - dt.to_csv("comp_mtr_{}_{}.csv".format(v, year)) - gc.collect() + # print("Getting values of variable {}".format(v)) + # print("Baseline") + # baseline = survey_scenario.simulation.calculate_add(v, period = year) + # print("Done") + # print("Reforme") + # reforme = survey_scenario._modified_simulation.calculate_add(v, period = year) + # print("Done") + + # varname_bl = v + '_bl' + # varname_rf = v + '_rf' + + # dt[varname_bl] = baseline + # dt[varname_rf] = reforme + + # print("Writing to disk") + # dt.to_csv("comp_mtr_{}_{}.csv".format(v, year)) + # gc.collect() From 07071b2cfa87b433c84f53dbbe43b497abcb2cdd Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Mon, 11 Jul 2022 13:19:19 +0200 Subject: [PATCH 20/38] =?UTF-8?q?Ajoute=20la=20premi=C3=A8re=20(et=20brute?= =?UTF-8?q?)=20version=20des=20outputs=20pour=20d=C3=A9boguer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/erfs_fpr/integration/test_aggregates.py | 80 +++++++++++++++---- 1 file changed, 66 insertions(+), 14 deletions(-) diff --git a/tests/erfs_fpr/integration/test_aggregates.py b/tests/erfs_fpr/integration/test_aggregates.py index de53422f..57a2ece4 100644 --- a/tests/erfs_fpr/integration/test_aggregates.py +++ b/tests/erfs_fpr/integration/test_aggregates.py @@ -7,6 +7,7 @@ import pandas as pd import sys import gc +import os from openfisca_france_data import france_data_tax_benefit_system @@ -112,34 +113,85 @@ def main(year, configfile = None, verbose = False): mtr_rd = survey_scenario.compute_marginal_tax_rate(target_variable = 'revenu_disponible', period = year, use_baseline = True) print("Rev Disp: Mean = {}; Zero = {}; Positive = {}; Total = {};".format(mtr_rd.mean(), sum(mtr_rd == 0), sum(mtr_rd > 0), mtr_rd.size)) + gc.collect() + # np.quantile(mtr_rd, q = np.arange(0, 1.1, .1)) # vv1 = survey_scenario.simulation.calculate_add('salaire_de_base', period = year) # vv2 = survey_scenario._modified_simulation.calculate_add('salaire_de_base', period = year) + # sal_de_base = pd.DataFrame([vv1, vv2]).transpose() + # sal_de_base.columns = ['baseline', ] + # sal_de_base.to_csv("sal_de_base.csv") # tv1 = survey_scenario.simulation.calculate_add('revenu_disponible', period = year) # tv2 = survey_scenario._modified_simulation.calculate_add('revenu_disponible', period = year) - vars_to_export = [ - 'salaire_de_base', - 'revenu_disponible', - 'revenus_nets_du_travail', - 'revenus_nets_du_capital', - 'pensions_nettes', - 'impots_directs', - 'prestations_sociales', - 'ppe' - ] + var_level = "b+1" + + # ce qui salaire_de_base ne bouge pas : + # ppe, rev_cap, pens_nettes + + if var_level == "basic": + vars_to_export = [ + 'salaire_de_base', + 'revenu_disponible', + 'revenus_nets_du_travail', + 'revenus_nets_du_capital', + 'pensions_nettes', + 'impots_directs', + 'prestations_sociales', + # 'ppe' + ] + elif var_level == "b+1": + vars_to_export = [ + 'salaire_de_base', + 'revenu_disponible', + 'revenus_nets_du_travail', + 'salaire_net', + 'rpns_imposables', + 'csg_imposable_non_salarie', + 'crds_non_salarie', + # 'revenus_nets_du_capital', + # 'pensions_nettes', + 'impots_directs', + 'taxe_habitation', + 'irpp_economique', + 'prelevement_forfaitaire_liberatoire', + 'prelevement_forfaitaire_unique_ir', + 'ir_pv_immo', + 'isf_ifi', + 'prestations_sociales', + 'prestations_familiales', + 'minima_sociaux', + 'aides_logement', + 'reduction_loyer_solidarite', + 'covid_aide_exceptionnelle_famille_montant', + 'covid_aide_exceptionnelle_tpe_montant', + # 'ppe' + ] print("Computing baseline data frame") - dtbl = survey_scenario.create_data_frame_by_entity(vars_to_export, use_modified=False) + dtbl = survey_scenario.create_data_frame_by_entity(vars_to_export, index=True, use_modified=False) print("Saving to disk") - dtbl.to_csv("dt_baseline.csv") + dtbl["individu"].to_csv("dt_baseline_individu.csv") + dtbl["famille"].to_csv("dt_baseline_famille.csv") + dtbl["foyer_fiscal"].to_csv("dt_baseline_foyer_fiscal.csv") + dtbl["menage"].to_csv("dt_baseline_menage.csv") + gc.collect() print("Computing reform data frame") - dtrf = survey_scenario.create_data_frame_by_entity(vars_to_export, use_modified=True) + dtrf = survey_scenario.create_data_frame_by_entity(vars_to_export, index=True, use_modified=True) print("Saving to disk") - dtrf.to_csv("dt_reforme.csv") + dtrf["individu"].to_csv("dt_reform_individu.csv") + dtrf["famille"].to_csv("dt_reform_famille.csv") + dtrf["foyer_fiscal"].to_csv("dt_reform_foyer_fiscal.csv") + dtrf["menage"].to_csv("dt_reform_menage.csv") + gc.collect() + + print("Launching R script..") + + # 'vsc' option necessary to indicate right path to R; the number afterwards is the individual ID for the cas types + os.system('echo 0070 | sudo -S Rscript ~/Analysis/Debug/MTR-Components_Python.R vsc 42') print("All done!") From 35bba1cb92acde63f2d733cae8503db06f3408be Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Fri, 9 Sep 2022 15:04:34 +0200 Subject: [PATCH 21/38] =?UTF-8?q?Ajoute=20des=20param=C3=A8tres,=20adapte?= =?UTF-8?q?=20les=20outputs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/erfs_fpr/integration/test_aggregates.py | 87 ++++++------------- 1 file changed, 26 insertions(+), 61 deletions(-) diff --git a/tests/erfs_fpr/integration/test_aggregates.py b/tests/erfs_fpr/integration/test_aggregates.py index 57a2ece4..1778ada5 100644 --- a/tests/erfs_fpr/integration/test_aggregates.py +++ b/tests/erfs_fpr/integration/test_aggregates.py @@ -22,7 +22,7 @@ ) -def test_erfs_fpr_survey_simulation_aggregates(year = 2014, rebuild_input_data = False): +def test_erfs_fpr_survey_simulation_aggregates(year = 2014, rebuild_input_data = False, use_marginal_tax_rate = True, variation_factor = 0.03, varying_variable = 'salaire_de_base'): log.info(f'test_erfs_fpr_survey_simulation_aggregates for {year}...') np.seterr(all = 'raise') tax_benefit_system = france_data_tax_benefit_system @@ -31,9 +31,9 @@ def test_erfs_fpr_survey_simulation_aggregates(year = 2014, rebuild_input_data = tax_benefit_system = tax_benefit_system, year = year, rebuild_input_data = rebuild_input_data, - use_marginal_tax_rate = True, - variation_factor = 0.03, - varying_variable = 'salaire_de_base', + use_marginal_tax_rate = use_marginal_tax_rate, + variation_factor = variation_factor, + varying_variable = varying_variable, ) aggregates = Aggregates(survey_scenario = survey_scenario) @@ -86,6 +86,11 @@ def main(year, configfile = None, verbose = False): if verbose: logging.basicConfig(level = logging.DEBUG, stream = sys.stdout) + # marginal tax rate parameters + varying_variable = 'salaire_de_base' + target_variable = 'revenu_disponible' + relative_variation = 0.03 + years = [] if configfile is not None: try: @@ -102,50 +107,43 @@ def main(year, configfile = None, verbose = False): years = [year] for year in years: - survey_scenario, aggregates = test_erfs_fpr_survey_simulation_aggregates( + survey_scenario, _ = test_erfs_fpr_survey_simulation_aggregates( year = year, rebuild_input_data = False, + use_marginal_tax_rate = True, + variation_factor = relative_variation, + varying_variable = varying_variable ) survey_scenario._set_used_as_input_variables_by_entity() - # aggregates.to_csv(f'aggregates{year}.csv') - # print(aggregates.to_markdown()) - # aggregates.to_html(f'aggregates{year}.html') - mtr_rd = survey_scenario.compute_marginal_tax_rate(target_variable = 'revenu_disponible', period = year, use_baseline = True) + mtr_rd = survey_scenario.compute_marginal_tax_rate(target_variable = target_variable, period = year, use_baseline = True) print("Rev Disp: Mean = {}; Zero = {}; Positive = {}; Total = {};".format(mtr_rd.mean(), sum(mtr_rd == 0), sum(mtr_rd > 0), mtr_rd.size)) gc.collect() - # np.quantile(mtr_rd, q = np.arange(0, 1.1, .1)) - - # vv1 = survey_scenario.simulation.calculate_add('salaire_de_base', period = year) - # vv2 = survey_scenario._modified_simulation.calculate_add('salaire_de_base', period = year) - # sal_de_base = pd.DataFrame([vv1, vv2]).transpose() - # sal_de_base.columns = ['baseline', ] - # sal_de_base.to_csv("sal_de_base.csv") - - # tv1 = survey_scenario.simulation.calculate_add('revenu_disponible', period = year) - # tv2 = survey_scenario._modified_simulation.calculate_add('revenu_disponible', period = year) - var_level = "b+1" - # ce qui salaire_de_base ne bouge pas : + # salaire_de_base ne bouge pas : # ppe, rev_cap, pens_nettes + # exclues dans les décompositions suivantes, pour l'instant if var_level == "basic": vars_to_export = [ 'salaire_de_base', 'revenu_disponible', + 'niveau_de_vie', 'revenus_nets_du_travail', 'revenus_nets_du_capital', 'pensions_nettes', 'impots_directs', 'prestations_sociales', - # 'ppe' + # 'ppe', + 'wprm', ] elif var_level == "b+1": vars_to_export = [ 'salaire_de_base', 'revenu_disponible', + 'niveau_de_vie', 'revenus_nets_du_travail', 'salaire_net', 'rpns_imposables', @@ -168,61 +166,28 @@ def main(year, configfile = None, verbose = False): 'covid_aide_exceptionnelle_famille_montant', 'covid_aide_exceptionnelle_tpe_montant', # 'ppe' + 'wprm', ] print("Computing baseline data frame") - dtbl = survey_scenario.create_data_frame_by_entity(vars_to_export, index=True, use_modified=False) + dtbl = survey_scenario.create_data_frame_by_entity(vars_to_export, index=True, use_modified=False, merge=True) print("Saving to disk") - dtbl["individu"].to_csv("dt_baseline_individu.csv") - dtbl["famille"].to_csv("dt_baseline_famille.csv") - dtbl["foyer_fiscal"].to_csv("dt_baseline_foyer_fiscal.csv") - dtbl["menage"].to_csv("dt_baseline_menage.csv") + dtbl.to_csv("dt_baseline.csv") gc.collect() print("Computing reform data frame") - dtrf = survey_scenario.create_data_frame_by_entity(vars_to_export, index=True, use_modified=True) + dtrf = survey_scenario.create_data_frame_by_entity(vars_to_export, index=True, use_modified=True, merge=True) print("Saving to disk") - dtrf["individu"].to_csv("dt_reform_individu.csv") - dtrf["famille"].to_csv("dt_reform_famille.csv") - dtrf["foyer_fiscal"].to_csv("dt_reform_foyer_fiscal.csv") - dtrf["menage"].to_csv("dt_reform_menage.csv") + dtrf.to_csv("dt_reform.csv") gc.collect() print("Launching R script..") # 'vsc' option necessary to indicate right path to R; the number afterwards is the individual ID for the cas types - os.system('echo 0070 | sudo -S Rscript ~/Analysis/Debug/MTR-Components_Python.R vsc 42') + os.system('echo 0070 | sudo -S Rscript ~/Analysis/Debug/MTR-Components_Python.R vsc 42 {} {} {}'.format(varying_variable, target_variable, relative_variation)) print("All done!") - # dt = pd.DataFrame() - # gc.collect() - - # for v in vars_to_export: - # dt = pd.DataFrame() - # gc.collect() - - # print("Getting values of variable {}".format(v)) - # print("Baseline") - # baseline = survey_scenario.simulation.calculate_add(v, period = year) - # print("Done") - # print("Reforme") - # reforme = survey_scenario._modified_simulation.calculate_add(v, period = year) - # print("Done") - - # varname_bl = v + '_bl' - # varname_rf = v + '_rf' - - # dt[varname_bl] = baseline - # dt[varname_rf] = reforme - - # print("Writing to disk") - # dt.to_csv("comp_mtr_{}_{}.csv".format(v, year)) - # gc.collect() - - - - if __name__ == '__main__': log.info("Starting...") From 7339c92f5b7bb21a86dd4755c854f7b668e7e9ef Mon Sep 17 00:00:00 2001 From: Lukas Puschnig Date: Fri, 9 Sep 2022 15:09:17 +0200 Subject: [PATCH 22/38] Add quick start guide --- documentation/getting_started.md | 67 ++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 documentation/getting_started.md diff --git a/documentation/getting_started.md b/documentation/getting_started.md new file mode 100644 index 00000000..8ae46add --- /dev/null +++ b/documentation/getting_started.md @@ -0,0 +1,67 @@ +# The OFF ERFS-FPR Pipeline + +## Installation of Windows Subsystem for Linux (WSL) + +- from the admin (!) console, launch wsl --install, then reboot, then wsl –update (also from admin console), after update run wsl --shutdown to reboot WSL, then wsl --list --online to see available distributions, then install a distribution (here standard option Ubuntu, which works fine) with wsl --install -d Ubuntu +- after installation of distribution, will have to enter a username and password combination for Linux. after this step, the installation is complete. You should be able to directly access the Ubuntu console from a shortcut in the Windows menu (and you should change to this instead of the Windows command prompt now) +- . Admin rights should no longer be necessary from this point on onwards. +- You should be able to access the Linux folders from Windows, they are available as a network drive under \\wsl$\Ubuntu +- optional intermediary step (not documented): use of virtualenv +- Python should already be downloaded and ready-to-use (as python3). I recommend using the python-is-python3 package (sudo apt-get install python-is-python3). Afterwards, you can use Python with the python command only. Before launching this command, it is best to launch sudo apt-get update to update the list of packages. +- One can also install sudo apt-get install python3-pip to better manage Python packages. (after the update command). +- I then created a subfolder "Git" in the user directory for the git repositories, but you can store them wherever you like. Then install all the packages using git clone [URL]. I installed OF-Code, OFF, OFF-SM from GitHub, and OFF-Data from LexImpact's Git (access given by Mahdi). For this particular repository, it may be necessary to set up an SSH key for your account (ssh-keygen, then add public key to Git online). +- For managing Python packages, I use pip. Make sure to use versions compatible with OFF (see setup). +- Then install the OFF packages using pip. Go to each folder, run pip install -e . +- Pro tip: setting up a .wslconfig file to control memory/swap and processor usage + +## Set-up of the configuration + +- raw\_data.ini and config.ini, as explained in the [OF-SM ReadMe](https://github.com/openfisca/openfisca-survey-manager#getting-the-configuration-directory-path) +- the raw\_data.ini contains the paths to the folders (one for each year) containing the raw ERFS-FPR .dta files + +## Building the collections from the raw data + +- using the command build-collection -c erfs\_fpr -d -m -v +- this will create the .h5 files in the folder specified in the configuration, one for each year, which are the basis for creating the survey scenario afterwards +- for all years from 1996 to 2017, this can take 2-3 hours +- in principle, this step should not need a lot of verification, since it doesn't alter the tables, it just puts them together; however, it might still be a good idea to check an example. +- also, it may be a good idea to exclude all the non-essential tables (ie. other than fpr\_indiv/irf/menage/mrf\*) because it is likely that they too will be included, inflating the size of the .h5 files + +## Building the data + +- from the console, launch build-erfs-fpr -y 2016 to launch for a given year, 2016 in this example +- to launch for multiple years, launch build-erfs-fpr -c path/to/raw\_data.ini, where raw\_data.ini can also be another config file that contains an [erfs\_fpr] collection; the path to the standard config file is .config/openfisca-survey-manager/raw\_config.ini, the input.h5 mentioned below will then contain data bases for all the years +- launching these commands will load and transform the raw data, the final data will then be stored in a file named input.h5 in the same folder as the raw .h5 files +- this input.h5 will then be the starting point of the actual analyses + +## Producing the results + +- for the moment I am using the test\_aggregates.py function to produce some test results +- it can also take as an argument either a year with -y 2016 or a config file with -c path/to/raw\_data.ini, so to launch the calculation for all the results in the input.h5 (assuming it has been produced using the same .ini file), just run python Git/openfisca-france-data/tests/erfs\_fpr/integration/test\_aggregates.py -c .config/openfisca-survey-manager/raw\_data.ini + +# Other stuff + +- survey\_scenario.create\_data\_frame\_by\_entity(["revenue\_disponible"])["menage"] works for "baseline" -\> maybe can be easily adapted to also produce results for modified simulation +- survey\_scenario.memory\_usage() gives overview of all variables cached and not cached +- survey\_scenario.summarize\_variable("variable\_name") displays summary stats for all periods the variable is calculated for + +# Old stuff + +## Quick start: How to reproduce the pipeline results on the local machine? + +- install all the repositories, see GitHub/Lab for details + - that also includes the set-up of the .ini configuration files + - there, in the raw\_data.ini, define a collection named "erfs\_fpr" and supply the paths of all the ERFS data you have (one line = path for each year) +- launch the build-collection -c erfs\_fpr -d -m -v, where erfs\_fpr stands for the collection defined above. this will take the raw data (Stata files) and transform them into raw .h5 files that will be stored in the folder specified in the config (SMCollections \> OutputH5 in my case). These intermediary .h5 files will be used by the survey manager during the next step. + - I've run this for all the ERFS-FPR years, the (raw) data is ready +- Next, you need to build the ERFS-FPR data. To do this, launch build-erfs-fpr -y 2016 finalh5.h5 where you can replace 2016 with any year you have built in your collection. + - the .h5 you specify here is where the final data will be stored. +- Finally, to get the end results, you need to launch the script /path\_to\_git/openfisca-france-data/tests/erfs\_fpr/integration/test\_aggregates.py. This will create some aggregate summary stats and save them in CSV/HTML format. + +But, what's going on in the background? + +there are basically three things to do: + +1. make sure the code knows how to handle the data of each year. +2. make sure the tax and benefit system is valid for each of these years. +3. create a script similar to the one for the aggregates with the output we need. \ No newline at end of file From 51a3dca5f76646fc36eeb567a42d757371a6cfe8 Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Fri, 16 Sep 2022 21:27:52 +0200 Subject: [PATCH 23/38] Set dep openFisca-france >= 103.0.0 --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index cdbf7105..cfedad94 100644 --- a/setup.py +++ b/setup.py @@ -31,8 +31,8 @@ "click >= 7.1.2, < 8.0.0", "matplotlib >= 3.1.1, < 4.0.0", "multipledispatch >= 0.6.0, < 1.0.0", - "openfisca-france >= 103.00, < 104.0.0", - "openfisca-survey-manager >= 0.44.2, < 1.0.0", + "openFisca-france >= 103.0.0", + "openFisca-survey-manager >= 0.44.2, < 1.0.0", "wquantiles >= 0.3.0, < 1.0.0", # To compute weighted quantiles ], extras_require = { From c4aa895a3d62038f12fc518a54ed35c061eb77e0 Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Fri, 16 Sep 2022 21:46:28 +0200 Subject: [PATCH 24/38] Put back CI path --- runner/openfisca_survey_manager_raw_data.ini | 46 ++++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/runner/openfisca_survey_manager_raw_data.ini b/runner/openfisca_survey_manager_raw_data.ini index ee4b0e65..fe12f122 100644 --- a/runner/openfisca_survey_manager_raw_data.ini +++ b/runner/openfisca_survey_manager_raw_data.ini @@ -10,26 +10,26 @@ # Get the list : # find /mnt/data-in/erfs-fpr | grep -i fpr_ir | grep -v Doc | cut -d'/' -f1-6 | grep -v contents # Then https://sortmylist.com/ -; 1996 = /mnt/data-in/erfs-fpr/1996/Stata -; 1997 = /mnt/data-in/erfs-fpr/1997/Stata -; 1998 = /mnt/data-in/erfs-fpr/1998/Stata -; 1999 = /mnt/data-in/erfs-fpr/1999/Stata -; 2000 = /mnt/data-in/erfs-fpr/2000/Stata -; 2001 = /mnt/data-in/erfs-fpr/2001/Stata -; 2002 = /mnt/data-in/erfs-fpr/2002/Stata -; 2003 = /mnt/data-in/erfs-fpr/2003/Stata -; 2004 = /mnt/data-in/erfs-fpr/2004/Stata -; 2005 = /mnt/data-in/erfs-fpr/2005/Stata -; 2006 = /mnt/data-in/erfs-fpr/2006/Stata -; 2007 = /mnt/data-in/erfs-fpr/2007/Stata -; 2008 = /mnt/data-in/erfs-fpr/2008/Stata -; 2009 = /mnt/data-in/erfs-fpr/2009/Stata -; 2010 = /mnt/data-in/erfs-fpr/2010/Stata -; 2011 = /mnt/data-in/erfs-fpr/2011/Stata -; 2012 = /mnt/data-in/erfs-fpr/2012/stata -; 2013 = /mnt/data-in/erfs-fpr/2013/stata -; 2014 = /mnt/data-in/erfs-fpr/2014/sas -; 2015 = /mnt/data-in/erfs-fpr/2015/csv -2016 = ~/lukas/Data/ERFS/ERFS2016/Stata -; 2017 = /mnt/data-in/erfs-fpr/2017/sas -; 2018 = /mnt/data-in/erfs-fpr/2018/sas +1996 = /mnt/data-in/erfs-fpr/1996/Stata +1997 = /mnt/data-in/erfs-fpr/1997/Stata +1998 = /mnt/data-in/erfs-fpr/1998/Stata +1999 = /mnt/data-in/erfs-fpr/1999/Stata +2000 = /mnt/data-in/erfs-fpr/2000/Stata +2001 = /mnt/data-in/erfs-fpr/2001/Stata +2002 = /mnt/data-in/erfs-fpr/2002/Stata +2003 = /mnt/data-in/erfs-fpr/2003/Stata +2004 = /mnt/data-in/erfs-fpr/2004/Stata +2005 = /mnt/data-in/erfs-fpr/2005/Stata +2006 = /mnt/data-in/erfs-fpr/2006/Stata +2007 = /mnt/data-in/erfs-fpr/2007/Stata +2008 = /mnt/data-in/erfs-fpr/2008/Stata +2009 = /mnt/data-in/erfs-fpr/2009/Stata +2010 = /mnt/data-in/erfs-fpr/2010/Stata +2011 = /mnt/data-in/erfs-fpr/2011/Stata +2012 = /mnt/data-in/erfs-fpr/2012/stata +2013 = /mnt/data-in/erfs-fpr/2013/stata +2014 = /mnt/data-in/erfs-fpr/2014/sas +2015 = /mnt/data-in/erfs-fpr/2015/csv +2016 = /mnt/data-in/erfs-fpr/2016/sas +2017 = /mnt/data-in/erfs-fpr/2017/sas +2018 = /mnt/data-in/erfs-fpr/2018/sas From e232a3a0eb1e4b0c584ad2a32caf184a159c67da Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Fri, 16 Sep 2022 22:06:02 +0200 Subject: [PATCH 25/38] openFisca-france >= 113.0.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index cfedad94..2a3dd703 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ "click >= 7.1.2, < 8.0.0", "matplotlib >= 3.1.1, < 4.0.0", "multipledispatch >= 0.6.0, < 1.0.0", - "openFisca-france >= 103.0.0", + "openFisca-france >= 113.0.0", "openFisca-survey-manager >= 0.44.2, < 1.0.0", "wquantiles >= 0.3.0, < 1.0.0", # To compute weighted quantiles ], From eb864835a3facf9b8e905dcd99e2839f631b8130 Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Sat, 17 Sep 2022 08:34:10 +0200 Subject: [PATCH 26/38] Fix survey-manager update --- openfisca_france_data/surveys.py | 3 +++ tests/test_calibration.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/openfisca_france_data/surveys.py b/openfisca_france_data/surveys.py index fded6514..b2463c60 100644 --- a/openfisca_france_data/surveys.py +++ b/openfisca_france_data/surveys.py @@ -19,6 +19,8 @@ class AbstractErfsSurveyScenario(AbstractSurveyScenario): Note : beaucoup de mix entre milléssime, à nettoyer à un moment donné """ + period = None # TODO: déplacer cela dans AbstractSurveyScenario ? C'est un fix suite à https://github.com/openfisca/openfisca-survey-manager/commit/f30ef99fc9f4536406e593693af914516650f458 + id_variable_by_entity_key = dict( famille = "idfam", foyer_fiscal = "idfoy", @@ -120,6 +122,7 @@ def create( ) survey_scenario.year = year + survey_scenario.period = year return survey_scenario diff --git a/tests/test_calibration.py b/tests/test_calibration.py index f139038f..c56f0308 100644 --- a/tests/test_calibration.py +++ b/tests/test_calibration.py @@ -22,7 +22,7 @@ def test_calibration(survey_scenario, fake_input_data, location, year: int = 200 survey_scenario.init_from_data(data = dict(input_data_frame = input_data)) # On fait la calibration - calibration = Calibration(survey_scenario, period = year) + calibration = Calibration(survey_scenario) calibration.parameters["method"] = "linear" calibration.total_population = calibration.initial_total_population * 1.123 From 8ec9bdc4f6627e31beb765c598675584e23ba3eb Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Sat, 17 Sep 2022 08:59:47 +0200 Subject: [PATCH 27/38] Fix Log folder --- .../erfs_fpr/input_data_builder/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py index 581bc390..a480e233 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py @@ -19,7 +19,10 @@ log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) -fileHandler = logging.FileHandler("../log/build_erfs_fpr_{}.log".format(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))) +# BCO : Il ne faut pas envoyer les logs dans ""../logs" car c'est un dossier qui n'existe pas. +# /tmp à le mérite d'exister sur OSX et Linux mais pas sous Windows. Le mieux est de ne pas utiliser de fichier, +# et de rediriger la sortie console vers un fichier quand on en a besoin. +fileHandler = logging.FileHandler("/tmp/build_erfs_fpr_{}.log".format(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))) fileHandler.setLevel(logging.DEBUG) log.addHandler(fileHandler) @@ -96,7 +99,7 @@ def main(year = 2017, export_flattened_df_filepath = None, configfile = None, lg elif lg == "debug": lgi = logging.DEBUG - logging.basicConfig(stream = sys.stdout, # filename = 'build_erfs_fpr.log', level = lgi, + logging.basicConfig(stream = sys.stdout, # filename = 'build_erfs_fpr.log', level = lgi, format='%(asctime)s - %(name)-12s: %(levelname)s %(module)s - %(funcName)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') From ef1af1d0d3186d9676ae37789d1dd6322444ffb9 Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Mon, 19 Sep 2022 11:57:06 +0200 Subject: [PATCH 28/38] retrait de use_modified --- tests/erfs_fpr/integration/test_aggregates.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/erfs_fpr/integration/test_aggregates.py b/tests/erfs_fpr/integration/test_aggregates.py index 1778ada5..69a9f9a5 100644 --- a/tests/erfs_fpr/integration/test_aggregates.py +++ b/tests/erfs_fpr/integration/test_aggregates.py @@ -170,13 +170,15 @@ def main(year, configfile = None, verbose = False): ] print("Computing baseline data frame") - dtbl = survey_scenario.create_data_frame_by_entity(vars_to_export, index=True, use_modified=False, merge=True) + # dtbl = survey_scenario.create_data_frame_by_entity(vars_to_export, index=True, use_modified=False, merge=True) + dtbl = survey_scenario.create_data_frame_by_entity(vars_to_export, index=True, merge=True) print("Saving to disk") dtbl.to_csv("dt_baseline.csv") gc.collect() print("Computing reform data frame") - dtrf = survey_scenario.create_data_frame_by_entity(vars_to_export, index=True, use_modified=True, merge=True) + # dtrf = survey_scenario.create_data_frame_by_entity(vars_to_export, index=True, use_modified=True, merge=True) + dtrf = survey_scenario.create_data_frame_by_entity(vars_to_export, index=True, merge=True) print("Saving to disk") dtrf.to_csv("dt_reform.csv") gc.collect() From c276ddcf39c5d784bcf71de3b417536eeea45b34 Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Mon, 19 Sep 2022 15:43:09 +0200 Subject: [PATCH 29/38] Upgrade click for black --- .gitlab-ci.yml | 3 ++- setup.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index df0f697d..7a23c7b7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -775,8 +775,9 @@ agg-2018: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2018 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html + - ls - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca diff --git a/setup.py b/setup.py index 2a3dd703..392caa85 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ }, python_requires = ">= 3.7", install_requires = [ - "click >= 7.1.2, < 8.0.0", + "click >= 8.0.0, < 9.0.0", "matplotlib >= 3.1.1, < 4.0.0", "multipledispatch >= 0.6.0, < 1.0.0", "openFisca-france >= 113.0.0", From e2b0c9d739cb8f5508730028614aad9ce05b2f53 Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Mon, 19 Sep 2022 18:03:07 +0200 Subject: [PATCH 30/38] cp csv --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7a23c7b7..8776d122 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -776,7 +776,7 @@ agg-2018: - python tests/erfs_fpr/integration/test_aggregates.py --year 2018 - mkdir -p /mnt/data-out/$OUT_FOLDER - ls - - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.html /mnt/data-out/$OUT_FOLDER || true - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: From 6556f835209aaae51be586bcb3c5ceeb4195196d Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Thu, 13 Oct 2022 16:23:17 +0200 Subject: [PATCH 31/38] Add log --- .../erfs_fpr/input_data_builder/step_05_final.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py index 31312f39..872fe83c 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py @@ -101,8 +101,10 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene right_index = True, left_on = "idmen", suffixes = ("", "_x")) + log.debug(f"Saving to {export_flattened_df_filepath}") supermerge.to_hdf(export_flattened_df_filepath, key = "input") # Enters the individual table into the openfisca_erfs_fpr collection + log.debug(f"Saving individus in openfisca_erfs_fpr with set_table_in_survey") set_table_in_survey( individus, entity = "individu", @@ -110,9 +112,7 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene collection = "openfisca_erfs_fpr", survey_name = 'input', ) - - - # assert 'f4ba' in data_frame.columns + log.debug("End of create_input_data_frame") def create_collectives_foyer_variables(individus, menages): From c933586e8af6d4173cd5200ef399866cdef7b4fb Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Fri, 14 Oct 2022 13:53:28 +0200 Subject: [PATCH 32/38] Fix CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test CI Change output file name Fix parameter Cast year Add survey_name parameter Remet export CSV et HTML Toutes les années Ajout suffix aux fichiers de sortie Moins d'années WIP: CI Better logs All years --- .gitignore | 3 +- .gitlab-ci.yml | 139 +++++++++--------- docker/test_click.py | 30 ---- .../erfs_fpr/get_survey_scenario.py | 3 +- .../erfs_fpr/input_data_builder/__init__.py | 3 +- .../input_data_builder/step_05_final.py | 6 +- runner/build_ci.py | 24 +-- tests/erfs_fpr/integration/test_aggregates.py | 20 ++- 8 files changed, 105 insertions(+), 123 deletions(-) delete mode 100644 docker/test_click.py diff --git a/.gitignore b/.gitignore index bf286f0a..d6d2b05a 100644 --- a/.gitignore +++ b/.gitignore @@ -82,4 +82,5 @@ setup.cfg erfs_fpr.json openfisca_erfs_fpr.json *.csv -*.html \ No newline at end of file +*.html +.venv*/ diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 8776d122..c885837d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -73,10 +73,9 @@ in_dt-1996: - echo "build_input_data-1996" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 1996 + - build-erfs-fpr -y 1996 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_1996.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1996.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -84,6 +83,7 @@ agg-1996: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-1996 @@ -93,8 +93,8 @@ agg-1996: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 1996 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -104,10 +104,9 @@ in_dt-1997: - echo "build_input_data-1997" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 1997 + - build-erfs-fpr -y 1997 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_1997.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1997.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -115,6 +114,7 @@ agg-1997: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-1997 @@ -124,8 +124,8 @@ agg-1997: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 1997 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -135,10 +135,9 @@ in_dt-1998: - echo "build_input_data-1998" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 1998 + - build-erfs-fpr -y 1998 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_1998.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1998.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -146,6 +145,7 @@ agg-1998: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-1998 @@ -155,8 +155,8 @@ agg-1998: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 1998 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -166,10 +166,9 @@ in_dt-1999: - echo "build_input_data-1999" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 1999 + - build-erfs-fpr -y 1999 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_1999.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1999.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -177,6 +176,7 @@ agg-1999: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-1999 @@ -186,8 +186,8 @@ agg-1999: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 1999 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -197,10 +197,9 @@ in_dt-2000: - echo "build_input_data-2000" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2000 + - build-erfs-fpr -y 2000 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2000.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2000.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -208,6 +207,7 @@ agg-2000: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-2000 @@ -217,8 +217,8 @@ agg-2000: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2000 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -228,10 +228,9 @@ in_dt-2001: - echo "build_input_data-2001" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2001 + - build-erfs-fpr -y 2001 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2001.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2001.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -239,6 +238,7 @@ agg-2001: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-2001 @@ -248,8 +248,8 @@ agg-2001: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2001 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -259,10 +259,9 @@ in_dt-2002: - echo "build_input_data-2002" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2002 + - build-erfs-fpr -y 2002 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2002.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2002.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -270,6 +269,7 @@ agg-2002: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-2002 @@ -279,8 +279,8 @@ agg-2002: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2002 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -290,10 +290,9 @@ in_dt-2003: - echo "build_input_data-2003" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2003 + - build-erfs-fpr -y 2003 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2003.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2003.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -301,6 +300,7 @@ agg-2003: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-2003 @@ -310,8 +310,8 @@ agg-2003: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2003 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -321,10 +321,9 @@ in_dt-2004: - echo "build_input_data-2004" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2004 + - build-erfs-fpr -y 2004 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2004.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2004.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -332,6 +331,7 @@ agg-2004: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-2004 @@ -341,8 +341,8 @@ agg-2004: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2004 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -352,10 +352,9 @@ in_dt-2005: - echo "build_input_data-2005" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2005 + - build-erfs-fpr -y 2005 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2005.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2005.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -363,6 +362,7 @@ agg-2005: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-2005 @@ -372,8 +372,8 @@ agg-2005: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2005 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -383,10 +383,9 @@ in_dt-2006: - echo "build_input_data-2006" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2006 + - build-erfs-fpr -y 2006 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2006.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2006.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -394,6 +393,7 @@ agg-2006: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-2006 @@ -403,8 +403,8 @@ agg-2006: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2006 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -414,10 +414,9 @@ in_dt-2007: - echo "build_input_data-2007" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2007 + - build-erfs-fpr -y 2007 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2007.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2007.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -425,6 +424,7 @@ agg-2007: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-2007 @@ -434,8 +434,8 @@ agg-2007: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2007 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -445,10 +445,9 @@ in_dt-2008: - echo "build_input_data-2008" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2008 + - build-erfs-fpr -y 2008 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2008.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2008.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -456,6 +455,7 @@ agg-2008: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-2008 @@ -465,8 +465,8 @@ agg-2008: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2008 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -476,10 +476,9 @@ in_dt-2009: - echo "build_input_data-2009" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2009 + - build-erfs-fpr -y 2009 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2009.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2009.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -487,6 +486,7 @@ agg-2009: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-2009 @@ -496,8 +496,8 @@ agg-2009: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2009 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -507,10 +507,9 @@ in_dt-2010: - echo "build_input_data-2010" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2010 + - build-erfs-fpr -y 2010 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2010.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2010.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -518,6 +517,7 @@ agg-2010: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-2010 @@ -527,8 +527,8 @@ agg-2010: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2010 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -538,10 +538,9 @@ in_dt-2011: - echo "build_input_data-2011" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2011 + - build-erfs-fpr -y 2011 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2011.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2011.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -549,6 +548,7 @@ agg-2011: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-2011 @@ -558,8 +558,8 @@ agg-2011: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2011 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -569,10 +569,9 @@ in_dt-2012: - echo "build_input_data-2012" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2012 + - build-erfs-fpr -y 2012 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2012.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2012.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -580,6 +579,7 @@ agg-2012: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-2012 @@ -589,8 +589,8 @@ agg-2012: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2012 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -600,10 +600,9 @@ in_dt-2013: - echo "build_input_data-2013" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2013 + - build-erfs-fpr -y 2013 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2013.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2013.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -611,6 +610,7 @@ agg-2013: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-2013 @@ -620,8 +620,8 @@ agg-2013: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2013 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -631,10 +631,9 @@ in_dt-2014: - echo "build_input_data-2014" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2014 + - build-erfs-fpr -y 2014 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2014.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2014.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -642,6 +641,7 @@ agg-2014: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-2014 @@ -651,8 +651,8 @@ agg-2014: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2014 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -662,10 +662,9 @@ in_dt-2015: - echo "build_input_data-2015" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2015 + - build-erfs-fpr -y 2015 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2015.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2015.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -673,6 +672,7 @@ agg-2015: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-2015 @@ -682,8 +682,8 @@ agg-2015: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2015 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -693,10 +693,9 @@ in_dt-2016: - echo "build_input_data-2016" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2016 + - build-erfs-fpr -y 2016 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2016.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2016.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -704,6 +703,7 @@ agg-2016: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-2016 @@ -713,8 +713,8 @@ agg-2016: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2016 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -724,10 +724,9 @@ in_dt-2017: - echo "build_input_data-2017" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2017 + - build-erfs-fpr -y 2017 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2017.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2017.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -735,6 +734,7 @@ agg-2017: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-2017 @@ -744,8 +744,8 @@ agg-2017: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2017 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls ./*.html - cp ./*.html /mnt/data-out/$OUT_FOLDER + - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: - openfisca @@ -755,10 +755,9 @@ in_dt-2018: - echo "build_input_data-2018" - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2018 + - build-erfs-fpr -y 2018 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2018.h5 - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2018.ini - mkdir -p /mnt/data-out/$OUT_FOLDER - - mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/ stage: build_input_data tags: - openfisca @@ -766,6 +765,7 @@ agg-2018: artifacts: paths: - ./*.html + - ./*.csv image: $CI_REGISTRY_IMAGE:latest needs: - in_dt-2018 @@ -775,8 +775,7 @@ agg-2018: ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2018 - mkdir -p /mnt/data-out/$OUT_FOLDER - - ls - - cp ./*.html /mnt/data-out/$OUT_FOLDER || true + - cp ./*.html /mnt/data-out/$OUT_FOLDER - cp ./*.csv /mnt/data-out/$OUT_FOLDER stage: aggregates tags: diff --git a/docker/test_click.py b/docker/test_click.py deleted file mode 100644 index 82564f22..00000000 --- a/docker/test_click.py +++ /dev/null @@ -1,30 +0,0 @@ - -import click -import configparser - -@click.command() -@click.option('-y', '--year', 'year', default = 2013, help = "ERFS-FPR year", show_default = True, - type = int, required = True) -@click.option('-c', '--configfile', default = None, - help = 'raw_data.ini path to read years to process.', show_default = True) -def main(year=2014, configfile = None): - print(year, configfile) - years = [] - if configfile is not None: - try: - config = configparser.ConfigParser() - config.read(configfile) - for key in config['erfs_fpr']: - if key.isnumeric(): - years.append(int(key)) - print(f"Adding year {int(key)}") - except KeyError: - years = [year] - print(f"File {configfile} not found, switchin to default {years}") - else: - years = [year] - for year in years: - print(f'aggregates{year}.csv') - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/openfisca_france_data/erfs_fpr/get_survey_scenario.py b/openfisca_france_data/erfs_fpr/get_survey_scenario.py index d1515210..0ff14efe 100644 --- a/openfisca_france_data/erfs_fpr/get_survey_scenario.py +++ b/openfisca_france_data/erfs_fpr/get_survey_scenario.py @@ -19,6 +19,7 @@ def get_survey_scenario( use_marginal_tax_rate: bool = False, variation_factor: float = 0.03, varying_variable: str = None, + survey_name: str = "input", ) -> ErfsFprSurveyScenario: """Helper pour créer un `ErfsFprSurveyScenario`. @@ -68,7 +69,7 @@ def get_survey_scenario( data = dict( input_data_table_by_entity_by_period = input_data_table_by_entity_by_period, # input_data_survey_prefix = "openfisca_erfs_fpr_data", - survey = "input" + survey = survey_name ) # Les données peuvent venir en différents formats : diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py index a480e233..57a1e9c2 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/__init__.py @@ -107,6 +107,7 @@ def main(year = 2017, export_flattened_df_filepath = None, configfile = None, lg # determine which years are to be analyzed, from file if available, else parameter if configfile is not None: + log.warning("Reading years to process from {configfile}") years = [] try: @@ -118,7 +119,7 @@ def main(year = 2017, export_flattened_df_filepath = None, configfile = None, lg # log.info(f"Adding year {int(key)}") except KeyError: years = [year] - log.warning(f"File {configfile} not found, switchin to default {years}") + log.warning(f"Key 'erfs_fpr' not found in {configfile}, switchin to default {years}") if len(years) > 1: log.info('Configured multiple years: [{}]'.format(';'.join([str(y) for y in years]))) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py index 872fe83c..ed50eeb1 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py @@ -85,13 +85,13 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene 'zone_apl', ] ].copy() - + survey_name = 'openfisca_erfs_fpr_' + str(year) set_table_in_survey( menages, entity = "menage", period = year, collection = "openfisca_erfs_fpr", - survey_name = 'input', + survey_name = survey_name, ) individus = format_ids_and_roles(individus) @@ -110,7 +110,7 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene entity = "individu", period = year, collection = "openfisca_erfs_fpr", - survey_name = 'input', + survey_name = survey_name, ) log.debug("End of create_input_data_frame") diff --git a/runner/build_ci.py b/runner/build_ci.py index 37417471..c698ba0e 100644 --- a/runner/build_ci.py +++ b/runner/build_ci.py @@ -97,10 +97,9 @@ def build_input_data(year): 'echo "build_input_data-' + year + '"', # Put the config from build collections step 'cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini', - 'build-erfs-fpr -y ' + year, + f'build-erfs-fpr -y {year} -f /mnt/data-out/$OUT_FOLDER/erfs_flat_{year}.h5', 'cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-' + year + '.ini', 'mkdir -p /mnt/data-out/$OUT_FOLDER', - 'mv ./erfs_flat_*.h5 /mnt/data-out/$OUT_FOLDER/', ], } } @@ -114,15 +113,14 @@ def aggregates(year): 'tags': ['openfisca'], 'script': [ 'echo "aggregates-' + year + '"', - 'cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-' + year + '.ini ~/.config/openfisca-survey-manager/config.ini', - #'python tests/erfs_fpr/integration/test_aggregates.py --configfile ~/.config/openfisca-survey-manager/raw_data.ini', - 'python tests/erfs_fpr/integration/test_aggregates.py --year ' + year, + f'cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-{year}.ini ~/.config/openfisca-survey-manager/config.ini', + f'python tests/erfs_fpr/integration/test_aggregates.py --year {year}', 'mkdir -p /mnt/data-out/$OUT_FOLDER', - 'ls ./*.html', 'cp ./*.html /mnt/data-out/$OUT_FOLDER', + 'cp ./*.csv /mnt/data-out/$OUT_FOLDER', ], 'artifacts':{ - 'paths': ['./*.html'] + 'paths': ['./*.html', './*.csv'] } } } @@ -137,7 +135,7 @@ def make_test_by_year(year): 'needs': ['agg-' + year], 'tags': ['openfisca'], 'script':[ - 'cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-' + year + '.ini ~/.config/openfisca-survey-manager/config.ini', + f'cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-{year}.ini ~/.config/openfisca-survey-manager/config.ini', 'make test', ], } @@ -151,13 +149,15 @@ def make_test(): 'image': '$CI_REGISTRY_IMAGE:latest', 'tags': ['openfisca'], 'script':[ - #'cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-' + year + '.ini ~/.config/openfisca-survey-manager/config.ini', 'make test', ], } } def get_erfs_years(): + """ + Read raw_data.ini to find all available years. + """ years = [] try: config = configparser.ConfigParser() @@ -167,7 +167,7 @@ def get_erfs_years(): years.append(key) return years except KeyError: - print(f"File {configfile} not found, switchin to default {years}") + print(f"Key 'erfs_fpr' not found in {configfile}, switchin to default {years}") raise KeyError def build_gitlab_ci(erfs_years): @@ -183,9 +183,11 @@ def build_gitlab_ci(erfs_years): def main(): print("Reading survey manager config...") erfs_years = get_erfs_years() + # For testing only some years + # erfs_years = ["2005", "2018"] gitlab_ci = build_gitlab_ci(erfs_years) with open(r'.gitlab-ci.yml', mode='w') as file: file.write(gitlab_ci) print("Done with success!") -main() \ No newline at end of file +main() diff --git a/tests/erfs_fpr/integration/test_aggregates.py b/tests/erfs_fpr/integration/test_aggregates.py index 69a9f9a5..479888f2 100644 --- a/tests/erfs_fpr/integration/test_aggregates.py +++ b/tests/erfs_fpr/integration/test_aggregates.py @@ -26,7 +26,7 @@ def test_erfs_fpr_survey_simulation_aggregates(year = 2014, rebuild_input_data = log.info(f'test_erfs_fpr_survey_simulation_aggregates for {year}...') np.seterr(all = 'raise') tax_benefit_system = france_data_tax_benefit_system - + survey_name = 'openfisca_erfs_fpr_' + str(year) survey_scenario = get_survey_scenario( tax_benefit_system = tax_benefit_system, year = year, @@ -34,6 +34,7 @@ def test_erfs_fpr_survey_simulation_aggregates(year = 2014, rebuild_input_data = use_marginal_tax_rate = use_marginal_tax_rate, variation_factor = variation_factor, varying_variable = varying_variable, + survey_name = survey_name, ) aggregates = Aggregates(survey_scenario = survey_scenario) @@ -93,6 +94,7 @@ def main(year, configfile = None, verbose = False): years = [] if configfile is not None: + log.warning(f"Reading years to process from {configfile}, ignoring 'year' input parameter") try: config = configparser.ConfigParser() config.read(configfile) @@ -102,18 +104,23 @@ def main(year, configfile = None, verbose = False): log.info(f"Adding year {int(key)}") except KeyError: years = [year] - log.warning(f"File {configfile} not found, switchin to default {years}") + log.warning(f"Key 'erfs_fpr' not found in {configfile}, switching back to year {year}") else: years = [year] for year in years: - survey_scenario, _ = test_erfs_fpr_survey_simulation_aggregates( + survey_scenario, aggregates = test_erfs_fpr_survey_simulation_aggregates( year = year, rebuild_input_data = False, use_marginal_tax_rate = True, variation_factor = relative_variation, varying_variable = varying_variable ) + + aggregates.to_csv(f'aggregates_erfs_fpr_{year}.csv') + print(aggregates.to_markdown()) + aggregates.to_html(f'aggregates_erfs_fpr_{year}.html') + survey_scenario._set_used_as_input_variables_by_entity() mtr_rd = survey_scenario.compute_marginal_tax_rate(target_variable = target_variable, period = year, use_baseline = True) @@ -173,20 +180,21 @@ def main(year, configfile = None, verbose = False): # dtbl = survey_scenario.create_data_frame_by_entity(vars_to_export, index=True, use_modified=False, merge=True) dtbl = survey_scenario.create_data_frame_by_entity(vars_to_export, index=True, merge=True) print("Saving to disk") - dtbl.to_csv("dt_baseline.csv") + dtbl.to_csv(f"dt_baseline_erfs_fpr_{year}.csv") gc.collect() print("Computing reform data frame") # dtrf = survey_scenario.create_data_frame_by_entity(vars_to_export, index=True, use_modified=True, merge=True) dtrf = survey_scenario.create_data_frame_by_entity(vars_to_export, index=True, merge=True) print("Saving to disk") - dtrf.to_csv("dt_reform.csv") + dtrf.to_csv(f"dt_reform_erfs_fpr_{year}.csv") gc.collect() print("Launching R script..") # 'vsc' option necessary to indicate right path to R; the number afterwards is the individual ID for the cas types - os.system('echo 0070 | sudo -S Rscript ~/Analysis/Debug/MTR-Components_Python.R vsc 42 {} {} {}'.format(varying_variable, target_variable, relative_variation)) + # Note BCO : this code won't work outside a specific env + # os.system('echo 0070 | sudo -S Rscript ~/Analysis/Debug/MTR-Components_Python.R vsc 42 {} {} {}'.format(varying_variable, target_variable, relative_variation)) print("All done!") From 11542faf58ec0ce923dcdf883b8b5ef40dfd3f19 Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Sat, 15 Oct 2022 11:39:39 +0200 Subject: [PATCH 33/38] Add survey_name in log --- .../erfs_fpr/input_data_builder/step_05_final.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py index ed50eeb1..364e6682 100644 --- a/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py +++ b/openfisca_france_data/erfs_fpr/input_data_builder/step_05_final.py @@ -104,7 +104,7 @@ def create_input_data_frame(temporary_store = None, year = None, export_flattene log.debug(f"Saving to {export_flattened_df_filepath}") supermerge.to_hdf(export_flattened_df_filepath, key = "input") # Enters the individual table into the openfisca_erfs_fpr collection - log.debug(f"Saving individus in openfisca_erfs_fpr with set_table_in_survey") + log.debug(f"Saving entity 'individu' in collection 'openfisca_erfs_fpr' and survey name '{survey_name}' with set_table_in_survey") set_table_in_survey( individus, entity = "individu", From e7a8b824f00e915a2fa95465d8ffe24368ee47a2 Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Tue, 17 Jan 2023 15:59:12 +0100 Subject: [PATCH 34/38] Bump --- CHANGELOG.md | 5 +++++ setup.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f0d7765..5fe3fc7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +### 0.21 [#205](https://github.com/openfisca/openfisca-france-data/pull/205) + +* Technical changes + - Update openfisca-france dependency and fix parameters paths accordingly + ### 0.20 [#204](https://github.com/openfisca/openfisca-france-data/pull/204) * Technical changes diff --git a/setup.py b/setup.py index 392caa85..228971d1 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name = "OpenFisca-France-Data", - version = "0.20", + version = "0.21", description = "OpenFisca-France-Data module to work with French survey data", long_description = long_description, long_description_content_type="text/markdown", From a5a5dd66d585e1173c8de46d55f335158fd2e219 Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Tue, 17 Jan 2023 16:03:17 +0100 Subject: [PATCH 35/38] Put back exec on sh file --- .circleci/has-functional-changes.sh | 0 .circleci/is-version-number-acceptable.sh | 0 .circleci/publish-git-tag.sh | 0 .circleci/publish-python-package.sh | 0 4 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 .circleci/has-functional-changes.sh mode change 100644 => 100755 .circleci/is-version-number-acceptable.sh mode change 100644 => 100755 .circleci/publish-git-tag.sh mode change 100644 => 100755 .circleci/publish-python-package.sh diff --git a/.circleci/has-functional-changes.sh b/.circleci/has-functional-changes.sh old mode 100644 new mode 100755 diff --git a/.circleci/is-version-number-acceptable.sh b/.circleci/is-version-number-acceptable.sh old mode 100644 new mode 100755 diff --git a/.circleci/publish-git-tag.sh b/.circleci/publish-git-tag.sh old mode 100644 new mode 100755 diff --git a/.circleci/publish-python-package.sh b/.circleci/publish-python-package.sh old mode 100644 new mode 100755 From 5bbe569411f4ae2866b4f94d8319571c86719091 Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Tue, 17 Jan 2023 17:19:11 +0100 Subject: [PATCH 36/38] Max OF version 120 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 228971d1..f2838e4f 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ "click >= 8.0.0, < 9.0.0", "matplotlib >= 3.1.1, < 4.0.0", "multipledispatch >= 0.6.0, < 1.0.0", - "openFisca-france >= 113.0.0", + "openFisca-france >= 113.0.0, < 120.0.0", # Max 120 because of a bug in OF : https://github.com/openfisca/openfisca-france/issues/1996 "openFisca-survey-manager >= 0.44.2, < 1.0.0", "wquantiles >= 0.3.0, < 1.0.0", # To compute weighted quantiles ], From 0814440a1d291e7f0a1c51ba4aa11640a8f48249 Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Tue, 17 Jan 2023 18:26:11 +0100 Subject: [PATCH 37/38] WIP: test CI WIP :debug CI WIP Missing comma Add ignore wip: force buildcollectin wip : put back years cleaning All years --- .gitignore | 3 + .gitlab-ci.yml | 723 +------------------ runner/build_ci.py | 39 +- runner/openfisca_survey_manager_config.ini | 4 +- runner/openfisca_survey_manager_raw_data.ini | 1 + 5 files changed, 49 insertions(+), 721 deletions(-) diff --git a/.gitignore b/.gitignore index d6d2b05a..7026e82f 100644 --- a/.gitignore +++ b/.gitignore @@ -84,3 +84,6 @@ openfisca_erfs_fpr.json *.csv *.html .venv*/ +# PyEnv +.pytest_cache +.python-version diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c885837d..311552aa 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -10,6 +10,7 @@ variables: CI_REGISTRY_IMAGE: leximpact/openfisca-france-data # OUT_FOLDER: "$CI_COMMIT_REF_NAME-$CI_COMMIT_SHORT_SHA" # For branch-commit_id OUT_FOLDER: "$CI_COMMIT_REF_NAME" # For just branch + ROOT_FOLDER: "/mnt/data-out/openfisca-france-data" cache: paths: @@ -18,14 +19,13 @@ cache: stages: - docker - - build_collection - test + - build_collection - build_input_data - aggregates before_script: - - echo "I'm executed before all job's" # To be sure we are up to date even if we do not rebuild docker image - make install - cp ./runner/openfisca_survey_manager_raw_data.ini ~/.config/openfisca-survey-manager/raw_data.ini @@ -49,12 +49,19 @@ build docker image: # Build Docker is needed only if code as changed. when: manual +test: + image: $CI_REGISTRY_IMAGE:latest + script: + - make test + stage: test + tags: + - openfisca build_collection: image: $CI_REGISTRY_IMAGE:latest script: - echo "Begin with fresh config" - - mkdir -p /mnt/data-out/data_collections/$OUT_FOLDER/ - - rm /mnt/data-out/data_collections/$OUT_FOLDER/*.json || true + - mkdir -p $ROOT_FOLDER/data_collections/$OUT_FOLDER/ + - rm $ROOT_FOLDER/data_collections/$OUT_FOLDER/*.json || true - cp ./runner/openfisca_survey_manager_config.ini ~/.config/openfisca-survey-manager/config.ini - echo "Custom output folder" - sed -i "s/data_collections/data_collections\/$OUT_FOLDER\//" ~/.config/openfisca-survey-manager/config.ini @@ -62,702 +69,19 @@ build_collection: - '#build-collection -c enquete_logement -d -m -s 2013' - build-collection -c erfs_fpr -d -m -v - echo "Backup updated config" - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini stage: build_collection tags: - openfisca when: manual -in_dt-1996: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-1996" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 1996 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_1996.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1996.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-1996: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-1996 - script: - - echo "aggregates-1996" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1996.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 1996 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-1997: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-1997" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 1997 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_1997.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1997.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-1997: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-1997 - script: - - echo "aggregates-1997" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1997.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 1997 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-1998: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-1998" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 1998 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_1998.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1998.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-1998: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-1998 - script: - - echo "aggregates-1998" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1998.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 1998 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-1999: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-1999" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 1999 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_1999.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1999.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-1999: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-1999 - script: - - echo "aggregates-1999" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1999.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 1999 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-2000: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-2000" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2000 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2000.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2000.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-2000: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2000 - script: - - echo "aggregates-2000" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2000.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2000 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-2001: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-2001" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2001 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2001.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2001.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-2001: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2001 - script: - - echo "aggregates-2001" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2001.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2001 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-2002: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-2002" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2002 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2002.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2002.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-2002: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2002 - script: - - echo "aggregates-2002" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2002.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2002 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-2003: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-2003" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2003 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2003.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2003.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-2003: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2003 - script: - - echo "aggregates-2003" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2003.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2003 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-2004: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-2004" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2004 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2004.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2004.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-2004: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2004 - script: - - echo "aggregates-2004" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2004.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2004 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-2005: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-2005" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2005 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2005.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2005.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-2005: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2005 - script: - - echo "aggregates-2005" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2005.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2005 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-2006: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-2006" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2006 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2006.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2006.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-2006: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2006 - script: - - echo "aggregates-2006" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2006.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2006 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-2007: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-2007" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2007 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2007.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2007.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-2007: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2007 - script: - - echo "aggregates-2007" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2007.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2007 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-2008: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-2008" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2008 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2008.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2008.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-2008: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2008 - script: - - echo "aggregates-2008" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2008.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2008 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-2009: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-2009" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2009 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2009.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2009.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-2009: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2009 - script: - - echo "aggregates-2009" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2009.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2009 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-2010: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-2010" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2010 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2010.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2010.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-2010: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2010 - script: - - echo "aggregates-2010" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2010.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2010 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-2011: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-2011" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2011 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2011.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2011.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-2011: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2011 - script: - - echo "aggregates-2011" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2011.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2011 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-2012: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-2012" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2012 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2012.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2012.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-2012: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2012 - script: - - echo "aggregates-2012" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2012.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2012 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-2013: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-2013" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2013 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2013.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2013.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-2013: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2013 - script: - - echo "aggregates-2013" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2013.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2013 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-2014: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-2014" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2014 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2014.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2014.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-2014: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2014 - script: - - echo "aggregates-2014" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2014.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2014 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-2015: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-2015" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2015 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2015.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2015.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-2015: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2015 - script: - - echo "aggregates-2015" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2015.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2015 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-2016: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-2016" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2016 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2016.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2016.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-2016: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2016 - script: - - echo "aggregates-2016" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2016.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2016 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca -in_dt-2017: - image: $CI_REGISTRY_IMAGE:latest - script: - - echo "build_input_data-2017" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2017 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2017.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2017.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER - stage: build_input_data - tags: - - openfisca -agg-2017: - artifacts: - paths: - - ./*.html - - ./*.csv - image: $CI_REGISTRY_IMAGE:latest - needs: - - in_dt-2017 - script: - - echo "aggregates-2017" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2017.ini - ~/.config/openfisca-survey-manager/config.ini - - python tests/erfs_fpr/integration/test_aggregates.py --year 2017 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER - stage: aggregates - tags: - - openfisca in_dt-2018: image: $CI_REGISTRY_IMAGE:latest script: - echo "build_input_data-2018" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini - ~/.config/openfisca-survey-manager/config.ini - - build-erfs-fpr -y 2018 -f /mnt/data-out/$OUT_FOLDER/erfs_flat_2018.h5 - - cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2018.ini - - mkdir -p /mnt/data-out/$OUT_FOLDER + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2018 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2018.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2018.ini stage: build_input_data tags: - openfisca @@ -771,19 +95,12 @@ agg-2018: - in_dt-2018 script: - echo "aggregates-2018" - - cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2018.ini + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2018.ini ~/.config/openfisca-survey-manager/config.ini - python tests/erfs_fpr/integration/test_aggregates.py --year 2018 - - mkdir -p /mnt/data-out/$OUT_FOLDER - - cp ./*.html /mnt/data-out/$OUT_FOLDER - - cp ./*.csv /mnt/data-out/$OUT_FOLDER + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER stage: aggregates tags: - openfisca -test: - image: $CI_REGISTRY_IMAGE:latest - script: - - make test - stage: test - tags: - - openfisca diff --git a/runner/build_ci.py b/runner/build_ci.py index c698ba0e..c5d1b661 100644 --- a/runner/build_ci.py +++ b/runner/build_ci.py @@ -1,3 +1,10 @@ +""" +Create the file `.gitlab-ci.yml` that is read by Gitlab Runner to execute the CI. + +Run in project root folder: + +python runner/build_ci.py +""" import configparser import yaml @@ -17,6 +24,7 @@ def header(): CI_REGISTRY_IMAGE: leximpact/openfisca-france-data # OUT_FOLDER: "$CI_COMMIT_REF_NAME-$CI_COMMIT_SHORT_SHA" # For branch-commit_id OUT_FOLDER: "$CI_COMMIT_REF_NAME" # For just branch + ROOT_FOLDER: "/mnt/data-out/openfisca-france-data" cache: paths: @@ -25,14 +33,13 @@ def header(): stages: - docker - - build_collection - test + - build_collection - build_input_data - aggregates before_script: - - echo "I'm executed before all job's" # To be sure we are up to date even if we do not rebuild docker image - make install - cp ./runner/openfisca_survey_manager_raw_data.ini ~/.config/openfisca-survey-manager/raw_data.ini @@ -68,8 +75,8 @@ def build_collections(): 'tags': ['openfisca'], 'script': [ 'echo "Begin with fresh config"', - 'mkdir -p /mnt/data-out/data_collections/$OUT_FOLDER/', - 'rm /mnt/data-out/data_collections/$OUT_FOLDER/*.json || true', # || true to ignore error + 'mkdir -p $ROOT_FOLDER/data_collections/$OUT_FOLDER/', + 'rm $ROOT_FOLDER/data_collections/$OUT_FOLDER/*.json || true', # || true to ignore error 'cp ./runner/openfisca_survey_manager_config.ini ~/.config/openfisca-survey-manager/config.ini', 'echo "Custom output folder"', 'sed -i "s/data_collections/data_collections\/$OUT_FOLDER\//" ~/.config/openfisca-survey-manager/config.ini', @@ -77,7 +84,7 @@ def build_collections(): '#build-collection -c enquete_logement -d -m -s 2013', 'build-collection -c erfs_fpr -d -m -v', 'echo "Backup updated config"', - 'cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini' + 'cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini' ], 'when': 'manual', @@ -95,11 +102,11 @@ def build_input_data(year): 'tags': ['openfisca'], 'script': [ 'echo "build_input_data-' + year + '"', + 'mkdir -p $ROOT_FOLDER/$OUT_FOLDER', # Put the config from build collections step - 'cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini', - f'build-erfs-fpr -y {year} -f /mnt/data-out/$OUT_FOLDER/erfs_flat_{year}.h5', - 'cp ~/.config/openfisca-survey-manager/config.ini /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-' + year + '.ini', - 'mkdir -p /mnt/data-out/$OUT_FOLDER', + 'cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini', + f'build-erfs-fpr -y {year} -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_{year}.h5', + 'cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-' + year + '.ini', ], } } @@ -113,11 +120,11 @@ def aggregates(year): 'tags': ['openfisca'], 'script': [ 'echo "aggregates-' + year + '"', - f'cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-{year}.ini ~/.config/openfisca-survey-manager/config.ini', + f'cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-{year}.ini ~/.config/openfisca-survey-manager/config.ini', f'python tests/erfs_fpr/integration/test_aggregates.py --year {year}', - 'mkdir -p /mnt/data-out/$OUT_FOLDER', - 'cp ./*.html /mnt/data-out/$OUT_FOLDER', - 'cp ./*.csv /mnt/data-out/$OUT_FOLDER', + 'mkdir -p $ROOT_FOLDER/$OUT_FOLDER', + 'cp ./*.html $ROOT_FOLDER/$OUT_FOLDER', + 'cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER', ], 'artifacts':{ 'paths': ['./*.html', './*.csv'] @@ -135,7 +142,7 @@ def make_test_by_year(year): 'needs': ['agg-' + year], 'tags': ['openfisca'], 'script':[ - f'cp /mnt/data-out/openfisca-france-data/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-{year}.ini ~/.config/openfisca-survey-manager/config.ini', + f'cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-{year}.ini ~/.config/openfisca-survey-manager/config.ini', 'make test', ], } @@ -172,19 +179,19 @@ def get_erfs_years(): def build_gitlab_ci(erfs_years): gitlab_ci = header() + gitlab_ci += yaml.dump(make_test()) gitlab_ci += yaml.dump(build_collections()) for year in erfs_years: print('\t ERFS : Building for year', year) gitlab_ci += yaml.dump(build_input_data(year)) gitlab_ci += yaml.dump(aggregates(year)) - gitlab_ci += yaml.dump(make_test()) return gitlab_ci def main(): print("Reading survey manager config...") erfs_years = get_erfs_years() # For testing only some years - # erfs_years = ["2005", "2018"] + # erfs_years = ["2018"] gitlab_ci = build_gitlab_ci(erfs_years) with open(r'.gitlab-ci.yml', mode='w') as file: file.write(gitlab_ci) diff --git a/runner/openfisca_survey_manager_config.ini b/runner/openfisca_survey_manager_config.ini index 4d431014..b42614ec 100644 --- a/runner/openfisca_survey_manager_config.ini +++ b/runner/openfisca_survey_manager_config.ini @@ -3,8 +3,8 @@ # sur le runner gitlab piloté par ipp/openfisca-france/data [collections] -collections_directory = /mnt/data-out/data_collections +collections_directory = /mnt/data-out/openfisca-france-data/data_collections [data] -output_directory = /mnt/data-out +output_directory = /mnt/data-out/openfisca-france-data/output tmp_directory = /tmp diff --git a/runner/openfisca_survey_manager_raw_data.ini b/runner/openfisca_survey_manager_raw_data.ini index fe12f122..f707ed85 100644 --- a/runner/openfisca_survey_manager_raw_data.ini +++ b/runner/openfisca_survey_manager_raw_data.ini @@ -33,3 +33,4 @@ 2016 = /mnt/data-in/erfs-fpr/2016/sas 2017 = /mnt/data-in/erfs-fpr/2017/sas 2018 = /mnt/data-in/erfs-fpr/2018/sas +2019 = /mnt/data-in/erfs-fpr/2019/csv From 39dfc5ad7d30264d3d51a7b215451bec56872e6d Mon Sep 17 00:00:00 2001 From: benoit-cty <6603048+benoit-cty@users.noreply.github.com> Date: Fri, 20 Jan 2023 14:23:58 +0100 Subject: [PATCH 38/38] Fix CI --- .gitlab-ci.yml | 690 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 690 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 311552aa..6290ead5 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -74,6 +74,666 @@ build_collection: tags: - openfisca when: manual +in_dt-1996: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-1996" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 1996 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_1996.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1996.ini + stage: build_input_data + tags: + - openfisca +agg-1996: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-1996 + script: + - echo "aggregates-1996" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1996.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 1996 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-1997: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-1997" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 1997 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_1997.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1997.ini + stage: build_input_data + tags: + - openfisca +agg-1997: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-1997 + script: + - echo "aggregates-1997" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1997.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 1997 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-1998: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-1998" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 1998 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_1998.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1998.ini + stage: build_input_data + tags: + - openfisca +agg-1998: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-1998 + script: + - echo "aggregates-1998" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1998.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 1998 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-1999: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-1999" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 1999 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_1999.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1999.ini + stage: build_input_data + tags: + - openfisca +agg-1999: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-1999 + script: + - echo "aggregates-1999" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-1999.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 1999 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-2000: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2000" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2000 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2000.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2000.ini + stage: build_input_data + tags: + - openfisca +agg-2000: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2000 + script: + - echo "aggregates-2000" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2000.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2000 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-2001: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2001" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2001 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2001.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2001.ini + stage: build_input_data + tags: + - openfisca +agg-2001: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2001 + script: + - echo "aggregates-2001" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2001.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2001 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-2002: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2002" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2002 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2002.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2002.ini + stage: build_input_data + tags: + - openfisca +agg-2002: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2002 + script: + - echo "aggregates-2002" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2002.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2002 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-2003: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2003" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2003 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2003.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2003.ini + stage: build_input_data + tags: + - openfisca +agg-2003: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2003 + script: + - echo "aggregates-2003" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2003.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2003 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-2004: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2004" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2004 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2004.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2004.ini + stage: build_input_data + tags: + - openfisca +agg-2004: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2004 + script: + - echo "aggregates-2004" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2004.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2004 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-2005: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2005" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2005 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2005.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2005.ini + stage: build_input_data + tags: + - openfisca +agg-2005: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2005 + script: + - echo "aggregates-2005" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2005.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2005 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-2006: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2006" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2006 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2006.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2006.ini + stage: build_input_data + tags: + - openfisca +agg-2006: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2006 + script: + - echo "aggregates-2006" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2006.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2006 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-2007: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2007" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2007 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2007.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2007.ini + stage: build_input_data + tags: + - openfisca +agg-2007: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2007 + script: + - echo "aggregates-2007" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2007.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2007 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-2008: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2008" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2008 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2008.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2008.ini + stage: build_input_data + tags: + - openfisca +agg-2008: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2008 + script: + - echo "aggregates-2008" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2008.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2008 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-2009: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2009" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2009 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2009.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2009.ini + stage: build_input_data + tags: + - openfisca +agg-2009: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2009 + script: + - echo "aggregates-2009" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2009.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2009 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-2010: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2010" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2010 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2010.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2010.ini + stage: build_input_data + tags: + - openfisca +agg-2010: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2010 + script: + - echo "aggregates-2010" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2010.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2010 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-2011: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2011" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2011 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2011.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2011.ini + stage: build_input_data + tags: + - openfisca +agg-2011: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2011 + script: + - echo "aggregates-2011" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2011.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2011 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-2012: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2012" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2012 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2012.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2012.ini + stage: build_input_data + tags: + - openfisca +agg-2012: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2012 + script: + - echo "aggregates-2012" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2012.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2012 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-2013: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2013" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2013 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2013.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2013.ini + stage: build_input_data + tags: + - openfisca +agg-2013: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2013 + script: + - echo "aggregates-2013" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2013.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2013 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-2014: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2014" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2014 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2014.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2014.ini + stage: build_input_data + tags: + - openfisca +agg-2014: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2014 + script: + - echo "aggregates-2014" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2014.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2014 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-2015: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2015" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2015 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2015.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2015.ini + stage: build_input_data + tags: + - openfisca +agg-2015: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2015 + script: + - echo "aggregates-2015" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2015.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2015 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-2016: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2016" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2016 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2016.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2016.ini + stage: build_input_data + tags: + - openfisca +agg-2016: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2016 + script: + - echo "aggregates-2016" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2016.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2016 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca +in_dt-2017: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2017" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2017 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2017.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2017.ini + stage: build_input_data + tags: + - openfisca +agg-2017: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2017 + script: + - echo "aggregates-2017" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2017.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2017 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca in_dt-2018: image: $CI_REGISTRY_IMAGE:latest script: @@ -104,3 +764,33 @@ agg-2018: stage: aggregates tags: - openfisca +in_dt-2019: + image: $CI_REGISTRY_IMAGE:latest + script: + - echo "build_input_data-2019" + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp $ROOT_FOLDER/openfisca_survey_manager_config-after-build-collection.ini ~/.config/openfisca-survey-manager/config.ini + - build-erfs-fpr -y 2019 -f $ROOT_FOLDER/$OUT_FOLDER/erfs_flat_2019.h5 + - cp ~/.config/openfisca-survey-manager/config.ini $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2019.ini + stage: build_input_data + tags: + - openfisca +agg-2019: + artifacts: + paths: + - ./*.html + - ./*.csv + image: $CI_REGISTRY_IMAGE:latest + needs: + - in_dt-2019 + script: + - echo "aggregates-2019" + - cp $ROOT_FOLDER/openfisca_survey_manager_config_input_data-after-build-erfs-fprs-2019.ini + ~/.config/openfisca-survey-manager/config.ini + - python tests/erfs_fpr/integration/test_aggregates.py --year 2019 + - mkdir -p $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.html $ROOT_FOLDER/$OUT_FOLDER + - cp ./*.csv $ROOT_FOLDER/$OUT_FOLDER + stage: aggregates + tags: + - openfisca