Skip to content

Commit

Permalink
Merge pull request #241 from openfisca/rebase-refacto-scenario
Browse files Browse the repository at this point in the history
Rebase refacto scenario
  • Loading branch information
clallemand authored Nov 8, 2023
2 parents ea1d6f8 + ea591f1 commit 502c9c7
Show file tree
Hide file tree
Showing 17 changed files with 185 additions and 157 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Changelog

### 3.0.0 [#241](https://github.com/openfisca/openfisca-france-data/pull/241)
- Breaking changes

Adapte le dépôt au passage à openfisca-survey-manager 2.0.0 qui constitue une refactorisation de l'objet survey-scenario et des simulations qui sont dedans. Cela concerne donc les parties de ce dépôts qui héritent d'objets d'openfisca-survey-manager :
- `openfisca_france_data/aggregates.py`
- `openfisca_france_data/surveys.py`
Les autres modifications sont des adaptions syntaxique mineurs du fait de cette adaptation

### 2.0.7 [#239](https://github.com/openfisca/openfisca-france-data/pull/239/files)
* New features
- Ajoute des nouveaux agrégats pour FranceAggregates
Expand Down
12 changes: 9 additions & 3 deletions openfisca_france_data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import inspect
from importlib import metadata
import logging
import os
import pkg_resources
import pandas
from pathlib import Path

from openfisca_core import reforms # type: ignore
from openfisca_core.errors import VariableNameConflictError

import openfisca_france # type: ignore

Expand All @@ -13,6 +15,9 @@
from openfisca_france_data.model.base import * # noqa analysis:ignore


openfisca_france_data_location = Path(__file__).parent.parent


log = logging.getLogger(__name__)


Expand Down Expand Up @@ -141,7 +146,8 @@ def apply(self):
continue
try:
self.add_variable(variable)
except AttributeError:
except VariableNameConflictError:
# log.debug(f"{variable.__name__} has been updated in openfisca-france-data")
self.update_variable(variable)


Expand Down Expand Up @@ -206,7 +212,7 @@ def apply(self):

COUNTRY_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(
pkg_resources.get_distribution('openfisca-france-data').location,
openfisca_france_data_location,
'openfisca_france_data',
'plugins',
'aggregates',
Expand Down
42 changes: 22 additions & 20 deletions openfisca_france_data/aggregates.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,12 @@
import json
from pathlib import Path

import numpy as np
import pandas as pd
import pkg_resources
import os
from datetime import datetime
import pandas as pd

from openfisca_survey_manager.aggregates import AbstractAggregates
from openfisca_france_data import AGGREGATES_DEFAULT_VARS # type: ignore
from openfisca_france_data import openfisca_france_data_location, AGGREGATES_DEFAULT_VARS # type: ignore


log = logging.getLogger(__name__)
Expand Down Expand Up @@ -38,14 +36,14 @@ def __init__(self, survey_scenario = None, target_source = None):
super().__init__(survey_scenario = survey_scenario)
self.target_source = target_source

def load_actual_data(self, year = None):
def load_actual_data(self, period = None):
target_source = self.target_source
assert target_source in ["ines", "taxipp", "france_entiere"], "les options possible pour source_cible sont ines, taxipp ou france_entiere"
assert year is not None
assert period is not None

if target_source == "taxipp":
taxipp_aggregates_file = Path(
pkg_resources.get_distribution("openfisca-france_data").location,
openfisca_france_data_location,
"openfisca_france_data",
"assets",
"aggregats",
Expand All @@ -62,41 +60,41 @@ def load_actual_data(self, year = None):
.rename(columns = {"unnamed: 0": "description"})
.dropna(subset = ["annee 2019", "annee 2018", "annee 2017", "annee 2016"], how = "all")
)
if f"annee {year}" not in df:
if f"annee {period}" not in df:
return

df = (
df[["variable_openfisca", f"annee {year}"]]
df[["variable_openfisca", f"annee {period}"]]
.dropna()
.rename(columns = {
"variable_openfisca": "variable",
f"annee {year}": year,
f"annee {period}": period,
})
)

beneficiaries = (
df.loc[df.variable.str.startswith("nombre")]
.set_index("variable")
.rename(index = lambda x : x.replace("nombre_", ""))
.rename(columns = {year: "actual_beneficiaries"})
.rename(columns = {period: "actual_beneficiaries"})
) / self.beneficiaries_unit

amounts = (
df.loc[~df.variable.str.startswith("nombre")]
.set_index("variable")
.rename(columns = {year: "actual_amount"})
.rename(columns = {period: "actual_amount"})
) / self.amount_unit

result = amounts.merge(beneficiaries, on = "variable", how = "outer").drop("PAS SIMULE")

elif target_source == "ines":
ines_aggregates_file = Path(
pkg_resources.get_distribution("openfisca-france_data").location,
openfisca_france_data_location,
"openfisca_france_data",
"assets",
"aggregats",
"ines",
f"ines_{year}.json"
f"ines_{period}.json"
)

with open(ines_aggregates_file, 'r') as f:
Expand All @@ -110,33 +108,37 @@ def load_actual_data(self, year = None):

elif target_source == "france_entiere":
ines_aggregates_file = Path(
pkg_resources.get_distribution("openfisca-france_data").location,
openfisca_france_data_location,
"openfisca_france_data",
"assets",
"aggregats",
"france_entiere",
f"france_entiere_{year}.json"
f"france_entiere_{period}.json"
)

with open(ines_aggregates_file, 'r') as f:
data = json.load(f)

result = pd.DataFrame(data['data']).drop(['source'], axis = 1)
result['actual_beneficiaries'] = result. actual_beneficiaries / self.beneficiaries_unit
result['actual_amount'] = result. actual_amount / self.amount_unit
result['actual_amount'] = result.actual_amount / self.amount_unit

result = result[["variable","actual_amount","actual_beneficiaries"]].set_index("variable")
result = result[[
"variable",
"actual_amount",
"actual_beneficiaries",
]].set_index("variable")

return result

def to_csv(self, path = None, absolute = True, amount = True, beneficiaries = True, default = 'actual',
relative = True, target = "reform"):
"""Saves the table to csv."""
assert path is not None

if os.path.isdir(path):
now = datetime.now()
file_path = os.path.join(path, 'Aggregates_%s_%s_%s.%s' % (self.target_source,self.year,now.strftime('%d-%m-%Y'), "csv"))
file_path = os.path.join(path, 'Aggregates_%s_%s_%s.%s' % (self.target_source, self.period, now.strftime('%d-%m-%Y'), "csv"))
else:
file_path = path

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@


from openfisca_survey_manager.temporary import temporary_store_decorator
from openfisca_france_data import openfisca_france_data_location
from openfisca_france_data.utils import (
check_structure,
control,
Expand All @@ -20,6 +21,7 @@
set_variables_default_value,
)


log = logging.getLogger(__name__)


Expand Down Expand Up @@ -202,8 +204,6 @@ def final(temporary_store = None, year = None, check = True):
print_id(final2)
# # TODO: merging with patrimoine
log.info(' traitement des zones apl')
import pkg_resources
openfisca_france_data_location = pkg_resources.get_distribution('openfisca-france-data').location
zone_apl_imputation_data_file_path = os.path.join(
openfisca_france_data_location,
'openfisca_france_data',
Expand Down
6 changes: 3 additions & 3 deletions openfisca_france_data/erfs/old/datatable.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os
import pkg_resources
import sys
import gc

Expand All @@ -18,8 +17,9 @@

#from openfisca_france.data.sources.config import DATA_DIR

openfisca_france_location = pkg_resources.get_distribution('openfisca-france-data').location
CONFIG_DIR = os.path.join(openfisca_france_location)
from openfisca_france_data import openfisca_france_data_location

CONFIG_DIR = os.path.join(openfisca_france_data_location)

#ERF_HDF5_DATA_DIR = os.path.join(SRC_PATH,'countries','france','data', 'erf')

Expand Down
4 changes: 2 additions & 2 deletions openfisca_france_data/erfs/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,5 @@ class ErfsSurveyScenario(AbstractErfsSurveyScenario):
'zone_apl',
]

def __init__(self, year: int) -> None:
self.year = year
def __init__(self, period: int) -> None:
self.period = period
8 changes: 6 additions & 2 deletions openfisca_france_data/erfs_fpr/get_survey_scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from openfisca_france_data.erfs_fpr.scenario import ErfsFprSurveyScenario
from openfisca_france_data import france_data_tax_benefit_system

from openfisca_survey_manager import default_config_files_directory

from openfisca_france_data.model.id_variables import (
idmen_original,
Expand Down Expand Up @@ -102,6 +103,7 @@ def get_survey_scenario(
variation_factor: float = 0.03,
varying_variable: str = None,
survey_name: str = "input",
config_files_directory : str = default_config_files_directory,
) -> ErfsFprSurveyScenario:
"""Helper pour créer un `ErfsFprSurveyScenario`.
Expand All @@ -126,14 +128,14 @@ def get_survey_scenario(
survey_scenario = ErfsFprSurveyScenario.create(
tax_benefit_system = tax_benefit_system,
baseline_tax_benefit_system = baseline_tax_benefit_system,
year = year,
period = year,
)
else:
assert varying_variable is not None, "You need to specify the varying variable."
survey_scenario = ErfsFprSurveyScenario.create(
tax_benefit_system = tax_benefit_system,
baseline_tax_benefit_system = baseline_tax_benefit_system,
year = year,
period = year,
)
# taux marginaux !!
survey_scenario.variation_factor = variation_factor
Expand All @@ -153,6 +155,8 @@ def get_survey_scenario(
input_data_table_by_entity_by_period = input_data_table_by_entity_by_period,
survey = survey_name
)
data["config_files_directory"] = config_files_directory


# Les données peuvent venir en différents formats :
#
Expand Down
5 changes: 3 additions & 2 deletions openfisca_france_data/erfs_fpr/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,9 @@ class ErfsFprSurveyScenario(AbstractErfsSurveyScenario):
"wprm_init",
]

def __init__(self, year: int) -> None:
self.year = year
def __init__(self, period: int) -> None:
# self.year = period
self.period = period

@classmethod
def build_input_data(cls, year: int) -> None:
Expand Down
14 changes: 6 additions & 8 deletions openfisca_france_data/model/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,10 @@ def formula(foyer_fiscal, period):
weight_foyers = foyer_fiscal('weight_foyers', period)
menage_ordinaire_foyers_fiscaux = foyer_fiscal('menage_ordinaire_foyers_fiscaux', period)
labels = arange(1, 11)
method = 2
decile, values = mark_weighted_percentiles(rfr, labels, weight_foyers * menage_ordinaire_foyers_fiscaux, method, return_quantiles = True)
# Alternative method
# method = 2
# decile, values = mark_weighted_percentiles(niveau_de_vie, labels, pondmen, method, return_quantiles = True)
decile, values = weighted_quantiles(rfr, labels, weight_foyers * menage_ordinaire_foyers_fiscaux, return_quantiles = True)
# decile, values = weighted_quantiles(rfr, labels, weight_foyers * menage_ordinaire_foyers_fiscaux, return_quantiles = True)
return decile


Expand Down Expand Up @@ -228,11 +228,9 @@ def formula(foyer_fiscal, period):
weight_foyers = foyer_fiscal('weight_foyers', period)
menage_ordinaire_foyers_fiscaux = foyer_fiscal('menage_ordinaire_foyers_fiscaux', period)
labels = arange(1, 11)
# Alternative method
# method = 2
# decile, values = mark_weighted_percentiles(niveau_de_vie, labels, pondmen, method, return_quantiles = True)
decile, values = weighted_quantiles(
rfr / nbptr, labels, weight_foyers * menage_ordinaire_foyers_fiscaux, return_quantiles = True)
method = 2
decile, values = mark_weighted_percentiles(
rfr / nbptr, labels, weight_foyers * menage_ordinaire_foyers_fiscaux, method, return_quantiles = True)
return decile


Expand Down
2 changes: 1 addition & 1 deletion openfisca_france_data/model/survey_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
class menage_ordinaire(Variable):
value_type = int
is_period_size_independent = True
default_value = True
default_value = 1
entity = Menage
definition_period = YEAR

Expand Down
Loading

0 comments on commit 502c9c7

Please sign in to comment.