Skip to content

Commit

Permalink
📊 Update WB poverty projections (#3843)
Browse files Browse the repository at this point in the history
* 📊 Update WB poverty projections

* ✨ new version

* ✨ snapshot

* update snapshot

* ✨ use more efficient zip load

* 🐛 fix poverty lines being in a long format

* 🚧 delete povertyline edit

* 📊 Round povertyline to 2 decimal places

* 🐛 correct old steps

* 🐛 update grapher step in old version

* 🐝 archive old steps

---------

Co-authored-by: lucasrodes <lucasrodes@users.noreply.github.com>
Co-authored-by: Marigold <mojmir.vinkler@gmail.com>
  • Loading branch information
3 people authored and Tuna Acisu committed Feb 5, 2025
1 parent 4302c45 commit c8da8fc
Show file tree
Hide file tree
Showing 11 changed files with 434 additions and 8 deletions.
8 changes: 8 additions & 0 deletions dag/archive/poverty_inequality.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,11 @@ steps:
- data://meadow/wb/2024-06-26/poverty_projections
data://grapher/wb/2024-06-26/poverty_projections:
- data://garden/wb/2024-06-26/poverty_projections

# Poverty projections from the Poverty, Prosperity and Planet Report 2024 (old version)
data://meadow/wb/2024-12-03/poverty_projections:
- snapshot://wb/2024-12-03/reproducibility_package_poverty_prosperity_planet.zip
data://garden/wb/2024-12-03/poverty_projections:
- data://meadow/wb/2024-12-03/poverty_projections
data://grapher/wb/2024-12-03/poverty_projections:
- data://garden/wb/2024-12-03/poverty_projections
12 changes: 6 additions & 6 deletions dag/poverty_inequality.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,9 @@ steps:
- data://garden/igh/2024-07-05/better_data_homelessness

# Poverty projections from the Poverty, Prosperity and Planet Report 2024
data://meadow/wb/2024-12-03/poverty_projections:
- snapshot://wb/2024-12-03/reproducibility_package_poverty_prosperity_planet.zip
data://garden/wb/2024-12-03/poverty_projections:
- data://meadow/wb/2024-12-03/poverty_projections
data://grapher/wb/2024-12-03/poverty_projections:
- data://garden/wb/2024-12-03/poverty_projections
data://meadow/wb/2025-01-15/poverty_projections:
- snapshot://wb/2025-01-15/reproducibility_package_poverty_prosperity_planet.zip
data://garden/wb/2025-01-15/poverty_projections:
- data://meadow/wb/2025-01-15/poverty_projections
data://grapher/wb/2025-01-15/poverty_projections:
- data://garden/wb/2025-01-15/poverty_projections
3 changes: 3 additions & 0 deletions etl/steps/data/garden/wb/2024-12-03/poverty_projections.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ def run(dest_dir: str) -> None:
# Concatenate tables
tb = pr.concat(tables, ignore_index=True)

# Round povertyline to 2 decimal places
tb["povertyline"] = tb["povertyline"].round(2)

# Multiply poorpop by 1_000_000
tb["poorpop"] = tb["poorpop"] * 1_000_000

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"EAP": "East Asia and Pacific (PIP)",
"ECA": "Europe and Central Asia (PIP)",
"LAC": "Latin America and the Caribbean (PIP)",
"MNA": "Middle East and North Africa (PIP)",
"OHI": "Other high income countries (PIP)",
"SAS": "South Asia (PIP)",
"SSA": "Sub-Saharan Africa (PIP)",
"World": "World"
}
108 changes: 108 additions & 0 deletions etl/steps/data/garden/wb/2025-01-15/poverty_projections.meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# NOTE: To learn more about the fields, hover over their names.
definitions:
common:
processing_level: minor
display: &common-display
tolerance: 0
entityAnnotationsMap: |-
Other high income countries (PIP): e.g. US, Western Europe, Australia, Japan, South Korea and Saudi Arabia
presentation:
topic_tags:
- Poverty

description_key_povertyline: |-
<% if povertyline == "2.15" %>
Extreme poverty here is defined as living below the International Poverty Line of $2.15 per day.
<% elif povertyline == "3.65" %>
A poverty line of $3.65 a day represents definitions of national poverty lines in lower-middle-income countries.
<% elif povertyline == "6.85" %>
A poverty line of $6.85 a day represents definitions of national poverty lines in upper-middle-income countries.
<%- endif -%>
description_key_ppp: |-
The data is measured in international-$ at 2017 prices – this adjusts for inflation and for differences in the cost of living between countries.
description_key_income_consumption: |-
Depending on the country and year, the data relates to income measured after taxes and benefits, or to consumption, per capita. "Per capita" means that the income of each household is attributed equally to each member of the household (including children).
description_key_nonmarket_income: |-
Non-market sources of income, including food grown by subsistence farmers for their own consumption, are taken into account.
description_key_scenarios: |-
<% if scenario == "Historical estimates" %>
Estimates are based on household surveys or extrapolated up until the year of the data release using GDP growth estimates and forecasts. For more details about the methodology, please refer to the [World Bank PIP documentation](https://datanalytics.worldbank.org/PIP-Methodology/lineupestimates.html#nowcasts).
<% elif scenario == "Current forecast + historical growth projections" %>
This data is a projection of the estimates based on GDP growth projections from the World Bank's Global Economic Prospects and the the Macro Poverty Outlook, together with IMF's World Economic Outlook, in the period 2025-2029. For the period 2030-2050, the data is projected using the average annual historical GDP per capita growth over 2010-2019.
<% elif scenario == "Historical estimates + projections" %>
This data combines data based on household surveys or extrapolated up until the year of the data release using GDP growth estimates and forecasts, with projections based on GDP growth projections from the World Bank's Global Economic Prospects and the the Macro Poverty Outlook, together with IMF's World Economic Outlook, in the period 2025-2029. For the period 2030-2050, the data is projected using the average annual historical GDP per capita growth over 2010-2019.
<% elif scenario == "2% growth projections" %>
This data is a projection of the estimates based on a scenario of 2% average GDP per capita growth, while keeping income inequality constant.
<% elif scenario == "2% growth + Gini reduction 1% projections" %>
This data is a projection of the estimates based on a scenatio of 2% average GDP per capita growth, while reducing income inequality by 1% of the Gini coefficient per year.
<% elif scenario == "2% growth + Gini reduction 2% projections" %>
This data is a projection of the estimates based on a scenatio of 2% average GDP per capita growth, while reducing income inequality by 2% of the Gini coefficient per year.
<% elif scenario == "4% growth projections" %>
This data is a projection of the estimates based on a scenario of 4% average GDP per capita growth, while keeping income inequality constant.
<% elif scenario == "6% growth projections" %>
This data is a projection of the estimates based on a scenario of 6% average GDP per capita growth, while keeping income inequality constant.
<% elif scenario == "8% growth projections" %>
This data is a projection of the estimates based on a scenario of 8% average GDP per capita growth, while keeping income inequality constant.
<%- endif -%>
isprojection_by_scenario: |-
<% if scenario == "Historical estimates" or scenario == "Historical estimates + projections" %>
false
<% else %>
true
<%- endif -%>
# Learn more about the available fields:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/
dataset:
title: Poverty projections by the World Bank
update_period_days: 681


tables:
poverty_projections:
variables:
fgt0:
title: $<<povertyline>> a day - Share of population in poverty (<<scenario>>)
unit: "%"
short_unit: "%"
description_short: "Percentage of population living in households with an income or consumption per person below $<<povertyline>> a day"
description_key:
- "{definitions.description_key_povertyline}"
- "{definitions.description_key_ppp}"
- "{definitions.description_key_income_consumption}"
- "{definitions.description_key_nonmarket_income}"
- "{definitions.description_key_scenarios}"
presentation:
title_public: Share of population living in poverty
title_variant: $<<povertyline>> a day, <<scenario>>
display:
name: Share of population living below $<<povertyline>> a day (<<scenario>>)
numDecimalPlaces: 1
isProjection: "{definitions.isprojection_by_scenario}"
<<: *common-display

poorpop:
title: $<<povertyline>> a day - Number of people in poverty (<<scenario>>)
unit: "people"
short_unit: ""
description_short: "Number of people living in households with an income or consumption per person below $<<povertyline>> a day"
description_key:
- "{definitions.description_key_povertyline}"
- "{definitions.description_key_ppp}"
- "{definitions.description_key_income_consumption}"
- "{definitions.description_key_nonmarket_income}"
- "{definitions.description_key_scenarios}"
presentation:
title_public: Number of people living in poverty
title_variant: $<<povertyline>> a day, <<scenario>>
display:
name: Number of people living below $<<povertyline>> a day (<<scenario>>)
numDecimalPlaces: 0
isProjection: "{definitions.isprojection_by_scenario}"
<<: *common-display
139 changes: 139 additions & 0 deletions etl/steps/data/garden/wb/2025-01-15/poverty_projections.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
"""Load a meadow dataset and create a garden dataset."""

import owid.catalog.processing as pr
from owid.catalog import Table
from owid.datautils.dataframes import map_series

from etl.data_helpers import geo
from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)

# Define latest year without projections
LATEST_YEAR_WITHOUT_PROJECTIONS = 2024

# Define tables to be loaded. I am not processing country, because they were created for the aggregations and not to highlight them.
TABLES = ["region", "global"]

# Define scenarios and new names
SCENARIOS = {
"historical": "Historical estimates",
"current_forecast": "Current forecast + historical growth projections",
"2pct": "2% growth projections",
"2pct_gini1": "2% growth + Gini reduction 1% projections",
"2pct_gini2": "2% growth + Gini reduction 2% projections",
"4pct": "4% growth projections",
"6pct": "6% growth projections",
"8pct": "8% growth projections",
}

# Define index columns
INDEX_COLUMNS = ["country", "year", "povertyline", "scenario"]

# Define indicator columns
INDICATOR_COLUMNS = ["fgt0", "poorpop"]


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load meadow dataset.
ds_meadow = paths.load_dataset("poverty_projections")

# Read tables from meadow dataset.
tables = [ds_meadow.read(table_name) for table_name in TABLES]

#
# Process data.
#
# Concatenate tables
tb = pr.concat(tables, ignore_index=True)

# Round povertyline to 2 decimal places
tb["povertyline"] = tb["povertyline"].round(2)

# Multiply poorpop by 1_000_000
tb["poorpop"] = tb["poorpop"] * 1_000_000

tb = geo.harmonize_countries(
df=tb,
countries_file=paths.country_mapping_path,
)

tb = connect_estimates_with_projections(tb)

# Rename scenario column
tb["scenario"] = map_series(
series=tb["scenario"],
mapping=SCENARIOS,
)

# Recover origins
tb["scenario"] = tb["scenario"].copy_metadata(tb["country"])

tb = tb.format(INDEX_COLUMNS, short_name="poverty_projections")

#
# Save outputs.
#
# Create a new garden dataset with the same metadata as the meadow dataset.
ds_garden = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata
)

# Save changes in the new garden dataset.
ds_garden.save()


def connect_estimates_with_projections(tb: Table) -> Table:
"""
Connects estimates with projections for visualizations in Grapher.
This is repeating the latest estimate in the historical scenario in the rest of the scenarios.
"""

tb = tb.copy()

# Save tb_historical and tb_current_forecast, by filtering scenario in historical and current_forecast
tb_historical = tb[tb["scenario"] == "historical"].copy().reset_index(drop=True)
tb_current_forecast = tb[tb["scenario"] == "current_forecast"].copy().reset_index(drop=True)

# Make table wider, by using scenario as columns
tb = tb.pivot(index=["country", "year", "povertyline"], columns="scenario", values=INDICATOR_COLUMNS)

# For year LATEST_YEAR_WITHOUT_PROJECTIONS, fill the rest of the columns with the same value
for indicator in INDICATOR_COLUMNS:
for scenario in SCENARIOS.keys():
if scenario != "historical":
tb.loc[tb.index.get_level_values("year") == LATEST_YEAR_WITHOUT_PROJECTIONS, (indicator, scenario)] = (
tb.loc[
tb.index.get_level_values("year") == LATEST_YEAR_WITHOUT_PROJECTIONS, (indicator, scenario)
].combine_first(
tb.loc[
tb.index.get_level_values("year") == LATEST_YEAR_WITHOUT_PROJECTIONS,
(indicator, "historical"),
]
)
)

# Make table long again, by creating a scenario column
tb = tb.stack(level="scenario", future_stack=True).reset_index()

# Recover origins
for indicator in INDICATOR_COLUMNS:
tb[indicator] = tb[indicator].copy_metadata(tb["country"])

# Combine historical and current_forecast, by concatenating tb_historical and tb_current_forecast
tb_connected = pr.concat([tb_historical, tb_current_forecast], ignore_index=True)

# Rename scenario column to "Historical + current forecast + historical growth"
tb_connected["scenario"] = "Historical estimates + projections"

# Keep only the columns in INDEX_COLUMNS and INDICATOR_COLUMNS
tb_connected = tb_connected[INDEX_COLUMNS + INDICATOR_COLUMNS]

# Concatenate tb and tb_connected
tb = pr.concat([tb, tb_connected], ignore_index=True)

return tb
12 changes: 10 additions & 2 deletions etl/steps/data/grapher/wb/2024-12-03/poverty_projections.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
# Get paths and naming conventions for current step.
paths = PathFinder(__file__)

# Define index columns
INDEX_COLUMNS = ["country", "year", "povertyline", "scenario"]


def run(dest_dir: str) -> None:
#
Expand All @@ -14,12 +17,17 @@ def run(dest_dir: str) -> None:
ds_garden = paths.load_dataset("poverty_projections")

# Read table from garden dataset.
tb = ds_garden.read("poverty_projections", reset_index=False)
tb = ds_garden.read("poverty_projections")

# Round povertyline to 2 decimal places
tb["povertyline"] = tb["povertyline"].round(2)

tb = tb.format(INDEX_COLUMNS)

#
# Save outputs.
#
# Create a new grapher dataset with the same metadata as the garden dataset.
# Create a new grapher dataset with the same metadata as the garden dataset..
ds_grapher = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata
)
Expand Down
36 changes: 36 additions & 0 deletions etl/steps/data/grapher/wb/2025-01-15/poverty_projections.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""Load a garden dataset and create a grapher dataset."""

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)

# Define index columns
INDEX_COLUMNS = ["country", "year", "povertyline", "scenario"]


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load garden dataset.
ds_garden = paths.load_dataset("poverty_projections")

# Read table from garden dataset.
tb = ds_garden.read("poverty_projections")

# Round povertyline to 2 decimal places
tb["povertyline"] = tb["povertyline"].round(2)

tb = tb.format(INDEX_COLUMNS)

#
# Save outputs.
#
# Create a new grapher dataset with the same metadata as the garden dataset..
ds_grapher = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata
)

# Save changes in the new grapher dataset.
ds_grapher.save()
Loading

0 comments on commit c8da8fc

Please sign in to comment.