Browse Source

read emissions from crops [skip ci]

Daniel Busch 4 months ago
parent
commit
6be433dace
2 changed files with 166 additions and 74 deletions
  1. 96 0
      src/faostat_data_primap/helper/definitions.py
  2. 70 74
      src/faostat_data_primap/read.py

+ 96 - 0
src/faostat_data_primap/helper/definitions.py

@@ -1,4 +1,5 @@
 """definitions like folders, mappings etc."""
+
 from pathlib import Path
 
 domains = {
@@ -74,3 +75,98 @@ root_path = get_root_path()
 code_path = root_path / "src" / "faostat_data_primap"
 extracted_data_path = root_path / "extracted_data"
 downloaded_data_path = root_path / "downloaded_data"
+
+# data reading
+areas_to_remove_base = [
+    "World",
+    "Africa",
+    "Eastern Africa",
+    "Middle Africa",
+    "Northern Africa",
+    "Southern Africa",
+    "Western Africa",
+    "Americas",
+    "Northern America",
+    "Central America",
+    "Caribbean",
+    "South America",
+    "Asia",
+    "Central Asia",
+    "Eastern Asia",
+    "Southern Asia",
+    "South-eastern Asia",
+    "Western Asia",
+    "Europe",
+    "Eastern Europe",
+    "Northern Europe",
+    "Southern Europe",
+    "Western Europe",
+    "Oceania",
+    "Australia and New Zealand",
+    "Melanesia",
+    "Micronesia",
+    "Polynesia",
+    "Least Developed Countries",
+    "Land Locked Developing Countries",
+    "Small Island Developing States",
+    "Low Income Food Deficit Countries",
+    "Net Food Importing Developing Countries",
+    "Annex I countries",
+    "Non-Annex I countries",
+    "OECD",
+]
+
+read_config_all = {
+    "farm_gate_agriculture_energy": {
+        "2024-11-14": {
+            "units_to_remove" : ["TJ"],
+            "areas_to_remove": [
+                *areas_to_remove_base,
+            ],
+            "entity_mapping": {
+                "Emissions (CO2)": "CO2",
+                "Emissions (CH4)": "CH4",
+                "Emissions (N2O)": "N2O",
+            },
+            "columns_to_drop" : ["Element", "Element Code", "Item", "Item Code", "Area Code (M49)", "Area", "Area Code"],
+        }
+    },
+    "farm_gate_emissions_crops": {
+        "2024-11-14": {
+            "areas_to_remove": [
+                *areas_to_remove_base,
+                "European Union (27)",
+                # This seems to be data for a Belgian province,
+                # I don't think we need it
+                "Belgium-Luxembourg",
+                # We cannot split combined country data
+                "Serbia and Montenegro",
+            ],
+            "elements_to_remove": [
+                "Crop residues (N content)",
+                "Burning crop residues (Biomass burned, dry matter)",
+                "Area harvested",
+                "Nitrogen fertilizer content applied that leaches",
+                "Nitrogen fertilizer content applied that volatilises",
+                "Synthetic fertilizers (Agricultural use)",
+            ],
+            "entity_mapping": {
+                "Crop residues (Emissions N2O)": "N2O",
+                "Crop residues (Direct emissions N2O)": "N2O",
+                "Crop residues (Indirect emissions N2O)": "N2O",
+                "Burning crop residues (Emissions N2O)": "N2O",
+                "Burning crop residues (Emissions CH4)": "CH4",
+                "Rice cultivation (Emissions CH4)": "CH4",
+                "Crops total (Emissions N2O)": "N2O",
+                "Crops total (Emissions CH4)": "CH4",
+                "Synthetic fertilizers (Emissions N2O)": "N2O",
+                "Synthetic fertilizers (Direct emissions N2O)": "N2O",
+                "Indirect emissions (N2O that leaches) (Synthetic fertilizers)": "N2O",
+                "Indirect emissions (N2O that volatilises) (Synthetic fertilizers)": "N2O",
+            },
+            "columns_to_drop" : ["Element", "Element Code", "Item", "Item Code", "Area Code (M49)", "Area",
+                                 "Area Code", 'Item Code (CPC)', 'Source Code'],
+
+        }
+    },
+}

+ 70 - 74
src/faostat_data_primap/read.py

@@ -1,16 +1,18 @@
 """read data set"""
 
-
 import pandas as pd
 import primap2 as pm2
 import pycountry
 
-from src.faostat_data_primap.helper.definitions import downloaded_data_path
+from src.faostat_data_primap.helper.definitions import (
+    downloaded_data_path,
+    read_config_all,
+)
 
 custom_country_mapping_code = {}
 
 custom_country_mapping_name = {
-    # FAO
+    # farm gate agricultur energy
     "Bolivia (Plurinational State of)": "BOL",
     "China, Hong Kong SAR": "HKG",
     "China, Macao SAR": "MAC",
@@ -28,17 +30,19 @@ custom_country_mapping_name = {
     "Yugoslav SFR": "YUG",
     "World": "EARTH",
     # Andrew cement (probably not needed)
-    "Bonaire, Saint Eustatius and Saba": "BES",
-    "Cape Verde": "CPV",
+    # "Bonaire, Saint Eustatius and Saba": "BES",
+    # "Cape Verde": "CPV",
     "Democratic Republic of the Congo": "COD",
-    "Faeroe Islands": "FRO",
+    # "Faeroe Islands": "FRO",
     "Micronesia (Federated States of)": "FSM",
-    "Iran": "IRN",
-    "Laos": "LAO",
-    "Occupied Palestinian Territory": "PSE",
-    "Swaziland": "SWZ",
-    "Taiwan": "TWN",
+    # "Iran": "IRN",
+    # "Laos": "LAO",
+    # "Occupied Palestinian Territory": "PSE",
+    # "Swaziland": "SWZ",
+    # "Taiwan": "TWN",
     "Wallis and Futuna Islands": "WLF",
+    # farm gate emissions crops
+    "United States Virgin Islands": "VIR",
 }
 
 
@@ -109,75 +113,67 @@ files_to_read = (
     ),
 )
 
+df_all = None
 for domain, release, filename in files_to_read:
     dataset_path = downloaded_data_path / domain / release / filename
-    data_pd = pd.read_csv(dataset_path)
-
-    # remove in entries with unit TJ
-    data_pd = data_pd[data_pd["Unit"] != "TJ"]
-
-    # remove the country aggegrates
-    areas_to_remove = [
-        "World",
-        "Africa",
-        "Eastern Africa",
-        "Middle Africa",
-        "Northern Africa",
-        "Southern Africa",
-        "Western Africa",
-        "Americas",
-        "Northern America",
-        "Central America",
-        "Caribbean",
-        "South America",
-        "Asia",
-        "Central Asia",
-        "Eastern Asia",
-        "Southern Asia",
-        "South-eastern Asia",
-        "Western Asia",
-        "Europe",
-        "Eastern Europe",
-        "Northern Europe",
-        "Southern Europe",
-        "Western Europe",
-        "Oceania",
-        "Australia and New Zealand",
-        "Melanesia",
-        "Micronesia",
-        "Polynesia",
-        "Least Developed Countries",
-        "Land Locked Developing Countries",
-        "Small Island Developing States",
-        "Low Income Food Deficit Countries",
-        "Net Food Importing Developing Countries",
-        "Annex I countries",
-        "Non-Annex I countries",
-        "OECD",
-    ]
-
-    data_pd = data_pd[~data_pd["Area"].isin(areas_to_remove)]
-    country_mapping = {c: get_country_code(c) for c in data_pd["Area"].unique()}
-
-    data_pd["country (ISO3)"] = data_pd["Area"].map(country_mapping)
-
-    entity_mapping = {
-        "Emissions (CO2)": "CO2",
-        "Emissions (CH4)": "CH4",
-        "Emissions (N2O)": "N2O",
-    }
-
-    data_pd["entity"] = data_pd["Element"].map(entity_mapping)
-
-    # todo can we do this in primap2 function?
-    data_pd = data_pd.drop(
-        ["Element", "Element Code", "Item Code", "Area Code (M49)", "Area Code"], axis=1
+    read_config = read_config_all[domain][release]
+
+    df_domain = pd.read_csv(dataset_path)
+
+    # remove rows by unit
+    # todo align pattern with below
+    # df_domain = df_domain[df_domain["Unit"] != "TJ"]
+    if "units_to_remove" in read_config.keys():
+        df_domain = df_domain[~df_domain["Unit"].isin(read_config["units_to_remove"])]
+
+    # remove rows by element
+    if "elements_to_remove" in read_config.keys():
+        df_domain = df_domain[
+            ~df_domain["Element"].isin(read_config["elements_to_remove"])
+        ]
+
+    # remove rows by area
+    if "areas_to_remove" in read_config.keys():
+        df_domain = df_domain[~df_domain["Area"].isin(read_config["areas_to_remove"])]
+
+    # todo we shouldn't re-compute this everytime
+    country_mapping = {c: get_country_code(c) for c in df_domain["Area"].unique()}
+
+    # create country columns
+    df_domain["country (ISO3)"] = df_domain["Area"].map(country_mapping)
+
+    # create entity column
+    df_domain["entity"] = df_domain["Element"].map(read_config["entity_mapping"])
+
+    # create category column (combination of Item and Element works best)
+    df_domain["category"] = df_domain["Item"] + " " + df_domain["Element"]
+
+    # drop columns we don't need
+    df_domain = df_domain.drop(
+        read_config["columns_to_drop"],
+        axis=1,
     )
 
+    if df_all is None:
+        df_all = df_domain
+    else:
+        # makes sure there are no duplicate category names
+        if any(
+            [
+                category in df_all["category"].unique()
+                for category in df_domain["category"].unique()
+            ]
+        ):
+            msg = f"Duplicate category names for {domain}"
+            raise ValueError(msg)
+        df_all = pd.concat(
+            [df_all, df_domain],
+            axis=0,
+            join="outer",
+        ).reset_index(drop=True)
 
 coords_cols = {
     "area": "country (ISO3)",
-    "category": "Item",
     "unit": "Unit",
     "entity": "entity",
 }
@@ -202,7 +198,7 @@ meta_data = {
 }
 
 data_if = pm2.pm2io.convert_wide_dataframe_if(
-    data_pd,
+    df_all,
     coords_cols=coords_cols,
     coords_defaults=coords_defaults,
     coords_terminologies=coords_terminologies,