Browse Source

all land use domains [skip ci]

Daniel Busch 4 months ago
parent
commit
5b28d9d465
2 changed files with 208 additions and 19 deletions
  1. 167 6
      src/faostat_data_primap/helper/definitions.py
  2. 41 13
      src/faostat_data_primap/read.py

+ 167 - 6
src/faostat_data_primap/helper/definitions.py

@@ -78,6 +78,7 @@ downloaded_data_path = root_path / "downloaded_data"
 
 # data reading
 areas_to_remove_base = [
+    # We can aggregate these country groups ourselves if we need to
     "World",
     "Africa",
     "Eastern Africa",
@@ -119,7 +120,11 @@ areas_to_remove_base = [
 read_config_all = {
     "farm_gate_agriculture_energy": {
         "2024-11-14": {
-            "units_to_remove" : ["TJ"],
+            # todo is NOFLAG the right choice?
+            "filename": "Emissions_Agriculture_Energy_E_All_Data_NOFLAG.csv",
+            # we don't need energy in Joule
+            # todo maybe explicitly deleting elements is better
+            "units_to_remove": ["TJ"],
             "areas_to_remove": [
                 *areas_to_remove_base,
             ],
@@ -128,21 +133,31 @@ read_config_all = {
                 "Emissions (CH4)": "CH4",
                 "Emissions (N2O)": "N2O",
             },
-            "columns_to_drop" : ["Element", "Element Code", "Item", "Item Code", "Area Code (M49)", "Area", "Area Code"],
+            "columns_to_drop": [
+                "Element",
+                "Element Code",
+                "Item",
+                "Item Code",
+                "Area Code (M49)",
+                "Area",
+                "Area Code",
+            ],
         }
     },
     "farm_gate_emissions_crops": {
         "2024-11-14": {
+            "filename": "Emissions_crops_E_All_Data_NOFLAG.csv",
             "areas_to_remove": [
                 *areas_to_remove_base,
                 "European Union (27)",
                 # This seems to be data for a Belgian province,
                 # I don't think we need it
                 "Belgium-Luxembourg",
-                # We cannot split combined country data
+                # I'm not sure if we can downscale these two
                 "Serbia and Montenegro",
             ],
             "elements_to_remove": [
+                # all these elements are not emissions
                 "Crop residues (N content)",
                 "Burning crop residues (Biomass burned, dry matter)",
                 "Area harvested",
@@ -164,9 +179,155 @@ read_config_all = {
                 "Indirect emissions (N2O that leaches) (Synthetic fertilizers)": "N2O",
                 "Indirect emissions (N2O that volatilises) (Synthetic fertilizers)": "N2O",
             },
-            "columns_to_drop" : ["Element", "Element Code", "Item", "Item Code", "Area Code (M49)", "Area",
-                                 "Area Code", 'Item Code (CPC)', 'Source Code'],
-
+            "columns_to_drop": [
+                "Element",
+                "Element Code",
+                "Item",
+                "Item Code",
+                "Area Code (M49)",
+                "Area",
+                "Area Code",
+                "Item Code (CPC)",
+                "Source Code",
+            ],
+        }
+    },
+    "farm_gate_livestock": {
+        "2024-11-14": {
+            "filename": "Emissions_livestock_E_All_Data_NOFLAG.csv",
+            "areas_to_remove": [
+                *areas_to_remove_base,
+                "Belgium-Luxembourg",
+                "Serbia and Montenegro",
+                "European Union (27)",
+            ],
+            "elements_to_remove": [
+                "Stocks",  # number of animals
+                "Manure management (manure treated, N content)",
+                "Manure left on pasture (N content)",
+                "Manure left on pasture that leaches (N content)",
+                "Manure left on pasture that volatilises (N content)",
+                "Manure applied to soils (N content)",
+                "Manure applied to soils that leaches (N content)",
+                "Manure applied to soils that volatilises (N content)",
+            ],
+            "entity_mapping": {
+                # todo we could make this smarter and get the entity from the string
+                "Livestock total (Emissions N2O)": "N2O",
+                "Livestock total (Emissions CH4)": "CH4",
+                "Enteric fermentation (Emissions CH4)": "CH4",
+                "Manure management (Emissions CH4)": "CH4",
+                "Manure management (Emissions N2O)": "N2O",
+                "Manure management (Direct emissions N2O)": "N2O",
+                "Manure management (Indirect emissions N2O)": "N2O",
+                "Manure left on pasture (Emissions N2O)": "N2O",
+                "Manure left on pasture (Direct emissions N2O)": "N2O",
+                "Indirect emissions (N2O that leaches) (Manure on pasture)": "N2O",
+                "Indirect emissions (N2O that volatilises) (Manure on pasture)": "N2O",
+                "Manure left on pasture (Indirect emissions N2O)": "N2O",
+                "Emissions (N2O) (Manure applied)": "N2O",
+                "Manure applied to soils (Direct emissions N2O)": "N2O",
+                "Indirect emissions (N2O that leaches) (Manure applied)": "N2O",
+                "Indirect emissions (N2O that volatilises) (Manure applied)": "N2O",
+                "Manure applied to soils (Indirect emissions N2O)": "N2O",
+            },
+            "columns_to_drop": [
+                "Element",
+                "Element Code",
+                "Item",
+                "Item Code",
+                "Area Code (M49)",
+                "Area",
+                "Area Code",
+                "Item Code (CPC)",
+                "Source Code",
+            ],
+        }
+    },
+    "land_use_drained_organic_soils": {
+        "2023-11-09": {
+            "filename": "Emissions_Drained_Organic_Soils_E_All_Data_NOFLAG.csv",
+            "areas_to_remove": [
+                *areas_to_remove_base,
+                "Belgium-Luxembourg",
+                "Serbia and Montenegro",
+                "European Union (27)",
+                # check todo channel islands belong to UK
+                "Channel Islands",
+            ],
+            "elements_to_remove": [
+                "Area",
+                # todo can we convert this into emissions?
+                "Net stock change (C)",
+            ],
+            "entity_mapping": {
+                "Emissions (N2O)": "N2O",
+                "Emissions (CO2)": "CO2",
+            },
+            "columns_to_drop": [
+                "Element",
+                "Element Code",
+                "Item",
+                "Item Code",
+                "Area Code (M49)",
+                "Area",
+                "Area Code",
+                "Source Code",
+            ],
+        }
+    },
+    "land_use_fires": {
+        "2023-11-09": {
+            "filename": "Emissions_Land_Use_Fires_E_All_Data_NOFLAG.csv",
+            "areas_to_remove": [
+                *areas_to_remove_base,
+                "Belgium-Luxembourg",
+                "Serbia and Montenegro",
+                "European Union (27)",
+                # check todo channel islands belong to UK
+                "Channel Islands",
+            ],
+            "elements_to_remove": ["Biomass burned (dry matter)", "Burned Area"],
+            "entity_mapping": {
+                "Emissions (CH4)": "CH4",
+                "Emissions (N2O)": "N2O",
+                "Emissions (CO2)": "CO2",
+            },
+            "columns_to_drop": [
+                "Element",
+                "Element Code",
+                "Item",
+                "Item Code",
+                "Area Code (M49)",
+                "Area",
+                "Area Code",
+                "Source Code",
+            ],
+        }
+    },
+    "land_use_forests": {
+        "2024-11-14": {
+            "filename": "Emissions_Land_Use_Forests_E_All_Data_NOFLAG.csv",
+            "areas_to_remove": [
+                *areas_to_remove_base,
+                "Belgium-Luxembourg",
+                "Serbia and Montenegro",
+                "European Union (27)",
+            ],
+            "elements_to_remove": [
+                "Area",
+            ],
+            "entity_mapping": {"Net emissions/removals (CO2) (Forest land)": "CO2"},
+            "columns_to_drop": [
+                "Element",
+                "Element Code",
+                "Item",
+                "Item Code",
+                "Area Code (M49)",
+                "Area",
+                "Area Code",
+                "Source Code",
+            ],
         }
     },
 }

+ 41 - 13
src/faostat_data_primap/read.py

@@ -8,9 +8,7 @@ from src.faostat_data_primap.helper.definitions import (
     downloaded_data_path,
     read_config_all,
 )
-
 custom_country_mapping_code = {}
-
 custom_country_mapping_name = {
     # farm gate agricultur energy
     "Bolivia (Plurinational State of)": "BOL",
@@ -29,7 +27,7 @@ custom_country_mapping_name = {
     "Venezuela (Bolivarian Republic of)": "VEN",
     "Yugoslav SFR": "YUG",
     "World": "EARTH",
-    # Andrew cement (probably not needed)
+    # todo Andrews cement list below (deleted commented lines)
     # "Bonaire, Saint Eustatius and Saba": "BES",
     # "Cape Verde": "CPV",
     "Democratic Republic of the Congo": "COD",
@@ -43,6 +41,14 @@ custom_country_mapping_name = {
     "Wallis and Futuna Islands": "WLF",
     # farm gate emissions crops
     "United States Virgin Islands": "VIR",
+    # todo is this relevant to us?
+    'Pacific Islands Trust Territory' : 'PIC',
+    'Svalbard and Jan Mayen Islands' : "SJM",  # Norwy
+    # something goes wrong with french characters in land_use_forest
+    "Côte d'Ivoire" : "CIV",
+'Curaçao' : "CUW",
+   "Réunion" : "REU",
+'Türkiye' : "TUR",
 }
 
 
@@ -104,25 +110,42 @@ files_to_read = (
     (
         "farm_gate_agriculture_energy",
         "2024-11-14",
-        "Emissions_Agriculture_Energy_E_All_Data_NOFLAG.csv",
     ),
     (
         "farm_gate_emissions_crops",
         "2024-11-14",
-        "Emissions_crops_E_All_Data_NOFLAG.csv",
     ),
+    (
+        "farm_gate_livestock",
+        "2024-11-14",
+    ),
+    (
+        "land_use_drained_organic_soils",
+        "2023-11-09",
+    ),
+    (
+        "land_use_fires",
+        "2023-11-09",
+    ),
+    (
+    "land_use_forests",
+        "2024-11-14",
+    )
 )
 
 df_all = None
-for domain, release, filename in files_to_read:
-    dataset_path = downloaded_data_path / domain / release / filename
+country_mapping = {}
+# todo remove reversed, I'm using it to get the new domain first in the debugger
+for domain, release in reversed(files_to_read):
     read_config = read_config_all[domain][release]
 
-    df_domain = pd.read_csv(dataset_path)
+    print(f"Read {read_config["filename"]}")
+    dataset_path = downloaded_data_path / domain / release / read_config["filename"]
+    # There are some non-utf8 characters in Emissions_Drained_Organic_Soils_E_All_Data_NOFLAG.csv
+    df_domain = pd.read_csv(dataset_path, encoding = "ISO-8859-1")
 
     # remove rows by unit
-    # todo align pattern with below
-    # df_domain = df_domain[df_domain["Unit"] != "TJ"]
+    # todo this is maybe not a good idea as it hides the elements to be removed
     if "units_to_remove" in read_config.keys():
         df_domain = df_domain[~df_domain["Unit"].isin(read_config["units_to_remove"])]
 
@@ -136,8 +159,10 @@ for domain, release, filename in files_to_read:
     if "areas_to_remove" in read_config.keys():
         df_domain = df_domain[~df_domain["Area"].isin(read_config["areas_to_remove"])]
 
-    # todo we shouldn't re-compute this everytime
-    country_mapping = {c: get_country_code(c) for c in df_domain["Area"].unique()}
+    # country name to ISO3 country code mapping
+    countries_to_map = [c for c in df_domain["Area"].unique() if c not in country_mapping.keys()]
+    for country_to_map in countries_to_map:
+        country_mapping[country_to_map] = get_country_code(country_to_map)
 
     # create country columns
     df_domain["country (ISO3)"] = df_domain["Area"].map(country_mapping)
@@ -156,6 +181,7 @@ for domain, release, filename in files_to_read:
 
     if df_all is None:
         df_all = df_domain
+        break
     else:
         # makes sure there are no duplicate category names
         if any(
@@ -176,12 +202,13 @@ coords_cols = {
     "area": "country (ISO3)",
     "unit": "Unit",
     "entity": "entity",
+    "source" : "Source"
 }
 
 coords_terminologies = {"area": "ISO3", "category": "FAOSTAT"}
 
 coords_defaults = {
-    "source": "FAO",
+    # "source": "FAO",
     "scenario": release,
 }
 
@@ -208,6 +235,7 @@ data_if = pm2.pm2io.convert_wide_dataframe_if(
     meta_data=meta_data,
 )
 
+pass
 # steps:
 # convert to primap2 format
 # save raw data set