Bläddra i källkod

[DATALAD] Recorded changes

Daniel Busch 3 veckor sedan

+ 1 - 1

@@ -1 +1 @@

+ 1 - 1

@@ -1 +1 @@

+ 48 - 22

@@ -378,20 +378,22 @@ agg_info_fao = {
-agg_info_ipcc2006_primap = {
+agg_info_ipcc2006_primap_N2O = {
     "category (IPCC2006_PRIMAP)": {
         "3.C.1": {  # Emissions from Biomass Burning
             "sources": [
-                # "3.C.1.a",  # Biomass Burning In Forest Lands, because not there in 2023 release
+                # "3.C.1.a",  # Biomass Burning In Forest Lands, because not included in 2023 release
                 "3.C.1.b",  # Biomass Burning In Croplands
                 "3.C.1.c",  # Biomass Burning in Grasslands
+            "sel": {"variable": ["N2O"]},
-        "M.3.C.1.AG": {  # Emissions from Biomass Burning
+        "M.3.C.1.AG": {  # AG-related emissions from Biomass Burning
             "sources": [
                 "3.C.1.b",  # Biomass Burning In Croplands
                 "3.C.1.c",  # Biomass Burning in Grasslands
+            "sel": {"variable": ["N2O"]},
         "M.3.C.AG": {
             "sources": [
@@ -423,7 +425,8 @@ agg_info_ipcc2006_primap = {
                 "3.C.7",  # rice cultivation
                 "3.B.2",  # Drained grassland, was in LULUCF orginally
                 "3.B.3",  # Drained cropland, was in LULUCF originally
-            ]
+            ],
+            "sel": {"variable": ["N2O"]},
         "3.A.1.a": {  # enteric fermentation
             "sources": [
@@ -491,19 +494,28 @@ agg_info_ipcc2006_primap = {
 agg_info_ipcc2006_primap_CO2 = {
     "category (IPCC2006_PRIMAP)": {
+        "3.C.1": {  # Emissions from Biomass Burning
+            "sources": [
+                # "3.C.1.a",  # Biomass Burning In Forest Lands, because not there in 2023 release
+                "3.C.1.b",  # Biomass Burning In Croplands
+                "3.C.1.c",  # Biomass Burning in Grasslands
+            ],
+            "sel": {"variable": ["CO2"]},
+        },
+        "M.3.C.1.AG": {  # AG-related emissions from Biomass Burning
+            "sources": [
+                "3.C.1.b",  # Biomass Burning In Croplands
+                "3.C.1.c",  # Biomass Burning in Grasslands
+            ],
+            "sel": {"variable": ["CO2"]},
+        },
         "M.3.C.AG": {
+            "sources": ["M.3.C.1.AG"],
+            "sel": {"variable": ["CO2"]},
+        },
+        "3.C": {
             "sources": [
-                # "3.C.1.b",  # Biomass Burning In Croplands - looks good (CH4, N2O)
-                # "3.C.1.c",  # Biomass Burning in Grasslands - looks good (CH4)
-                # "3.C.4",  # Direct N2O Emissions from Managed Soils, only N2O
-                # "M.3.C.4.SF",  # synthetic fertilisers direct, only N2O
-                # "3.C.5",  # Indirect N2O Emissions from Managed Soils, only N2O
-                # "M.3.C.5.SF",  # synthetic fertilisers indirect, only N2O
-                # "3.C.6",  # Indirect N2O Emissions from Manure Management, only N2O
-                # "3.C.7",  # rice cultivation, only CH4
-                # "3.B.2",  # Drained grassland, is already in LULUCF and seems to fit
-                # "3.B.3",  # Drained cropland, is already in LULUCF and seems to fit
-                # "2.G",  # pesticides and fertilisers manufacturing, doesn't match
+                "M.3.C.1.AG",
             "sel": {"variable": ["CO2"]},
@@ -540,18 +552,32 @@ agg_info_ipcc2006_primap_CO2 = {
 agg_info_ipcc2006_primap_CH4 = {
     "category (IPCC2006_PRIMAP)": {
+        "3.C.1": {  # Emissions from Biomass Burning
+            "sources": [
+                # "3.C.1.a",  # Biomass Burning In Forest Lands, because not there in 2023 release
+                "3.C.1.b",  # Biomass Burning In Croplands
+                "3.C.1.c",  # Biomass Burning in Grasslands
+            ],
+            "sel": {"variable": ["CH4"]},
+        },
+        "M.3.C.1.AG": {  # AG-related emissions from Biomass Burning
+            "sources": [
+                "3.C.1.b",  # Biomass Burning In Croplands
+                "3.C.1.c",  # Biomass Burning in Grasslands
+            ],
+            "sel": {"variable": ["CH4"]},
+        },
         "M.3.C.AG": {
             "sources": [
                 "3.C.1.b",  # Biomass Burning In Croplands - looks good (CH4, N2O)
                 "3.C.1.c",  # Biomass Burning in Grasslands - looks good (CH4)
-                # "3.C.4",  # Direct N2O Emissions from Managed Soils
-                # "M.3.C.4.SF",  # synthetic fertilisers direct
-                # "3.C.5",  # Indirect N2O Emissions from Managed Soils, empty
-                # "M.3.C.5.SF",  # synthetic fertilisers indirect
-                # "3.C.6",  # Indirect N2O Emissions from Manure Management
                 "3.C.7",  # rice cultivation
-                # "3.B.2",  # Drained grassland, was in LULUCF orginally
-                # "3.B.3",  # Drained cropland, was in LULUCF originally
+            ],
+            "sel": {"variable": ["CH4"]},
+        },
+        "3.C": {
+            "sources": [
+                "M.3.C.1.AG",
             "sel": {"variable": ["CH4"]},

+ 144 - 18

@@ -4,9 +4,9 @@ import xarray as xr
 from src.faostat_data_primap.helper.category_aggregation import (
-    agg_info_ipcc2006_primap,
+    agg_info_ipcc2006_primap_N2O,
 from src.faostat_data_primap.helper.paths import (
@@ -15,6 +15,131 @@ from src.faostat_data_primap.helper.paths import (
 from import read_data
+def test_conversion_from_FAO_to_IPCC2006_PRIMAP_output_equal():
+    # make categorisation A from yaml
+    categorisation_a = cc.FAO
+    # make categorisation B from yaml
+    categorisation_b = cc.IPCC2006_PRIMAP
+    # category FAOSTAT not yet part of climate categories, so we need to add it manually
+    cats = {
+        "FAO": categorisation_a,
+        "IPCC2006_PRIMAP": categorisation_b,
+    }
+    # release_name = "v2024-11-14"
+    release_name = "v2023-12-13"
+    # reproduce 2023 data set
+    reproduce23 = True
+    ds_fao = (
+        extracted_data_path
+        # / "v2024-11-14/"
+        / f"{release_name}/FAOSTAT_Agrifood_system_emissions_{release_name}"
+    )
+    ds = pm2.open_dataset(ds_fao)
+    # drop UNFCCC data
+    ds = ds.drop_sel(source="UNFCCC")
+    # consistency check in original categorisation
+    ds_checked =  # noqa: F841
+    # ds_checked_if =
+    # We need a conversion CSV file for each entity
+    # That's a temporary workaround until convert function can filter for data variables (entities)
+    conv = {}
+    gases = ["CO2", "CH4", "N2O"]
+    if reproduce23:
+        reproduce23_filename = "_reproduce23"
+    else:
+        reproduce23_filename = ""
+    for var in gases:
+        conv[var] = cc.Conversion.from_csv(
+            f"../../conversion_FAO_IPPCC2006_PRIMAP_{var}{reproduce23_filename}.csv",
+            cats=cats,
+        )
+    # convert for each entity
+    da_dict = {}
+    for var in gases:
+        da_dict[var] = ds[var].pr.convert(
+            dim="category (FAO)",
+            conversion=conv[var],
+        )
+    result = xr.Dataset(da_dict)
+    result.attrs = ds.attrs
+    result.attrs["cat"] = "category (IPCC2006_PRIMAP)"
+    # convert to interchange format and back to get rid of empty categories
+    # TODO there may be a better way to do this
+    result_if =
+    result = pm2.pm2io.from_interchange_format(result_if)
+    # aggregation for each gas for better understanding
+    # TODO creates some duplicate code, we can combine maybe
+    result_proc =
+        agg_info=agg_info_ipcc2006_primap_N2O
+    )
+    result_proc =
+        agg_info=agg_info_ipcc2006_primap_CO2
+    )
+    result_proc =
+        agg_info=agg_info_ipcc2006_primap_CH4
+    )
+    # get processed data
+    output_filename = f"FAOSTAT_Agrifood_system_emissions_{release_name}"
+    output_folder = extracted_data_path / release_name
+    filepath = output_folder / (output_filename + ".nc")
+    ds_original = pm2.open_dataset(filepath)
+    # result_proc_if =
+    assert ds_original.broadcast_equals(result_proc)
+    # result_proc_if =
+    #
+    #
+    #
+    # if not output_folder.exists() :
+    #     output_folder.mkdir()
+    #
+    # filepath = output_folder / (output_filename + ".csv")
+    # print(f"Writing processed primap2 file to {filepath}")
+    # pm2.pm2io.write_interchange_format(
+    #     filepath,
+    #     result_proc_if,
+    # )
+    #
+    # compression = dict(zlib=True, complevel=9)
+    # encoding = {var : compression for var in result_proc.data_vars}
+    # filepath = output_folder / (output_filename + ".nc")
+    # print(f"Writing netcdf file to {filepath}")
+    #, encoding=encoding)
+def test_read(tmp_path):
+    domains_and_releases_to_read = [
+        # ("farm_gate_agriculture_energy", "2024-11-14"),
+        # ("farm_gate_emissions_crops", "2024-11-14"),
+        # ("farm_gate_livestock", "2024-11-14"),
+        # ("land_use_drained_organic_soils", "2024-11-14"),
+        ("land_use_fires", "2023-11-09"),
+        # ("land_use_forests", "2024-11-14"),
+        # ("pre_post_agricultural_production", "2024-11-14"),
+    ]
+    read_data(
+        domains_and_releases_to_read=domains_and_releases_to_read,
+        read_path=downloaded_data_path,
+        save_path=tmp_path,
+    )
 def test_conversion_from_FAO_to_IPCC2006_PRIMAP():
     # make categorisation A from yaml
     categorisation_a = cc.FAO
@@ -79,8 +204,9 @@ def test_conversion_from_FAO_to_IPCC2006_PRIMAP():
     result = pm2.pm2io.from_interchange_format(result_if)
     # aggregation for each gas for better understanding
+    # TODO creates some duplicate code, we can combine maybe
     result_proc =
-        agg_info=agg_info_ipcc2006_primap
+        agg_info=agg_info_ipcc2006_primap_N2O
     result_proc =
@@ -114,22 +240,22 @@ def test_conversion_from_FAO_to_IPCC2006_PRIMAP():, encoding=encoding)
-def test_read(tmp_path):
-    domains_and_releases_to_read = [
-        # ("farm_gate_agriculture_energy", "2024-11-14"),
-        # ("farm_gate_emissions_crops", "2024-11-14"),
-        # ("farm_gate_livestock", "2024-11-14"),
-        # ("land_use_drained_organic_soils", "2024-11-14"),
-        ("land_use_fires", "2023-11-09"),
-        # ("land_use_forests", "2024-11-14"),
-        # ("pre_post_agricultural_production", "2024-11-14"),
-    ]
-    read_data(
-        domains_and_releases_to_read=domains_and_releases_to_read,
-        read_path=downloaded_data_path,
-        save_path=tmp_path,
-    )
+# def test_read(tmp_path):
+#     domains_and_releases_to_read = [
+#         # ("farm_gate_agriculture_energy", "2024-11-14"),
+#         # ("farm_gate_emissions_crops", "2024-11-14"),
+#         # ("farm_gate_livestock", "2024-11-14"),
+#         # ("land_use_drained_organic_soils", "2024-11-14"),
+#         ("land_use_fires", "2023-11-09"),
+#         # ("land_use_forests", "2024-11-14"),
+#         # ("pre_post_agricultural_production", "2024-11-14"),
+#     ]
+#     read_data(
+#         domains_and_releases_to_read=domains_and_releases_to_read,
+#         read_path=downloaded_data_path,
+#         save_path=tmp_path,
+#     )
 def test_read_2023():