Переглянути джерело

[DATALAD] Recorded changes

Daniel Busch 2 тижнів тому
батько
коміт
0d300faa1c

+ 1 - 1
extracted_data/v2023-12-13/FAOSTAT_Agrifood_system_emissions_v2023-12-13.csv

@@ -1 +1 @@
-../../.git/annex/objects/47/1F/MD5E-s5763420--61fa57295aeb26589f60f650fffc0826.csv/MD5E-s5763420--61fa57295aeb26589f60f650fffc0826.csv
+../../.git/annex/objects/X3/xx/MD5E-s5982112--6d968cecd8f39c3d537ea0b6de011f94.csv/MD5E-s5982112--6d968cecd8f39c3d537ea0b6de011f94.csv

+ 1 - 1
extracted_data/v2023-12-13/FAOSTAT_Agrifood_system_emissions_v2023-12-13.nc

@@ -1 +1 @@
-../../.git/annex/objects/WJ/g4/MD5E-s1798444--e8ba00d8e0d59fc8ee4972903afcc4f8.nc/MD5E-s1798444--e8ba00d8e0d59fc8ee4972903afcc4f8.nc
+../../.git/annex/objects/9m/KJ/MD5E-s1887931--11354706000e8ef7c8c5e643f060f5e5.nc/MD5E-s1887931--11354706000e8ef7c8c5e643f060f5e5.nc

+ 48 - 22
src/faostat_data_primap/helper/category_aggregation.py

@@ -378,20 +378,22 @@ agg_info_fao = {
     }
 }
 
-agg_info_ipcc2006_primap = {
+agg_info_ipcc2006_primap_N2O = {
     "category (IPCC2006_PRIMAP)": {
         "3.C.1": {  # Emissions from Biomass Burning
             "sources": [
-                # "3.C.1.a",  # Biomass Burning In Forest Lands, because not there in 2023 release
+                # "3.C.1.a",  # Biomass Burning In Forest Lands, because not included in 2023 release
                 "3.C.1.b",  # Biomass Burning In Croplands
                 "3.C.1.c",  # Biomass Burning in Grasslands
             ],
+            "sel": {"variable": ["N2O"]},
         },
-        "M.3.C.1.AG": {  # Emissions from Biomass Burning
+        "M.3.C.1.AG": {  # AG-related emissions from Biomass Burning
             "sources": [
                 "3.C.1.b",  # Biomass Burning In Croplands
                 "3.C.1.c",  # Biomass Burning in Grasslands
             ],
+            "sel": {"variable": ["N2O"]},
         },
         "M.3.C.AG": {
             "sources": [
@@ -423,7 +425,8 @@ agg_info_ipcc2006_primap = {
                 "3.C.7",  # rice cultivation
                 "3.B.2",  # Drained grassland, was in LULUCF orginally
                 "3.B.3",  # Drained cropland, was in LULUCF originally
-            ]
+            ],
+            "sel": {"variable": ["N2O"]},
         },
         "3.A.1.a": {  # enteric fermentation
             "sources": [
@@ -491,19 +494,28 @@ agg_info_ipcc2006_primap = {
 
 agg_info_ipcc2006_primap_CO2 = {
     "category (IPCC2006_PRIMAP)": {
+        "3.C.1": {  # Emissions from Biomass Burning
+            "sources": [
+                # "3.C.1.a",  # Biomass Burning In Forest Lands, because not there in 2023 release
+                "3.C.1.b",  # Biomass Burning In Croplands
+                "3.C.1.c",  # Biomass Burning in Grasslands
+            ],
+            "sel": {"variable": ["CO2"]},
+        },
+        "M.3.C.1.AG": {  # AG-related emissions from Biomass Burning
+            "sources": [
+                "3.C.1.b",  # Biomass Burning In Croplands
+                "3.C.1.c",  # Biomass Burning in Grasslands
+            ],
+            "sel": {"variable": ["CO2"]},
+        },
         "M.3.C.AG": {
+            "sources": ["M.3.C.1.AG"],
+            "sel": {"variable": ["CO2"]},
+        },
+        "3.C": {
             "sources": [
-                # "3.C.1.b",  # Biomass Burning In Croplands - looks good (CH4, N2O)
-                # "3.C.1.c",  # Biomass Burning in Grasslands - looks good (CH4)
-                # "3.C.4",  # Direct N2O Emissions from Managed Soils, only N2O
-                # "M.3.C.4.SF",  # synthetic fertilisers direct, only N2O
-                # "3.C.5",  # Indirect N2O Emissions from Managed Soils, only N2O
-                # "M.3.C.5.SF",  # synthetic fertilisers indirect, only N2O
-                # "3.C.6",  # Indirect N2O Emissions from Manure Management, only N2O
-                # "3.C.7",  # rice cultivation, only CH4
-                # "3.B.2",  # Drained grassland, is already in LULUCF and seems to fit
-                # "3.B.3",  # Drained cropland, is already in LULUCF and seems to fit
-                # "2.G",  # pesticides and fertilisers manufacturing, doesn't match
+                "M.3.C.1.AG",
             ],
             "sel": {"variable": ["CO2"]},
         },
@@ -540,18 +552,32 @@ agg_info_ipcc2006_primap_CO2 = {
 
 agg_info_ipcc2006_primap_CH4 = {
     "category (IPCC2006_PRIMAP)": {
+        "3.C.1": {  # Emissions from Biomass Burning
+            "sources": [
+                # "3.C.1.a",  # Biomass Burning In Forest Lands, because not there in 2023 release
+                "3.C.1.b",  # Biomass Burning In Croplands
+                "3.C.1.c",  # Biomass Burning in Grasslands
+            ],
+            "sel": {"variable": ["CH4"]},
+        },
+        "M.3.C.1.AG": {  # AG-related emissions from Biomass Burning
+            "sources": [
+                "3.C.1.b",  # Biomass Burning In Croplands
+                "3.C.1.c",  # Biomass Burning in Grasslands
+            ],
+            "sel": {"variable": ["CH4"]},
+        },
         "M.3.C.AG": {
             "sources": [
                 "3.C.1.b",  # Biomass Burning In Croplands - looks good (CH4, N2O)
                 "3.C.1.c",  # Biomass Burning in Grasslands - looks good (CH4)
-                # "3.C.4",  # Direct N2O Emissions from Managed Soils
-                # "M.3.C.4.SF",  # synthetic fertilisers direct
-                # "3.C.5",  # Indirect N2O Emissions from Managed Soils, empty
-                # "M.3.C.5.SF",  # synthetic fertilisers indirect
-                # "3.C.6",  # Indirect N2O Emissions from Manure Management
                 "3.C.7",  # rice cultivation
-                # "3.B.2",  # Drained grassland, was in LULUCF orginally
-                # "3.B.3",  # Drained cropland, was in LULUCF originally
+            ],
+            "sel": {"variable": ["CH4"]},
+        },
+        "3.C": {
+            "sources": [
+                "M.3.C.1.AG",
             ],
             "sel": {"variable": ["CH4"]},
         },

+ 144 - 18
tests/unit/test_conversion.py

@@ -4,9 +4,9 @@ import xarray as xr
 
 from src.faostat_data_primap.helper.category_aggregation import (
     agg_info_fao,
-    agg_info_ipcc2006_primap,
     agg_info_ipcc2006_primap_CH4,
     agg_info_ipcc2006_primap_CO2,
+    agg_info_ipcc2006_primap_N2O,
 )
 from src.faostat_data_primap.helper.paths import (
     downloaded_data_path,
@@ -15,6 +15,131 @@ from src.faostat_data_primap.helper.paths import (
 from src.faostat_data_primap.read import read_data
 
 
+def test_conversion_from_FAO_to_IPCC2006_PRIMAP_output_equal():
+    # make categorisation A from yaml
+    categorisation_a = cc.FAO
+    # make categorisation B from yaml
+    categorisation_b = cc.IPCC2006_PRIMAP
+
+    # category FAOSTAT not yet part of climate categories, so we need to add it manually
+    cats = {
+        "FAO": categorisation_a,
+        "IPCC2006_PRIMAP": categorisation_b,
+    }
+    # release_name = "v2024-11-14"
+    release_name = "v2023-12-13"
+
+    # reproduce 2023 data set
+    reproduce23 = True
+
+    ds_fao = (
+        extracted_data_path
+        # / "v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14_raw.nc"
+        / f"{release_name}/FAOSTAT_Agrifood_system_emissions_{release_name}_raw.nc"
+    )
+    ds = pm2.open_dataset(ds_fao)
+
+    # drop UNFCCC data
+    ds = ds.drop_sel(source="UNFCCC")
+
+    # consistency check in original categorisation
+    ds_checked = ds.pr.add_aggregates_coordinates(agg_info=agg_info_fao)  # noqa: F841
+    # ds_checked_if = ds_checked.pr.to_interchange_format()
+
+    # We need a conversion CSV file for each entity
+    # That's a temporary workaround until convert function can filter for data variables (entities)
+    conv = {}
+    gases = ["CO2", "CH4", "N2O"]
+
+    if reproduce23:
+        reproduce23_filename = "_reproduce23"
+    else:
+        reproduce23_filename = ""
+
+    for var in gases:
+        conv[var] = cc.Conversion.from_csv(
+            f"../../conversion_FAO_IPPCC2006_PRIMAP_{var}{reproduce23_filename}.csv",
+            cats=cats,
+        )
+
+    # convert for each entity
+    da_dict = {}
+    for var in gases:
+        da_dict[var] = ds[var].pr.convert(
+            dim="category (FAO)",
+            conversion=conv[var],
+        )
+    result = xr.Dataset(da_dict)
+    result.attrs = ds.attrs
+    result.attrs["cat"] = "category (IPCC2006_PRIMAP)"
+
+    # convert to interchange format and back to get rid of empty categories
+    # TODO there may be a better way to do this
+    result_if = result.pr.to_interchange_format()
+    result = pm2.pm2io.from_interchange_format(result_if)
+
+    # aggregation for each gas for better understanding
+    # TODO creates some duplicate code, we can combine maybe
+    result_proc = result.pr.add_aggregates_coordinates(
+        agg_info=agg_info_ipcc2006_primap_N2O
+    )
+
+    result_proc = result_proc.pr.add_aggregates_coordinates(
+        agg_info=agg_info_ipcc2006_primap_CO2
+    )
+
+    result_proc = result_proc.pr.add_aggregates_coordinates(
+        agg_info=agg_info_ipcc2006_primap_CH4
+    )
+
+    # get processed data
+    output_filename = f"FAOSTAT_Agrifood_system_emissions_{release_name}"
+    output_folder = extracted_data_path / release_name
+    filepath = output_folder / (output_filename + ".nc")
+    ds_original = pm2.open_dataset(filepath)
+
+    # result_proc_if = result_proc.pr.to_interchange_format()
+
+    assert ds_original.broadcast_equals(result_proc)
+    # result_proc_if = result_proc.pr.to_interchange_format()
+    #
+    #
+    #
+    # if not output_folder.exists() :
+    #     output_folder.mkdir()
+    #
+    # filepath = output_folder / (output_filename + ".csv")
+    # print(f"Writing processed primap2 file to {filepath}")
+    # pm2.pm2io.write_interchange_format(
+    #     filepath,
+    #     result_proc_if,
+    # )
+    #
+    # compression = dict(zlib=True, complevel=9)
+    # encoding = {var : compression for var in result_proc.data_vars}
+    # filepath = output_folder / (output_filename + ".nc")
+    # print(f"Writing netcdf file to {filepath}")
+    # result_proc.pr.to_netcdf(filepath, encoding=encoding)
+
+
+def test_read(tmp_path):
+    domains_and_releases_to_read = [
+        # ("farm_gate_agriculture_energy", "2024-11-14"),
+        # ("farm_gate_emissions_crops", "2024-11-14"),
+        # ("farm_gate_livestock", "2024-11-14"),
+        # ("land_use_drained_organic_soils", "2024-11-14"),
+        ("land_use_fires", "2023-11-09"),
+        # ("land_use_forests", "2024-11-14"),
+        # ("pre_post_agricultural_production", "2024-11-14"),
+    ]
+
+    read_data(
+        domains_and_releases_to_read=domains_and_releases_to_read,
+        read_path=downloaded_data_path,
+        save_path=tmp_path,
+    )
+
+
 def test_conversion_from_FAO_to_IPCC2006_PRIMAP():
     # make categorisation A from yaml
     categorisation_a = cc.FAO
@@ -79,8 +204,9 @@ def test_conversion_from_FAO_to_IPCC2006_PRIMAP():
     result = pm2.pm2io.from_interchange_format(result_if)
 
     # aggregation for each gas for better understanding
+    # TODO creates some duplicate code, we can combine maybe
     result_proc = result.pr.add_aggregates_coordinates(
-        agg_info=agg_info_ipcc2006_primap
+        agg_info=agg_info_ipcc2006_primap_N2O
     )
 
     result_proc = result_proc.pr.add_aggregates_coordinates(
@@ -114,22 +240,22 @@ def test_conversion_from_FAO_to_IPCC2006_PRIMAP():
     result_proc.pr.to_netcdf(filepath, encoding=encoding)
 
 
-def test_read(tmp_path):
-    domains_and_releases_to_read = [
-        # ("farm_gate_agriculture_energy", "2024-11-14"),
-        # ("farm_gate_emissions_crops", "2024-11-14"),
-        # ("farm_gate_livestock", "2024-11-14"),
-        # ("land_use_drained_organic_soils", "2024-11-14"),
-        ("land_use_fires", "2023-11-09"),
-        # ("land_use_forests", "2024-11-14"),
-        # ("pre_post_agricultural_production", "2024-11-14"),
-    ]
-
-    read_data(
-        domains_and_releases_to_read=domains_and_releases_to_read,
-        read_path=downloaded_data_path,
-        save_path=tmp_path,
-    )
+# def test_read(tmp_path):
+#     domains_and_releases_to_read = [
+#         # ("farm_gate_agriculture_energy", "2024-11-14"),
+#         # ("farm_gate_emissions_crops", "2024-11-14"),
+#         # ("farm_gate_livestock", "2024-11-14"),
+#         # ("land_use_drained_organic_soils", "2024-11-14"),
+#         ("land_use_fires", "2023-11-09"),
+#         # ("land_use_forests", "2024-11-14"),
+#         # ("pre_post_agricultural_production", "2024-11-14"),
+#     ]
+#
+#     read_data(
+#         domains_and_releases_to_read=domains_and_releases_to_read,
+#         read_path=downloaded_data_path,
+#         save_path=tmp_path,
+#     )
 
 
 def test_read_2023():