Browse Source

[DATALAD] Recorded changes

Daniel Busch 2 months ago
parent
commit
1d4368d52b

+ 1 - 0
conversion_FAO_IPPCC2006_PRIMAP_CH4_2024.csv

@@ -0,0 +1 @@
+.git/annex/objects/zV/Pf/MD5E-s480--335765bea6f835d1e2f7176ee7f00791.csv/MD5E-s480--335765bea6f835d1e2f7176ee7f00791.csv

+ 1 - 0
conversion_FAO_IPPCC2006_PRIMAP_CO2_2024.csv

@@ -0,0 +1 @@
+.git/annex/objects/5g/kM/MD5E-s522--e1bd0f6d079272418da3e654e2fad31e.csv/MD5E-s522--e1bd0f6d079272418da3e654e2fad31e.csv

+ 1 - 0
conversion_FAO_IPPCC2006_PRIMAP_N2O_2024.csv

@@ -0,0 +1 @@
+.git/annex/objects/Jw/08/MD5E-s686--b20376d03ca0f946f240180f5f10c5ac.csv/MD5E-s686--b20376d03ca0f946f240180f5f10c5ac.csv

+ 1 - 1
extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14.csv

@@ -1 +1 @@
-../../.git/annex/objects/Zq/Xx/MD5E-s10459474--d82eda959df55e229320cb05012d7853.csv/MD5E-s10459474--d82eda959df55e229320cb05012d7853.csv
+../../.git/annex/objects/fq/31/MD5E-s6106149--8ac5dc82f1efde02f221637477a3147b.csv/MD5E-s6106149--8ac5dc82f1efde02f221637477a3147b.csv

+ 1 - 1
extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14.nc

@@ -1 +1 @@
-../../.git/annex/objects/V4/pG/MD5E-s3341536--53e447409452734a496b3ffea3647f59.nc/MD5E-s3341536--53e447409452734a496b3ffea3647f59.nc
+../../.git/annex/objects/1m/xF/MD5E-s1917874--847a6ef01320ee0c265bbbbf0220ac3a.nc/MD5E-s1917874--847a6ef01320ee0c265bbbbf0220ac3a.nc

+ 1 - 1
extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14_raw.nc

@@ -1 +1 @@
-../../.git/annex/objects/mw/x3/MD5E-s15066036--f6b379d199f0f0de6d96e2294d6a7b56.nc/MD5E-s15066036--f6b379d199f0f0de6d96e2294d6a7b56.nc
+../../.git/annex/objects/vg/xf/MD5E-s15070132--3f6ad29b3a9140e506c33b6a1be88aa8.nc/MD5E-s15070132--3f6ad29b3a9140e506c33b6a1be88aa8.nc

+ 28 - 1
src/faostat_data_primap/read.py

@@ -280,8 +280,35 @@ def read_data(  # noqa: PLR0915 PLR0912
     print(f"Writing netcdf file to {filepath}")
     data_pm2.pr.to_netcdf(filepath, encoding=encoding)
 
+    # process data - conversion and category aggregation
+    # todo variable naming
+    result_proc = process(data_pm2)
 
-def process(ds: xarray.Dataset, year: str):
+    # save processed data
+    result_proc_if = result_proc.pr.to_interchange_format()
+
+    output_filename = f"FAOSTAT_Agrifood_system_emissions_{release_name}"
+    output_folder = extracted_data_path / release_name
+
+    if not output_folder.exists():
+        output_folder.mkdir()
+
+    filepath = output_folder / (output_filename + ".csv")
+    print(f"Writing processed primap2 file to {filepath}")
+    pm2.pm2io.write_interchange_format(
+        filepath,
+        result_proc_if,
+    )
+
+    compression = dict(zlib=True, complevel=9)
+    encoding = {var: compression for var in result_proc.data_vars}
+    filepath = output_folder / (output_filename + ".nc")
+    print(f"Writing netcdf file to {filepath}")
+    result_proc.pr.to_netcdf(filepath, encoding=encoding)
+
+
+# TODO we don't need the year, the conversion should remain the same
+def process(ds: xarray.Dataset, year: str = "2024"):
     """
     Process dataset.
 

+ 22 - 92
tests/unit/test_conversion.py

@@ -1,13 +1,6 @@
 import climate_categories as cc
 import primap2 as pm2
-import xarray as xr
 
-from src.faostat_data_primap.helper.category_aggregation import (
-    agg_info_fao,
-    agg_info_ipcc2006_primap_CH4,
-    agg_info_ipcc2006_primap_CO2,
-    agg_info_ipcc2006_primap_N2O,
-)
 from src.faostat_data_primap.helper.paths import (
     downloaded_data_path,
     extracted_data_path,
@@ -15,7 +8,7 @@ from src.faostat_data_primap.helper.paths import (
 from src.faostat_data_primap.read import process, read_data
 
 
-def test_process_output_remains_the_same():
+def test_processed_output_remains_the_same():
     # get processed data
     # release_name = "v2024-11-14"
     release_name = "v2023-12-13"
@@ -56,81 +49,18 @@ def test_read(tmp_path):
 
 
 def test_conversion_from_FAO_to_IPCC2006_PRIMAP():
-    # make categorisation A from yaml
-    categorisation_a = cc.FAO
-    # make categorisation B from yaml
-    categorisation_b = cc.IPCC2006_PRIMAP
-
-    # category FAOSTAT not yet part of climate categories, so we need to add it manually
-    cats = {
-        "FAO": categorisation_a,
-        "IPCC2006_PRIMAP": categorisation_b,
-    }
-    # release_name = "v2024-11-14"
-    release_name = "v2023-12-13"
-
-    # reproduce 2023 data set
-    reproduce23 = True
+    release_name = "v2024-11-14"
+    # release_name = "v2023-12-13"
 
-    ds_fao = (
+    # get raw data
+    filename_raw_ds = (
         extracted_data_path
-        # / "v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14_raw.nc"
         / f"{release_name}/FAOSTAT_Agrifood_system_emissions_{release_name}_raw.nc"
     )
-    ds = pm2.open_dataset(ds_fao)
-
-    # drop UNFCCC data
-    ds = ds.drop_sel(source="UNFCCC")
-
-    # consistency check in original categorisation
-    ds_checked = ds.pr.add_aggregates_coordinates(agg_info=agg_info_fao)  # noqa: F841
-    # ds_checked_if = ds_checked.pr.to_interchange_format()
-
-    # We need a conversion CSV file for each entity
-    # That's a temporary workaround until convert function can filter for data variables (entities)
-    conv = {}
-    gases = ["CO2", "CH4", "N2O"]
-
-    if reproduce23:
-        reproduce23_filename = "_reproduce23"
-    else:
-        reproduce23_filename = ""
-
-    for var in gases:
-        conv[var] = cc.Conversion.from_csv(
-            f"../../conversion_FAO_IPPCC2006_PRIMAP_{var}{reproduce23_filename}.csv",
-            cats=cats,
-        )
-
-    # convert for each entity
-    da_dict = {}
-    for var in gases:
-        da_dict[var] = ds[var].pr.convert(
-            dim="category (FAO)",
-            conversion=conv[var],
-        )
-    result = xr.Dataset(da_dict)
-    result.attrs = ds.attrs
-    result.attrs["cat"] = "category (IPCC2006_PRIMAP)"
-
-    # convert to interchange format and back to get rid of empty categories
-    # TODO there may be a better way to do this
-    result_if = result.pr.to_interchange_format()
-    result = pm2.pm2io.from_interchange_format(result_if)
-
-    # aggregation for each gas for better understanding
-    # TODO creates some duplicate code, we can combine maybe
-    result_proc = result.pr.add_aggregates_coordinates(
-        agg_info=agg_info_ipcc2006_primap_N2O
-    )
-
-    result_proc = result_proc.pr.add_aggregates_coordinates(
-        agg_info=agg_info_ipcc2006_primap_CO2
-    )
+    ds_raw = pm2.open_dataset(filename_raw_ds)
 
-    result_proc = result_proc.pr.add_aggregates_coordinates(
-        agg_info=agg_info_ipcc2006_primap_CH4
-    )
+    # process raw data
+    result_proc = process(ds=ds_raw, year="2024")
 
     result_proc_if = result_proc.pr.to_interchange_format()
 
@@ -175,20 +105,20 @@ def test_conversion_from_FAO_to_IPCC2006_PRIMAP():
 
 def test_read_2023():
     domains_and_releases_to_read = [
-        ("farm_gate_agriculture_energy", "2023-12-13"),
-        ("farm_gate_emissions_crops", "2023-11-09"),
-        ("farm_gate_livestock", "2023-11-09"),
-        ("land_use_drained_organic_soils", "2023-11-09"),
-        ("land_use_fires", "2023-11-09"),
-        ("land_use_forests", "2023-11-09"),
-        ("pre_post_agricultural_production", "2023-11-09"),
-        # ("farm_gate_agriculture_energy", "2024-11-14"),
-        # ("farm_gate_emissions_crops", "2024-11-14"),
-        # ("farm_gate_livestock", "2024-11-14"),
-        # ("land_use_drained_organic_soils", "2024-11-14"),
-        # ("land_use_fires", "2024-11-14"),
-        # ("land_use_forests", "2024-11-14"),
-        # ("pre_post_agricultural_production", "2024-11-14"),
+        # ("farm_gate_agriculture_energy", "2023-12-13"),
+        # ("farm_gate_emissions_crops", "2023-11-09"),
+        # ("farm_gate_livestock", "2023-11-09"),
+        # ("land_use_drained_organic_soils", "2023-11-09"),
+        # ("land_use_fires", "2023-11-09"),
+        # ("land_use_forests", "2023-11-09"),
+        # ("pre_post_agricultural_production", "2023-11-09"),
+        ("farm_gate_agriculture_energy", "2024-11-14"),
+        ("farm_gate_emissions_crops", "2024-11-14"),
+        ("farm_gate_livestock", "2024-11-14"),
+        ("land_use_drained_organic_soils", "2024-11-14"),
+        ("land_use_fires", "2024-11-14"),
+        ("land_use_forests", "2024-11-14"),
+        ("pre_post_agricultural_production", "2024-11-14"),
     ]
 
     read_data(