Browse Source

[DATALAD] Recorded changes

Daniel Busch 3 months ago
parent
commit
d134059c45

+ 1 - 0
conversion_FAO_IPPCC2006_PRIMAP_CH4_2023.csv

@@ -0,0 +1 @@
+.git/annex/objects/3m/PQ/MD5E-s439--3a3c3c747faacd58172b97058b513a28.csv/MD5E-s439--3a3c3c747faacd58172b97058b513a28.csv

+ 1 - 0
conversion_FAO_IPPCC2006_PRIMAP_CO2_2023.csv

@@ -0,0 +1 @@
+.git/annex/objects/5g/kM/MD5E-s522--e1bd0f6d079272418da3e654e2fad31e.csv/MD5E-s522--e1bd0f6d079272418da3e654e2fad31e.csv

+ 1 - 0
conversion_FAO_IPPCC2006_PRIMAP_N2O_2023.csv

@@ -0,0 +1 @@
+.git/annex/objects/1w/6J/MD5E-s667--c1223392c238ed9d3e38a0fe1cb62565.csv/MD5E-s667--c1223392c238ed9d3e38a0fe1cb62565.csv

+ 5 - 21
src/faostat_data_primap/read.py

@@ -281,7 +281,7 @@ def read_data(  # noqa: PLR0915 PLR0912
     data_pm2.pr.to_netcdf(filepath, encoding=encoding)
 
 
-def process(ds: xarray.Dataset):
+def process(ds: xarray.Dataset, year: str):
     """
     Process dataset.
 
@@ -308,39 +308,22 @@ def process(ds: xarray.Dataset):
         "FAO": categorisation_a,
         "IPCC2006_PRIMAP": categorisation_b,
     }
-    # # release_name = "v2024-11-14"
-    # release_name = "v2023-12-13"
-    #
-    # # reproduce 2023 data set
-    reproduce23 = True
-    #
-    # ds_fao = (
-    #         extracted_data_path
-    #         # / "v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14_raw.nc"
-    #         / f"{release_name}/FAOSTAT_Agrifood_system_emissions_{release_name}_raw.nc"
-    # )
-    # ds = pm2.open_dataset(ds_fao)
 
     # drop UNFCCC data
     ds = ds.drop_sel(source="UNFCCC")
 
     # consistency check in original categorisation
     ds_checked = ds.pr.add_aggregates_coordinates(agg_info=agg_info_fao)  # noqa: F841
-    # ds_checked_if = ds_checked.pr.to_interchange_format()
 
     # We need a conversion CSV file for each entity
     # That's a temporary workaround until convert function can filter for data variables (entities)
+    # TODO the "year" variable is not a great approach to handle configurations
     conv = {}
     gases = ["CO2", "CH4", "N2O"]
 
-    if reproduce23:
-        reproduce23_filename = "_reproduce23"
-    else:
-        reproduce23_filename = ""
-
     for var in gases:
         conv[var] = cc.Conversion.from_csv(
-            f"../../conversion_FAO_IPPCC2006_PRIMAP_{var}{reproduce23_filename}.csv",
+            f"../../conversion_FAO_IPPCC2006_PRIMAP_{var}_{year}.csv",
             cats=cats,
         )
 
@@ -351,6 +334,7 @@ def process(ds: xarray.Dataset):
             dim="category (FAO)",
             conversion=conv[var],
         )
+
     result = xr.Dataset(da_dict)
     result.attrs = ds.attrs
     result.attrs["cat"] = "category (IPCC2006_PRIMAP)"
@@ -361,7 +345,7 @@ def process(ds: xarray.Dataset):
     result = pm2.pm2io.from_interchange_format(result_if)
 
     # aggregation for each gas for better understanding
-    # TODO creates some duplicate code, we can combine maybe
+    # TODO creates some duplicate code, we can combine again later
     result_proc = result.pr.add_aggregates_coordinates(
         agg_info=agg_info_ipcc2006_primap_N2O
     )

+ 7 - 7
tests/unit/test_conversion.py

@@ -31,7 +31,7 @@ def test_process_output_remains_the_same():
     ds_raw = pm2.open_dataset(filename_raw_ds)
 
     # process raw data
-    ds_processed_new = process(ds_raw)
+    ds_processed_new = process(ds=ds_raw, year="2023")
 
     # compare
     assert ds_processed.broadcast_equals(ds_processed_new)
@@ -39,13 +39,13 @@ def test_process_output_remains_the_same():
 
 def test_read(tmp_path):
     domains_and_releases_to_read = [
-        # ("farm_gate_agriculture_energy", "2024-11-14"),
-        # ("farm_gate_emissions_crops", "2024-11-14"),
-        # ("farm_gate_livestock", "2024-11-14"),
-        # ("land_use_drained_organic_soils", "2024-11-14"),
+        ("farm_gate_agriculture_energy", "2024-11-14"),
+        ("farm_gate_emissions_crops", "2024-11-14"),
+        ("farm_gate_livestock", "2024-11-14"),
+        ("land_use_drained_organic_soils", "2024-11-14"),
         ("land_use_fires", "2023-11-09"),
-        # ("land_use_forests", "2024-11-14"),
-        # ("pre_post_agricultural_production", "2024-11-14"),
+        ("land_use_forests", "2024-11-14"),
+        ("pre_post_agricultural_production", "2024-11-14"),
     ]
 
     read_data(