10 コミット ab7e05e335 ... 12547cc232

作者 SHA1 メッセージ 日付
  Daniel Busch 12547cc232 [DATALAD] Recorded changes 1 ヶ月 前
  Daniel Busch 899cdf8dbc [DATALAD] Recorded changes 1 ヶ月 前
  crdanielbusch 8a9b79c88a Merge pull request #12 from primap-community/category-clean-up 1 ヶ月 前
  Daniel Busch b664815b48 [DATALAD] Recorded changes 1 ヶ月 前
  Daniel Busch 67d9836b3f [DATALAD] Recorded changes 1 ヶ月 前
  Daniel Busch 714673f746 [DATALAD] Recorded changes 1 ヶ月 前
  Daniel Busch cddab90b73 [DATALAD] Recorded changes 1 ヶ月 前
  Daniel Busch 70dbcd294c [DATALAD] Recorded changes 1 ヶ月 前
  Daniel Busch 5764a51ced [DATALAD] Recorded changes 1 ヶ月 前
  Daniel Busch 89adcbffe9 [DATALAD] Recorded changes 1 ヶ月 前

+ 9 - 8
.pre-commit-config.yaml

@@ -36,13 +36,14 @@ repos:
       - id: ruff
         args: [ --fix, --exit-non-zero-on-fix ]
       - id: ruff-format
-  - repo: https://github.com/python-poetry/poetry
-    rev: '1.8.3'
-    hooks:
-      - id: poetry-check
+# poetry tasks have network issues on GitHub
+#  - repo: https://github.com/python-poetry/poetry
+#    rev: '1.8.3'
+#    hooks:
+#      - id: poetry-check
 #      - id: poetry-lock
 #        args: [--no-update]
-  - repo: https://github.com/python-poetry/poetry-plugin-export
-    rev: '1.8.0'
-    hooks:
-      - id: poetry-export
+#  - repo: https://github.com/python-poetry/poetry-plugin-export
+#    rev: '1.9.0'
+#    hooks:
+#      - id: poetry-export

+ 4 - 3
conv_FAO_IPPCC2006_PRIMAP_CH4.csv

@@ -1,9 +1,10 @@
 # references: CH4 emissions only
 # last_update: 2024-10-14
 FAO,IPCC2006_PRIMAP,comment
-1.A.2.c,3.C.7,Rice cultivation
-1.A.1.b + 1.A.2.b + 1.A.6.b + 1.A.7.b, 3.C.1.b, FAO 1.A.X.b Burning of crop residues to IPCC 3.C.1.b Biomass Burning In Croplands
+1.A.2.c,3.C.7, FAO 1.A.2.c Rice cultivation to IPCC 3.C.7 Rice Cultivation
+M.1.BCR, 3.C.1.b, FAO M.1.BCR (or 1.A.1.b + 1.A.2.b + 1.A.6.b + 1.A.7.b) Burning of crop residues to IPCC 3.C.1.b Biomass Burning In Croplands
 M.3.EF, 3.A.1, Enteric fermentation
 M.3.MM, 3.A.2, Manure Management
-6.A,3.C.1.a, FAO 6.A forest fires to IPCC 3.C.1.a Biomass burning in forest lands
+6.A,M.3.C.1.a, FAO 6.A forest fires to IPCC M.3.C.1.a Biomass burning in forest lands (M category because it is not contained in the 3.C.1 sum)
 6.B,3.C.1.c, FAO 6.B savanna fires to IPCC 3.C.1.c Biomass burning in grasslands
+6.C, M.3.B.2.FOS, FAO 6.C Fires in organic soils to IPCC 3.B.3 cropland (fires in organic soils)

+ 6 - 6
conv_FAO_IPPCC2006_PRIMAP_CO2.csv

@@ -1,11 +1,11 @@
 # references: CO2 emissions only
 # last_update: 2024-10-14
 FAO,IPCC2006_PRIMAP,comment
-4,3.B.1,Carbon stock change in forests
-4.B,M.NFC,Net Forest conversion
-5.A,3.B.3,FAO 5.A Drained grassland to IPCC 3.B.3 Grassland
+4,3.B.1,FAO 4 Carbon stock change in forests to IPCC 3.B.1 Forest land
+4.B,M.NFC,FAO 4.B Net forest conversion to IPCC Net forest conversion
+5.A,M.3.B.3.DOS,FAO 5.A Drained grassland to IPCC 3.B.3 Grassland (drained organic soils)
 5.B,M.3.B.2.DOS,FAO 5.B Drained cropland to IPCC 3.B.2 Cropland (drained organic soils)
-6.A,3.C.1.a, FAO 6.A forest fires to IPCC 3.C.1.a Biomass burning in forest lands
-6.B, 3.C.1.c, FAO 6.B savanna fires to IPCC 3.C.1.c Biomass burning in grasslands
+6.A,M.3.C.1.a, FAO 6.A forest fires to IPCC M.3.C.1.a Biomass burning in forest lands (M category because it is not contained in the 3.C.1 sum)
+6.B, M.3.C.1.c, FAO 6.B savanna fires to IPCC M.3.C.1.c Biomass burning in grasslands (M category because it is not contained in the 3.C.1 sum) - This category is empty in FAOSTAT data explorer - we could consider deleting it
 6.C, M.3.B.2.FOS, FAO 6.C Fires in organic soils to IPCC 3.B.3 cropland (fires in organic soils)
-7.M + 7.A, 2.G, Manufacturing fertlisers and pesticides
+7.M + 7.A, 2.G, Manufacturing fertlisers and pesticides (not used for primap-hist)

+ 10 - 8
conv_FAO_IPPCC2006_PRIMAP_N2O.csv

@@ -1,12 +1,14 @@
 # references: N2O emissions only
 # last_update: 2024-10-14
 FAO,IPCC2006_PRIMAP,comment
-M.3.MM, 3.A.2, Manure management
-1.A.1.b + 1.A.2.b + 1.A.6.b + 1.A.7.b, 3.C.1.b, FAO 1.A.X.b Burning of crop residues to IPCC 3.C.1.b Biomass Burning In Croplands
-M.3.MP + M.3.MA + M.1.CR, 3.C.4
-1.B.1,M.3.C.4.SF,Better to map indivudually in case it covers different range of years
-1.B.2.a + 1.B.2.b,M.3.C.5.SF,Better to map individually in case it covers different range of years
-5.A,3.B.3,FAO 5.A Drained grassland to IPCC 3.B.3 Grassland
-5.B,3.B.2,Drained cropland
-6.A,3.C.1.a, FAO 6.A forest fires to IPCC 3.C.1.a Biomass burning in forest lands
+M.3.MM, 3.A.2, FAO All animals manure management to IPCC 3.A.2 Manure Management
+M.1.BCR, 3.C.1.b, All Crops - Burning crop residues (Emissions N2O) (or the sum of FAO 1.A.X.b) to IPCC 3.C.1.b Biomass Burning In Croplands
+M.3.MP, M.3.C.45.MP, FAO Manure left on pasture to IPCC Direct and indirect N2O emissions from manure left on pasture
+M.3.MA, M.3.C.45.MA, FAO Manure applied to soils to IPCC Direct and indirect N2O emissions from manure applied to soils
+M.1.CR, M.3.C.45.CR, FAO Crop residues to IPCC Direct and indirect emissions N2O from crop residues
+1.B.1,3.C.4.a, FAO Synthetic fertilisers direct emissions to IPCC Synthetic fertiliser emissions from direct N2O Emissions from Managed Soils
+1.B.2.a + 1.B.2.b,M.3.C.5.SF, FAO Synthetic fertilisers direct emissions to IPCC Synthetic fertiliser emissions from direct N2O Emissions from Managed Soils
+5.A,M.3.C.4.DOS.GL,FAO 5.A Drained grassland to IPCC 3.C.4 share of drained organic soils in grass land
+5.B,M.3.C.4.DOS.CL,FAO 5.B Drained cropland to IPCC 3.C.4 share of drained organic soils in cropland
+6.A,M.3.C.1.a, FAO 6.A forest fires to IPCC M.3.C.1.a Biomass burning in forest lands (M category because it is not contained in the 3.C.1 sum)
 6.B, 3.C.1.c, FAO 6.B savanna fires to IPCC 3.C.1.c Biomass burning in grasslands

+ 1 - 1
extracted_data/v2023-12-13/FAOSTAT_Agrifood_system_emissions_v2023-12-13.csv

@@ -1 +1 @@
-../../.git/annex/objects/z3/Mq/MD5E-s5980799--3043dd6bf8a609e041a5243794bcc826.csv/MD5E-s5980799--3043dd6bf8a609e041a5243794bcc826.csv
+../../.git/annex/objects/84/Jg/MD5E-s6104450--9149191fea1c8852b3236979b2092381.csv/MD5E-s6104450--9149191fea1c8852b3236979b2092381.csv

+ 1 - 1
extracted_data/v2023-12-13/FAOSTAT_Agrifood_system_emissions_v2023-12-13.nc

@@ -1 +1 @@
-../../.git/annex/objects/9g/Ff/MD5E-s1859181--f564c003c41b542bb3520e9151f85e16.nc/MD5E-s1859181--f564c003c41b542bb3520e9151f85e16.nc
+../../.git/annex/objects/kp/vZ/MD5E-s2060452--d7732a88ad51e73b64bc605e9a4a861d.nc/MD5E-s2060452--d7732a88ad51e73b64bc605e9a4a861d.nc

+ 1 - 1
extracted_data/v2023-12-13/FAOSTAT_Agrifood_system_emissions_v2023-12-13_raw.nc

@@ -1 +1 @@
-../../.git/annex/objects/20/P0/MD5E-s14450580--2e044a03684998487ff734df894273c1.nc/MD5E-s14450580--2e044a03684998487ff734df894273c1.nc
+../../.git/annex/objects/Q6/F6/MD5E-s14450580--a43c6824e8130ab4c56b300043374e93.nc/MD5E-s14450580--a43c6824e8130ab4c56b300043374e93.nc

+ 1 - 1
extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14.csv

@@ -1 +1 @@
-../../.git/annex/objects/4m/5w/MD5E-s6104711--3aa09937e311620fd1a6e5f32e845935.csv/MD5E-s6104711--3aa09937e311620fd1a6e5f32e845935.csv
+../../.git/annex/objects/xK/xP/MD5E-s6236738--50418baa31366f55c691f7191c0edd4e.csv/MD5E-s6236738--50418baa31366f55c691f7191c0edd4e.csv

+ 1 - 1
extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14.nc

@@ -1 +1 @@
-../../.git/annex/objects/V7/Q8/MD5E-s1885319--34b97929ef46e16f92251c442368df1c.nc/MD5E-s1885319--34b97929ef46e16f92251c442368df1c.nc
+../../.git/annex/objects/m3/kK/MD5E-s2312541--ff444b4069808a639ac2bb785296921a.nc/MD5E-s2312541--ff444b4069808a639ac2bb785296921a.nc

+ 1 - 1
extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14_raw.csv

@@ -1 +1 @@
-../../.git/annex/objects/xp/61/MD5E-s34032099--9dfa52731d2cd7d035ea8aafa2a8f650.csv/MD5E-s34032099--9dfa52731d2cd7d035ea8aafa2a8f650.csv
+../../.git/annex/objects/vp/pV/MD5E-s34289102--5198b88b05708f715b0db35ed579d6af.csv/MD5E-s34289102--5198b88b05708f715b0db35ed579d6af.csv

+ 1 - 1
extracted_data/v2024-11-14/FAOSTAT_Agrifood_system_emissions_v2024-11-14_raw.nc

@@ -1 +1 @@
-../../.git/annex/objects/Zm/QM/MD5E-s15105077--c6b011fcb6728c549b85b749f175e1cb.nc/MD5E-s15105077--c6b011fcb6728c549b85b749f175e1cb.nc
+../../.git/annex/objects/VV/k7/MD5E-s15222746--49d853c6b5d16eb09b0ecae01cb7bc55.nc/MD5E-s15222746--49d853c6b5d16eb09b0ecae01cb7bc55.nc

+ 96 - 152
src/faostat_data_primap/helper/category_aggregation.py

@@ -6,6 +6,15 @@ Definitions for category aggregation.
 # There are discrepancies of up to 100% due to rounding errors for small values,
 # for example, 0.0001 (rounded from 0.00006) + 0.0004 (rounded from 0.00036)
 # = 0.00042 which is then rounded to 0.0004, while the consistency check expects 0.0005
+# There are even more extreme example, where we need a tolerance of 100%:
+# Eswatini, 1976:
+# 1.A.1.a Crop residues (emissions N2O) = 0.0001
+# 1.A.1.a.i Crop residues (Indirect emissions N2O) = 0
+# 1.A.1.a.ii Crop residues (Direct emissions N2O) = 0
+# Our way to deal with it, was to set the tolerance to 1% and look at the
+# countries / sectors that yielded an error. If only a few countries and years
+# are affected, it is likely just a rounding error. If all years are affected
+# there may be something wrong with the data
 agg_info_fao = {
     "category (FAO)": {
         "1.A.1.a": {  # wheat
@@ -203,6 +212,18 @@ agg_info_fao = {
             ],
             "sel": {"variable": ["N2O"]},
         },
+        # Only some crop types are burned on the field
+        # Rounding errors up to 100% (see explanation above)
+        "M.1.BCR": {
+            "tolerance": 1,
+            "sources": [
+                "1.A.1.b",  # wheat
+                "1.A.2.b",  # rice
+                "1.A.6.b",  # maize (corn)
+                "1.A.7.b",  # sugar cane
+            ],
+            "sel": {"variable": ["N2O", "CH4"]},
+        },
         "1.A": {
             # crops
             "tolerance": 1,
@@ -374,83 +395,66 @@ agg_info_fao = {
 }
 
 # aggregating each gas separately to make this easier to understand
-# We can change it back to one dict once it's all organised
+# We can change it back to one dict, once it's all sorted out
 agg_info_ipcc2006_primap_N2O = {
     "category (IPCC2006_PRIMAP)": {
-        "3.C.1": {  # Emissions from Biomass Burning
+        "M.3.C.1.AG": {  # AG-related emissions from Biomass Burning
             "sources": [
-                # "3.C.1.a",  # leaving out "Biomass Burning In Forest Lands", because not included in 2023 release
-                "3.C.1.b",  # Biomass Burning In Croplands
-                "3.C.1.c",  # Biomass Burning in Grasslands
+                "3.C.1.b",  # Biomass Burning In Croplands (FAO M.1.BCR All Crops - Burning crop residues)
+                "3.C.1.c",  # Biomass Burning in Grasslands (FAO 6.B savanna fires)
             ],
             "sel": {"variable": ["N2O"]},
         },
-        "M.3.C.1.AG": {  # AG-related emissions from Biomass Burning
+        "3.C.1": {  # Emissions from Biomass Burning (the same as M.3.C.1.AG)
+            "sources": ["M.3.C.1.AG"],
+            "sel": {"variable": ["N2O"]},
+        },
+        "3.C.4": {  # Direct N2O Emissions from Managed Soils
+            # We currently only have direct and indirect emissions combined in one category.
+            # Therefore, we need to make a decision how to classify it. We decided to map it all to
+            # direct emissions. In does not make a difference for the primap-hist sectors,
+            # but TODO direct / indirect should be mapped individually
+            "sources": [
+                "M.3.C.45.MP",  # Direct and indirect emissions from manure left on pasture (FAO M.3.MP)
+                "M.3.C.45.MA",  # Direct and indirect emissions from manure applied to soils (FAO M.3.MA)
+                "M.3.C.45.CR",  # Direct and indirect emissions from crop residues (FAO M.1.CR)
+                "3.C.4.a",  # synthetic fertilisers direct (FAO 1.B.1)
+                "M.3.C.4.DOS.CL",  # Drained cropland (FAO 5.A drained cropland)
+                "M.3.C.4.DOS.GL",  # Drained grassland (FAO 5.B drained grassland)
+            ],
+            "sel": {"variable": ["N2O"]},
+        },
+        "3.C.5": {  # Indirect N2O Emissions from Managed Soils
             "sources": [
-                "3.C.1.b",  # Biomass Burning In Croplands
-                "3.C.1.c",  # Biomass Burning in Grasslands
+                # Similarly to 3.C.4, 3.C.5 does not accurately represent the IPCC categories
+                # There should be only indirect emissions in this category, but we only have direct and indirect combined,
+                # except for "M.3.C.5.SF" which is only indirect
+                "M.3.C.5.SF",  # synthetic fertilisers indirect - there is no IPCC sub-category for this
             ],
             "sel": {"variable": ["N2O"]},
         },
         "M.3.C.AG": {
             "sources": [
-                "3.C.1.b",  # Biomass Burning In Croplands - looks good (CH4, N2O)
-                "3.C.1.c",  # Biomass Burning in Grasslands - looks good (CH4)
+                "M.3.C.1.AG",  # AG-related emissions from Biomass Burning, same as 3.C.1
                 "3.C.4",  # Direct N2O Emissions from Managed Soils
-                "M.3.C.4.SF",  # synthetic fertilisers direct
-                # "3.C.5",  # Indirect N2O Emissions from Managed Soils, currently empty
-                "M.3.C.5.SF",  # synthetic fertilisers indirect
-                # "3.C.6",  # Indirect N2O Emissions from Manure Management, currently empty
-                "3.C.7",  # rice cultivation
-                "3.B.2",  # Drained grassland, was in LULUCF orginally
-                "3.B.3",  # Drained cropland, was in LULUCF originally
+                "3.C.5",  # Indirect N2O Emissions from Managed Soils
             ],
             "sel": {"variable": ["N2O"]},
         },
         "M.AG.ELV": {
             "sources": ["M.3.C.AG"],
-            "sel": {"variable": ["N2O"]},  # "M.3.D.AG" is zero
+            "sel": {
+                "variable": ["N2O"]
+            },  # "M.3.D.AG" would be part of this, but does not exist
         },
         "3.C": {
-            "sources": [
-                "M.3.C.1.AG",  # TODO 3.C.1 would be correct, but doesn't match 2023
-                "3.C.4",  # Direct N2O Emissions from Managed Soils
-                "M.3.C.4.SF",  # synthetic fertilisers direct
-                # "3.C.5",  # Indirect N2O Emissions from Managed Soils, empty
-                "M.3.C.5.SF",  # synthetic fertilisers indirect
-                # "3.C.6",  # Indirect N2O Emissions from Manure Management, empty
-                "3.C.7",  # rice cultivation
-                "3.B.2",  # Drained grassland, was in LULUCF orginally
-                "3.B.3",  # Drained cropland, was in LULUCF originally
-            ],
-            "sel": {"variable": ["N2O"]},
-        },
-        # TODO 3.A.2.x are currently not read in
-        # "3.A.2.a": {  # decomposition of manure - CH4, N2O
-        #     "sources": [
-        #         "3.A.2.a.i",  # cattle (dairy)
-        #         "3.A.2.a.ii",  # cattle (non-dairy)
-        #     ],
-        #     "sel": {"variable": ["N2O"]},
-        # },
-        # # consistency check
-        # "3.A.2": {  # decomposition of manure - CH4, N2O
-        #     "sources": [
-        #         "3.A.2.a",
-        #         "3.A.2.b",
-        #         "3.A.2.c",
-        #         "3.A.2.d",
-        #         "3.A.2.e",
-        #         "3.A.2.f",
-        #         "3.A.2.g",
-        #         "3.A.2.h",
-        #         "3.A.2.i",
-        #         "3.A.2.j",
-        #     ],
-        #     "sel": {"variable": ["N2O"]},
-        # },
+            "sources": ["M.3.C.AG"],
+            "sel": {"variable": ["N2O"]},
+        },
         "3.A": {
-            "sources": ["3.A.1", "3.A.2"],
+            "sources": [
+                "3.A.2"
+            ],  # Manure management (3.A.1 is enteric fermentation and CH4 only)
             "sel": {"variable": ["N2O"]},
         },
         "M.AG": {
@@ -463,10 +467,10 @@ agg_info_ipcc2006_primap_N2O = {
         "M.LULUCF": {
             "sources": [
                 "3.B.1",  # Carbon stock change in forests (FAO 4, or 4.A and 4.B)
-                "M.NFC",
-                # "3.B.2",  # Drained grassland
-                # "3.B.3",  # Drained cropland
-                "3.C.1.a",  # Biomass Burning In Forests
+                # Note that IPCC M.3.C.1.a biomass burning in forest goes to LULUCF and not to AG. According to the FAO
+                # mapping document it is part of forest land.
+                # M.3.C.1.a for CO2 is an exception, because it is already included in the carbon stock change (FAO 4)
+                "M.3.C.1.a",  # Biomass Burning In Forests (FAO 6.A forest fires)
             ],
             "sel": {"variable": ["N2O"]},
         },
@@ -479,55 +483,42 @@ agg_info_ipcc2006_primap_N2O = {
 
 agg_info_ipcc2006_primap_CO2 = {
     "category (IPCC2006_PRIMAP)": {
-        "3.C.1": {  # Emissions from Biomass Burning
-            "sources": [
-                # "3.C.1.a",  # Biomass Burning In Forest Lands, because not there in 2023 release
-                "3.C.1.b",  # Biomass Burning In Croplands
-                "3.C.1.c",  # Biomass Burning in Grasslands
-            ],
-            "sel": {"variable": ["CO2"]},
-        },
-        "M.3.C.1.AG": {  # AG-related emissions from Biomass Burning
-            "sources": [
-                # "3.C.1.b",  # Biomass Burning In Croplands
-                "3.C.1.c",  # Biomass Burning in Grasslands
-            ],
-            "sel": {"variable": ["CO2"]},
-        },
-        "M.3.C.AG": {
-            "sources": ["M.3.C.1.AG"],
-            "sel": {"variable": ["CO2"]},
-        },
-        "3.C": {
-            "sources": [
-                "M.3.C.1.AG",
-            ],
-            "sel": {"variable": ["CO2"]},
-        },
-        "M.AG.ELV": {
+        # CO2 is LULUCF only
+        # To see which CO2 categories are mapped to LULUCF, go to the FAOSTAT data explorer
+        # and select for item: IPCC aggregates -> LULUCF list, elements: Emissions (CO2), years: any, country: any
+        # There will be four items for each country: Forestland, Net Forest Conversion, Fires in Organic Soils,
+        # and Drained Organic Soils (CO2)
+        # The selection for IPCC Agriculture will be empty, that means CO2 is all LULUCF
+        "3.B.2": {
             "sources": [
-                "M.3.C.AG",
+                "M.3.B.2.FOS",  # crop land - fires in organic soils (6.C Fires in organic soils)
+                "M.3.B.2.DOS",  # crop land - drained organic soils (FAO 5.B Drained cropland)
             ],
             "sel": {"variable": ["CO2"]},
         },
-        "M.AG": {
+        "3.B.3": {
             "sources": [
-                "3.A",
-                "M.AG.ELV",
+                "M.3.B.3.DOS",  # grass land - drained organic soils (FAO 5.A Drained grassland)
             ],
             "sel": {"variable": ["CO2"]},
         },
         "M.LULUCF": {
             "sources": [
+                # Note that IPCC 3.B.1 comes from FAO 4 Carbon stock change in forests,
+                # which is the sum of forestland and net forest conversion.
+                # Also note that the category M.3.C.1.a (or FAO 6.A) forest fires would theoretically go
+                # into LULUCF. However, according to https://files-faostat.fao.org/production/GT/GT_en.pdf
+                # "The estimates from Forest fires exclude CO2, since these are
+                # already covered in the carbon stock changes calculations carried out in the FAOSTAT
+                # Forests domain." -> meaning CO2 is computed but not added to the LULUCF totals
                 "3.B.1",  # Carbon stock change in forests (FAO 4, or 4.A and 4.B)
-                "M.3.B.2.DOS",  # crop land - drained organic soils
-                "M.3.B.2.FOS",  # crop land - fires in organic soils
+                "3.B.2",  # crop land
                 "3.B.3",  # grass land
             ],
             "sel": {"variable": ["CO2"]},
         },
         "3": {
-            "sources": ["M.AG", "M.LULUCF"],
+            "sources": ["M.LULUCF"],
             "sel": {"variable": ["CO2"]},
         },
     }
@@ -535,75 +526,30 @@ agg_info_ipcc2006_primap_CO2 = {
 
 agg_info_ipcc2006_primap_CH4 = {
     "category (IPCC2006_PRIMAP)": {
-        "3.A.1.a": {  # enteric fermentation
-            "sources": [
-                "3.A.1.a.i",  # cattle (dairy)
-                "3.A.1.a.ii",  # cattle (non-dairy)
-            ],
-            "sel": {"variable": ["CH4"]},
-        },
-        "3.A.1": {  # enteric fermentation
+        "3.A": {  # Livestock
             "sources": [
-                "3.A.1.a",
-                "3.A.1.b",
-                "3.A.1.c",
-                "3.A.1.d",
-                "3.A.1.e",
-                "3.A.1.f",
-                "3.A.1.g",
-                "3.A.1.h",
-                "3.A.1.j",
+                "3.A.1",  # enteric fermentation (FAO M.3.EF)
+                "3.A.2",  # manure management (FAO M.3.MM)
             ],
             "sel": {"variable": ["CH4"]},
         },
-        # TODO 3.A.2.x are currently not read in
-        # "3.A.2.a": {  # decomposition of manure - CH4, N2O
-        #     "sources": [
-        #         "3.A.2.a.i",  # cattle (dairy)
-        #         "3.A.2.a.ii",  # cattle (non-dairy)
-        #     ],
-        #     "sel": {"variable": ["CH4"]},
-        # },
-        # # consistency check
-        # "3.A.2": {  # decomposition of manure - CH4, N2O
-        #     "sources": [
-        #         "3.A.2.a",
-        #         "3.A.2.b",
-        #         "3.A.2.c",
-        #         "3.A.2.d",
-        #         "3.A.2.e",
-        #         "3.A.2.f",
-        #         "3.A.2.g",
-        #         "3.A.2.h",
-        #         "3.A.2.i",
-        #         "3.A.2.j",
-        #     ],
-        #     "sel": {"variable": ["CH4"]},
-        # },
-        "3.A": {
-            "sources": ["3.A.1", "3.A.2"],
-            "sel": {"variable": ["CH4"]},
-        },
         "3.C.1": {  # Emissions from Biomass Burning
             "sources": [
-                # "3.C.1.a",  # Biomass Burning In Forest Lands, because not there in 2023 release
-                "3.C.1.b",  # Biomass Burning In Croplands
-                "3.C.1.c",  # Biomass Burning in Grasslands
+                "3.C.1.b",  # Biomass Burning In Croplands (FAO M.1.BCR))
+                "3.C.1.c",  # Biomass Burning in Grasslands (FAO 6.B savanna fires)
             ],
             "sel": {"variable": ["CH4"]},
         },
         "M.3.C.1.AG": {  # AG-related emissions from Biomass Burning
             "sources": [
-                "3.C.1.b",  # Biomass Burning In Croplands
-                "3.C.1.c",  # Biomass Burning in Grasslands
+                "3.C.1",  # Emissions from Biomass Burning
             ],
             "sel": {"variable": ["CH4"]},
         },
         "M.3.C.AG": {
             "sources": [
-                "3.C.1.b",  # Biomass Burning In Croplands - looks good (CH4, N2O)
-                "3.C.1.c",  # Biomass Burning in Grasslands - looks good (CH4)
-                "3.C.7",  # rice cultivation
+                "M.3.C.1.AG",  # AG-related emissions from Biomass Burning
+                "3.C.7",  # rice cultivation (FAO 1.A.2.c Rice cultivation)
             ],
             "sel": {"variable": ["CH4"]},
         },
@@ -621,17 +567,15 @@ agg_info_ipcc2006_primap_CH4 = {
         },
         "M.AG": {
             "sources": [
-                "3.A",
-                "M.AG.ELV",
+                "3.A",  # Livestock
+                "M.AG.ELV",  # Agriculture excluding livestock
             ],
             "sel": {"variable": ["CH4"]},
         },
         "M.LULUCF": {
             "sources": [
-                "3.B.1",  # Carbon stock change in forests
-                "3.B.2",  # Drained grassland
-                "3.B.3",  # Drained cropland
-                "3.C.1.a",  # Biomass Burning In Forests
+                "M.3.C.1.a",  # Biomass Burning In Forests (FAO 6.A forest fires)
+                "M.3.B.2.FOS",  # cropland - fires in organic soils (FAO 6.C fires in organic soils)
             ],
             "sel": {"variable": ["CH4"]},
         },

+ 1 - 0
src/faostat_data_primap/helper/country_mapping.py

@@ -260,6 +260,7 @@ country_to_iso3_mapping = {
     "Rwanda": "RWA",
     "Saint Barthélemy": "BLM",
     "Saint Helena, Ascension and Tristan da Cunha": "SHN",
+    "Ascension, Saint Helena and Tristan da Cunha": "SHN",
     "Saint Kitts and Nevis": "KNA",
     "Saint Lucia": "LCA",
     "Saint Martin (French part)": "MAF",

+ 141 - 2
src/faostat_data_primap/helper/definitions.py

@@ -424,8 +424,8 @@ read_config_all: Any = {
             "items-elements_to_remove": [
                 "All Crops - Crop residues (Direct emissions N2O)",
                 "All Crops - Crop residues (Indirect emissions N2O)",
-                "All Crops - Burning crop residues (Emissions N2O)",
-                "All Crops - Burning crop residues (Emissions CH4)",
+                # "All Crops - Burning crop residues (Emissions N2O)",
+                # "All Crops - Burning crop residues (Emissions CH4)",
             ],
             "columns_to_drop": [
                 "Element",
@@ -719,6 +719,145 @@ read_config_all: Any = {
                 "Source Code",
             ],
         },
+        "2025-02-12": {
+            "filename": "Emissions_livestock_E_All_Data_NOFLAG.csv",
+            "areas_to_remove": [
+                *areas_to_remove_base,
+                "Belgium-Luxembourg",
+                "Serbia and Montenegro",
+                "European Union (27)",
+            ],
+            "items_to_remove": [
+                # we don't need aggregates
+                "Camels and Llamas",
+                "Cattle",  # dairy and non-dairy
+                # mistake by FAO, should be "Mules, hinnies, and asses"
+                "Mules and Asses",
+                "Sheep and Goats",
+                "Swine",  # breeding and market
+            ],
+            "elements_to_remove": [
+                "Stocks",  # number of animals
+                "Manure management (manure treated, N content)",
+                "Manure left on pasture (N content)",
+                "Manure left on pasture that leaches (N content)",
+                "Manure left on pasture that volatilises (N content)",
+                "Manure applied to soils (N content)",
+                "Manure applied to soils that leaches (N content)",
+                "Manure applied to soils that volatilises (N content)",
+                # TODO ?
+                # sum of direct and direct manure management emissions
+                # would add another level in the category tree, but
+                # is not needed (see miro)
+                # "Manure management (Emissions N2O)",
+            ],
+            "entity_mapping": {
+                "Livestock total (Emissions N2O)": "N2O",
+                "Livestock total (Emissions CH4)": "CH4",
+                "Enteric fermentation (Emissions CH4)": "CH4",
+                "Manure management (Emissions CH4)": "CH4",
+                "Manure management (Emissions N2O)": "N2O",
+                "Manure management (Direct emissions N2O)": "N2O",
+                "Manure management (Indirect emissions N2O)": "N2O",
+                "Manure left on pasture (Emissions N2O)": "N2O",
+                "Manure left on pasture (Direct emissions N2O)": "N2O",
+                "Indirect emissions (N2O that leaches) (Manure on pasture)": "N2O",
+                "Indirect emissions (N2O that volatilises) (Manure on pasture)": "N2O",
+                "Manure left on pasture (Indirect emissions N2O)": "N2O",
+                "Emissions (N2O) (Manure applied)": "N2O",
+                "Manure applied to soils (Direct emissions N2O)": "N2O",
+                "Indirect emissions (N2O that leaches) (Manure applied)": "N2O",
+                "Indirect emissions (N2O that volatilises) (Manure applied)": "N2O",
+                "Manure applied to soils (Indirect emissions N2O)": "N2O",
+            },
+            "category_mapping_item_element": {
+                "All Animals - Enteric fermentation (Emissions CH4)": "M.3.EF",
+                "All Animals - Manure management (Emissions CH4)": "M.3.MM",
+                "All Animals - Manure management (Emissions N2O)": "M.3.MM",
+                "All Animals - Manure left on pasture (Emissions N2O)": "M.3.MP",
+                "All Animals - Emissions (N2O) (Manure applied)": "M.3.MA",
+            },
+            "category_mapping_item": {
+                "All Animals": "3",
+                "Asses": "3.A",
+                "Camels": "3.B",
+                "Cattle, dairy": "3.C",
+                "Cattle, non-dairy": "3.D",
+                "Chickens, broilers": "3.E",
+                "Chickens, layers": "3.F",
+                "Goats": "3.G",
+                "Horses": "3.H",
+                "Mules and hinnies": "3.I",
+                "Sheep": "3.J",
+                "Llamas": "3.K",
+                "Chickens": "3.L",
+                "Poultry Birds": "3.M",
+                "Buffalo": "3.N",
+                "Ducks": "3.O",
+                "Swine, breeding": "3.P",
+                "Swine, market": "3.Q",
+                "Turkeys": "3.R",
+            },
+            "category_mapping_element": {
+                "Livestock total (Emissions N2O)": "",
+                "Livestock total (Emissions CH4)": "",
+                "Enteric fermentation (Emissions CH4)": ".4",
+                "Manure management (Emissions CH4)": ".1.a",
+                # TODO we need to aggregate 3.X.1 for CH4
+                "Manure management (Emissions N2O)": ".1",
+                "Manure management (Direct emissions N2O)": ".1.b",
+                "Manure management (Indirect emissions N2O)": ".1.c",
+                "Manure left on pasture (Emissions N2O)": ".2",
+                "Manure left on pasture (Direct emissions N2O)": ".2.a",
+                "Indirect emissions (N2O that leaches) (Manure on pasture)": ".2.b.i",
+                (
+                    "Indirect emissions (N2O that volatilises) " "(Manure on pasture)"
+                ): ".2.b.ii",
+                "Manure left on pasture (Indirect emissions N2O)": ".2.b",
+                "Emissions (N2O) (Manure applied)": ".3",
+                "Manure applied to soils (Direct emissions N2O)": ".3.a",
+                ("Indirect emissions (N2O that leaches) " "(Manure applied)"): ".3.b.i",
+                "Indirect emissions (N2O that volatilises) (Manure applied)": ".3.b.ii",
+                "Manure applied to soils (Indirect emissions N2O)": ".3.b",
+            },
+            "items-elements_to_remove": [
+                "All Animals - Manure left on pasture (Direct emissions N2O)",
+                (
+                    "All Animals - Indirect emissions (N2O that leaches) "
+                    "(Manure on pasture)"
+                ),
+                (
+                    "All Animals - Indirect emissions (N2O that volatilises) "
+                    "(Manure on pasture)"
+                ),
+                "All Animals - Manure left on pasture (Indirect emissions N2O)",
+                "All Animals - Manure applied to soils (Direct emissions N2O)",
+                "All Animals - Indirect emissions (N2O that leaches) (Manure applied)",
+                (
+                    "All Animals - Indirect emissions (N2O that volatilises) "
+                    "(Manure applied)"
+                ),
+                "All Animals - Manure applied to soils (Indirect emissions N2O)",
+                "All Animals - Manure management (Direct emissions N2O)",
+                "All Animals - Manure management (Indirect emissions N2O)",
+                # "All Animals - Enteric fermentation (Emissions CH4)",
+                # "All Animals - Manure management (Emissions CH4)",
+                # "All Animals - Manure management (Emissions N2O)",
+                # "All Animals - Manure left on pasture (Emissions N2O)",
+                # "All Animals - Emissions (N2O) (Manure applied)",
+            ],
+            "columns_to_drop": [
+                "Element",
+                "Element Code",
+                "Item",
+                "Item Code",
+                "Area Code (M49)",
+                "Area",
+                "Area Code",
+                "Item Code (CPC)",
+                "Source Code",
+            ],
+        },
     },
     "land_use_drained_organic_soils": {
         "2023-11-09": {

+ 3 - 22
src/faostat_data_primap/read.py

@@ -23,7 +23,6 @@ from faostat_data_primap.helper.definitions import (
 from faostat_data_primap.helper.paths import (
     downloaded_data_path,
     extracted_data_path,
-    root_path,
 )
 
 
@@ -323,41 +322,23 @@ def process(ds: xarray.Dataset) -> xarray.Dataset:
         The processed dataset
 
     """
-    # make categorisation A from yaml
-    categorisation_a = cc.FAO
-    # make categorisation B from yaml
-    categorisation_b = cc.IPCC2006_PRIMAP
-
-    # category FAOSTAT not yet part of climate categories, so we need to add it manually
-    cats = {
-        "FAO": categorisation_a,
-        "IPCC2006_PRIMAP": categorisation_b,
-    }
-
     # drop UNFCCC data
     ds = ds.drop_sel(source="UNFCCC")
 
     # consistency check in original categorisation
     ds_checked = ds.pr.add_aggregates_coordinates(agg_info=agg_info_fao)  # noqa: F841
 
-    # We need a conversion CSV file for each entity
-    # That's a temporary workaround until the filter function in climate categories works
-    conv = {}
     gases = ["CO2", "CH4", "N2O"]
 
-    for var in gases:
-        conversion_path = root_path / f"conv_FAO_IPPCC2006_PRIMAP_{var}.csv"
-        conv[var] = cc.Conversion.from_csv(
-            conversion_path,
-            cats=cats,  # type: ignore
-        )
+    conv = cc.FAO.conversion_to(cc.IPCC2006_PRIMAP)
 
     # convert for each entity
     da_dict = {}
     for var in gases:
+        conv_for_gas = conv.filter(aux_dim="gas", values=[var])
         da_dict[var] = ds[var].pr.convert(
             dim="category (FAO)",
-            conversion=conv[var],
+            conversion=conv_for_gas,
         )
 
     result = xr.Dataset(da_dict)

+ 2 - 0
tests/integration/test_download_and_read.py

@@ -5,6 +5,8 @@ from faostat_data_primap.read import read_latest_data
 
 
 # test the complete download and read process
+# This will fail when there is a new release that does
+# not have a corresponding configuration
 def test_download_all_domains(tmp_path):
     downloaded_data_path = tmp_path / "downloaded_data"
     download_all_domains(downloaded_data_path=downloaded_data_path)

+ 13 - 1
tests/unit/test_conversion.py

@@ -1,6 +1,7 @@
 """Note that these tests only run locally, because they require the downloaded data"""
 import primap2 as pm2
 import pytest
+import xarray as xr
 
 from faostat_data_primap.helper.paths import (
     downloaded_data_path,
@@ -28,8 +29,19 @@ def test_processed_output_remains_the_same():
     # process raw data
     ds_processed_new = process(ds=ds_raw)
 
+    # filter by primap categories (sub-categories can change)
+    primap_sectors = ["3", "3.A", "M.AG", "M.AG.ELV", "M.LULUCF"]
+    ds_processed = ds_processed.loc[{"category (IPCC2006_PRIMAP)": primap_sectors}]
+    ds_processed_new = ds_processed_new.loc[
+        {"category (IPCC2006_PRIMAP)": primap_sectors}
+    ]
+
     # compare
-    assert ds_processed.broadcast_equals(ds_processed_new)
+    xr.testing.assert_allclose(
+        ds_processed, ds_processed_new, rtol=1e-10, check_dim_order=False
+    )
+
+    # assert ds_processed.broadcast_equals(ds_processed_new)
 
 
 @pytest.mark.parametrize(