فهرست منبع

Merge pull request #92 from JGuetschow/CCPI_nAI_2024

China, United Arab Emirates, primap2 update
Johannes Gütschow 7 ماه پیش
والد
کامیت
fd9a586009
58فایلهای تغییر یافته به همراه3117 افزوده شده و 514 حذف شده
  1. 1 0
      .gitignore
  2. 1 0
      downloaded_data/UNFCCC/00_new_downloads_BUR-2024-05-27.csv
  3. 1 0
      downloaded_data/UNFCCC/China/BUR3/China_BUR3_Chinese.pdf
  4. 1 0
      downloaded_data/UNFCCC/China/BUR3/China_BUR3_English.pdf
  5. 1 0
      downloaded_data/UNFCCC/Fiji/BUR1/Fiji_GHG_NIR_2023_Final.pdf
  6. 1 0
      downloaded_data/UNFCCC/United_Arab_Emirates/BUR1/all_data_manual.csv
  7. 1 0
      downloaded_data/UNFCCC/United_Arab_Emirates/BUR1/tables_manual.ods
  8. 1 1
      downloaded_data/UNFCCC/submissions-bur.csv
  9. 1 0
      extracted_data/UNFCCC/China/CHN_BUR3_2023_IPCC1996_2006_CHN_Inv.csv
  10. 1 0
      extracted_data/UNFCCC/China/CHN_BUR3_2023_IPCC1996_2006_CHN_Inv.nc
  11. 22 0
      extracted_data/UNFCCC/China/CHN_BUR3_2023_IPCC1996_2006_CHN_Inv.yaml
  12. 1 0
      extracted_data/UNFCCC/China/CHN_BUR3_2023_IPCC2006_PRIMAP.csv
  13. 1 0
      extracted_data/UNFCCC/China/CHN_BUR3_2023_IPCC2006_PRIMAP.nc
  14. 24 0
      extracted_data/UNFCCC/China/CHN_BUR3_2023_IPCC2006_PRIMAP.yaml
  15. 1 0
      extracted_data/UNFCCC/China/CHN_NC4_2023_IPCC1996_2006_CHN_Inv.csv
  16. 1 0
      extracted_data/UNFCCC/China/CHN_NC4_2023_IPCC1996_2006_CHN_Inv.nc
  17. 22 0
      extracted_data/UNFCCC/China/CHN_NC4_2023_IPCC1996_2006_CHN_Inv.yaml
  18. 1 0
      extracted_data/UNFCCC/China/CHN_NC4_2023_IPCC2006_PRIMAP.csv
  19. 1 0
      extracted_data/UNFCCC/China/CHN_NC4_2023_IPCC2006_PRIMAP.nc
  20. 22 0
      extracted_data/UNFCCC/China/CHN_NC4_2023_IPCC2006_PRIMAP.yaml
  21. 1 0
      extracted_data/UNFCCC/United_Arab_Emirates/ARE_BUR1_2023_IPCC1996_2006_ARE_Inv.csv
  22. 1 0
      extracted_data/UNFCCC/United_Arab_Emirates/ARE_BUR1_2023_IPCC1996_2006_ARE_Inv.nc
  23. 23 0
      extracted_data/UNFCCC/United_Arab_Emirates/ARE_BUR1_2023_IPCC1996_2006_ARE_Inv.yaml
  24. 1 0
      extracted_data/UNFCCC/United_Arab_Emirates/ARE_BUR1_2023_IPCC2006_PRIMAP.csv
  25. 1 0
      extracted_data/UNFCCC/United_Arab_Emirates/ARE_BUR1_2023_IPCC2006_PRIMAP.nc
  26. 25 0
      extracted_data/UNFCCC/United_Arab_Emirates/ARE_BUR1_2023_IPCC2006_PRIMAP.yaml
  27. 714 120
      poetry.lock
  28. 4 1
      pyproject.toml
  29. 2 0
      src/unfccc_ghg_data/helper/__init__.py
  30. 77 139
      src/unfccc_ghg_data/helper/functions.py
  31. 21 16
      src/unfccc_ghg_data/unfccc_di_reader/unfccc_di_reader_config.py
  32. 3 1
      src/unfccc_ghg_data/unfccc_downloader/fetch_submissions_bur.py
  33. 203 98
      src/unfccc_ghg_data/unfccc_reader/Argentina/config_arg_bur5.py
  34. 0 1
      src/unfccc_ghg_data/unfccc_reader/Argentina/read_ARG_BUR5_from_csv.py
  35. 20 8
      src/unfccc_ghg_data/unfccc_reader/Burundi/config_bdi_bur1.py
  36. 7 7
      src/unfccc_ghg_data/unfccc_reader/Chile/config_chl_bur4.py
  37. 3 1
      src/unfccc_ghg_data/unfccc_reader/Chile/read_CHL_BUR4_from_xlsx.py
  38. 3 1
      src/unfccc_ghg_data/unfccc_reader/Chile/read_CHL_BUR5_from_xlsx.py
  39. 30 0
      src/unfccc_ghg_data/unfccc_reader/China/__init__.py
  40. 755 0
      src/unfccc_ghg_data/unfccc_reader/China/config_chn_bur3_nc4.py
  41. 230 0
      src/unfccc_ghg_data/unfccc_reader/China/read_CHN_BUR3_from_pdf.py
  42. 227 0
      src/unfccc_ghg_data/unfccc_reader/China/read_CHN_NC4_from_pdf.py
  43. 20 8
      src/unfccc_ghg_data/unfccc_reader/Guinea/config_gin_bur1.py
  44. 17 10
      src/unfccc_ghg_data/unfccc_reader/Indonesia/read_IDN_BUR3_from_pdf.py
  45. 55 22
      src/unfccc_ghg_data/unfccc_reader/Israel/config_isr_bur2.py
  46. 20 8
      src/unfccc_ghg_data/unfccc_reader/Malaysia/config_mys_bur3.py
  47. 20 8
      src/unfccc_ghg_data/unfccc_reader/Malaysia/config_mys_bur4.py
  48. 35 17
      src/unfccc_ghg_data/unfccc_reader/Morocco/config_mar_bur3.py
  49. 11 5
      src/unfccc_ghg_data/unfccc_reader/Nigeria/config_nga_bur2.py
  50. 11 5
      src/unfccc_ghg_data/unfccc_reader/Peru/config_per_bur3.py
  51. 16 10
      src/unfccc_ghg_data/unfccc_reader/Singapore/config_sgp_bur5.py
  52. 30 12
      src/unfccc_ghg_data/unfccc_reader/Taiwan/config_twn_nir2023.py
  53. 1 1
      src/unfccc_ghg_data/unfccc_reader/Taiwan/read_TWN_2023_Inventory_from_pdf.py
  54. 34 13
      src/unfccc_ghg_data/unfccc_reader/Thailand/config_tha_bur3.py
  55. 30 0
      src/unfccc_ghg_data/unfccc_reader/United_Arab_Emirates/__init__.py
  56. 201 0
      src/unfccc_ghg_data/unfccc_reader/United_Arab_Emirates/config_are_bur1.py
  57. 155 0
      src/unfccc_ghg_data/unfccc_reader/United_Arab_Emirates/read_ARE_BUR1_from_csv.py
  58. 4 1
      src/unfccc_ghg_data/unfccc_reader/folder_mapping.json

+ 1 - 0
.gitignore

@@ -158,3 +158,4 @@ dmypy.json
 
 # Mac stuff
 *.DS_Store
+/stubs/xarray/

+ 1 - 0
downloaded_data/UNFCCC/00_new_downloads_BUR-2024-05-27.csv

@@ -0,0 +1 @@
+../../.git/annex/objects/fM/QF/MD5E-s434--4e092d9f2f3567558f1f8b5dd8b36974.csv/MD5E-s434--4e092d9f2f3567558f1f8b5dd8b36974.csv

+ 1 - 0
downloaded_data/UNFCCC/China/BUR3/China_BUR3_Chinese.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/58/9z/MD5E-s943178--6c3c7cf95c2b8d2596c43117b6f5666a.pdf/MD5E-s943178--6c3c7cf95c2b8d2596c43117b6f5666a.pdf

+ 1 - 0
downloaded_data/UNFCCC/China/BUR3/China_BUR3_English.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/28/XP/MD5E-s969227--646f2b07b58c484b937a4afe6745913b.pdf/MD5E-s969227--646f2b07b58c484b937a4afe6745913b.pdf

+ 1 - 0
downloaded_data/UNFCCC/Fiji/BUR1/Fiji_GHG_NIR_2023_Final.pdf

@@ -0,0 +1 @@
+../../../../.git/annex/objects/fP/zK/MD5E-s6506574--b053162c016b58f13629cba244556c11.pdf/MD5E-s6506574--b053162c016b58f13629cba244556c11.pdf

+ 1 - 0
downloaded_data/UNFCCC/United_Arab_Emirates/BUR1/all_data_manual.csv

@@ -0,0 +1 @@
+../../../../.git/annex/objects/3F/Zw/MD5E-s1726--a09d9e4d2d62e49c1687f1667a341118.csv/MD5E-s1726--a09d9e4d2d62e49c1687f1667a341118.csv

+ 1 - 0
downloaded_data/UNFCCC/United_Arab_Emirates/BUR1/tables_manual.ods

@@ -0,0 +1 @@
+../../../../.git/annex/objects/PX/fj/MD5E-s57283--e9b971b7d617721b136e6873569205a2.ods/MD5E-s57283--e9b971b7d617721b136e6873569205a2.ods

+ 1 - 1
downloaded_data/UNFCCC/submissions-bur.csv

@@ -1 +1 @@
-../../.git/annex/objects/qj/XZ/MD5E-s54279--b2771d5ad902ce77fd39778f6eb2a6f0.csv/MD5E-s54279--b2771d5ad902ce77fd39778f6eb2a6f0.csv
+../../.git/annex/objects/JQ/2z/MD5E-s54690--0b7ba139c85a90d0544cc01f96860df6.csv/MD5E-s54690--0b7ba139c85a90d0544cc01f96860df6.csv

+ 1 - 0
extracted_data/UNFCCC/China/CHN_BUR3_2023_IPCC1996_2006_CHN_Inv.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/gX/v2/MD5E-s27071--1efd3e637878243b3da7f74de33ebb51.csv/MD5E-s27071--1efd3e637878243b3da7f74de33ebb51.csv

+ 1 - 0
extracted_data/UNFCCC/China/CHN_BUR3_2023_IPCC1996_2006_CHN_Inv.nc

@@ -0,0 +1 @@
+../../../.git/annex/objects/55/48/MD5E-s131783--7313dfa5a5f041b126275949edd9cab0.nc/MD5E-s131783--7313dfa5a5f041b126275949edd9cab0.nc

+ 22 - 0
extracted_data/UNFCCC/China/CHN_BUR3_2023_IPCC1996_2006_CHN_Inv.yaml

@@ -0,0 +1,22 @@
+attrs:
+  rights: ''
+  contact: mail@johannes-guetschow.de
+  title: ''
+  comment: Read fom pdf file by Johannes Gütschow
+  institution: United Nations Framework Convention on Climate Change (UNFCCC)
+  references: https://unfccc.int/documents/636696
+  cat: category (IPCC1996_2006_CHN_Inv)
+  scen: scenario (PRIMAP)
+  area: area (ISO3)
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - provenance
+  - scenario (PRIMAP)
+  - area (ISO3)
+  - category (IPCC1996_2006_CHN_Inv)
+  - source
+  - entity
+  - unit
+data_file: CHN_BUR3_2023_IPCC1996_2006_CHN_Inv.csv

+ 1 - 0
extracted_data/UNFCCC/China/CHN_BUR3_2023_IPCC2006_PRIMAP.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/1J/80/MD5E-s85499--715c4ab7911a86da0585150a7e46be6e.csv/MD5E-s85499--715c4ab7911a86da0585150a7e46be6e.csv

+ 1 - 0
extracted_data/UNFCCC/China/CHN_BUR3_2023_IPCC2006_PRIMAP.nc

@@ -0,0 +1 @@
+../../../.git/annex/objects/XQ/j4/MD5E-s218783--a3d8c2c1952a4e9366b8ee23191e29fa.nc/MD5E-s218783--a3d8c2c1952a4e9366b8ee23191e29fa.nc

+ 24 - 0
extracted_data/UNFCCC/China/CHN_BUR3_2023_IPCC2006_PRIMAP.yaml

@@ -0,0 +1,24 @@
+attrs:
+  rights: ''
+  contact: mail@johannes-guetschow.de
+  title: ' Processed on 2024-06-05'
+  comment: Read fom pdf file by Johannes Gütschow Processed on 2024-06-05
+  institution: United Nations Framework Convention on Climate Change (UNFCCC)
+  references: https://unfccc.int/documents/636696
+  cat: category (IPCC2006_PRIMAP)
+  scen: scenario (PRIMAP)
+  area: area (ISO3)
+  entity: PFCS
+  gwp_context: AR6GWP100
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - provenance
+  - scenario (PRIMAP)
+  - area (ISO3)
+  - category (IPCC2006_PRIMAP)
+  - source
+  - entity
+  - unit
+data_file: CHN_BUR3_2023_IPCC2006_PRIMAP.csv

+ 1 - 0
extracted_data/UNFCCC/China/CHN_NC4_2023_IPCC1996_2006_CHN_Inv.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/g5/ZG/MD5E-s21493--47630227996a0fca69c6eabdb309e11d.csv/MD5E-s21493--47630227996a0fca69c6eabdb309e11d.csv

+ 1 - 0
extracted_data/UNFCCC/China/CHN_NC4_2023_IPCC1996_2006_CHN_Inv.nc

@@ -0,0 +1 @@
+../../../.git/annex/objects/m5/38/MD5E-s118587--df371ff4ad2e92f5f5488c3705a2298e.nc/MD5E-s118587--df371ff4ad2e92f5f5488c3705a2298e.nc

+ 22 - 0
extracted_data/UNFCCC/China/CHN_NC4_2023_IPCC1996_2006_CHN_Inv.yaml

@@ -0,0 +1,22 @@
+attrs:
+  rights: ''
+  contact: mail@johannes-guetschow.de
+  title: ''
+  comment: Read fom pdf file by Johannes Gütschow
+  institution: United Nations Framework Convention on Climate Change (UNFCCC)
+  references: https://unfccc.int/documents/636695
+  cat: category (IPCC1996_2006_CHN_Inv)
+  scen: scenario (PRIMAP)
+  area: area (ISO3)
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - source
+  - area (ISO3)
+  - scenario (PRIMAP)
+  - category (IPCC1996_2006_CHN_Inv)
+  - provenance
+  - entity
+  - unit
+data_file: CHN_NC4_2023_IPCC1996_2006_CHN_Inv.csv

+ 1 - 0
extracted_data/UNFCCC/China/CHN_NC4_2023_IPCC2006_PRIMAP.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/64/Z2/MD5E-s78563--182c0627938ca4475c48bd9be4f6c0d4.csv/MD5E-s78563--182c0627938ca4475c48bd9be4f6c0d4.csv

+ 1 - 0
extracted_data/UNFCCC/China/CHN_NC4_2023_IPCC2006_PRIMAP.nc

@@ -0,0 +1 @@
+../../../.git/annex/objects/2P/g7/MD5E-s218011--d2697e2e3195e12bc7b1d2020642abdd.nc/MD5E-s218011--d2697e2e3195e12bc7b1d2020642abdd.nc

+ 22 - 0
extracted_data/UNFCCC/China/CHN_NC4_2023_IPCC2006_PRIMAP.yaml

@@ -0,0 +1,22 @@
+attrs:
+  rights: ''
+  contact: mail@johannes-guetschow.de
+  title: ' Processed on 2024-06-05'
+  comment: Read fom pdf file by Johannes Gütschow Processed on 2024-06-05
+  institution: United Nations Framework Convention on Climate Change (UNFCCC)
+  references: https://unfccc.int/documents/636695
+  cat: category (IPCC2006_PRIMAP)
+  scen: scenario (PRIMAP)
+  area: area (ISO3)
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - source
+  - area (ISO3)
+  - scenario (PRIMAP)
+  - category (IPCC2006_PRIMAP)
+  - provenance
+  - entity
+  - unit
+data_file: CHN_NC4_2023_IPCC2006_PRIMAP.csv

+ 1 - 0
extracted_data/UNFCCC/United_Arab_Emirates/ARE_BUR1_2023_IPCC1996_2006_ARE_Inv.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/xG/71/MD5E-s10659--3b28e2e6b92044cb845ef86094a097ca.csv/MD5E-s10659--3b28e2e6b92044cb845ef86094a097ca.csv

+ 1 - 0
extracted_data/UNFCCC/United_Arab_Emirates/ARE_BUR1_2023_IPCC1996_2006_ARE_Inv.nc

@@ -0,0 +1 @@
+../../../.git/annex/objects/Fj/jG/MD5E-s49491--e8e4cf5aa7e39de5e0b6e24e23d69148.nc/MD5E-s49491--e8e4cf5aa7e39de5e0b6e24e23d69148.nc

+ 23 - 0
extracted_data/UNFCCC/United_Arab_Emirates/ARE_BUR1_2023_IPCC1996_2006_ARE_Inv.yaml

@@ -0,0 +1,23 @@
+attrs:
+  references: https://unfccc.int/documents/635318
+  rights: ''
+  contact: mail@johannes-guestchow.de
+  title: United Arab Emirates. National Communication (NC). NC 5. Biennial Update
+    Report (BUR). BUR 1.
+  comment: Read fom pdf by Johannes Gütschow
+  institution: UNFCCC
+  cat: category (IPCC1996_2006_ARE_Inv)
+  area: area (ISO3)
+  scen: scenario (PRIMAP)
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - provenance
+  - area (ISO3)
+  - category (IPCC1996_2006_ARE_Inv)
+  - scenario (PRIMAP)
+  - source
+  - entity
+  - unit
+data_file: ARE_BUR1_2023_IPCC1996_2006_ARE_Inv.csv

+ 1 - 0
extracted_data/UNFCCC/United_Arab_Emirates/ARE_BUR1_2023_IPCC2006_PRIMAP.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/kJ/px/MD5E-s25915--6516846a2177717a5ae871df0cb5b2aa.csv/MD5E-s25915--6516846a2177717a5ae871df0cb5b2aa.csv

+ 1 - 0
extracted_data/UNFCCC/United_Arab_Emirates/ARE_BUR1_2023_IPCC2006_PRIMAP.nc

@@ -0,0 +1 @@
+../../../.git/annex/objects/8f/7z/MD5E-s133092--8c16a354ad1bb876c0a71438466d6d0c.nc/MD5E-s133092--8c16a354ad1bb876c0a71438466d6d0c.nc

+ 25 - 0
extracted_data/UNFCCC/United_Arab_Emirates/ARE_BUR1_2023_IPCC2006_PRIMAP.yaml

@@ -0,0 +1,25 @@
+attrs:
+  references: https://unfccc.int/documents/635318
+  rights: ''
+  contact: mail@johannes-guestchow.de
+  title: United Arab Emirates. National Communication (NC). NC 5. Biennial Update
+    Report (BUR). BUR 1. Processed on 2024-06-05
+  comment: Read fom pdf by Johannes Gütschow Processed on 2024-06-05
+  institution: UNFCCC
+  cat: category (IPCC2006_PRIMAP)
+  area: area (ISO3)
+  scen: scenario (PRIMAP)
+  entity: HFCS
+  gwp_context: AR6GWP100
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - provenance
+  - area (ISO3)
+  - category (IPCC2006_PRIMAP)
+  - scenario (PRIMAP)
+  - source
+  - entity
+  - unit
+data_file: ARE_BUR1_2023_IPCC2006_PRIMAP.csv

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 714 - 120
poetry.lock


+ 4 - 1
pyproject.toml

@@ -12,7 +12,7 @@ include = ["LICENCE"]  # poetry uses US English so assumes it will be spelt LICE
 python = ">=3.10, <3.11"
 matplotlib = { version = "^3.7.1", optional = true }
 doit = "^0.36.0"
-primap2 = ">=0.9.8"
+primap2 = ">=0.11.0"
 pycountry = "^22.3.5"
 datalad = "^0.19.3"
 treelib = "^1.7.0"
@@ -49,6 +49,9 @@ ruff = "^0.1.8"
 pre-commit = "^3.3.1"
 towncrier = "^23.6.0"
 liccheck = "^0.9.1"
+notebook = "^7.2.0"
+ipywidgets = "^8.1.2"
+ipympl = "^0.9.4"
 
 [build-system]
 requires = ["poetry-core"]

+ 2 - 0
src/unfccc_ghg_data/helper/__init__.py

@@ -34,6 +34,7 @@ from .functions import (
     get_country_name,
     make_wide_table,
     process_data_for_country,
+    set_to_nan_in_ds,
 )
 
 __all__ = [
@@ -63,4 +64,5 @@ __all__ = [
     "nAI_countries",
     "AI_countries",
     "all_countries",
+    "set_to_nan_in_ds",
 ]

+ 77 - 139
src/unfccc_ghg_data/helper/functions.py

@@ -8,6 +8,7 @@ import copy
 import json
 import re
 import warnings
+from collections.abc import Hashable
 from copy import deepcopy
 from datetime import date
 from pathlib import Path
@@ -70,6 +71,8 @@ def process_data_for_country(  # noqa PLR0913, PLR0912, PLR0915
         Categories to return
     processing_info_country
         more detailed processing info TODO: explain format
+        The "aggregate_cats" flag is deprecated and will be removed in a future
+        version. Please use "aggregate_coord" with key "category" instead
 
     Returns
     -------
@@ -263,58 +266,43 @@ def process_data_for_country(  # noqa PLR0913, PLR0912, PLR0915
                     )
 
         # aggregate categories
-        # TODO replace by primap2 function once it is in primap2 stable
         if "aggregate_cats" in processing_info_country:
-            data_country = data_country.pr.dequantify()
+            warnings.warn(
+                'The "aggregate_cats" flag is deprecated and will '
+                "be removed in a future version. Please use "
+                '"aggregate_coord" with key "category" instead',
+                category=DeprecationWarning,
+            )
+            print(
+                f"Aggregating categories for country {country_code}, source {source}, "
+                f"scenario {scenario}"
+            )
+
+            # prep input to add_aggregates_coordinates
+            agg_info = {"category": processing_info_country["aggregate_cats"]}
+
             if "agg_tolerance" in processing_info_country:
                 agg_tolerance = processing_info_country["agg_tolerance"]
             else:
                 agg_tolerance = tolerance
-            aggregate_cats_current = processing_info_country["aggregate_cats"]
+
+            data_country = data_country.pr.add_aggregates_coordinates(
+                agg_info=agg_info,
+                tolerance=agg_tolerance,
+                skipna=True,
+                min_count=1,
+            )
+
+        if "aggregate_coord" in processing_info_country:
             print(
-                f"Aggregating categories for country {country_code}, source {source}, "
+                f"Aggregating data for country {country_code}, source {source}, "
                 f"scenario {scenario}"
             )
-            for cat_to_agg in aggregate_cats_current:
-                print(f"Category: {cat_to_agg}")
-                source_cats = aggregate_cats_current[cat_to_agg]["sources"]
-                data_agg = data_country.pr.loc[{"category": source_cats}].pr.sum(
-                    dim="category", skipna=True, min_count=1
-                )
-                nan_vars = [
-                    var
-                    for var in data_agg.data_vars
-                    if data_agg[var].isnull().all().data is True  # noqa: PD003
-                ]
-                data_agg = data_agg.drop(nan_vars)
-                if len(data_agg.data_vars) > 0:
-                    data_agg = data_agg.expand_dims(
-                        [f"category (" f"{cat_terminology_in})"]
-                    )
-                    data_agg = data_agg.assign_coords(
-                        coords={
-                            f"category ({cat_terminology_in})": (
-                                f"category ({cat_terminology_in})",
-                                [cat_to_agg],
-                            )
-                        }
-                    )
-                    if cat_name_present:
-                        cat_name = aggregate_cats_current[cat_to_agg]["name"]
-                        data_agg = data_agg.assign_coords(
-                            coords={
-                                "orig_cat_name": (
-                                    f"category ({cat_terminology_in})",
-                                    [cat_name],
-                                )
-                            }
-                        )
-                    data_country = data_country.pr.merge(
-                        data_agg, tolerance=agg_tolerance
-                    )
-                else:
-                    print(f"no data to aggregate category {cat_to_agg}")
-            data_country = data_country.pr.quantify()
+            data_country = data_country.pr.add_aggregates_coordinates(
+                agg_info=processing_info_country["aggregate_coords"],
+                skipna=True,
+                min_count=1,
+            )
 
         # copy HFCs and PFCs with default factors
         if "basket_copy" in processing_info_country:
@@ -333,13 +321,9 @@ def process_data_for_country(  # noqa PLR0913, PLR0912, PLR0915
 
         # aggregate gases if desired
         if "aggregate_gases" in processing_info_country:
-            # TODO: why use different code here than below. Can this fill non-existen
-            #  gas baskets?
-            for case in processing_info_country["aggregate_gases"].keys():
-                case_info = processing_info_country["aggregate_gases"][case]
-                data_country[
-                    case_info["basket"]
-                ] = data_country.pr.fill_na_gas_basket_from_contents(**case_info)
+            data_country = data_country.pr.add_aggregates_variables(
+                gases=processing_info_country["aggregate_gases"],
+            )
 
     # 3: map categories
     if category_conversion is not None:
@@ -366,40 +350,10 @@ def process_data_for_country(  # noqa PLR0913, PLR0912, PLR0915
         data_country = data_country.pr.loc[{"category": cats_to_keep}]
 
     # create gas baskets
-    entities_present = set(data_country.data_vars)
-    for basket in gas_baskets.keys():
-        basket_contents_present = [
-            gas for gas in gas_baskets[basket] if gas in entities_present
-        ]
-        if len(basket_contents_present) > 0:
-            if basket in list(data_country.data_vars):
-                data_country[basket] = data_country.pr.fill_na_gas_basket_from_contents(
-                    basket=basket,
-                    basket_contents=basket_contents_present,
-                    skipna=True,
-                    min_count=1,
-                )
-            else:
-                try:
-                    # print(data_country.data_vars)
-                    data_country[basket] = xr.full_like(
-                        data_country["CO2"], np.nan
-                    ).pr.quantify(units="Gg CO2 / year")
-                    data_country[basket].attrs = {
-                        "entity": basket.split(" ")[0],
-                        "gwp_context": basket.split(" ")[1][1:-1],
-                    }
-                    data_country[basket] = data_country.pr.gas_basket_contents_sum(
-                        basket=basket,
-                        basket_contents=basket_contents_present,
-                        min_count=1,
-                    )
-                    entities_present.add(basket)
-                except Exception as ex:
-                    print(
-                        f"No gas basket created for {country_code}, {source}, "
-                        f"{scenario}: {ex}"
-                    )
+    if gas_baskets:
+        data_country = data_country.pr.add_aggregates_variables(
+            gas_baskets=gas_baskets, skipna=True, min_count=1
+        )
 
     # amend title and comment
     data_country.attrs["comment"] = (
@@ -423,14 +377,9 @@ def convert_categories(
     """
     convert data from one category terminology to another
 
-    # TODO rewrite to use aggregate_coordinates functions
     """
     print(f"converting categories to {terminology_to}")
 
-    if "orig_cat_name" in ds_input.coords:
-        cat_name_present = True
-    else:
-        cat_name_present = False
     ds_converted = ds_input.copy(deep=True)
     ds_converted.attrs = deepcopy(ds_input.attrs)
     # TODO: change attrs for additional coordinates
@@ -456,57 +405,17 @@ def convert_categories(
             {f"category ({terminology_to})": (f"category ({terminology_to})", to_cats)}
         )
 
-    # redo the list of present cats after mapping, as we have new categories in the
-    # target terminology now
-    cats_present_mapped = list(
-        ds_converted.coords[f"category (" f"{terminology_to})"].values
-    )
     # aggregate categories
     if "aggregate" in conversion:
-        aggregate_cats = conversion["aggregate"]
-        for cat_to_agg in aggregate_cats:
-            if debug:
-                print(f"Category: {cat_to_agg}")
-            source_cats = [
-                cat
-                for cat in aggregate_cats[cat_to_agg]["sources"]
-                if cat in cats_present_mapped
-            ]
-            if debug:
-                print(source_cats)
-            data_agg = ds_converted.pr.loc[{"category": source_cats}].pr.sum(
-                dim="category", skipna=True, min_count=1
-            )
-            nan_vars = [
-                var
-                for var in data_agg.data_vars
-                if data_agg[var].isnull().all().data is True  # noqa: PD003
-            ]
-            data_agg = data_agg.drop(nan_vars)
-            if len(data_agg.data_vars) > 0:
-                data_agg = data_agg.expand_dims([f"category ({terminology_to})"])
-                data_agg = data_agg.assign_coords(
-                    coords={
-                        f"category ({terminology_to})": (
-                            f"category ({terminology_to})",
-                            [cat_to_agg],
-                        )
-                    }
-                )
-                if cat_name_present:
-                    data_agg = data_agg.assign_coords(
-                        coords={
-                            "orig_cat_name": (
-                                f"category ({terminology_to})",
-                                [aggregate_cats[cat_to_agg]["name"]],
-                            )
-                        }
-                    )
-                ds_converted = ds_converted.pr.merge(data_agg, tolerance=tolerance)
-                cats_present_mapped.append(cat_to_agg)
-            else:
-                print(f"no data to aggregate category {cat_to_agg}")
-
+        agg_info = {
+            "category": conversion["aggregate"],
+        }
+        ds_converted = ds_converted.pr.add_aggregates_coordinates(
+            agg_info=agg_info,
+            tolerance=tolerance,
+            skipna=True,
+            min_count=1,
+        )
     return ds_converted
 
 
@@ -1154,6 +1063,35 @@ def find_and_replace_values(
     return df
 
 
+def set_to_nan_in_ds(
+    ds_in: xr.Dataset,
+    entities: list[Hashable],
+    filter: dict[str, any],
+) -> xr.Dataset:
+    """
+    Set values to NaN in a data set.
+
+    Parameters
+    ----------
+    ds_in:
+        input dataset
+    entities
+        list of entities to work on
+    filter
+        .pr.loc type selector which selects the elements that should be replaced
+        with nan
+
+    Returns
+    -------
+        xr.Dataset with the desired values set to nan
+    """
+    ds_mask = xr.zeros_like(ds_in[entities].pr.loc[filter]).combine_first(
+        xr.ones_like(ds_in)
+    )
+
+    return ds_in.where(ds_mask)
+
+
 def assert_values(
     df: pd.DataFrame,
     test_case: tuple[str | float | int],

+ 21 - 16
src/unfccc_ghg_data/unfccc_di_reader/unfccc_di_reader_config.py

@@ -333,33 +333,36 @@ cat_conversion = {
             "7": "5",
         },  # 5.A-D ignored as not fitting 2006 cats
         "aggregate": {
-            "2.B": {"sources": ["M.2.B_2.B", "M.2.B_2.E"], "name": "Chemical Industry"},
-            "2.H": {"sources": ["M.2.H.1_2", "2.H.3"], "name": "Other"},
+            "2.B": {
+                "sources": ["M.2.B_2.B", "M.2.B_2.E"],
+                "orig_cat_name": "Chemical Industry",
+            },
+            "2.H": {"sources": ["M.2.H.1_2", "2.H.3"], "orig_cat_name": "Other"},
             #'2': {'sources': ['2.A', '2.B', '2.C', '2.F', '2.H'],
             #      'name': 'Industrial Processes and Product Use'},
-            "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
+            "3.A": {"sources": ["3.A.1", "3.A.2"], "orig_cat_name": "Livestock"},
             "3.C.1": {
                 "sources": ["3.C.1.b", "3.C.1.c"],
-                "name": "Emissions from biomass burning",
+                "orig_cat_name": "Emissions from biomass burning",
             },
             "M.3.C.1.AG": {
                 "sources": ["3.C.1.b", "3.C.1.c"],
-                "name": "Emissions from biomass burning (Agriculture)",
+                "orig_cat_name": "Emissions from biomass burning (Agriculture)",
             },
             "3.C": {
                 "sources": ["3.C.1", "M.3.C.45.AG", "3.C.7", "3.C.8"],
-                "name": "Aggregate sources and non-CO2 emissions sources on land",
+                "orig_cat_name": "Aggregate sources and non-CO2 emissions sources on land",
             },
             "M.3.C.AG": {
                 "sources": ["M.3.C.1.AG", "M.3.C.45.AG", "3.C.7", "3.C.8"],
-                "name": "Aggregate sources and non-CO2 emissions sources on land ("
+                "orig_cat_name": "Aggregate sources and non-CO2 emissions sources on land ("
                 "Agriculture)",
             },
             "M.AG.ELV": {
                 "sources": ["M.3.C.AG"],
-                "name": "Agriculture excluding livestock",
+                "orig_cat_name": "Agriculture excluding livestock",
             },
-            "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},
+            "3": {"sources": ["M.AG", "M.LULUCF"], "orig_cat_name": "AFOLU"},
         },
     },
 }
@@ -541,15 +544,15 @@ di_processing_templates = {
             "aggregate_cats": {
                 "2": {
                     "sources": ["2.A", "2.B", "2.C"],
-                    "name": "2.  Industrial Processes",
+                    "orig_cat_name": "2.  Industrial Processes",
                 },
                 "15163": {
                     "sources": ["1", "2", "4", "6"],
-                    "name": "Total GHG emissions excluding LULUCF/LUCF",
+                    "orig_cat_name": "Total GHG emissions excluding LULUCF/LUCF",
                 },
                 "24540": {
                     "sources": ["1", "2", "5", "4", "6"],
-                    "name": "Total GHG emissions including LULUCF/LUCF",
+                    "orig_cat_name": "Total GHG emissions including LULUCF/LUCF",
                 },
             },
         },
@@ -949,7 +952,7 @@ di_processing_templates = {
             "aggregate_cats": {
                 "14637": {
                     "sources": ["14423", "14424"],
-                    "name": "International Bunkers",
+                    "orig_cat_name": "International Bunkers",
                 },
             },
             # downscaling in two steps
@@ -1137,7 +1140,7 @@ di_processing_templates = {
                 "5": {
                     "parent": "24540",
                     "subtract": ["15163"],
-                    "name": "5.  Land-Use Change and Forestry",
+                    "orig_cat_name": "5.  Land-Use Change and Forestry",
                 },
             },
             "downscale": {  # not tested yet
@@ -1454,7 +1457,9 @@ di_processing_templates = {
         },
     },
     "ERI": {
-        "DI2023-05-24": {  # 1994 1995-1999 (partial coverage, KYOTOGHG and total are incomplete), 2000
+        "DI2023-05-24": {
+            # 1994 1995-1999 (partial coverage, KYOTOGHG
+            # and total are incomplete), 2000
             "remove_ts": {
                 "energy_N2O": {  # very high in 1994
                     "category": ["1", "1.A", "15163", "24540"],
@@ -1785,7 +1790,7 @@ di_processing_templates = {
             "aggregate_cats": {
                 "1.B": {
                     "sources": ["1.B.2"],
-                    "name": "1.B  Fugitive Emissions from Fuels",
+                    "orig_cat_name": "1.B  Fugitive Emissions from Fuels",
                 },
             },
         },

+ 3 - 1
src/unfccc_ghg_data/unfccc_downloader/fetch_submissions_bur.py

@@ -53,13 +53,15 @@ if __name__ == "__main__":
         if "href" not in link.attrs:
             continue
         href = link.attrs["href"]
-        if "/documents/" in href:
+        if "documents/" in href:
             if "title" in link.attrs.keys():
                 title = link.attrs["title"]
             else:
                 title = link.contents[0]
             if href.startswith("/documents"):
                 href = "https://unfccc.int" + href
+            elif href.startswith("documents"):
+                href = "https://unfccc.int/" + href
             # Only add pages in the format https://unfccc.int/documents/65587
             # to further downloads
             if str(Path(href).parent).endswith("documents"):

+ 203 - 98
src/unfccc_ghg_data/unfccc_reader/Argentina/config_arg_bur5.py

@@ -73,14 +73,14 @@ meta_data = {
 
 # many custom categories which are not in climate categories, so automatic
 # aggregation would be a lot of coding work
-cats_to_agg = {  # name is just for readability, not used
+cats_to_agg = {
     "1.A.1.c": {
         "sources": ["1.A.1.c.ii"],
-        "name": "Manufacture of Solid Fuels and Other Energy Industries",
+        # "orig_cat_name": "Manufacture of Solid Fuels and Other Energy Industries",
     },
     "1.A.1": {
         "sources": ["1.A.1.a", "1.A.1.b", "1.A.1.c"],
-        "name": "Energy Industries",
+        # "orig_cat_name": "Energy Industries",
     },
     "1.A.2": {
         "sources": [
@@ -95,38 +95,59 @@ cats_to_agg = {  # name is just for readability, not used
             "1.A.2.l",
             "1.A.2.m",
         ],
-        "name": "Manufacturing Industries and Construction",
+        # "orig_cat_name": "Manufacturing Industries and Construction",
+    },
+    "1.A.3.a": {
+        "sources": ["1.A.3.a.ii"],
+        # "orig_cat_name": "Civil Aviation"
     },
-    "1.A.3.a": {"sources": ["1.A.3.a.ii"], "name": "Civil Aviation"},
     "1.A.3.b": {
         "sources": ["1.A.3.b.iii", "1.A.3.b.vii"],
-        "name": "Road Transportation",
+        # "orig_cat_name": "Road Transportation",
+    },
+    "1.A.3.d": {
+        "sources": ["1.A.3.d.ii"],
+        # "orig_cat_name": "Water-Borne Navigation"
+    },
+    "1.A.3.e": {
+        "sources": ["1.A.3.e.i"],
+        # "orig_cat_name": "Other Transportation"
     },
-    "1.A.3.d": {"sources": ["1.A.3.d.ii"], "name": "Water-Borne Navigation"},
-    "1.A.3.e": {"sources": ["1.A.3.e.i"], "name": "Other Transportation"},
     "1.A.3": {
         "sources": ["1.A.3.a", "1.A.3.b", "1.A.3.c", "1.A.3.d", "1.A.3.e"],
-        "name": "Transport",
+        # "orig_cat_name": "Transport",
     },
     "1.A.4.a": {
         "sources": ["1.A.4.a.i", "1.A.4.a.ii", "1.A.4.a.iii"],
-        "name": "Commercial/Institutional",
+        # "orig_cat_name": "Commercial/Institutional",
+    },
+    "1.A.4": {
+        "sources": ["1.A.4.a", "1.A.4.b", "1.A.4.c"],
+        # "orig_cat_name": "Other Sectors"
     },
-    "1.A.4": {"sources": ["1.A.4.a", "1.A.4.b", "1.A.4.c"], "name": "Other Sectors"},
     "1.A": {
         "sources": ["1.A.1", "1.A.2", "1.A.3", "1.A.4"],
-        "name": "Fuel Combustion Activities",
+        # "orig_cat_name": "Fuel Combustion Activities",
     },
     "1.B.1.a.i": {
         "sources": ["1.B.1.a.i.1", "1.B.1.a.i.2"],
-        "name": "Underground mines",
+        # "orig_cat_name": "Underground mines",
+    },
+    "1.B.1.a": {
+        "sources": ["1.B.1.a.i"],
+        # "orig_cat_name": "Coal Mining and Handling"
+    },
+    "1.B.1.c": {
+        "sources": ["1.B.1.c.i"],
+        # "orig_cat_name": "Solid Fuel Transformation"
+    },
+    "1.B.1": {
+        "sources": ["1.B.1.a", "1.B.1.c"],
+        # "orig_cat_name": "Solid Fuels"
     },
-    "1.B.1.a": {"sources": ["1.B.1.a.i"], "name": "Coal Mining and Handling"},
-    "1.B.1.c": {"sources": ["1.B.1.c.i"], "name": "Solid Fuel Transformation"},
-    "1.B.1": {"sources": ["1.B.1.a", "1.B.1.c"], "name": "Solid Fuels"},
     "1.B.2.a": {
         "sources": ["1.B.2.a.i", "1.B.2.a.ii", "1.B.2.a.iii", "1.B.2.a.iv"],
-        "name": "Oil",
+        # "orig_cat_name": "Oil",
     },
     "1.B.2.b": {
         "sources": [
@@ -137,42 +158,66 @@ cats_to_agg = {  # name is just for readability, not used
             "1.B.2.b.v",
             "1.B.2.b.vi",
         ],
-        "name": "Natural Gas",
+        # "orig_cat_name": "Natural Gas",
+    },
+    "1.B.2": {
+        "sources": ["1.B.2.a", "1.B.2.b"],
+        # "orig_cat_name": "Oil and Natural Gas"
+    },
+    "1.B": {
+        "sources": ["1.B.1", "1.B.2"],
+        # "orig_cat_name": "Fugitive Emissions from Fuels"
+    },
+    "1": {
+        "sources": ["1.A", "1.B"],
+        # "orig_cat_name": "Energy"
     },
-    "1.B.2": {"sources": ["1.B.2.a", "1.B.2.b"], "name": "Oil and Natural Gas"},
-    "1.B": {"sources": ["1.B.1", "1.B.2"], "name": "Fugitive Emissions from Fuels"},
-    "1": {"sources": ["1.A", "1.B"], "name": "Energy"},
     "2.A.4": {
         "sources": ["2.A.4.a", "2.A.4.b", "2.A.4.d"],
-        "name": "Other Process Uses of Carbonates",
+        # "orig_cat_name": "Other Process Uses of Carbonates",
+    },
+    "2.A": {
+        "sources": ["2.A.1", "2.A.2", "2.A.4"],
+        # "orig_cat_name": "Mineral Industry"
     },
-    "2.A": {"sources": ["2.A.1", "2.A.2", "2.A.4"], "name": "Mineral Industry"},
     "2.B.8": {
         "sources": ["2.B.8.a", "2.B.8.b", "2.B.8.c", "2.B.8.f"],
-        "name": "Petrochemical and Carbon Black Production",
+        # "orig_cat_name": "Petrochemical and Carbon Black Production",
+    },
+    "2.B.9": {
+        "sources": ["2.B.9.a"],
+        # "orig_cat_name": "Fluorochemical Production"
     },
-    "2.B.9": {"sources": ["2.B.9.a"], "name": "Fluorochemical Production"},
     "2.B": {
         "sources": ["2.B.1", "2.B.2", "2.B.5", "2.B.7", "2.B.8", "2.B.9"],
-        "name": "Chemical Industry",
+        # "orig_cat_name": "Chemical Industry",
+    },
+    "2.C": {
+        "sources": ["2.C.1", "2.C.2", "2.C.3", "2.C.6"],
+        # "orig_cat_name": "Metal Industry"
     },
-    "2.C": {"sources": ["2.C.1", "2.C.2", "2.C.3", "2.C.6"], "name": "Metal Industry"},
     "2.D": {
         "sources": ["2.D.1", "2.D.2"],
-        "name": "Non-Energy Products from Fuels and Solvent Use",
+        # "orig_cat_name": "Non-Energy Products from Fuels and Solvent Use",
     },
     "2.F.1": {
         "sources": ["2.F.1.a", "2.F.1.b"],
-        "name": "Refrigeration and Air Conditioning",
+        # "orig_cat_name": "Refrigeration and Air Conditioning",
     },
     "2.F": {
         "sources": ["2.F.1", "2.F.2", "2.F.3", "2.F.4"],
-        "name": "Product Uses as Substitutes for Ozone Depleting Substances",
+        # "orig_cat_name": "Product Uses as Substitutes for Ozone Depleting Substances",
+    },
+    "2": {
+        "sources": ["2.A", "2.B", "2.C", "2.D", "2.F"],
+        # "orig_cat_name": "IPPU"
     },
-    "2": {"sources": ["2.A", "2.B", "2.C", "2.D", "2.F"], "name": "IPPU"},
     # AFOLU
     # 3.A - Livestock
-    "3.A.1.a": {"sources": ["3.A.1.a.i", "3.A.1.a.ii"], "name": "Cattle"},
+    "3.A.1.a": {
+        "sources": ["3.A.1.a.i", "3.A.1.a.ii"],
+        # "orig_cat_name": "Cattle"
+    },
     "3.A.1": {
         "sources": [
             "3.A.1.a",
@@ -184,9 +229,12 @@ cats_to_agg = {  # name is just for readability, not used
             "3.A.1.g",
             "3.A.1.h",
         ],
-        "name": "Enteric Fermentation",
+        # "orig_cat_name": "Enteric Fermentation",
+    },
+    "3.A.2.a": {
+        "sources": ["3.A.2.a.i", "3.A.2.a.ii"],
+        # "orig_cat_name": "Cattle"
     },
-    "3.A.2.a": {"sources": ["3.A.2.a.i", "3.A.2.a.ii"], "name": "Cattle"},
     "3.A.2": {
         "sources": [
             "3.A.2.a",
@@ -199,73 +247,91 @@ cats_to_agg = {  # name is just for readability, not used
             "3.A.2.h",
             "3.A.2.i",
         ],
-        "name": "Enteric Fermentation",
+        # "orig_cat_name": "Enteric Fermentation",
+    },
+    "3.A": {
+        "sources": ["3.A.1", "3.A.2"],
+        # "orig_cat_name": "Livestock"
     },
-    "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
     # 3.B - Land
     "3.B.1.a.i": {
         "sources": ["3.B.1.a.i.1", "3.B.1.a.i.2"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # no name, not the normal IPCC category
     "3.B.1.a.ii": {
         "sources": ["3.B.1.a.ii.1", "3.B.1.a.ii.2"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # no name, not the normal IPCC category
     "3.B.1.a": {
         "sources": ["3.B.1.a.i", "3.B.1.a.ii"],
-        "name": "Forest Land Remaining Forest Land",
+        # "orig_cat_name": "Forest Land Remaining Forest Land",
     },
     # '3.B.1.b': {'sources': ['3.B.1.b.i', '3.B.1.b.ii'],
     #             'name': 'Land Converted to Forest Land'},
-    "3.B.1": {"sources": ["3.B.1.a"], "name": "Forest Land"},  # , '3.B.1.b'],
+    "3.B.1": {
+        "sources": ["3.B.1.a"],
+        # "orig_cat_name": "Forest Land"
+    },  # , '3.B.1.b'],
     "3.B.2.b": {
         "sources": ["3.B.2.b.i", "3.B.2.b.ii"],
-        "name": "Land Converted to Cropland",
+        # "orig_cat_name": "Land Converted to Cropland",
+    },
+    "3.B.2": {
+        "sources": ["3.B.2.b"],
+        # "orig_cat_name": "Cropland"
     },
-    "3.B.2": {"sources": ["3.B.2.b"], "name": "Cropland"},
     "3.B.3.b": {
         "sources": ["3.B.3.b.i", "3.B.3.b.ii"],
-        "name": "Land Converted to Grassland",
+        # "orig_cat_name": "Land Converted to Grassland",
+    },
+    "3.B.3": {
+        "sources": ["3.B.3.b"],
+        # "orig_cat_name": "Grassland"
+    },
+    "3.B": {
+        "sources": ["3.B.1", "3.B.2", "3.B.3", "3.B.7"],
+        # "orig_cat_name": "Land"
     },
-    "3.B.3": {"sources": ["3.B.3.b"], "name": "Grassland"},
-    "3.B": {"sources": ["3.B.1", "3.B.2", "3.B.3", "3.B.7"], "name": "Land"},
     # 3.C - Aggregate Sources and Non-CO2 Emissions Sources on Land
     "3.C.1.a": {
         "sources": ["3.C.1.a.i", "3.C.1.a.ii"],
-        "name": "Biomass Burning in Forest Lands",
+        # "orig_cat_name": "Biomass Burning in Forest Lands",
     },
     "3.C.1.b": {
         "sources": ["3.C.1.b.i", "3.C.1.b.ii"],
-        "name": "Biomass Burning in Croplands",
+        # "orig_cat_name": "Biomass Burning in Croplands",
     },
     "M.3.C.1.b.AG": {
         "sources": ["3.C.1.b.i"],
-        "name": "Biomass Burning in Croplands - Agriculture",
+        # "orig_cat_name": "Biomass Burning in Croplands - Agriculture",
     },
     "M.3.C.1.b.LU": {
         "sources": ["3.C.1.b.ii"],
-        "name": "Biomass Burning in Croplands - LULUCF",
+        # "orig_cat_name": "Biomass Burning in Croplands - LULUCF",
     },
     "3.C.1.c": {
         "sources": ["3.C.1.c.i", "3.C.1.c.ii"],
-        "name": "Biomass Burning in Grasslands",
+        # "orig_cat_name": "Biomass Burning in Grasslands",
     },
     "M.3.C.1.c.AG": {
         "sources": ["3.C.1.c.i"],
-        "name": "Biomass Burning in Grasslands - Agriculture",
+        # "orig_cat_name": "Biomass Burning in Grasslands - Agriculture",
     },
     "M.3.C.1.c.LU": {
         "sources": ["3.C.1.c.ii"],
-        "name": "Biomass Burning in Grasslands - LULUCF",
+        # "orig_cat_name": "Biomass Burning in Grasslands - LULUCF",
+    },
+    "3.C.1": {
+        "sources": ["3.C.1.a", "3.C.1.b", "3.C.1.c"],
+        # "orig_cat_name": "Biomass Burning"
     },
-    "3.C.1": {"sources": ["3.C.1.a", "3.C.1.b", "3.C.1.c"], "name": "Biomass Burning"},
     "M.3.C.1.AG": {
         "sources": ["M.3.C.1.b.AG", "M.3.C.1.c.AG"],
-        "name": "Biomass Burning - Agriculture",
+        # "orig_cat_name": "Biomass Burning - Agriculture",
     },
     "M.3.C.1.LU": {
         "sources": ["3.C.1.a", "M.3.C.1.b.LU", "M.3.C.1.c.LU"],
-        "name": "Biomass Burning",
+        # "orig_cat_name": "Biomass Burning",
     },
     "3.C.4.d": {
         "sources": [
@@ -277,11 +343,11 @@ cats_to_agg = {  # name is just for readability, not used
             "3.C.4.d.vi",
             "3.C.4.d.vii",
         ],
-        "name": "",
+        # "orig_cat_name": "",
     },  # not standard IPCC2006
     "3.C.4.g": {
         "sources": ["3.C.4.g.i", "3.C.4.g.ii"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # not standard IPCC2006
     "3.C.4": {
         "sources": [
@@ -295,47 +361,47 @@ cats_to_agg = {  # name is just for readability, not used
             "3.C.4.n",
             "3.C.4.o",
         ],
-        "name": "Direct N2O Emissions from Managed Soils",
+        # "orig_cat_name": "Direct N2O Emissions from Managed Soils",
     },
     "3.C.5.a": {
         "sources": ["3.C.5.a.i", "3.C.5.a.ii"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # not standard IPCC2006
     "3.C.5.b": {
         "sources": ["3.C.5.b.i", "3.C.5.b.ii"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # not standard IPCC2006
     "3.C.5.c": {
         "sources": ["3.C.5.c.i", "3.C.5.c.ii"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # not standard IPCC2006
     "3.C.5.d.i": {
         "sources": ["3.C.5.d.i.1", "3.C.5.d.i.2"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # not standard IPCC2006
     "3.C.5.d.ii": {
         "sources": ["3.C.5.d.ii.1", "3.C.5.d.ii.2"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # not standard IPCC2006
     "3.C.5.d.iii": {
         "sources": ["3.C.5.d.iii.1", "3.C.5.d.iii.2"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # not standard IPCC2006
     "3.C.5.d.iv": {
         "sources": ["3.C.5.d.iv.1", "3.C.5.d.iv.2"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # not standard IPCC2006
     "3.C.5.d.v": {
         "sources": ["3.C.5.d.v.1", "3.C.5.d.v.2"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # not standard IPCC2006
     "3.C.5.d.vi": {
         "sources": ["3.C.5.d.vi.1", "3.C.5.d.vi.2"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # not standard IPCC2006
     "3.C.5.d.vii": {
         "sources": ["3.C.5.d.vii.1", "3.C.5.d.vii.2"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # not standard IPCC2006
     "3.C.5.d": {
         "sources": [
@@ -347,28 +413,31 @@ cats_to_agg = {  # name is just for readability, not used
             "3.C.5.d.vi",
             "3.C.5.d.vii",
         ],
-        "name": "",
+        # "orig_cat_name": "",
+    },  # not standard IPCC2006
+    "3.C.5.f": {
+        "sources": ["3.C.5.f.ii"],
+        # "orig_cat_name": ""
     },  # not standard IPCC2006
-    "3.C.5.f": {"sources": ["3.C.5.f.ii"], "name": ""},  # not standard IPCC2006
     "3.C.5.g.i": {
         "sources": ["3.C.5.g.i.1", "3.C.5.g.i.2"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # not standard IPCC2006
     "3.C.5.g.ii": {
         "sources": ["3.C.5.g.ii.1", "3.C.5.g.ii.2"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # not standard IPCC2006
     "3.C.5.g": {
         "sources": ["3.C.5.g.i", "3.C.5.g.ii"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # not standard IPCC2006
     "3.C.5.n": {
         "sources": ["3.C.5.n.i", "3.C.5.n.ii"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # not standard IPCC2006
     "3.C.5.o": {
         "sources": ["3.C.5.o.i", "3.C.5.o.ii"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # not standard IPCC2006
     "3.C.5": {
         "sources": [
@@ -382,60 +451,96 @@ cats_to_agg = {  # name is just for readability, not used
             "3.C.5.n",
             "3.C.5.o",
         ],
-        "name": "Indirect N2O Emissions from Managed Soils",
+        # "orig_cat_name": "Indirect N2O Emissions from Managed Soils",
     },
-    "3.C.6.a.i": {"sources": ["3.C.6.a.i.1"], "name": ""},  # not standard IPCC2006
+    "3.C.6.a.i": {
+        "sources": ["3.C.6.a.i.1"],
+        # "orig_cat_name": ""
+    },  # not standard IPCC2006
     "3.C.6.a.ii": {
         "sources": ["3.C.6.a.ii.1", "3.C.6.a.ii.2"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # not standard IPCC2006
     "3.C.6.a": {
         "sources": ["3.C.6.a.i", "3.C.6.a.ii"],
-        "name": "",
+        # "orig_cat_name": "",
     },  # not standard IPCC2006
     "3.C.6.h": {
         "sources": ["3.C.6.h.i", "3.C.6.h.ii"],
-        "name": "",
+        # "orig_cat_name": "",
+    },  # not standard IPCC2006
+    "3.C.6.i": {
+        "sources": ["3.C.6.i.i"],
+        # "orig_cat_name": ""
     },  # not standard IPCC2006
-    "3.C.6.i": {"sources": ["3.C.6.i.i"], "name": ""},  # not standard IPCC2006
     "3.C.6": {
         "sources": ["3.C.6.a", "3.C.6.h", "3.C.6.i"],
-        "name": "Indirect N2O Emissions from Manure Management",
+        # "orig_cat_name": "Indirect N2O Emissions from Manure Management",
     },
     "3.C": {
         "sources": ["3.C.1", "3.C.3", "3.C.4", "3.C.5", "3.C.6", "3.C.7"],
-        "name": "Emissions from Biomass Burning",
+        # "orig_cat_name": "Emissions from Biomass Burning",
     },
     "M.3.C.AG": {
         "sources": ["M.3.C.1.AG", "3.C.3", "3.C.4", "3.C.5", "3.C.6", "3.C.7"],
-        "name": "Emissions from Biomass Burning - Agriculture",
+        # "orig_cat_name": "Emissions from Biomass Burning - Agriculture",
+    },
+    "M.AG.ELV": {
+        "sources": ["M.3.C.AG"],
+        # "orig_cat_name": "Agriculture Excluding Livestock"
     },
-    "M.AG.ELV": {"sources": ["M.3.C.AG"], "name": "Agriculture Excluding Livestock"},
     "M.3.C.LU": {
         "sources": ["M.3.C.1.LU"],
-        "name": "Emissions from Biomass Burning - LULUCF",
+        # "orig_cat_name": "Emissions from Biomass Burning - LULUCF",
+    },
+    "3.D": {
+        "sources": ["3.D.1"],
+        # "orig_cat_name": "Other"
+    },
+    "M.3.D.LU": {
+        "sources": ["3.D.1"],
+        # "orig_cat_name": "Other - LULUCF"
+    },
+    "3": {
+        "sources": ["3.A", "3.B", "3.C", "3.D"],
+        # "orig_cat_name": "AFOLU"
+    },
+    "M.AG": {
+        "sources": ["3.A", "M.3.C.AG"],
+        # "orig_cat_name": "Agriculture"
+    },
+    "M.LULUCF": {
+        "sources": ["3.B", "M.3.C.LU", "3.D"],
+        # "orig_cat_name": "LULUCF"
     },
-    "3.D": {"sources": ["3.D.1"], "name": "Other"},
-    "M.3.D.LU": {"sources": ["3.D.1"], "name": "Other - LULUCF"},
-    "3": {"sources": ["3.A", "3.B", "3.C", "3.D"], "name": "AFOLU"},
-    "M.AG": {"sources": ["3.A", "M.3.C.AG"], "name": "Agriculture"},
-    "M.LULUCF": {"sources": ["3.B", "M.3.C.LU", "3.D"], "name": "LULUCF"},
     # waste
-    "4.A": {"sources": ["4.A.1", "4.A.3"], "name": "Solid Waste Disposal"},
-    "4.C": {"sources": ["4.C.1"], "name": "Incineration and Open Burning of Waste"},
+    "4.A": {
+        "sources": ["4.A.1", "4.A.3"],
+        # "orig_cat_name": "Solid Waste Disposal"
+    },
+    "4.C": {
+        "sources": ["4.C.1"],
+        # "orig_cat_name": "Incineration and Open Burning of Waste"
+    },
     "4.D.2": {
         "sources": ["4.D.2.a", "4.D.2.b", "4.D.2.c", "4.D.2.d", "4.D.2.e"],
-        "name": "Industrial Wastewater Treatment and Discharge",
+        # "orig_cat_name": "Industrial Wastewater Treatment and Discharge",
     },
     "4.D": {
         "sources": ["4.D.1", "4.D.2"],
-        "name": "Wastewater Treatment and Discharge",
+        # "orig_cat_name": "Wastewater Treatment and Discharge",
+    },
+    "4": {
+        "sources": ["4.A", "4.B", "4.C", "4.D"],
+        # "orig_cat_name": "Waste"
     },
-    "4": {"sources": ["4.A", "4.B", "4.C", "4.D"], "name": "Waste"},
     # national totals
-    "0": {"sources": ["1", "2", "3", "4"], "name": "National Total"},
+    "0": {
+        "sources": ["1", "2", "3", "4"],
+        # "orig_cat_name": "National Total"
+    },
     "M.0.EL": {
         "sources": ["1", "2", "M.AG", "4"],
-        "name": "National Total Excluding LULUCF",
+        # "orig_cat_name": "National Total Excluding LULUCF",
     },
 }

+ 0 - 1
src/unfccc_ghg_data/unfccc_reader/Argentina/read_ARG_BUR5_from_csv.py

@@ -98,7 +98,6 @@ if __name__ == "__main__":
         data_if,
     )
 
-    data_pm2 = pm2.pm2io.from_interchange_format(data_if)
     encoding = {var: compression for var in data_pm2.data_vars}
     data_pm2.pr.to_netcdf(
         output_folder

+ 20 - 8
src/unfccc_ghg_data/unfccc_reader/Burundi/config_bdi_bur1.py

@@ -187,20 +187,32 @@ country_processing_step1 = {
                 "3.C.7",
                 "3.C.8",
             ],
-            "name": "Aggregate sources and non-CO2 emissions sources on land "
-            "(Agriculture)",
+            # "name": "Aggregate sources and non-CO2 emissions sources on land "
+            # "(Agriculture)",
+        },
+        "M.3.D.AG": {
+            "sources": ["3.D.2"],
+            # "name": "Other (Agriculture)"
         },
-        "M.3.D.AG": {"sources": ["3.D.2"], "name": "Other (Agriculture)"},
         "M.AG.ELV": {
             "sources": ["M.3.C.AG", "M.3.D.AG"],
-            "name": "Agriculture excluding livestock",
+            # "name": "Agriculture excluding livestock",
+        },
+        "M.AG": {
+            "sources": ["3.A", "M.AG.ELV"],
+            # "name": "Agriculture"
+        },
+        "M.3.D.LU": {
+            "sources": ["3.D.1"],
+            # "name": "Other (LULUCF)"
+        },
+        "M.LULUCF": {
+            "sources": ["3.B", "M.3.D.LU"],
+            # "name": "LULUCF"
         },
-        "M.AG": {"sources": ["3.A", "M.AG.ELV"], "name": "Agriculture"},
-        "M.3.D.LU": {"sources": ["3.D.1"], "name": "Other (LULUCF)"},
-        "M.LULUCF": {"sources": ["3.B", "M.3.D.LU"], "name": "LULUCF"},
         "M.0.EL": {
             "sources": ["1", "2", "M.AG", "4", "5"],
-            "name": "National total emissions excluding LULUCF",
+            # "name": "National total emissions excluding LULUCF",
         },
     },
     "basket_copy": {

+ 7 - 7
src/unfccc_ghg_data/unfccc_reader/Chile/config_chl_bur4.py

@@ -305,16 +305,16 @@ cat_mapping = {  # categories not listed here have the same UNFCCC_GHG_data as i
 # '3.A.4.g.ii.',
 
 aggregate_cats = {
-    "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
+    "3.A": {"sources": ["3.A.1", "3.A.2"], "orig_cat_name": "Livestock"},
     "3.B": {
         "sources": ["3.B.1", "3.B.2", "3.B.3", "3.B.4", "3.B.5", "3.B.6"],
-        "name": "Land",
+        "orig_cat_name": "Land",
     },
     "3.C.1": {
         "sources": ["3.C.1.b", "3.C.1.c"],
-        "name": "Emissions from Biomass Burning",
+        "orig_cat_name": "Emissions from Biomass Burning",
     },
-    "3.C.8": {"sources": ["3.C.8.a", "3.C.8.b"], "name": "Other"},
+    "3.C.8": {"sources": ["3.C.8.a", "3.C.8.b"], "orig_cat_name": "Other"},
     "3.C": {
         "sources": [
             "3.C.1",
@@ -326,8 +326,8 @@ aggregate_cats = {
             "3.C.7",
             "3.C.8",
         ],
-        "name": "Aggregate sources and non-CO2 emissions sources on land",
+        "orig_cat_name": "Aggregate sources and non-CO2 emissions sources on land",
     },
-    "3.D": {"sources": ["3.D.1", "3.D.2"], "name": "Other"},
-    "3": {"sources": ["3.A", "3.B", "3.C", "3.D"], "name": "AFOLU"},
+    "3.D": {"sources": ["3.D.1", "3.D.2"], "orig_cat_name": "Other"},
+    "3": {"sources": ["3.A", "3.B", "3.C", "3.D"], "orig_cat_name": "AFOLU"},
 }

+ 3 - 1
src/unfccc_ghg_data/unfccc_reader/Chile/read_CHL_BUR4_from_xlsx.py

@@ -303,7 +303,9 @@ if __name__ == "__main__":
             ).sum()
 
             df_combine.insert(0, cat_label, cat_to_agg)
-            df_combine.insert(1, "orig_cat_name", aggregate_cats[cat_to_agg]["name"])
+            df_combine.insert(
+                1, "orig_cat_name", aggregate_cats[cat_to_agg]["orig_cat_name"]
+            )
 
             df_combine = df_combine.reset_index()
 

+ 3 - 1
src/unfccc_ghg_data/unfccc_reader/Chile/read_CHL_BUR5_from_xlsx.py

@@ -311,7 +311,9 @@ if __name__ == "__main__":
             )
 
             df_combine.insert(0, cat_label, cat_to_agg)
-            df_combine.insert(1, "orig_cat_name", aggregate_cats[cat_to_agg]["name"])
+            df_combine.insert(
+                1, "orig_cat_name", aggregate_cats[cat_to_agg]["orig_cat_name"]
+            )
 
             df_combine = df_combine.reset_index()
 

+ 30 - 0
src/unfccc_ghg_data/unfccc_reader/China/__init__.py

@@ -0,0 +1,30 @@
+"""Read China's BURs, NIRs, NCs
+
+Scripts and configurations to read China's submissions to the UNFCCC.
+Currently, the following submissions and datasets are available (all datasets
+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
+
+.. exec_code::
+    :hide_code:
+
+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
+                                                  get_country_submissions)
+    country = 'CHN'
+    # print available submissions
+    print("="*15 + " Available submissions " + "="*15)
+    get_country_submissions(country, True)
+    print("")
+
+    #print available datasets
+    print("="*15 + " Available datasets " + "="*15)
+    get_country_datasets(country, True)
+
+You can also obtain this information running
+
+.. code-block:: bash
+
+    poetry run doit country_info country=CHN
+
+See below for a listing of scripts for BUR/NIR reading including links.
+
+"""

+ 755 - 0
src/unfccc_ghg_data/unfccc_reader/China/config_chn_bur3_nc4.py

@@ -0,0 +1,755 @@
+"""Config for China BUR3 and NC4
+
+Configuration for reading the China's NC4 and BUR3 from pdf.
+Full configuration is contained here including configuraton for conversions to
+primap2 data format.
+
+NOTE: GWPs are a mixture of AR4 and SAR values (SAR except for HFC-245fa and HFC-365mfc)
+Thus the Kyoto GHG gas basket is not fully consistent with SAR GWPs and is
+re-generated for the processed version of the data.
+"""
+
+## general config
+gwp_to_use = "SARGWP100"  # see note above
+terminology_raw = "IPCC1996_2006_CHN_Inv"
+terminology_proc = "IPCC2006_PRIMAP"
+
+
+config_general = {
+    "time_format": "%Y",
+    "coords_cols": {
+        "category": "category",
+        "unit": "unit",
+        "entity": "entity",
+    },
+    "coords_defaults": {
+        "source": "CHN-GHG-Inventory",
+        "provenance": "measured",
+    },
+    "coords_terminologies": {
+        "area": "ISO3",
+        "category": terminology_raw,
+        "scenario": "PRIMAP",
+    },
+    "filter_remove": {
+        "f1": {
+            "category": ["6. Memo Items", "6. Memo items", "Memo Items"],
+        }
+    },
+    "coords_value_mapping": {
+        "unit": "PRIMAP1",
+        "entity": {
+            "CF4": "CF4",
+            "CH4": "CH4",
+            "CO2": "CO2",
+            "HFC-125": "HFC125",
+            "HFC-134a": "HFC134a",
+            "HFC-143a": "HFC143a",
+            "HFC-227ea": "HFC227ea",
+            "HFC-23": "HFC23",
+            "HFC-236fa": "HFC236fa",
+            "HFC-245fa": "HFC245fa",
+            "HFC-32": "HFC32",
+            "HFC-365mfc": "HFC365MFC",
+            "HFCs": f"HFCS ({gwp_to_use})",
+            "HFCs HFC-134a": "HFC134a",
+            "HFCs HFC-152a": "HFC152a",
+            "N2O": "N2O",
+            "PFCs": f"PFCS ({gwp_to_use})",
+            "PFCs C2F6": "C2F6",
+            "SF6": "SF6",
+            "Total": f"KYOTOGHG ({gwp_to_use})",
+            "合计": f"KYOTOGHG ({gwp_to_use})",
+        },
+        "category": {
+            "1. Energy": "1",
+            "1. Energy Sector": "1",
+            "Energy Sector": "1",
+            "2. Industrial Processes": "2",
+            "2. Industrial processes": "2",
+            "3. Agriculture": "3",
+            "4. LULUCF": "4",
+            "4. Land-use change and forestry (LUCF)": "4",
+            "5. Waste": "5",
+            "Waste Disposal": "5",
+            "6. Memo Items": "IGNORE",
+            "6. Memo items": "IGNORE",
+            "Agricultural land": "4.B",
+            "Farmland": "4.B",
+            "Agricultural soils": "3.D",
+            "Agricultural Soils": "3.D",
+            "Agriculture": "3",
+            "Biological treatment": "M.5.A.BIO",  #  HKG, 2006category 4.B
+            "Biomass combustion": "M.BIO",
+            "CO2 emissions from biomass": "M.BIO",
+            "Biomass burning": "M.BIO",
+            "Cement production": "2.A.1",  # sum to 2.A, HKG
+            "Changes in forest and other woody biomass stocks": "4.A.1",  # sum for HKG
+            "Chemical industry": "2.B",
+            "Consumption of halocarbons and SF6": "2.F",
+            "Consumption of Halocarbons and SF6": "2.F",
+            "Cropland": "4.B",
+            "Energy": "1",
+            "Energy industries": "1.A.1",
+            "Energy industry": "1.A.1",
+            "Enteric fermentation": "3.A",
+            "Field burning of agricultural residues": "3.F",
+            "Forest conversion": "M.DEF",
+            "Forest land": "4.A",
+            "Fuel combustion": "1.A",
+            "Fugitive emission": "1.B",
+            "Fugitive emissions": "1.B",
+            "Grassland": "4.C",
+            "Harvested wood products": "4.G.1",
+            "Incineration": "5.C",
+            "Industrial Processes": "2",
+            "Industrial processes": "2",
+            "International aviation": "M.1.A",
+            "International marine": "M.1.B",
+            "International navigation": "M.1.B",
+            "LUCF": "4",
+            "LULUCF": "4",
+            "Land-UseChangeand Forestry (LUCF)": "4",
+            "Landfill": "5.A",
+            "Manufacturing and construction industries": "1.A.2",
+            "Manufacturing industries and construction": "1.A.2",
+            "Manure management": "3.B",
+            "Memo Items": "IGNORE",
+            "Metal industry": "2.C",
+            "Metal production": "2.C",
+            "Mineral industry": "2.A",
+            "Mineral products": "2.A",
+            "Non-energy products from fuels and solvent use": "2.D",  # mixture of 1996 and 2006 categories
+            "Non-Energy Products from Fuels and Solvent Use": "2.D",
+            "Oil and natural gas": "1.B.2",
+            "Oil and natural gas system": "1.B.2",
+            "Other factors": "1.A.4",
+            "Other land": "4.F",
+            "Other sectors": "1.A.4",
+            "Prescribed burning of savannas": "3.E",
+            "Production of halocarbons and SF6": "2.E",
+            "Rice cultivation": "3.C",
+            "Settlements": "4.E",
+            "Solid fuel": "1.B.1",
+            "Solid fuels": "1.B.1",
+            "Solid waste disposal": "5.A",
+            "Treatment of solid waste": "5.A",
+            "Special regional aviation": "M.SR.A",  # MAC only
+            "Special regional marine": "M.SR.M",  # MAC only
+            "Total": "0",
+            "Total (with LUCF)": "0",
+            "Total (with LULUCF)": "0",
+            "Total (without LUCF)": "M.0.EL",
+            "Total (without LULUCF)": "M.0.EL",
+            "Total emissions": "0",
+            "Transport": "1.A.3",
+            "Waste": "5",
+            "Wastewater handling": "5.B",
+            "Wastewater treatment": "5.B",
+            "Treatment of wastewater": "5.B",
+            "Wetlands": "4.D",
+        },
+    },
+    "meta_data": {
+        "rights": "",
+        "contact": "mail@johannes-guetschow.de",
+        "title": "",
+        "comment": "Read fom pdf file by Johannes Gütschow",
+        "institution": "United Nations Framework Convention on Climate Change (UNFCCC)",
+    },
+}
+
+category_conversion = {
+    "CHN": {
+        "mapping": {
+            "0": "0",
+            "1": "1",
+            "1.A": "1.A",
+            "1.A.1": "1.A.1",
+            "1.A.2": "1.A.2",
+            "1.A.3": "1.A.3",
+            "1.A.4": "1.A.4",
+            "1.B": "1.B",
+            "1.B.1": "1.B.1",
+            "1.B.2": "1.B.2",
+            "2": "2",
+            "2.A": "2.A",
+            "2.A.1": "2.A.1",
+            "2.B": "2.B",
+            "2.C": "2.C",
+            "2.D": "2.D",
+            "2.E": "2.B.9",
+            "2.F": "2.F",
+            "3": "M.AG",
+            "3.A": "3.A.1",
+            "3.B": "3.A.2",
+            "3.C": "3.C.7",
+            "3.D": "M.3.C.45.AG",
+            "3.E": "3.C.1.c",
+            "3.F": "3.C.1.b",
+            "4": "M.LULUCF",
+            "4.A": "3.B.1",
+            "4.B": "3.B.2",
+            "4.C": "3.B.3",
+            "4.D": "3.B.4",
+            "4.E": "3.B.5",
+            "4.F": "3.B.6",
+            "4.G.1": "3.D.1",
+            "5": "4",
+            "5.A": "4.A",
+            "5.B": "4.D",
+            "5.C": "4.C",
+            "M.0.EL": "M.0.EL",
+            "M.1.A": "M.BK.A",
+            "M.1.B": "M.BK.M",
+            "M.5.A.BIO": "4.B",
+            "M.BIO": "M.BIO",
+            #'M.DEF': '', #
+            #'M.SR.A': '',
+            #'M.SR.M': '',
+        },
+        "aggregate": {
+            "3.A": {
+                "sources": ["3.A.1", "3.A.2"],
+                "filter": {
+                    "entity": ["CH4", "N2O"],
+                },
+            },
+            "3.B": {
+                "sources": ["3.B.1", "3.B.2", "3.B.3", "3.B.4", "3.B.5", "3.B.6"],
+                "filter": {
+                    "entity": ["CO2", "CH4", "N2O"],
+                },
+            },
+            "3.C.1": {
+                "sources": ["3.C.1.b", "3.C.1.c"],
+                "filter": {
+                    "entity": ["CH4", "N2O"],
+                },
+            },
+            "M.3.C.1.AG": {
+                "sources": ["3.C.1.b", "3.C.1.c"],
+                "filter": {
+                    "entity": ["CH4", "N2O"],
+                },
+            },
+            "M.3.C.AG": {
+                "sources": ["M.3.C.1.AG", "M.3.C.45.AG", "3.C.7"],
+                "filter": {
+                    "entity": ["CH4", "N2O"],
+                },
+            },
+            "3.C": {
+                "sources": ["M.3.C.AG"],
+                "filter": {
+                    "entity": ["CH4", "N2O"],
+                },
+            },
+            "M.AG.ELV": {
+                "sources": ["M.3.C.AG"],
+                "filter": {
+                    "entity": ["CH4", "N2O"],
+                },
+            },
+            "M.3.D.LU": {
+                "sources": ["3.D.1"],
+                "filter": {
+                    "entity": ["CO2"],
+                },
+            },
+            "3.D": {
+                "sources": ["M.3.D.AG"],
+                "filter": {
+                    "entity": ["CO2"],
+                },
+            },
+            "M.AG": {  # check consistency
+                "sources": ["M.AG.ELV", "3.A"],
+                "filter": {
+                    "entity": ["CH4", "N2O"],
+                },
+            },
+            "M.LULUCF": {  # check consistency
+                "sources": ["3.B", "M.3.D.LU"],
+                "filter": {
+                    "entity": ["CO2", "CH4", "N2O"],
+                },
+            },
+            "3": {
+                "sources": ["M.AG", "M.LULUCF"],
+                "filter": {
+                    "entity": ["CO2", "CH4", "N2O"],
+                },
+            },
+            "M.0.EL": {
+                "sources": ["1", "2", "M.AG", "4"],
+            },
+            "0": {
+                "sources": ["1", "2", "3", "4"],
+            },
+            "M.BK": {
+                "sources": ["M.BK.A", "M.BK.M"],
+                "filter": {
+                    "entity": ["CO2", "CH4", "N2O"],
+                },
+            },
+        },
+    },
+    "HKG": {
+        "mapping": {
+            "0": "0",
+            "1": "1",
+            "1.A": "1.A",
+            "1.A.1": "1.A.1",
+            "1.A.2": "1.A.2",
+            "1.A.3": "1.A.3",
+            "1.A.4": "1.A.4",
+            "1.B": "1.B",
+            "1.B.1": "1.B.1",
+            "1.B.2": "1.B.2",
+            "2": "2",
+            "2.A.1": "2.A.1",
+            "2.E": "2.B.9",
+            "2.F": "2.F",
+            "3": "M.AG",
+            "3.A": "3.A.1",
+            "3.B": "3.A.2",
+            "3.C": "3.C.7",
+            "3.D": "M.3.C.45.AG",
+            "3.E": "3.C.1.c",
+            "4": "M.LULUCF",
+            "4.A.1": "3.B.1.a",
+            "4.B": "3.B.2",
+            "5": "4",
+            "5.A": "4.A",
+            "5.B": "4.D",
+            "M.0.EL": "M.0.EL",
+            "M.1.A": "M.BK.A",
+            "M.1.B": "M.BK.M",
+            "M.BIO": "M.BIO",
+            "M.DEF": "M.DEF",  #
+            "M.SR.A": "M.SR.A",
+            "M.SR.M": "M.SR.M",
+        },
+        "aggregate": {
+            "2.A": {
+                "sources": ["2.A.1"],
+                "filter": {
+                    "entity": ["CO2"],
+                },
+            },
+            "3.A": {
+                "sources": ["3.A.1", "3.A.2"],
+                "filter": {
+                    "entity": ["CH4"],
+                },
+            },
+            "3.B.1": {
+                "sources": ["3.B.1.a"],
+                "filter": {
+                    "entity": ["CO2"],
+                },
+            },
+            "3.C.1": {
+                "sources": ["3.C.1.b", "3.C.1.c"],
+                "filter": {
+                    "entity": ["CH4"],
+                },
+            },
+            "M.3.C.1.AG": {
+                "sources": ["3.C.1.c"],
+                "filter": {
+                    "entity": ["CH4"],
+                },
+            },
+            "M.3.C.AG": {
+                "sources": ["M.3.C.1.AG", "M.3.C.45.AG", "3.C.7"],
+                "filter": {
+                    "entity": ["CH4"],
+                },
+            },
+            "3.C": {
+                "sources": ["M.3.C.AG"],
+                "filter": {
+                    "entity": ["CH4"],
+                },
+            },
+            "M.AG.ELV": {
+                "sources": ["M.3.C.AG"],
+                "filter": {
+                    "entity": ["CH4"],
+                },
+            },
+            "M.AG": {  # check consistency (not consistent in table)
+                "sources": ["M.AG.ELV", "3.A"],
+                "tolerance": 0.21,
+                "filter": {
+                    "entity": ["CH4"],
+                },
+            },
+            "3": {
+                "sources": ["M.AG", "M.LULUCF"],
+                "filter": {
+                    "entity": ["CO2", "CH4", "N2O"],
+                },
+            },
+            "4": {
+                "sources": ["4.A", "4.D"],
+                "filter": {
+                    "entity": ["CO2", "CH4", "N2O"],
+                },
+            },
+            "M.0.EL": {
+                "sources": ["1", "2", "M.AG", "4"],
+                "tolerance": 0.06,  # for N2O in NC4
+            },
+            "0": {
+                "sources": ["1", "2", "3", "4"],
+                "tolerance": 0.06,  # for N2O in NC4
+            },
+            "M.BK": {
+                "sources": ["M.BK.A", "M.BK.M"],
+                "filter": {
+                    "entity": ["CO2", "CH4", "N2O"],
+                },
+            },
+        },
+    },
+    "MAC": {
+        "mapping": {
+            "0": "0",
+            "1": "1",
+            "1.A": "1.A",
+            "1.A.1": "1.A.1",
+            "1.A.2": "1.A.2",
+            "1.A.3": "1.A.3",
+            "1.A.4": "1.A.4",
+            "1.B": "1.B",
+            "2": "2",
+            "3": "M.AG",
+            "4": "M.LULUCF",
+            "5": "4",
+            "5.A": "4.A",
+            "5.B": "4.D",
+            "M.0.EL": "M.0.EL",
+            "M.1.A": "M.BK.A",
+            "M.1.B": "M.BK.M",
+            "M.BIO": "M.BIO",
+            "M.SR.A": "M.SR.A",
+            "M.SR.M": "M.SR.M",
+        },
+        "aggregate": {
+            "3": {
+                "sources": ["M.AG", "M.LULUCF"],
+            },
+            "M.0.EL": {
+                "sources": ["1", "2", "M.AG", "4"],
+                "tolerance": 0.1,  # for 2005, CH4
+            },
+            "0": {
+                "sources": ["1", "2", "3", "4"],
+                "tolerance": 0.1,  # for 2005, CH4
+            },
+        },
+    },
+}
+
+## NC4 specific config
+config_nc4 = {
+    "coords_defaults": {
+        "scenario": "NC4",
+    },
+    "meta_data": {
+        "references": "https://unfccc.int/documents/636695",
+    },
+    "table_groups": {
+        # "overview": {  # inconsistent with other tables due to rounding
+        #     "pages": {
+        #         "CHN": [30],
+        #         "MAC": [218],
+        #         "HKG": [190],
+        #     },
+        #     "year": 2017,
+        # },
+        "inventory": {
+            "pages": {
+                "CHN": [31, 32],
+                "MAC": [219],
+                "HKG": [191],
+            },
+            "year": 2017,
+        },
+        "fgas": {
+            "pages": {
+                "CHN": [33],
+                "MAC": [],
+                "HKG": [192],
+            },
+            "year": 2017,
+        },
+    },
+    "page_def": {
+        # "30": {  # CHN overview 2017
+        #     "table_areas": ["77,756,520,582"],
+        #     "split_text": True,
+        #     "flavor": "stream",
+        #     "unit": "MtCO2eq",
+        # },
+        "31": {  # CHN detail 2017
+            "table_areas": ["73,451,518,74"],
+            "columns": ["294,383,470"],
+            "split_text": True,
+            "flavor": "stream",
+            "unit": "kt",
+        },
+        "32": {  # CHN detail 2017
+            "table_areas": ["73,777,518,563"],
+            "columns": ["290,379,468"],
+            "split_text": True,
+            "flavor": "stream",
+            "unit": "kt",
+        },
+        "33": {  # CHN f-gases 2017
+            "table_areas": ["74,508,766,235"],
+            "columns": ["152,198,241,289,342,393,448,505,560,620,682,707,737"],
+            "split_text": False,
+            "flavor": "stream",
+            "row_tol": 10,
+            "rows_to_fix": {
+                2: ["Industrial", "- Chemical", "Source"],
+                3: ["- Consumption of"],
+                4: ["- Non-Energy"],
+            },
+            "unit": "kt",
+        },
+        # "190": {  # HKG overview 2017
+        #     "table_areas": ["50,427,546,268"],
+        #     "split_text": True,
+        #     "flavor": "stream",
+        #     "unit": "MtCO2eq",
+        # },
+        "191": {  # HKG detail 2017
+            "table_areas": ["73,754,520,178"],
+            "split_text": True,
+            "flavor": "stream",
+            "unit": "kt",
+        },
+        "192": {  # HKG f-gases 2017
+            "table_areas": ["74,756,517,472"],
+            "columns": ["181,237,282,332,384,440,480"],
+            "split_text": True,
+            "flavor": "stream",
+            "rows_to_fix": {
+                -3: ["GHG source"],
+                3: ["- Production of", "- Consumption of"],
+            },
+            "unit": "t",
+            "unit_correction": 100,
+            "manual_repl_unit": {"PFCs": "tCO2eq"},
+        },
+        # "218": {  # MAC overview 2017
+        #     "table_areas": ["73,501,489,341"],
+        #     "split_text": True,
+        #     "flavor": "stream",
+        #     "unit": "ktCO2eq",
+        #     "unit_correction": 10,
+        # },
+        "219": {  # MAC detail 2017
+            "table_areas": ["73,754,520,339"],
+            "columns": ["291,393,458"],
+            "split_text": True,
+            "flavor": "stream",
+            "unit": "t",
+            "unit_correction": 100,
+        },
+    },
+    "processing_info_country": {
+        "HKG": {
+            "basket_copy": {  # needed for 2005, 2017, 2018
+                "GWPs_to_add": ["AR4GWP100", "AR5GWP100", "AR6GWP100"],
+                "entities": ["PFCS"],
+                "source_GWP": gwp_to_use,
+            },
+        },
+        "CHN": None,
+        "MAC": None,
+    },
+}
+
+## BUR3 specific config
+# overview tables are not read (except for 2005 where no other data is available)
+# because the data are not consistent with the detailed tables due to rounding
+# errors
+config_bur3 = {
+    "coords_defaults": {
+        "scenario": "BUR3",
+    },
+    "meta_data": {
+        "references": "https://unfccc.int/documents/636696",
+    },
+    "table_groups": {
+        # "overview": {
+        #     "pages": {
+        #         #"CHN": [11],
+        #         "MAC": [63],
+        #         #"HKG": [43],
+        #     },
+        #     "year": 2018,
+        # },
+        "inventory": {
+            "pages": {
+                "CHN": [13, 14],
+                "MAC": [64],
+                "HKG": [44],
+            },
+            "year": 2018,
+        },
+        "fgas": {
+            "pages": {
+                "CHN": [15],
+                # "MAC": [],
+                "HKG": [45],
+            },
+            "year": 2018,
+        },
+        "recalc": {
+            "pages": {
+                "CHN": [18],
+                "MAC": [67],
+                "HKG": [50],
+            },
+            "year": 2005,
+        },
+    },
+    "page_def": {
+        # "11": {  # CHN overview 2018
+        #     "table_areas": ["67,584,525,482"],
+        #     "split_text": True,
+        #     "flavor": "stream",
+        #     "unit": "MtCO2eq"
+        # },
+        "13": {  # CHN detail 2018
+            "table_areas": ["71,565,523,76"],
+            "columns": ["325,389,453"],
+            "split_text": False,
+            "flavor": "stream",
+            "row_tol": 10,
+            "unit": "kt",
+        },
+        "14": {  # CHN detail 2018
+            "table_areas": ["69,771,526,526"],
+            "columns": ["331,388,453"],
+            "split_text": False,
+            "flavor": "stream",
+            "unit": "kt",
+        },
+        "15": {  # CHN fgases 2018
+            "table_areas": ["62,493,778,226"],
+            "columns": ["133,180,226,276,334,384,442,502,560,620,687,712,752"],
+            "split_text": False,
+            "flavor": "stream",
+            "row_tol": 10,
+            "rows_to_fix": {
+                2: ["Sources", "2. Industrial", "⎯ Mineral", "⎯ Chemical", "⎯ Metal"],
+                3: ["⎯ Consumption"],
+                4: ["⎯ Non-energy"],
+            },
+            "unit": "kt",
+        },
+        "18": {  # CHN overview 2005
+            "table_areas": ["84,615,507,503"],
+            "split_text": False,
+            "flavor": "stream",
+            "unit": "MtCO2eq",
+        },
+        # "43": {  # HKG overview 2018
+        #     "table_areas": ["86,319,501,220"],
+        #     "split_text": False,
+        #     "flavor": "stream",
+        #     "unit": "ktCO2eq"
+        # },
+        "44": {  # HKG detail 2018
+            "table_areas": ["83,743,508,171"],
+            "split_text": False,
+            "flavor": "stream",
+            "unit": "kt",
+        },
+        "45": {  # HKG f-gases 2018
+            "table_areas": ["83,752,508,495"],
+            "split_text": False,
+            "flavor": "stream",
+            "row_tol": 10,
+            "rows_to_fix": {
+                3: ["GHG source and sink"],
+            },
+            "unit": "t",
+            "unit_correction": 100,
+            "manual_repl_unit": {"PFCs": "tCO2eq"},
+        },
+        "50": {  # HGK overview 2005
+            "table_areas": ["84,753,499,651"],
+            "split_text": False,
+            "flavor": "stream",
+            "unit": "ktCO2eq",
+        },
+        # "63": {  # MAC overview 2018
+        #     "table_areas": ["67,336,514,168"],
+        #     "columns": ["198,231,275,316,366,408,447"],
+        #     "split_text": False,
+        #     "flavor": "stream",
+        #     "row_tol": 10,
+        #     "strip_text": ".\n",
+        #     "rows_to_fix": {
+        #         2: ["Land-UseChangeand"],
+        #     },
+        #     "unit": "ktCO2eq",
+        # },
+        "64": {  # MAC detail 2018
+            "table_areas": ["66,754,526,387"],
+            "columns": ["308,389,458"],
+            "split_text": False,
+            "flavor": "stream",
+            "unit": "t",
+            "unit_correction": 100,
+        },
+        "67": {  # MAC overview 2005
+            "table_areas": ["65,549,520,438"],
+            "split_text": False,
+            "flavor": "stream",
+            "unit": "ktCO2eq",
+        },
+    },
+    "remove_data": {
+        "HKG": {
+            "f1": {"time": ["2018"], "category": ["0", "M.0.EL"], "entity": ["CH4"]},
+            "f2": {
+                "time": ["2018"],
+                "category": ["3.B", "3.D", "3.E"],
+                "entity": ["N2O"],
+            },
+        }
+    },
+    "processing_info_country": {
+        "HKG": {
+            "basket_copy": {  # needed for 2005, 2017, 2018
+                "GWPs_to_add": ["AR4GWP100", "AR5GWP100", "AR6GWP100"],
+                "entities": ["HFCS", "PFCS"],
+                "source_GWP": gwp_to_use,
+            },
+        },
+        "CHN": {  # TODO: doesn't work for NC4. Change config
+            "basket_copy": {  # needed for 2005
+                "GWPs_to_add": ["AR4GWP100", "AR5GWP100", "AR6GWP100"],
+                "entities": ["HFCS", "PFCS"],
+                "source_GWP": gwp_to_use,
+            },
+        },
+        "MAC": {
+            "basket_copy": {  # needed for 2005, 2017,
+                "GWPs_to_add": ["AR4GWP100", "AR5GWP100", "AR6GWP100"],
+                "entities": ["PFCS"],
+                "source_GWP": gwp_to_use,
+            },
+        },
+    },
+}

+ 230 - 0
src/unfccc_ghg_data/unfccc_reader/China/read_CHN_BUR3_from_pdf.py

@@ -0,0 +1,230 @@
+"""
+Read data from China's BUR3.
+
+Data are read from pdf. The file contains a detailed inventory for 2018 and
+recalculated 2005 data for the main sectors and gases.
+
+Inventories for mainland China (CHN), Hong Kong (HKG) and Macau (MAC) are reported in
+individual inventories.
+"""
+
+
+from copy import deepcopy
+
+import camelot
+import primap2 as pm2
+
+from unfccc_ghg_data.helper import (
+    compression,
+    downloaded_data_path,
+    extracted_data_path,
+    fix_rows,
+    gas_baskets,
+    process_data_for_country,
+    set_to_nan_in_ds,
+)
+from unfccc_ghg_data.unfccc_reader.China.config_chn_bur3_nc4 import (
+    category_conversion,
+    config_bur3,
+    config_general,
+    gwp_to_use,
+    terminology_proc,
+)
+
+if __name__ == "__main__":
+    # ###
+    # configuration
+    # ###
+    input_folder = downloaded_data_path / "UNFCCC" / "China" / "BUR3"
+    output_folder = extracted_data_path / "UNFCCC" / "China"
+    if not output_folder.exists():
+        output_folder.mkdir()
+
+    output_filename = "CHN_BUR3_2023_"
+    inventory_file = "China_BUR3_English.pdf"
+
+    def repl(m):  # noqa: D103
+        return m.group("code")
+
+    # ###
+    # read the tables from pdf
+    # ###
+    all_data = None
+    for table_group in config_bur3["table_groups"].keys():
+        current_group = config_bur3["table_groups"][table_group]
+        for country in current_group["pages"].keys():
+            for page in current_group["pages"][country]:
+                print(f"Reading {country}, {table_group}, page {page}")
+                page_str = str(page)
+                page_def = config_bur3["page_def"][page_str]
+                if "rows_to_fix" in page_def:
+                    rows_to_fix = page_def.pop("rows_to_fix")
+                else:
+                    rows_to_fix = {}
+                if "unit_correction" in page_def:
+                    unit_correction = page_def.pop("unit_correction")
+                else:
+                    unit_correction = None
+                unit = page_def.pop("unit")
+                if "manual_repl_unit" in page_def:
+                    manual_repl_unit = page_def.pop("manual_repl_unit")
+                else:
+                    manual_repl_unit = None
+
+                tables_read = camelot.read_pdf(
+                    str(input_folder / inventory_file), pages=page_str, **page_def
+                )
+                table_df = tables_read[0].df
+                # fix split rows
+                if rows_to_fix:
+                    for n_rows in rows_to_fix.keys():
+                        table_df = fix_rows(table_df, rows_to_fix[n_rows], 0, n_rows)
+                # remove unwanted characters
+                table_df[0] = table_df[0].str.replace("\n", " ")
+                table_df[0] = table_df[0].str.replace("⎯ ", "")
+                table_df[0] = table_df[0].str.replace("♦", "")
+                table_df.iloc[0] = table_df.iloc[0].str.strip()
+
+                table_df = pm2.pm2io.nir_add_unit_information(
+                    table_df,
+                    unit_row=0,
+                    entity_row=0,
+                    regexp_entity=".*",
+                    default_unit=unit,
+                    manual_repl_unit=manual_repl_unit,
+                )
+                table_df = table_df.set_index(table_df.columns[0])
+                table_long = pm2.pm2io.nir_convert_df_to_long(
+                    table_df,
+                    year=current_group["year"],
+                    header_long=["category", "entity", "unit", "time", "data"],
+                )
+                # clean data and metadata
+                table_long["entity"] = table_long["entity"].str.strip()
+                table_long["data"] = table_long["data"].str.strip()
+                table_long["data"] = table_long["data"].str.replace(",", "")
+                table_long["data"] = table_long["data"].str.replace(" ", "")
+
+                # convert to primap2 format
+                coords_defaults = config_general["coords_defaults"].copy()
+                coords_defaults.update(config_bur3["coords_defaults"])
+                coords_defaults.update({"area": country})
+                coords_value_mapping = deepcopy(config_general["coords_value_mapping"])
+                if "CO2eq" in unit:
+                    coords_value_mapping["entity"].update(
+                        {
+                            "CH4": f"CH4 ({gwp_to_use})",
+                            "N2O": f"N2O ({gwp_to_use})",
+                            "SF6": f"SF6 ({gwp_to_use})",
+                        }
+                    )
+
+                meta_data = config_general["meta_data"].copy()
+                meta_data.update(config_bur3["meta_data"])
+                data_if = pm2.pm2io.convert_long_dataframe_if(
+                    table_long,
+                    coords_cols=config_general["coords_cols"],
+                    coords_defaults=coords_defaults,
+                    coords_terminologies=config_general["coords_terminologies"],
+                    coords_value_mapping=coords_value_mapping,
+                    filter_remove=config_general["filter_remove"],
+                    meta_data=meta_data,
+                    time_format=config_general["time_format"],
+                )
+                print(data_if["entity"].unique())
+                data_pm2 = pm2.pm2io.from_interchange_format(data_if)
+                if unit_correction is not None:
+                    for entity in data_pm2.data_vars:
+                        data_pm2[entity].data = data_pm2[entity].data * unit_correction
+
+                if all_data is None:
+                    all_data = data_pm2
+                else:
+                    all_data = all_data.pr.merge(data_pm2)
+
+    # ###
+    # save data to IF and native format
+    # ###
+    all_data_if = all_data.pr.to_interchange_format()
+    if not output_folder.exists():
+        output_folder.mkdir()
+    pm2.pm2io.write_interchange_format(
+        output_folder
+        / (output_filename + config_general["coords_terminologies"]["category"]),
+        all_data_if,
+    )
+
+    encoding = {var: compression for var in all_data.data_vars}
+    all_data.pr.to_netcdf(
+        output_folder
+        / (
+            output_filename + config_general["coords_terminologies"]["category"] + ".nc"
+        ),
+        encoding=encoding,
+    )
+
+    ### processing
+    data_proc_pm2 = None
+    remove_data = config_bur3["remove_data"]
+
+    # actual processing
+    for country in all_data.coords["area (ISO3)"].to_numpy():
+        print(f"Processing data for {country}")
+        data_country = all_data.pr.loc[{"area (ISO3)": [country]}]
+        # remove wrong and conflicting data
+        if country in remove_data:
+            for filter_name in remove_data[country]:
+                filter = remove_data[country][filter_name].copy()
+                entities = data_country.data_vars
+                if "entity" in filter:
+                    entities_current = filter.pop("entity")
+                    entities = [
+                        entity for entity in entities if entity in entities_current
+                    ]
+
+                data_country = set_to_nan_in_ds(
+                    data_country,
+                    entities=entities,
+                    filter=filter,
+                )
+                # ds_mask = xr.zeros_like(
+                #     data_country[entities].pr.loc[filter]
+                # ).combine_first(xr.ones_like(data_country))
+                #
+                # data_country = data_country.where(ds_mask)
+
+        data_proc_pm2_new = process_data_for_country(
+            data_country,
+            entities_to_ignore=[],
+            gas_baskets=gas_baskets,
+            processing_info_country=config_bur3["processing_info_country"][country],
+            cat_terminology_out=terminology_proc,
+            category_conversion=category_conversion[country],
+        )
+        if data_proc_pm2 is None:
+            data_proc_pm2 = data_proc_pm2_new
+        else:
+            data_proc_pm2 = data_proc_pm2.pr.merge(data_proc_pm2_new)
+
+    # adapt source and metadata
+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
+    data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
+    data_proc_pm2 = data_proc_pm2.pr.loc[{"source": ["BUR_NIR"]}]
+
+    # ###
+    # save data to IF and native format
+    # ###
+    data_proc_if = data_proc_pm2.pr.to_interchange_format()
+    if not output_folder.exists():
+        output_folder.mkdir()
+    pm2.pm2io.write_interchange_format(
+        output_folder / (output_filename + terminology_proc),
+        data_proc_if,
+    )
+
+    encoding = {var: compression for var in data_proc_pm2.data_vars}
+    data_proc_pm2.pr.to_netcdf(
+        output_folder / (output_filename + terminology_proc + ".nc"),
+        encoding=encoding,
+    )

+ 227 - 0
src/unfccc_ghg_data/unfccc_reader/China/read_CHN_NC4_from_pdf.py

@@ -0,0 +1,227 @@
+"""
+Read data from China's NC4.
+
+Data are read from pdf. The file contains a detailed inventory for 2017.
+
+Inventories for mainland China (CHN), Hong Kong (HKG) and Macau (MAC) are reported in
+individual inventories.
+"""
+
+
+from copy import deepcopy
+
+import camelot
+import primap2 as pm2
+import xarray as xr
+
+from unfccc_ghg_data.helper import (
+    compression,
+    downloaded_data_path,
+    extracted_data_path,
+    fix_rows,
+    gas_baskets,
+    process_data_for_country,
+)
+from unfccc_ghg_data.unfccc_reader.China.config_chn_bur3_nc4 import (
+    category_conversion,
+    config_general,
+    config_nc4,
+    gwp_to_use,
+    terminology_proc,
+)
+
+if __name__ == "__main__":
+    # ###
+    # configuration
+    # ###
+    input_folder = downloaded_data_path / "UNFCCC" / "China" / "NC4"
+    output_folder = extracted_data_path / "UNFCCC" / "China"
+    if not output_folder.exists():
+        output_folder.mkdir()
+
+    output_filename = "CHN_NC4_2023_"
+    inventory_file = "China_NC4_English.pdf"
+
+    def repl(m):  # noqa: D103
+        return m.group("code")
+
+    # ###
+    # read the tables from pdf
+    # ###
+    all_data = None
+    for table_group in config_nc4["table_groups"].keys():
+        current_group = config_nc4["table_groups"][table_group]
+        for country in current_group["pages"].keys():
+            for page in current_group["pages"][country]:
+                print(f"Reading {country}, {table_group}, page {page}")
+                page_str = str(page)
+                page_def = config_nc4["page_def"][page_str]
+                if "rows_to_fix" in page_def:
+                    rows_to_fix = page_def.pop("rows_to_fix")
+                else:
+                    rows_to_fix = {}
+                if "unit_correction" in page_def:
+                    unit_correction = page_def.pop("unit_correction")
+                else:
+                    unit_correction = None
+                unit = page_def.pop("unit")
+                if "manual_repl_unit" in page_def:
+                    manual_repl_unit = page_def.pop("manual_repl_unit")
+                else:
+                    manual_repl_unit = None
+
+                tables_read = camelot.read_pdf(
+                    str(input_folder / inventory_file), pages=page_str, **page_def
+                )
+                table_df = tables_read[0].df
+                # fix split rows
+                if rows_to_fix:
+                    for n_rows in rows_to_fix.keys():
+                        table_df = fix_rows(table_df, rows_to_fix[n_rows], 0, n_rows)
+                # remove unwanted characters
+                table_df[0] = table_df[0].str.replace("\n", " ")
+                table_df[0] = table_df[0].str.replace("⎯ ", "")
+                table_df[0] = table_df[0].str.replace("♦", "")
+                table_df[0] = table_df[0].str.replace("^-", "", regex=True)
+                table_df[0] = table_df[0].str.replace("", "")
+                table_df[0] = table_df[0].str.strip()
+
+                table_df.iloc[0] = table_df.iloc[0].str.strip()
+
+                table_df = pm2.pm2io.nir_add_unit_information(
+                    table_df,
+                    unit_row=0,
+                    entity_row=0,
+                    regexp_entity=".*",
+                    default_unit=unit,
+                    manual_repl_unit=manual_repl_unit,
+                )
+                table_df = table_df.set_index(table_df.columns[0])
+                table_long = pm2.pm2io.nir_convert_df_to_long(
+                    table_df,
+                    year=current_group["year"],
+                    header_long=["category", "entity", "unit", "time", "data"],
+                )
+                # clean data and metadata
+                table_long["entity"] = table_long["entity"].str.strip()
+                table_long["data"] = table_long["data"].str.strip()
+                table_long["data"] = table_long["data"].str.replace(",", "")
+                table_long["data"] = table_long["data"].str.replace(" ", "")
+
+                # convert to primap2 format
+                coords_defaults = config_general["coords_defaults"].copy()
+                coords_defaults.update(config_nc4["coords_defaults"])
+                coords_defaults.update({"area": country})
+                coords_value_mapping = deepcopy(config_general["coords_value_mapping"])
+                if "CO2eq" in unit:
+                    coords_value_mapping["entity"].update(
+                        {
+                            "CH4": f"CH4 ({gwp_to_use})",
+                            "N2O": f"N2O ({gwp_to_use})",
+                            "SF6": f"SF6 ({gwp_to_use})",
+                        }
+                    )
+
+                meta_data = config_general["meta_data"].copy()
+                meta_data.update(config_nc4["meta_data"])
+                data_if = pm2.pm2io.convert_long_dataframe_if(
+                    table_long,
+                    coords_cols=config_general["coords_cols"],
+                    coords_defaults=coords_defaults,
+                    coords_terminologies=config_general["coords_terminologies"],
+                    coords_value_mapping=coords_value_mapping,
+                    filter_remove=config_general["filter_remove"],
+                    meta_data=meta_data,
+                    time_format=config_general["time_format"],
+                )
+                data_pm2 = pm2.pm2io.from_interchange_format(data_if)
+                if unit_correction is not None:
+                    for entity in data_pm2.data_vars:
+                        data_pm2[entity].data = data_pm2[entity].data * unit_correction
+
+                if all_data is None:
+                    all_data = data_pm2
+                else:
+                    all_data = all_data.pr.merge(data_pm2)
+
+    # ###
+    # save data to IF and native format
+    # ###
+    all_data_if = all_data.pr.to_interchange_format()
+    if not output_folder.exists():
+        output_folder.mkdir()
+    pm2.pm2io.write_interchange_format(
+        output_folder
+        / (output_filename + config_general["coords_terminologies"]["category"]),
+        all_data_if,
+    )
+
+    encoding = {var: compression for var in all_data.data_vars}
+    all_data.pr.to_netcdf(
+        output_folder
+        / (
+            output_filename + config_general["coords_terminologies"]["category"] + ".nc"
+        ),
+        encoding=encoding,
+    )
+
+    ### processing
+    data_proc_pm2 = None
+    remove_data = []
+
+    # actual processing
+    for country in all_data.coords["area (ISO3)"].to_numpy():
+        print(f"Processing data for {country}")
+        data_country = all_data.pr.loc[{"area (ISO3)": [country]}]
+        # remove wrong and conflicting data
+        if country in remove_data:
+            for filter_name in remove_data[country]:
+                filter = remove_data[country][filter_name].copy()
+                entities = data_country.data_vars
+                if "entity" in filter:
+                    entities_current = filter.pop("entity")
+                    entities = [
+                        entity for entity in entities if entity in entities_current
+                    ]
+
+                ds_mask = xr.zeros_like(
+                    data_country[entities].pr.loc[filter]
+                ).combine_first(xr.ones_like(data_country))
+
+                data_country = data_country.where(ds_mask)
+
+        data_proc_pm2_new = process_data_for_country(
+            data_country,
+            entities_to_ignore=[],
+            gas_baskets=gas_baskets,
+            processing_info_country=config_nc4["processing_info_country"][country],
+            cat_terminology_out=terminology_proc,
+            category_conversion=category_conversion[country],
+        )
+        if data_proc_pm2 is None:
+            data_proc_pm2 = data_proc_pm2_new
+        else:
+            data_proc_pm2 = data_proc_pm2.pr.merge(data_proc_pm2_new)
+
+    # adapt source and metadata
+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
+    data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
+    data_proc_pm2 = data_proc_pm2.pr.loc[{"source": ["BUR_NIR"]}]
+
+    # ###
+    # save data to IF and native format
+    # ###
+    data_proc_if = data_proc_pm2.pr.to_interchange_format()
+    if not output_folder.exists():
+        output_folder.mkdir()
+    pm2.pm2io.write_interchange_format(
+        output_folder / (output_filename + terminology_proc),
+        data_proc_if,
+    )
+
+    encoding = {var: compression for var in data_proc_pm2.data_vars}
+    data_proc_pm2.pr.to_netcdf(
+        output_folder / (output_filename + terminology_proc + ".nc"),
+        encoding=encoding,
+    )

+ 20 - 8
src/unfccc_ghg_data/unfccc_reader/Guinea/config_gin_bur1.py

@@ -328,20 +328,32 @@ country_processing_step1 = {
                 "3.C.7",
                 "3.C.8",
             ],
-            "name": "Aggregate sources and non-CO2 emissions sources on land "
-            "(Agriculture)",
+            # "name": "Aggregate sources and non-CO2 emissions sources on land "
+            # "(Agriculture)",
+        },
+        "M.3.D.AG": {
+            "sources": ["3.D.2"],
+            # "name": "Other (Agriculture)"
         },
-        "M.3.D.AG": {"sources": ["3.D.2"], "name": "Other (Agriculture)"},
         "M.AG.ELV": {
             "sources": ["M.3.C.AG", "M.3.D.AG"],
-            "name": "Agriculture excluding livestock",
+            # "name": "Agriculture excluding livestock",
+        },
+        "M.AG": {
+            "sources": ["3.A", "M.AG.ELV"],
+            # "name": "Agriculture"
+        },
+        "M.3.D.LU": {
+            "sources": ["3.D.1"],
+            # "name": "Other (LULUCF)"
+        },
+        "M.LULUCF": {
+            "sources": ["3.B", "M.3.D.LU"],
+            # "name": "LULUCF"
         },
-        "M.AG": {"sources": ["3.A", "M.AG.ELV"], "name": "Agriculture"},
-        "M.3.D.LU": {"sources": ["3.D.1"], "name": "Other (LULUCF)"},
-        "M.LULUCF": {"sources": ["3.B", "M.3.D.LU"], "name": "LULUCF"},
         "M.0.EL": {
             "sources": ["1", "2", "M.AG", "4"],
-            "name": "National total emissions excluding LULUCF",
+            # "name": "National total emissions excluding LULUCF",
         },
     },
     "basket_copy": {

+ 17 - 10
src/unfccc_ghg_data/unfccc_reader/Indonesia/read_IDN_BUR3_from_pdf.py

@@ -156,29 +156,32 @@ if __name__ == "__main__":
     aggregate_cats = {
         "1.A.4": {
             "sources": ["1.A.4.a", "1.A.4.b"],
-            "name": "Other Sectors (calculated)",
+            "orig_cat_name": "Other Sectors (calculated)",
         },
         "2.A.4": {
             "sources": ["2.A.4.a", "2.A.4.b", "2.A.4.d"],
-            "name": "Other Process uses of Carbonates (calculated)",
+            "orig_cat_name": "Other Process uses of Carbonates (calculated)",
         },
         "2.B.8": {
             "sources": ["2.B.8.a", "2.B.8.b", "2.B.8.c", "2.B.8.f"],
-            "name": "Petrochemical and Carbon Black production (calculated)",
+            "orig_cat_name": "Petrochemical and Carbon Black production (calculated)",
         },
         "4.A": {
             "sources": ["4.A.2", "M.4.A.Ind"],
-            "name": "Solid Waste Disposal (calculated)",
+            "orig_cat_name": "Solid Waste Disposal (calculated)",
         },
     }
 
     aggregate_cats_N2O = {
-        "3.A.2": {"sources": ["3.A.2.b"], "name": "3A2 Manure Management"},
-        "3.A": {"sources": ["3.A.2"], "name": "3A Livestock"},
+        "3.A.2": {"sources": ["3.A.2.b"], "orig_cat_name": "3A2 Manure Management"},
+        "3.A": {"sources": ["3.A.2"], "orig_cat_name": "3A Livestock"},
     }
 
     aggregate_cats_CO2CH4N2O = {
-        "3.A.2": {"sources": ["3.A.2", "3.A.2.b"], "name": "3A2 Manure Management"},
+        "3.A.2": {
+            "sources": ["3.A.2", "3.A.2.b"],
+            "orig_cat_name": "3A2 Manure Management",
+        },
     }
 
     df_all = None
@@ -306,7 +309,9 @@ if __name__ == "__main__":
             ).sum(min_count=1)
 
             df_combine.insert(0, cat_label, cat_to_agg)
-            df_combine.insert(1, "orig_cat_name", aggregate_cats[cat_to_agg]["name"])
+            df_combine.insert(
+                1, "orig_cat_name", aggregate_cats[cat_to_agg]["orig_cat_name"]
+            )
 
             df_combine = df_combine.reset_index()
 
@@ -353,7 +358,7 @@ if __name__ == "__main__":
 
             df_combine.insert(0, cat_label, cat_to_agg)
             df_combine.insert(
-                1, "orig_cat_name", aggregate_cats_N2O[cat_to_agg]["name"]
+                1, "orig_cat_name", aggregate_cats_N2O[cat_to_agg]["orig_cat_name"]
             )
 
             df_combine = df_combine.reset_index()
@@ -401,7 +406,9 @@ if __name__ == "__main__":
 
             df_combine.insert(0, cat_label, cat_to_agg)
             df_combine.insert(
-                1, "orig_cat_name", aggregate_cats_CO2CH4N2O[cat_to_agg]["name"]
+                1,
+                "orig_cat_name",
+                aggregate_cats_CO2CH4N2O[cat_to_agg]["orig_cat_name"],
             )
 
             df_combine = df_combine.reset_index()

+ 55 - 22
src/unfccc_ghg_data/unfccc_reader/Israel/config_isr_bur2.py

@@ -253,20 +253,38 @@ meta_data = {
 #### for processing
 # aggregate categories
 cats_to_agg = {
-    "1": {"sources": ["1.A"], "name": "Energy"},  # for trends
+    "1": {
+        "sources": ["1.A"],
+        # "name": "Energy"
+    },  # for trends
     "1.A.4": {
         "sources": ["1.A.4.a", "1.A.4.b", "1.A.4.c", "1.A.4.ab"],
-        "name": "Other sectors",
+        # "name": "Other sectors",
+    },
+    "2.A.4": {
+        "sources": ["2.A.4.b"],
+        # "name": "Soda Ash"
+    },
+    "2.A.7": {
+        "sources": ["2.A.7.a"],
+        # "name": "Other"
     },
-    "2.A.4": {"sources": ["2.A.4.b"], "name": "Soda Ash"},
-    "2.A.7": {"sources": ["2.A.7.a"], "name": "Other"},
     "2.A": {
         "sources": ["2.A.1", "2.A.2", "2.A.4", "2.A.6", "2.A.7"],
-        "name": "Mineral Products",
+        # "name": "Mineral Products",
+    },
+    "2.B.5": {
+        "sources": ["2.B.5.f", "2.B.5.g"],
+        # "name": "Other"
+    },
+    "2.B": {
+        "sources": ["2.B.2", "2.B.5"],
+        # "name": "Chemical Industry"
+    },
+    "6.D": {
+        "sources": ["6.D", "6X.B"],
+        # "name": "Wastewater Treatment and Discharge"
     },
-    "2.B.5": {"sources": ["2.B.5.f", "2.B.5.g"], "name": "Other"},
-    "2.B": {"sources": ["2.B.2", "2.B.5"], "name": "Chemical Industry"},
-    "6.D": {"sources": ["6.D", "6X.B"], "name": "Wastewater Treatment and Discharge"},
     #'6.E': {'sources': ['6.E', '6X.D'], 'Other'}, # currently empty
 }
 
@@ -396,45 +414,60 @@ cat_conversion = {
         "7": "5",
     },  # 5.A-D ignored as not fitting 2006 cats
     "aggregate": {
-        "2.A.4": {"sources": ["2.A.4.b"], "name": "Other uses of soda ashes"},
+        "2.A.4": {
+            "sources": ["2.A.4.b"],
+            # "name": "Other uses of soda ashes"
+        },
         "2.B.8": {
             "sources": ["2.B.8.b"],
-            "name": "Petrochemical and Carbon Black production",
+            # "name": "Petrochemical and Carbon Black production",
+        },
+        "2.B.10": {
+            "sources": ["M.2.B.10.a", "M.2.B.10.b"],
+            # "name": "Other"
         },
-        "2.B.10": {"sources": ["M.2.B.10.a", "M.2.B.10.b"], "name": "Other"},
         "2.B": {
             "sources": ["2.B.2", "2.B.8", "2.B.9", "2.B.10"],
-            "name": "Chemical Industry",
+            # "name": "Chemical Industry",
+        },
+        "2.H": {
+            "sources": ["M.2.H.1_2", "2.H.3"],
+            # "name": "Other"
         },
-        "2.H": {"sources": ["M.2.H.1_2", "2.H.3"], "name": "Other"},
         # '2': {'sources': ['2.A', '2.B', '2.C', '2.F', '2.H'],
         #       'name': 'Industrial Processes and Product Use'},
-        "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
+        "3.A": {
+            "sources": ["3.A.1", "3.A.2"],
+            # "name": "Livestock"
+        },
         "3.C.1": {
             "sources": ["3.C.1.b", "3.C.1.c"],
-            "name": "Emissions from biomass burning",
+            # "name": "Emissions from biomass burning",
         },
         "M.3.C.1.AG": {
             "sources": ["3.C.1.b", "3.C.1.c"],
-            "name": "Emissions from biomass burning (Agriculture)",
+            # "name": "Emissions from biomass burning (Agriculture)",
         },
         "3.C": {
             "sources": ["3.C.1", "M.3.C.45.AG", "3.C.7", "3.C.8"],
-            "name": "Aggregate sources and non-CO2 emissions sources on land",
+            # "name": "Aggregate sources and non-CO2 emissions sources on land",
         },
         "M.3.C.AG": {
             "sources": ["M.3.C.1.AG", "M.3.C.45.AG", "3.C.7", "3.C.8"],
-            "name": "Aggregate sources and non-CO2 emissions sources on land ("
-            "Agriculture)",
+            # "name": "Aggregate sources and non-CO2 emissions sources on land ("
+            # "Agriculture)",
         },
         "M.AG.ELV": {
             "sources": ["M.3.C.AG"],
-            "name": "Agriculture excluding livestock",
+            # "name": "Agriculture excluding livestock",
+        },
+        "3": {
+            "sources": ["M.AG", "M.LULUCF"],
+            # "name": "AFOLU"
         },
-        "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},
         "M.0.EL": {
             "sources": ["1", "2", "M.AG", "4", "5"],
-            "name": "National total " "excluding LULUCF",
+            # "name": "National total " "excluding LULUCF",
         },
     },
 }

+ 20 - 8
src/unfccc_ghg_data/unfccc_reader/Malaysia/config_mys_bur3.py

@@ -960,20 +960,32 @@ country_processing_step1 = {
                 "3.C.7",
                 "3.C.8",
             ],
-            "name": "Aggregate sources and non-CO2 emissions sources on land "
-            "(Agriculture)",
+            # "name": "Aggregate sources and non-CO2 emissions sources on land "
+            # "(Agriculture)",
+        },
+        "M.3.D.AG": {
+            "sources": ["3.D.2"],
+            # "name": "Other (Agriculture)"
         },
-        "M.3.D.AG": {"sources": ["3.D.2"], "name": "Other (Agriculture)"},
         "M.AG.ELV": {
             "sources": ["M.3.C.AG", "M.3.D.AG"],
-            "name": "Agriculture excluding livestock",
+            # "name": "Agriculture excluding livestock",
+        },
+        "M.AG": {
+            "sources": ["3.A", "M.AG.ELV"],
+            # "name": "Agriculture"
+        },
+        "M.3.D.LU": {
+            "sources": ["3.D.1"],
+            # "name": "Other (LULUCF)"
+        },
+        "M.LULUCF": {
+            "sources": ["3.B", "M.3.D.LU"],
+            # "name": "LULUCF"
         },
-        "M.AG": {"sources": ["3.A", "M.AG.ELV"], "name": "Agriculture"},
-        "M.3.D.LU": {"sources": ["3.D.1"], "name": "Other (LULUCF)"},
-        "M.LULUCF": {"sources": ["3.B", "M.3.D.LU"], "name": "LULUCF"},
         "M.0.EL": {
             "sources": ["1", "2", "M.AG", "4", "5"],
-            "name": "National total emissions excluding LULUCF",
+            # "name": "National total emissions excluding LULUCF",
         },
     },
     "basket_copy": {

+ 20 - 8
src/unfccc_ghg_data/unfccc_reader/Malaysia/config_mys_bur4.py

@@ -371,20 +371,32 @@ country_processing_step1 = {
                 "3.C.7",
                 "3.C.8",
             ],
-            "name": "Aggregate sources and non-CO2 emissions sources on land "
-            "(Agriculture)",
+            # "name": "Aggregate sources and non-CO2 emissions sources on land "
+            # "(Agriculture)",
+        },
+        "M.3.D.AG": {
+            "sources": ["3.D.2"],
+            # "name": "Other (Agriculture)"
         },
-        "M.3.D.AG": {"sources": ["3.D.2"], "name": "Other (Agriculture)"},
         "M.AG.ELV": {
             "sources": ["M.3.C.AG", "M.3.D.AG"],
-            "name": "Agriculture excluding livestock",
+            # "name": "Agriculture excluding livestock",
+        },
+        "M.AG": {
+            "sources": ["3.A", "M.AG.ELV"],
+            # "name": "Agriculture"
+        },
+        "M.3.D.LU": {
+            "sources": ["3.D.1"],
+            # "name": "Other (LULUCF)"
+        },
+        "M.LULUCF": {
+            "sources": ["3.B", "M.3.D.LU"],
+            # "name": "LULUCF"
         },
-        "M.AG": {"sources": ["3.A", "M.AG.ELV"], "name": "Agriculture"},
-        "M.3.D.LU": {"sources": ["3.D.1"], "name": "Other (LULUCF)"},
-        "M.LULUCF": {"sources": ["3.B", "M.3.D.LU"], "name": "LULUCF"},
         "M.0.EL": {
             "sources": ["1", "2", "M.AG", "4", "5"],
-            "name": "National total emissions excluding LULUCF",
+            # "name": "National total emissions excluding LULUCF",
         },
     },
     "basket_copy": {

+ 35 - 17
src/unfccc_ghg_data/unfccc_reader/Morocco/config_mar_bur3.py

@@ -150,7 +150,7 @@ cat_mapping = {
 aggregate_cats = {
     "1.B.2.a.iii": {
         "sources": ["1.B.2.a.iii.4", "1.B.2.a.iii.5", "1.B.2.a.iii.6"],
-        "name": "All Other",
+        # "name": "All Other",
     },
     "1.B.2.b.iii": {
         "sources": [
@@ -159,26 +159,32 @@ aggregate_cats = {
             "1.B.2.b.iii.5",
             "1.B.2.b.iii.6",
         ],
-        "name": "All Other",
+        # "name": "All Other",
+    },
+    "1.B.2.a": {
+        "sources": ["1.B.2.a.iii"],
+        # "name": "Oil"
     },
-    "1.B.2.a": {"sources": ["1.B.2.a.iii"], "name": "Oil"},
     "1.B.2.b": {
         "sources": ["1.B.2.b.i", "1.B.2.b.ii", "1.B.2.b.iii"],
-        "name": "Natural Gas",
+        # "name": "Natural Gas",
     },
     "2.D": {
         "sources": ["2.D.4"],
-        "name": "Non-Energy Products from Fuels and Solvent Use",
+        # "name": "Non-Energy Products from Fuels and Solvent Use",
     },
     "2.F.1": {
         "sources": ["2.F.1.a", "2.F.1.b"],
-        "name": "Refrigeration and Air Conditioning",
+        # "name": "Refrigeration and Air Conditioning",
     },
     "2.F": {
         "sources": ["2.F.1", "2.F.2", "2.F.3", "2.F.4", "2.F.5", "2.F.6"],
-        "name": "Product uses as Substitutes for Ozone Depleting Substances",
+        # "name": "Product uses as Substitutes for Ozone Depleting Substances",
+    },
+    "2.H": {
+        "sources": ["2.H.1", "2.H.2", "2.H.3"],
+        # "name": "Other"
     },
-    "2.H": {"sources": ["2.H.1", "2.H.2", "2.H.3"], "name": "Other"},
     "3.A.2": {
         "sources": [
             "3.A.2.a",
@@ -190,28 +196,40 @@ aggregate_cats = {
             "3.A.2.h",
             "3.A.2.i",
         ],
-        "name": "Manure Management",
+        # "name": "Manure Management",
+    },
+    "3.A": {
+        "sources": ["3.A.1", "3.A.2"],
+        # "name": "Livestock"
     },
-    "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
     "3.B": {
         "sources": ["3.B.1", "3.B.2", "3.B.3", "3.B.4", "3.B.5", "3.B.6"],
-        "name": "Land",
+        # "name": "Land",
     },
     "3.C": {
         "sources": ["3.C.3", "3.C.4", "3.C.5", "3.C.6", "3.C.7"],
-        "name": "Aggregate sources and non-CO2 emissions sources on land",
+        # "name": "Aggregate sources and non-CO2 emissions sources on land",
     },
     "M.3.C.AG": {
         "sources": ["3.C.3", "3.C.4", "3.C.5", "3.C.6", "3.C.7"],
-        "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
+        # "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
+    },
+    "M.AG": {
+        "sources": ["3.A", "M.3.C.AG"],
+        # "name": "Agriculture"
+    },
+    "3": {
+        "sources": ["M.AG", "M.LULUCF"],
+        # "name": "AFOLU"
     },
-    "M.AG": {"sources": ["3.A", "M.3.C.AG"], "name": "Agriculture"},
-    "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},
     "M.AG.ELV": {
         "sources": ["M.3.C.AG"],
-        "name": "Agriculture excluding livestock emissions",
+        # "name": "Agriculture excluding livestock emissions",
+    },
+    "4": {
+        "sources": ["4.A", "4.D"],
+        # "name": "Waste"
     },
-    "4": {"sources": ["4.A", "4.D"], "name": "Waste"},
     "0": {"sources": ["1", "2", "3", "4"]},
     "M.0.EL": {"sources": ["1", "2", "M.AG", "4"]},
 }

+ 11 - 5
src/unfccc_ghg_data/unfccc_reader/Nigeria/config_nga_bur2.py

@@ -385,18 +385,24 @@ processing_info_step2 = {
     "aggregate_cats": {
         "M.AG.ELV": {
             "sources": ["3.C"],
-            "name": "Agriculture excluding livestock emissions",
+            # "name": "Agriculture excluding livestock emissions",
+        },
+        "M.AG": {
+            "sources": ["M.AG.ELV", "3.A"],
+            # "name": "Agriculture"
         },
-        "M.AG": {"sources": ["M.AG.ELV", "3.A"], "name": "Agriculture"},
         "M.LULUCF": {
             "sources": ["3.B", "3.D"],
-            "name": "Land Use, Land Use Change, and Forestry",
+            # "name": "Land Use, Land Use Change, and Forestry",
         },
         "M.0.EL": {
             "sources": ["1", "2", "M.AG", "4", "5"],
-            "name": "National Total Excluding LULUCF",
+            # "name": "National Total Excluding LULUCF",
+        },
+        "0": {
+            "sources": ["1", "2", "3", "4", "5"],
+            # "name": "National Total"
         },
-        "0": {"sources": ["1", "2", "3", "4", "5"], "name": "National Total"},
     },
     "downscale": {
         "sectors": {

+ 11 - 5
src/unfccc_ghg_data/unfccc_reader/Peru/config_per_bur3.py

@@ -527,18 +527,24 @@ cat_conversion = {
     "aggregate": {
         "2": {
             "sources": ["2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G", "2.H"],
-            "name": "IPPU",
+            # "name": "IPPU",
         },
         "M.3.C.AG": {
             "sources": ["3.C"],
-            "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
+            # "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
         },
         "M.AG.ELV": {
             "sources": ["M.3.C.AG"],
-            "name": "Agriculture excluding livestock emissions",
+            # "name": "Agriculture excluding livestock emissions",
+        },
+        "3.D": {
+            "sources": ["3.D.1"],
+            # "name": "Other"
+        },
+        "3": {
+            "sources": ["M.AG", "M.LULUCF"],
+            # "name": "AFOLU"
         },
-        "3.D": {"sources": ["3.D.1"], "name": "Other"},
-        "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},
     },
 }
 

+ 16 - 10
src/unfccc_ghg_data/unfccc_reader/Singapore/config_sgp_bur5.py

@@ -478,15 +478,15 @@ meta_data = {
 aggregate_sectors = {
     "2": {
         "sources": ["2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G", "2.H"],
-        "name": "IPPU",
+        # "name": "IPPU",
     },
     "M.3.C.1.AG": {
         "sources": ["3.C.1.b", "3.C.1.c"],
-        "name": "Emissions from Biomass Burning (Agriculture)",
+        # "name": "Emissions from Biomass Burning (Agriculture)",
     },
     "M.3.C.1.LU": {
         "sources": ["3.C.1.a", "3.C.1.d"],
-        "name": "Emissions from Biomass Burning (LULUCF)",
+        # "name": "Emissions from Biomass Burning (LULUCF)",
     },
     "M.3.C.AG": {
         "sources": [
@@ -499,22 +499,28 @@ aggregate_sectors = {
             "3.C.7",
             "3.C.8",
         ],
-        "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
+        # "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
     },
     "M.AG.ELV": {
         "sources": ["M.3.C.AG"],
-        "name": "Agriculture excluding livestock emissions",
+        # "name": "Agriculture excluding livestock emissions",
+    },
+    "M.AG": {
+        "sources": ["M.AG.ELV", "3.A"],
+        # "name": "Agriculture"
     },
-    "M.AG": {"sources": ["M.AG.ELV", "3.A"], "name": "Agriculture"},
     "M.LULUCF": {
         "sources": ["M.3.C.1.LU", "3.B", "3.D"],
-        "name": "Land Use, Land Use Change, and Forestry",
+        # "name": "Land Use, Land Use Change, and Forestry",
     },
     "M.0.EL": {
         "sources": ["1", "2", "M.AG", "4", "5"],
-        "name": "National Total Excluding LULUCF",
+        # "name": "National Total Excluding LULUCF",
+    },
+    "0": {
+        "sources": ["1", "2", "3", "4", "5"],
+        # "name": "National Total"
     },
-    "0": {"sources": ["1", "2", "3", "4", "5"], "name": "National Total"},
 }
 
 
@@ -524,7 +530,7 @@ processing_info_step1 = {
     "aggregate_cats": {
         "2": {
             "sources": ["2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G", "2.H"],
-            "name": "IPPU",
+            # "name": "IPPU",
         },
     },
     "tolerance": 1,  # because ch4 is inconsistent

+ 30 - 12
src/unfccc_ghg_data/unfccc_reader/Taiwan/config_twn_nir2023.py

@@ -329,7 +329,7 @@ table_defs_skip = {
 
 
 ##### primap2 metadata
-cat_code_regexp = r"(?P<UNFCCC_GHG_data>^[a-zA-Z0-9\.]{1,7})\s.*"
+cat_code_regexp = r"(?P<code>^[a-zA-Z0-9\.]{1,7})\s.*"
 
 time_format = "%Y"
 
@@ -422,30 +422,48 @@ cat_conversion = {
     "aggregate": {
         "1.A": {
             "sources": ["1.A.1", "1.A.2", "1.A.3", "1.A.4"],
-            "name": "Fuel Combustion Activities",
+            # "name": "Fuel Combustion Activities",
+        },
+        "1.B": {
+            "sources": ["1.B.1", "1.B.2"],
+            # "name": "Fugitive Emissions from Fuels"
         },
-        "1.B": {"sources": ["1.B.1", "1.B.2"], "name": "Fugitive Emissions from Fuels"},
         "2": {
             "sources": ["2.A", "2.B", "2.C", "2.D", "2.E", "2.F", "2.G", "2.H"],
-            "name": "Industrial Process and Product Use Sector",
+            # "name": "Industrial Process and Product Use Sector",
+        },
+        "3.A": {
+            "sources": ["3.A.1", "3.A.2"],
+            # "name": "Livestock"
+        },
+        "3.B": {
+            "sources": ["M.LULUCF"],
+            # "name": "Land"
+        },
+        "3.C.1": {
+            "sources": ["3.C.1.b"],
+            # "name": "Emissions from Biomass Burning"
         },
-        "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
-        "3.B": {"sources": ["M.LULUCF"], "name": "Land"},
-        "3.C.1": {"sources": ["3.C.1.b"], "name": "Emissions from Biomass Burning"},
         "3.C.5": {
             "sources": ["3.C.5.a", "3.C.5.b"],
-            "name": "Indirect N2O Emissions from Managed Soils",
+            # "name": "Indirect N2O Emissions from Managed Soils",
         },
         "3.C": {
             "sources": ["3.C.1", "3.C.3", "M.3.AS", "3.C.7"],
-            "name": "Aggregate sources and non-CO2 emissions sources on land",
+            # "name": "Aggregate sources and non-CO2 emissions sources on land",
         },
         "M.AG.ELV": {
             "sources": ["3.C"],
-            "name": "Agriculture excluding livestock emissions",
+            # "name": "Agriculture excluding livestock emissions",
+        },
+        "M.AG": {
+            "sources": ["3.A", "3.C"],
+            # "name": "Agriculture"
         },
-        "M.AG": {"sources": ["3.A", "3.C"], "name": "Agriculture"},
-        "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},  # consistency check
+        "3": {
+            "sources": ["M.AG", "M.LULUCF"],
+            # "name": "AFOLU"
+        },  # consistency check
         "M.0.EL": {"sources": ["1", "2", "M.AG", "4"]},  # consistency check
         "0": {"sources": ["1", "2", "3", "4"]},  # consistency check
     },

+ 1 - 1
src/unfccc_ghg_data/unfccc_reader/Taiwan/read_TWN_2023_Inventory_from_pdf.py

@@ -48,7 +48,7 @@ if __name__ == "__main__":
     inventory_file = "2023_NIR_executive_summary_english.pdf"
 
     def repl(m):  # noqa: D103
-        return m.group("UNFCCC_GHG_data")
+        return m.group("code")
 
     # ###
     # read the tables from pdf

+ 34 - 13
src/unfccc_ghg_data/unfccc_reader/Thailand/config_tha_bur3.py

@@ -169,7 +169,7 @@ country_processing_step1 = {
     "aggregate_cats": {
         "2.A.4": {
             "sources": ["2.A.4.b", "2.A.4.d"],
-            "name": "Other Process uses of Carbonates",
+            # "name": "Other Process uses of Carbonates",
         },
     },
     "aggregate_gases": {
@@ -427,14 +427,17 @@ cat_conversion = {
         "M.BIO": "M.BIO",
     },
     "aggregate": {
-        "3.A": {"sources": ["3.A.1", "3.A.2"], "name": "Livestock"},
+        "3.A": {
+            "sources": ["3.A.1", "3.A.2"],
+            # "name": "Livestock"
+        },
         "3.C.1": {
             "sources": ["M.3.C.1.AG", "M.3.C.1.LU"],
-            "name": "Emissions from Biomass Burning",
+            # "name": "Emissions from Biomass Burning",
         },
         "3.C": {
             "sources": ["3.C.1", "3.C.2", "3.C.3", "3.C.4", "3.C.5", "3.C.6", "3.C.7"],
-            "name": "Aggregate sources and non-CO2 emissions sources on land",
+            # "name": "Aggregate sources and non-CO2 emissions sources on land",
         },
         "M.3.C.AG": {
             "sources": [
@@ -446,22 +449,40 @@ cat_conversion = {
                 "3.C.6",
                 "3.C.7",
             ],
-            "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
+            # "name": "Aggregate sources and non-CO2 emissions sources on land (Agriculture)",
         },
         "M.AG.ELV": {
             "sources": ["M.3.C.AG"],
-            "name": "Agriculture excluding livestock emissions",
+            # "name": "Agriculture excluding livestock emissions",
         },
         "M.3.C.LU": {
             "sources": ["M.3.C.1.LU"],
-            "name": "Aggregate sources and non-CO2 emissions sources on land (Land use)",
+            # "name": "Aggregate sources and non-CO2 emissions sources on land (Land use)",
+        },
+        "3.B.1": {
+            "sources": ["3.B.1.a"],
+            # "name": "Forest Land"
+        },
+        "3.B.2": {
+            "sources": ["3.B.2.a", "3.B.2.b"],
+            # "name": "Cropland"
+        },
+        "3.B.6": {
+            "sources": ["3.B.6.b"],
+            # "name": "Other Land"
+        },
+        "3.B": {
+            "sources": ["3.B.1", "3.B.2", "3.B.6"],
+            # "name": "Land"
+        },
+        "M.LULUCF": {
+            "sources": ["3.B", "N.3.C.LU"],
+            # "name": "LULUCF"
+        },
+        "3": {
+            "sources": ["M.AG", "M.LULUCF"],
+            # "name": "AFOLU"
         },
-        "3.B.1": {"sources": ["3.B.1.a"], "name": "Forest Land"},
-        "3.B.2": {"sources": ["3.B.2.a", "3.B.2.b"], "name": "Cropland"},
-        "3.B.6": {"sources": ["3.B.6.b"], "name": "Other Land"},
-        "3.B": {"sources": ["3.B.1", "3.B.2", "3.B.6"], "name": "Land"},
-        "M.LULUCF": {"sources": ["3.B", "N.3.C.LU"], "name": "LULUCF"},
-        "3": {"sources": ["M.AG", "M.LULUCF"], "name": "AFOLU"},
     },
 }
 

+ 30 - 0
src/unfccc_ghg_data/unfccc_reader/United_Arab_Emirates/__init__.py

@@ -0,0 +1,30 @@
+"""Read United Arab Emirates' BURs, NIRs, NCs
+
+Scripts and configurations to read United Arab Emirates' submissions to the UNFCCC.
+Currently, the following submissions and datasets are available (all datasets
+including DI (red using the DI-reader) and legacy BUR/NIR (no code)):
+
+.. exec_code::
+    :hide_code:
+
+    from unfccc_ghg_data.helper.functions import (get_country_datasets,
+                                                  get_country_submissions)
+    country = 'ARE'
+    # print available submissions
+    print("="*15 + " Available submissions " + "="*15)
+    get_country_submissions(country, True)
+    print("")
+
+    #print available datasets
+    print("="*15 + " Available datasets " + "="*15)
+    get_country_datasets(country, True)
+
+You can also obtain this information running
+
+.. code-block:: bash
+
+    poetry run doit country_info country=ARE
+
+See below for a listing of scripts for BUR/NIR reading including links.
+
+"""

+ 201 - 0
src/unfccc_ghg_data/unfccc_reader/United_Arab_Emirates/config_are_bur1.py

@@ -0,0 +1,201 @@
+"""Config for United Arab Emirates BUR1
+
+Full configuration including PRIMAP2 conversion config and metadata
+
+"""
+
+#### configuration for PM2 format
+gwp_to_use = "AR4GWP100"
+
+coords_cols = {
+    "category": "category",
+    "entity": "entity",
+    "unit": "unit",
+}
+
+coords_terminologies = {
+    "area": "ISO3",
+    "category": "IPCC1996_2006_ARE_Inv",
+    "scenario": "PRIMAP",
+}
+
+coords_defaults = {
+    "source": "ARE-GHG-Inventory",
+    "provenance": "measured",
+    "area": "ARE",
+    "scenario": "BUR1",
+}
+
+coords_value_mapping = {
+    "unit": "PRIMAP1",
+    "category": {
+        "Total": "0",
+        "Energy": "1",
+        "Fuel Combustion Activities": "1.A",
+        "Fugitive Emissions": "1.B",
+        "Venting": "1.B.2.c.1",
+        "Flaring": "1.B.2.c.2",
+        "Other Fugitives": "M.1.B.2.OF",
+        "IPPU": "2",
+        "Mineral Industry": "2.A",
+        "Cement": "2.A.1",
+        "Chemical Industry": "2.B",
+        "Ammonia": "2.B.1",
+        "Metal Industry": "2.C",
+        "Iron & Steel": "2.C.1",
+        "Aluminum": "2.C.3",
+        "Agriculture": "3",
+        "Enteric Fermentation": "3.A",
+        "Manure Management": "3.B",
+        "Managed Soils": "3.D",
+        "LUCF": "4",
+        "Waste": "5",  # waste is more or less in 2006 categories
+        "Solid Waste Disposal": "5.A",
+        "Landfill": "M.5.A.LF",
+        "Biological treatment": "5.B",
+        "Composting": "M.5.B.COMP",
+        "Incineration": "5.C.1",
+        "Wastewater": "5.D",
+        "Memo Items": "IGNORE",
+        "Aviation": "M.1.A",
+        "Marine bunker": "M.1.B",
+    },
+    "entity": {
+        "CO2": "CO2",
+        "CH4": "CH4",
+        "N2O": "N2O",
+        "CH4.1": f"CH4 ({gwp_to_use})",
+        "N2O.1": f"N2O ({gwp_to_use})",
+        "HFCs": f"HFCS ({gwp_to_use})",
+        "PFCs": f"PFCS ({gwp_to_use})",
+        "Total GHG": f"KYOTOGHG ({gwp_to_use})",
+    },
+}
+
+filter_remove = {
+    "rem_cat": {"category": ["Memo Items"]},
+}
+
+filter_keep = {}
+
+meta_data = {
+    "references": "https://unfccc.int/documents/635318",
+    "rights": "",
+    "contact": "mail@johannes-guestchow.de",
+    "title": "United Arab Emirates. National Communication (NC). NC 5. Biennial Update Report (BUR). BUR 1.",
+    "comment": "Read fom pdf by Johannes Gütschow",
+    "institution": "UNFCCC",
+}
+
+## processing iconfig
+terminology_proc = "IPCC2006_PRIMAP"
+
+category_conversion = {
+    "mapping": {
+        "0": "0",
+        "1": "1",
+        "1.A": "1.A",
+        "1.B": "1.B",
+        "2": "2",
+        "2.A": "2.A",
+        "2.A.1": "2.A.1",
+        "2.B": "2.B",
+        "2.B.1": "2.B.1",
+        "2.C": "2.C",
+        "2.C.1": "2.C.1",
+        "2.C.3": "2.C.3",
+        "3": "M.AG",
+        "3.A": "3.A.1",
+        "3.B": "3.A.2",
+        "3.D": "M.3.C.45.AG",
+        "4": "M.LULUCF",
+        "5": "4",
+        "5.A": "4.A",
+        "5.B": "4.B",
+        "5.C.1": "4.C.1",
+        "5.D": "4.D",
+        "M.1.A": "M.BK.A",
+        "M.1.B": "M.BK.M",
+        "1.B.2.c.1": "M.1.B.2.VEN",
+        "1.B.2.c.2": "M.1.B.2.FL",
+        "M.1.B.2.OF": "M.1.B.2.OF",
+        # "M.5.A.LF": "",
+        # "M.5.B.COMP": "",
+    },
+    "aggregate": {
+        "1.B.2": {
+            "sources": ["M.1.B.2.VEN", "M.1.B.2.FL", "M.1.B.2.OF"],
+            "filter": {
+                "entity": ["CO2", "CH4", "N2O"],
+            },
+        },
+        "2": {
+            "sources": ["2.G"],
+            "filter": {
+                "entity": ["HFCS"],
+            },
+        },
+        "3.A": {
+            "sources": ["3.A.1", "3.A.2"],
+            "filter": {
+                "entity": ["CH4", "N2O"],
+            },
+        },
+        "3.C": {
+            "sources": ["M.3.C.45.AG"],
+            "filter": {
+                "entity": ["N2O"],
+            },
+        },
+        "M.AG.ELV": {
+            "sources": ["3.C"],
+            "filter": {
+                "entity": ["N2O"],
+            },
+        },
+        "M.AG": {  # consitency check
+            "sources": ["3.A", "M.AG.ELV"],
+            "filter": {
+                "entity": ["N2O", "CH4"],
+            },
+        },
+        "3": {
+            "sources": ["M.AG", "M.LULUCF"],
+            "filter": {
+                "entity": ["CO2", "CH4", "N2O"],
+            },
+        },
+        "4.C": {
+            "sources": ["4.C.1"],
+            "filter": {
+                "entity": ["CO2", "CH4", "N2O"],
+            },
+        },
+        "M.BK": {
+            "sources": ["M.BK.A", "M.BK.M"],
+            "filter": {
+                "entity": ["CO2", "CH4", "N2O"],
+            },
+        },
+        "M.0.EL": {
+            "sources": ["1", "2", "M.AG", "4"],
+            "filter": {
+                "entity": ["CO2", "CH4", "N2O", "HFCS", "PFCS"],
+            },
+        },
+        "0": {  # consistency check
+            "sources": ["1", "2", "3", "4"],
+            "filter": {
+                "entity": ["CO2", "CH4", "N2O", "HFCS", "PFCS"],
+            },
+        },
+    },
+}
+
+processing_info_country = {
+    "basket_copy": {
+        "GWPs_to_add": ["SARGWP100", "AR5GWP100", "AR6GWP100"],
+        "entities": ["PFCS", "HFCS"],
+        "source_GWP": gwp_to_use,
+    },
+}

+ 155 - 0
src/unfccc_ghg_data/unfccc_reader/United_Arab_Emirates/read_ARE_BUR1_from_csv.py

@@ -0,0 +1,155 @@
+"""
+Read data from United Arab Emirates' BUR!.
+
+Data are read a csv file which contains data manually copied from the pdf,
+which was necessary as multiple tables are not machine readable.
+The file contains an inventory for 2021.
+
+"""
+
+import pandas as pd
+import primap2 as pm2
+
+from unfccc_ghg_data.helper import (
+    compression,
+    downloaded_data_path,
+    extracted_data_path,
+    gas_baskets,
+    process_data_for_country,
+    set_to_nan_in_ds,
+)
+from unfccc_ghg_data.unfccc_reader.United_Arab_Emirates.config_are_bur1 import (
+    category_conversion,
+    coords_cols,
+    coords_defaults,
+    coords_terminologies,
+    coords_value_mapping,
+    filter_remove,
+    gwp_to_use,
+    meta_data,
+    processing_info_country,
+    terminology_proc,
+)
+
+if __name__ == "__main__":
+    # ###
+    # configuration
+    # ###
+    input_folder = downloaded_data_path / "UNFCCC" / "United_Arab_Emirates" / "BUR1"
+    output_folder = extracted_data_path / "UNFCCC" / "United_Arab_Emirates"
+    if not output_folder.exists():
+        output_folder.mkdir()
+
+    output_filename = "ARE_BUR1_2023_"
+    inventory_file = "all_data_manual.csv"
+
+    year = 2021
+    time_format = "%Y"
+
+    # ###
+    # read the tables from csv
+    # ###
+    data_pd = pd.read_csv(input_folder / inventory_file)
+
+    data_pd = pm2.pm2io.nir_add_unit_information(
+        data_pd,
+        unit_row=0,
+        entity_row="header",
+        regexp_entity=".*",
+        regexp_unit=".*",
+        default_unit="",
+    )
+    data_pd = data_pd.set_index(data_pd.columns[0])
+    table_long = pm2.pm2io.nir_convert_df_to_long(
+        data_pd,
+        year=year,
+        header_long=["category", "entity", "unit", "time", "data"],
+    )
+
+    # drop CH4, N2O with GWP
+    idx_gwp = table_long[table_long["entity"].isin(["CH4.1", "N2O.1"])].index
+    table_long = table_long.drop(index=idx_gwp)
+
+    data_if = pm2.pm2io.convert_long_dataframe_if(
+        table_long,
+        coords_cols=coords_cols,
+        coords_defaults=coords_defaults,
+        coords_terminologies=coords_terminologies,
+        coords_value_mapping=coords_value_mapping,
+        filter_remove=filter_remove,
+        meta_data=meta_data,
+        time_format=time_format,
+    )
+
+    data_pm2 = pm2.pm2io.from_interchange_format(data_if)
+
+    # ###
+    # save data to IF and native format
+    # ###
+    data_if = data_pm2.pr.to_interchange_format()
+    if not output_folder.exists():
+        output_folder.mkdir()
+    pm2.pm2io.write_interchange_format(
+        output_folder / (output_filename + coords_terminologies["category"]),
+        data_if,
+    )
+
+    encoding = {var: compression for var in data_pm2.data_vars}
+    data_pm2.pr.to_netcdf(
+        output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
+        encoding=encoding,
+    )
+
+    ### processing
+    data_proc_pm2 = data_pm2.copy()
+
+    # move HFCs from energy to IPPU as their use in electrical
+    # equipment is reported there
+    da_HFCs = data_proc_pm2[f"HFCS ({gwp_to_use})"].pr.loc[{"category": "1"}]
+    ds_HFCs = data_proc_pm2[f"HFCS ({gwp_to_use})"].pr.set(
+        "category", "2.G", da_HFCs, existing="overwrite"
+    )
+    data_proc_pm2 = data_proc_pm2.pr.merge(ds_HFCs)
+    data_proc_pm2 = set_to_nan_in_ds(
+        data_proc_pm2,
+        entities=[f"HFCS ({gwp_to_use})"],
+        filter={"category": ["1", "2"]},
+    )
+    data_proc_pm2 = set_to_nan_in_ds(
+        data_proc_pm2,
+        entities=[f"KYOTOGHG ({gwp_to_use})"],
+        filter={"category": ["1", "2"]},
+    )
+
+    # actual processing
+    data_proc_pm2 = process_data_for_country(
+        data_proc_pm2,
+        entities_to_ignore=[],
+        gas_baskets=gas_baskets,
+        processing_info_country=processing_info_country,
+        cat_terminology_out=terminology_proc,
+        category_conversion=category_conversion,
+    )
+
+    # adapt source and metadata
+    current_source = data_proc_pm2.coords["source"].to_numpy()[0]
+    data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
+    data_proc_pm2 = data_proc_pm2.pr.set("source", "BUR_NIR", data_temp)
+    data_proc_pm2 = data_proc_pm2.pr.loc[{"source": ["BUR_NIR"]}]
+
+    # ###
+    # save data to IF and native format
+    # ###
+    data_proc_if = data_proc_pm2.pr.to_interchange_format()
+    if not output_folder.exists():
+        output_folder.mkdir()
+    pm2.pm2io.write_interchange_format(
+        output_folder / (output_filename + terminology_proc),
+        data_proc_if,
+    )
+
+    encoding = {var: compression for var in data_proc_pm2.data_vars}
+    data_proc_pm2.pr.to_netcdf(
+        output_folder / (output_filename + terminology_proc + ".nc"),
+        encoding=encoding,
+    )

+ 4 - 1
src/unfccc_ghg_data/unfccc_reader/folder_mapping.json

@@ -1,7 +1,9 @@
 {
+    "ARE": "United_Arab_Emirates",
     "ARG": "Argentina",
     "BDI": "Burundi",
     "CHL": "Chile",
+    "CHN": "China",
     "COL": "Colombia",
     "GIN": "Guinea",
     "IDN": "Indonesia",
@@ -10,10 +12,11 @@
     "MAR": "Morocco",
     "MEX": "Mexico",
     "MNE": "Montenegro",
+    "MNG": "Mongolia",
     "MYS": "Malaysia",
     "NGA": "Nigeria",
     "PER": "Peru",
     "SGP": "Singapore",
     "THA": "Thailand",
     "TWN": "Taiwan"
-}
+}

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است